Why is sum(bigint) significantly faster than sum(integer) in PostgreSQL v10?











up vote
0
down vote

favorite












create table t_num_type(
n_bigint bigint,
n_numeric numeric,
n_int int
);

insert into t_num_type
select generate_series(1,10000000),
generate_series(1,10000000),
generate_series(1,10000000);


1» n_bigint



explain (analyze,buffers,format text)
select sum(n_bigint) from t_num_type;
Finalize Aggregate (cost=116778.56..116778.57 rows=1 width=32) (actual time=1221.663..1221.664 rows=1 loops=1)
Buffers: shared hit=23090
-> Gather (cost=116778.34..116778.55 rows=2 width=32) (actual time=1221.592..1221.643 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23090
-> Partial Aggregate (cost=115778.34..115778.35 rows=1 width=32) (actual time=1217.558..1217.559 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=8) (actual time=0.021..747.748 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.265 ms
Execution time: 1237.360 ms


2» numeric



explain (analyze,buffers,format text)
select sum(n_numeric) from t_num_type;
Finalize Aggregate (cost=116778.56..116778.57 rows=1 width=32) (actual time=1576.562..1576.562 rows=1 loops=1)
Buffers: shared hit=22108
-> Gather (cost=116778.34..116778.55 rows=2 width=32) (actual time=1576.502..1576.544 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=22108
-> Partial Aggregate (cost=115778.34..115778.35 rows=1 width=32) (actual time=1572.446..1572.446 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=6) (actual time=0.028..781.808 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.157 ms
Execution time: 1592.559 ms


3» n_int



explain (analyze,buffers,format text)
select sum(n_int) from t_num_type;
Finalize Aggregate (cost=116778.55..116778.56 rows=1 width=8) (actual time=1247.065..1247.065 rows=1 loops=1)
Buffers: shared hit=23367
-> Gather (cost=116778.33..116778.54 rows=2 width=8) (actual time=1247.006..1247.055 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23367
-> Partial Aggregate (cost=115778.33..115778.34 rows=1 width=8) (actual time=1242.524..1242.524 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=4) (actual time=0.028..786.940 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.196 ms
Execution time: 1263.352 ms









share|improve this question




















  • 2




    Could you edit your question to include a clearer description of the question in the body? The title refers to "the efficiency being increased significantly", but it's not clear what numbers we're comparing - are we comparing the execution time of these three queries, an unshown difference between Postgres versions, or something else?
    – IMSoP
    7 hours ago

















up vote
0
down vote

favorite












create table t_num_type(
n_bigint bigint,
n_numeric numeric,
n_int int
);

insert into t_num_type
select generate_series(1,10000000),
generate_series(1,10000000),
generate_series(1,10000000);


1» n_bigint



explain (analyze,buffers,format text)
select sum(n_bigint) from t_num_type;
Finalize Aggregate (cost=116778.56..116778.57 rows=1 width=32) (actual time=1221.663..1221.664 rows=1 loops=1)
Buffers: shared hit=23090
-> Gather (cost=116778.34..116778.55 rows=2 width=32) (actual time=1221.592..1221.643 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23090
-> Partial Aggregate (cost=115778.34..115778.35 rows=1 width=32) (actual time=1217.558..1217.559 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=8) (actual time=0.021..747.748 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.265 ms
Execution time: 1237.360 ms


2» numeric



explain (analyze,buffers,format text)
select sum(n_numeric) from t_num_type;
Finalize Aggregate (cost=116778.56..116778.57 rows=1 width=32) (actual time=1576.562..1576.562 rows=1 loops=1)
Buffers: shared hit=22108
-> Gather (cost=116778.34..116778.55 rows=2 width=32) (actual time=1576.502..1576.544 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=22108
-> Partial Aggregate (cost=115778.34..115778.35 rows=1 width=32) (actual time=1572.446..1572.446 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=6) (actual time=0.028..781.808 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.157 ms
Execution time: 1592.559 ms


3» n_int



explain (analyze,buffers,format text)
select sum(n_int) from t_num_type;
Finalize Aggregate (cost=116778.55..116778.56 rows=1 width=8) (actual time=1247.065..1247.065 rows=1 loops=1)
Buffers: shared hit=23367
-> Gather (cost=116778.33..116778.54 rows=2 width=8) (actual time=1247.006..1247.055 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23367
-> Partial Aggregate (cost=115778.33..115778.34 rows=1 width=8) (actual time=1242.524..1242.524 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=4) (actual time=0.028..786.940 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.196 ms
Execution time: 1263.352 ms









share|improve this question




















  • 2




    Could you edit your question to include a clearer description of the question in the body? The title refers to "the efficiency being increased significantly", but it's not clear what numbers we're comparing - are we comparing the execution time of these three queries, an unshown difference between Postgres versions, or something else?
    – IMSoP
    7 hours ago















up vote
0
down vote

favorite









up vote
0
down vote

favorite











create table t_num_type(
n_bigint bigint,
n_numeric numeric,
n_int int
);

insert into t_num_type
select generate_series(1,10000000),
generate_series(1,10000000),
generate_series(1,10000000);


1» n_bigint



explain (analyze,buffers,format text)
select sum(n_bigint) from t_num_type;
Finalize Aggregate (cost=116778.56..116778.57 rows=1 width=32) (actual time=1221.663..1221.664 rows=1 loops=1)
Buffers: shared hit=23090
-> Gather (cost=116778.34..116778.55 rows=2 width=32) (actual time=1221.592..1221.643 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23090
-> Partial Aggregate (cost=115778.34..115778.35 rows=1 width=32) (actual time=1217.558..1217.559 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=8) (actual time=0.021..747.748 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.265 ms
Execution time: 1237.360 ms


2» numeric



explain (analyze,buffers,format text)
select sum(n_numeric) from t_num_type;
Finalize Aggregate (cost=116778.56..116778.57 rows=1 width=32) (actual time=1576.562..1576.562 rows=1 loops=1)
Buffers: shared hit=22108
-> Gather (cost=116778.34..116778.55 rows=2 width=32) (actual time=1576.502..1576.544 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=22108
-> Partial Aggregate (cost=115778.34..115778.35 rows=1 width=32) (actual time=1572.446..1572.446 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=6) (actual time=0.028..781.808 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.157 ms
Execution time: 1592.559 ms


3» n_int



explain (analyze,buffers,format text)
select sum(n_int) from t_num_type;
Finalize Aggregate (cost=116778.55..116778.56 rows=1 width=8) (actual time=1247.065..1247.065 rows=1 loops=1)
Buffers: shared hit=23367
-> Gather (cost=116778.33..116778.54 rows=2 width=8) (actual time=1247.006..1247.055 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23367
-> Partial Aggregate (cost=115778.33..115778.34 rows=1 width=8) (actual time=1242.524..1242.524 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=4) (actual time=0.028..786.940 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.196 ms
Execution time: 1263.352 ms









share|improve this question















create table t_num_type(
n_bigint bigint,
n_numeric numeric,
n_int int
);

insert into t_num_type
select generate_series(1,10000000),
generate_series(1,10000000),
generate_series(1,10000000);


1» n_bigint



explain (analyze,buffers,format text)
select sum(n_bigint) from t_num_type;
Finalize Aggregate (cost=116778.56..116778.57 rows=1 width=32) (actual time=1221.663..1221.664 rows=1 loops=1)
Buffers: shared hit=23090
-> Gather (cost=116778.34..116778.55 rows=2 width=32) (actual time=1221.592..1221.643 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23090
-> Partial Aggregate (cost=115778.34..115778.35 rows=1 width=32) (actual time=1217.558..1217.559 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=8) (actual time=0.021..747.748 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.265 ms
Execution time: 1237.360 ms


2» numeric



explain (analyze,buffers,format text)
select sum(n_numeric) from t_num_type;
Finalize Aggregate (cost=116778.56..116778.57 rows=1 width=32) (actual time=1576.562..1576.562 rows=1 loops=1)
Buffers: shared hit=22108
-> Gather (cost=116778.34..116778.55 rows=2 width=32) (actual time=1576.502..1576.544 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=22108
-> Partial Aggregate (cost=115778.34..115778.35 rows=1 width=32) (actual time=1572.446..1572.446 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=6) (actual time=0.028..781.808 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.157 ms
Execution time: 1592.559 ms


3» n_int



explain (analyze,buffers,format text)
select sum(n_int) from t_num_type;
Finalize Aggregate (cost=116778.55..116778.56 rows=1 width=8) (actual time=1247.065..1247.065 rows=1 loops=1)
Buffers: shared hit=23367
-> Gather (cost=116778.33..116778.54 rows=2 width=8) (actual time=1247.006..1247.055 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23367
-> Partial Aggregate (cost=115778.33..115778.34 rows=1 width=8) (actual time=1242.524..1242.524 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=4) (actual time=0.028..786.940 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.196 ms
Execution time: 1263.352 ms






postgresql






share|improve this question















share|improve this question













share|improve this question




share|improve this question








edited 5 hours ago









Laurenz Albe

40.8k92745




40.8k92745










asked 7 hours ago









dodo

85




85








  • 2




    Could you edit your question to include a clearer description of the question in the body? The title refers to "the efficiency being increased significantly", but it's not clear what numbers we're comparing - are we comparing the execution time of these three queries, an unshown difference between Postgres versions, or something else?
    – IMSoP
    7 hours ago
















  • 2




    Could you edit your question to include a clearer description of the question in the body? The title refers to "the efficiency being increased significantly", but it's not clear what numbers we're comparing - are we comparing the execution time of these three queries, an unshown difference between Postgres versions, or something else?
    – IMSoP
    7 hours ago










2




2




Could you edit your question to include a clearer description of the question in the body? The title refers to "the efficiency being increased significantly", but it's not clear what numbers we're comparing - are we comparing the execution time of these three queries, an unshown difference between Postgres versions, or something else?
– IMSoP
7 hours ago






Could you edit your question to include a clearer description of the question in the body? The title refers to "the efficiency being increased significantly", but it's not clear what numbers we're comparing - are we comparing the execution time of these three queries, an unshown difference between Postgres versions, or something else?
– IMSoP
7 hours ago














1 Answer
1






active

oldest

votes

















up vote
0
down vote













First, you should repeat the experiment several times to see if the difference remains the same. Caching and other effects cause a certain fluctuation in query times.



I'd expect the difference between integer and bigint to be negligible in the long run. Both summation operations should be implemented in hardware.



numeric should be significantly slower, because operations on these binary coded decimals is implemented in C in the database engine.



If bigint summation remains faster even in repeated experiments, my only explanation is tuple deforming: to get to the third column, PostgreSQL has to process the first two columns.






share|improve this answer





















    Your Answer






    StackExchange.ifUsing("editor", function () {
    StackExchange.using("externalEditor", function () {
    StackExchange.using("snippets", function () {
    StackExchange.snippets.init();
    });
    });
    }, "code-snippets");

    StackExchange.ready(function() {
    var channelOptions = {
    tags: "".split(" "),
    id: "1"
    };
    initTagRenderer("".split(" "), "".split(" "), channelOptions);

    StackExchange.using("externalEditor", function() {
    // Have to fire editor after snippets, if snippets enabled
    if (StackExchange.settings.snippets.snippetsEnabled) {
    StackExchange.using("snippets", function() {
    createEditor();
    });
    }
    else {
    createEditor();
    }
    });

    function createEditor() {
    StackExchange.prepareEditor({
    heartbeatType: 'answer',
    convertImagesToLinks: true,
    noModals: true,
    showLowRepImageUploadWarning: true,
    reputationToPostImages: 10,
    bindNavPrevention: true,
    postfix: "",
    imageUploader: {
    brandingHtml: "Powered by u003ca class="icon-imgur-white" href="https://imgur.com/"u003eu003c/au003e",
    contentPolicyHtml: "User contributions licensed under u003ca href="https://creativecommons.org/licenses/by-sa/3.0/"u003ecc by-sa 3.0 with attribution requiredu003c/au003e u003ca href="https://stackoverflow.com/legal/content-policy"u003e(content policy)u003c/au003e",
    allowUrls: true
    },
    onDemand: true,
    discardSelector: ".discard-answer"
    ,immediatelyShowMarkdownHelp:true
    });


    }
    });














     

    draft saved


    draft discarded


















    StackExchange.ready(
    function () {
    StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53264063%2fwhy-is-sumbigint-significantly-faster-than-suminteger-in-postgresql-v10%23new-answer', 'question_page');
    }
    );

    Post as a guest
































    1 Answer
    1






    active

    oldest

    votes








    1 Answer
    1






    active

    oldest

    votes









    active

    oldest

    votes






    active

    oldest

    votes








    up vote
    0
    down vote













    First, you should repeat the experiment several times to see if the difference remains the same. Caching and other effects cause a certain fluctuation in query times.



    I'd expect the difference between integer and bigint to be negligible in the long run. Both summation operations should be implemented in hardware.



    numeric should be significantly slower, because operations on these binary coded decimals is implemented in C in the database engine.



    If bigint summation remains faster even in repeated experiments, my only explanation is tuple deforming: to get to the third column, PostgreSQL has to process the first two columns.






    share|improve this answer

























      up vote
      0
      down vote













      First, you should repeat the experiment several times to see if the difference remains the same. Caching and other effects cause a certain fluctuation in query times.



      I'd expect the difference between integer and bigint to be negligible in the long run. Both summation operations should be implemented in hardware.



      numeric should be significantly slower, because operations on these binary coded decimals is implemented in C in the database engine.



      If bigint summation remains faster even in repeated experiments, my only explanation is tuple deforming: to get to the third column, PostgreSQL has to process the first two columns.






      share|improve this answer























        up vote
        0
        down vote










        up vote
        0
        down vote









        First, you should repeat the experiment several times to see if the difference remains the same. Caching and other effects cause a certain fluctuation in query times.



        I'd expect the difference between integer and bigint to be negligible in the long run. Both summation operations should be implemented in hardware.



        numeric should be significantly slower, because operations on these binary coded decimals is implemented in C in the database engine.



        If bigint summation remains faster even in repeated experiments, my only explanation is tuple deforming: to get to the third column, PostgreSQL has to process the first two columns.






        share|improve this answer












        First, you should repeat the experiment several times to see if the difference remains the same. Caching and other effects cause a certain fluctuation in query times.



        I'd expect the difference between integer and bigint to be negligible in the long run. Both summation operations should be implemented in hardware.



        numeric should be significantly slower, because operations on these binary coded decimals is implemented in C in the database engine.



        If bigint summation remains faster even in repeated experiments, my only explanation is tuple deforming: to get to the third column, PostgreSQL has to process the first two columns.







        share|improve this answer












        share|improve this answer



        share|improve this answer










        answered 5 hours ago









        Laurenz Albe

        40.8k92745




        40.8k92745






























             

            draft saved


            draft discarded



















































             


            draft saved


            draft discarded














            StackExchange.ready(
            function () {
            StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53264063%2fwhy-is-sumbigint-significantly-faster-than-suminteger-in-postgresql-v10%23new-answer', 'question_page');
            }
            );

            Post as a guest




















































































            Popular posts from this blog

            Biblatex bibliography style without URLs when DOI exists (in Overleaf with Zotero bibliography)

            ComboBox Display Member on multiple fields

            Is it possible to collect Nectar points via Trainline?