Why is sum(bigint) significantly faster than sum(integer) in PostgreSQL v10?
up vote
0
down vote
favorite
create table t_num_type(
n_bigint bigint,
n_numeric numeric,
n_int int
);
insert into t_num_type
select generate_series(1,10000000),
generate_series(1,10000000),
generate_series(1,10000000);
1» n_bigint
explain (analyze,buffers,format text)
select sum(n_bigint) from t_num_type;
Finalize Aggregate (cost=116778.56..116778.57 rows=1 width=32) (actual time=1221.663..1221.664 rows=1 loops=1)
Buffers: shared hit=23090
-> Gather (cost=116778.34..116778.55 rows=2 width=32) (actual time=1221.592..1221.643 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23090
-> Partial Aggregate (cost=115778.34..115778.35 rows=1 width=32) (actual time=1217.558..1217.559 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=8) (actual time=0.021..747.748 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.265 ms
Execution time: 1237.360 ms
2» numeric
explain (analyze,buffers,format text)
select sum(n_numeric) from t_num_type;
Finalize Aggregate (cost=116778.56..116778.57 rows=1 width=32) (actual time=1576.562..1576.562 rows=1 loops=1)
Buffers: shared hit=22108
-> Gather (cost=116778.34..116778.55 rows=2 width=32) (actual time=1576.502..1576.544 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=22108
-> Partial Aggregate (cost=115778.34..115778.35 rows=1 width=32) (actual time=1572.446..1572.446 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=6) (actual time=0.028..781.808 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.157 ms
Execution time: 1592.559 ms
3» n_int
explain (analyze,buffers,format text)
select sum(n_int) from t_num_type;
Finalize Aggregate (cost=116778.55..116778.56 rows=1 width=8) (actual time=1247.065..1247.065 rows=1 loops=1)
Buffers: shared hit=23367
-> Gather (cost=116778.33..116778.54 rows=2 width=8) (actual time=1247.006..1247.055 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23367
-> Partial Aggregate (cost=115778.33..115778.34 rows=1 width=8) (actual time=1242.524..1242.524 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=4) (actual time=0.028..786.940 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.196 ms
Execution time: 1263.352 ms
postgresql
add a comment |
up vote
0
down vote
favorite
create table t_num_type(
n_bigint bigint,
n_numeric numeric,
n_int int
);
insert into t_num_type
select generate_series(1,10000000),
generate_series(1,10000000),
generate_series(1,10000000);
1» n_bigint
explain (analyze,buffers,format text)
select sum(n_bigint) from t_num_type;
Finalize Aggregate (cost=116778.56..116778.57 rows=1 width=32) (actual time=1221.663..1221.664 rows=1 loops=1)
Buffers: shared hit=23090
-> Gather (cost=116778.34..116778.55 rows=2 width=32) (actual time=1221.592..1221.643 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23090
-> Partial Aggregate (cost=115778.34..115778.35 rows=1 width=32) (actual time=1217.558..1217.559 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=8) (actual time=0.021..747.748 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.265 ms
Execution time: 1237.360 ms
2» numeric
explain (analyze,buffers,format text)
select sum(n_numeric) from t_num_type;
Finalize Aggregate (cost=116778.56..116778.57 rows=1 width=32) (actual time=1576.562..1576.562 rows=1 loops=1)
Buffers: shared hit=22108
-> Gather (cost=116778.34..116778.55 rows=2 width=32) (actual time=1576.502..1576.544 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=22108
-> Partial Aggregate (cost=115778.34..115778.35 rows=1 width=32) (actual time=1572.446..1572.446 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=6) (actual time=0.028..781.808 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.157 ms
Execution time: 1592.559 ms
3» n_int
explain (analyze,buffers,format text)
select sum(n_int) from t_num_type;
Finalize Aggregate (cost=116778.55..116778.56 rows=1 width=8) (actual time=1247.065..1247.065 rows=1 loops=1)
Buffers: shared hit=23367
-> Gather (cost=116778.33..116778.54 rows=2 width=8) (actual time=1247.006..1247.055 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23367
-> Partial Aggregate (cost=115778.33..115778.34 rows=1 width=8) (actual time=1242.524..1242.524 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=4) (actual time=0.028..786.940 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.196 ms
Execution time: 1263.352 ms
postgresql
2
Could you edit your question to include a clearer description of the question in the body? The title refers to "the efficiency being increased significantly", but it's not clear what numbers we're comparing - are we comparing the execution time of these three queries, an unshown difference between Postgres versions, or something else?
– IMSoP
7 hours ago
add a comment |
up vote
0
down vote
favorite
up vote
0
down vote
favorite
create table t_num_type(
n_bigint bigint,
n_numeric numeric,
n_int int
);
insert into t_num_type
select generate_series(1,10000000),
generate_series(1,10000000),
generate_series(1,10000000);
1» n_bigint
explain (analyze,buffers,format text)
select sum(n_bigint) from t_num_type;
Finalize Aggregate (cost=116778.56..116778.57 rows=1 width=32) (actual time=1221.663..1221.664 rows=1 loops=1)
Buffers: shared hit=23090
-> Gather (cost=116778.34..116778.55 rows=2 width=32) (actual time=1221.592..1221.643 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23090
-> Partial Aggregate (cost=115778.34..115778.35 rows=1 width=32) (actual time=1217.558..1217.559 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=8) (actual time=0.021..747.748 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.265 ms
Execution time: 1237.360 ms
2» numeric
explain (analyze,buffers,format text)
select sum(n_numeric) from t_num_type;
Finalize Aggregate (cost=116778.56..116778.57 rows=1 width=32) (actual time=1576.562..1576.562 rows=1 loops=1)
Buffers: shared hit=22108
-> Gather (cost=116778.34..116778.55 rows=2 width=32) (actual time=1576.502..1576.544 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=22108
-> Partial Aggregate (cost=115778.34..115778.35 rows=1 width=32) (actual time=1572.446..1572.446 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=6) (actual time=0.028..781.808 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.157 ms
Execution time: 1592.559 ms
3» n_int
explain (analyze,buffers,format text)
select sum(n_int) from t_num_type;
Finalize Aggregate (cost=116778.55..116778.56 rows=1 width=8) (actual time=1247.065..1247.065 rows=1 loops=1)
Buffers: shared hit=23367
-> Gather (cost=116778.33..116778.54 rows=2 width=8) (actual time=1247.006..1247.055 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23367
-> Partial Aggregate (cost=115778.33..115778.34 rows=1 width=8) (actual time=1242.524..1242.524 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=4) (actual time=0.028..786.940 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.196 ms
Execution time: 1263.352 ms
postgresql
create table t_num_type(
n_bigint bigint,
n_numeric numeric,
n_int int
);
insert into t_num_type
select generate_series(1,10000000),
generate_series(1,10000000),
generate_series(1,10000000);
1» n_bigint
explain (analyze,buffers,format text)
select sum(n_bigint) from t_num_type;
Finalize Aggregate (cost=116778.56..116778.57 rows=1 width=32) (actual time=1221.663..1221.664 rows=1 loops=1)
Buffers: shared hit=23090
-> Gather (cost=116778.34..116778.55 rows=2 width=32) (actual time=1221.592..1221.643 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23090
-> Partial Aggregate (cost=115778.34..115778.35 rows=1 width=32) (actual time=1217.558..1217.559 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=8) (actual time=0.021..747.748 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.265 ms
Execution time: 1237.360 ms
2» numeric
explain (analyze,buffers,format text)
select sum(n_numeric) from t_num_type;
Finalize Aggregate (cost=116778.56..116778.57 rows=1 width=32) (actual time=1576.562..1576.562 rows=1 loops=1)
Buffers: shared hit=22108
-> Gather (cost=116778.34..116778.55 rows=2 width=32) (actual time=1576.502..1576.544 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=22108
-> Partial Aggregate (cost=115778.34..115778.35 rows=1 width=32) (actual time=1572.446..1572.446 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=6) (actual time=0.028..781.808 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.157 ms
Execution time: 1592.559 ms
3» n_int
explain (analyze,buffers,format text)
select sum(n_int) from t_num_type;
Finalize Aggregate (cost=116778.55..116778.56 rows=1 width=8) (actual time=1247.065..1247.065 rows=1 loops=1)
Buffers: shared hit=23367
-> Gather (cost=116778.33..116778.54 rows=2 width=8) (actual time=1247.006..1247.055 rows=3 loops=1)
Workers Planned: 2
Workers Launched: 2
Buffers: shared hit=23367
-> Partial Aggregate (cost=115778.33..115778.34 rows=1 width=8) (actual time=1242.524..1242.524 rows=1 loops=3)
Buffers: shared hit=63695
-> Parallel Seq Scan on t_num_type (cost=0.00..105361.67 rows=4166667 width=4) (actual time=0.028..786.940 rows=3333333 loops=3)
Buffers: shared hit=63695
Planning time: 0.196 ms
Execution time: 1263.352 ms
postgresql
postgresql
edited 5 hours ago
Laurenz Albe
40.8k92745
40.8k92745
asked 7 hours ago
dodo
85
85
2
Could you edit your question to include a clearer description of the question in the body? The title refers to "the efficiency being increased significantly", but it's not clear what numbers we're comparing - are we comparing the execution time of these three queries, an unshown difference between Postgres versions, or something else?
– IMSoP
7 hours ago
add a comment |
2
Could you edit your question to include a clearer description of the question in the body? The title refers to "the efficiency being increased significantly", but it's not clear what numbers we're comparing - are we comparing the execution time of these three queries, an unshown difference between Postgres versions, or something else?
– IMSoP
7 hours ago
2
2
Could you edit your question to include a clearer description of the question in the body? The title refers to "the efficiency being increased significantly", but it's not clear what numbers we're comparing - are we comparing the execution time of these three queries, an unshown difference between Postgres versions, or something else?
– IMSoP
7 hours ago
Could you edit your question to include a clearer description of the question in the body? The title refers to "the efficiency being increased significantly", but it's not clear what numbers we're comparing - are we comparing the execution time of these three queries, an unshown difference between Postgres versions, or something else?
– IMSoP
7 hours ago
add a comment |
1 Answer
1
active
oldest
votes
up vote
0
down vote
First, you should repeat the experiment several times to see if the difference remains the same. Caching and other effects cause a certain fluctuation in query times.
I'd expect the difference between integer
and bigint
to be negligible in the long run. Both summation operations should be implemented in hardware.
numeric
should be significantly slower, because operations on these binary coded decimals is implemented in C in the database engine.
If bigint
summation remains faster even in repeated experiments, my only explanation is tuple deforming: to get to the third column, PostgreSQL has to process the first two columns.
add a comment |
1 Answer
1
active
oldest
votes
1 Answer
1
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
0
down vote
First, you should repeat the experiment several times to see if the difference remains the same. Caching and other effects cause a certain fluctuation in query times.
I'd expect the difference between integer
and bigint
to be negligible in the long run. Both summation operations should be implemented in hardware.
numeric
should be significantly slower, because operations on these binary coded decimals is implemented in C in the database engine.
If bigint
summation remains faster even in repeated experiments, my only explanation is tuple deforming: to get to the third column, PostgreSQL has to process the first two columns.
add a comment |
up vote
0
down vote
First, you should repeat the experiment several times to see if the difference remains the same. Caching and other effects cause a certain fluctuation in query times.
I'd expect the difference between integer
and bigint
to be negligible in the long run. Both summation operations should be implemented in hardware.
numeric
should be significantly slower, because operations on these binary coded decimals is implemented in C in the database engine.
If bigint
summation remains faster even in repeated experiments, my only explanation is tuple deforming: to get to the third column, PostgreSQL has to process the first two columns.
add a comment |
up vote
0
down vote
up vote
0
down vote
First, you should repeat the experiment several times to see if the difference remains the same. Caching and other effects cause a certain fluctuation in query times.
I'd expect the difference between integer
and bigint
to be negligible in the long run. Both summation operations should be implemented in hardware.
numeric
should be significantly slower, because operations on these binary coded decimals is implemented in C in the database engine.
If bigint
summation remains faster even in repeated experiments, my only explanation is tuple deforming: to get to the third column, PostgreSQL has to process the first two columns.
First, you should repeat the experiment several times to see if the difference remains the same. Caching and other effects cause a certain fluctuation in query times.
I'd expect the difference between integer
and bigint
to be negligible in the long run. Both summation operations should be implemented in hardware.
numeric
should be significantly slower, because operations on these binary coded decimals is implemented in C in the database engine.
If bigint
summation remains faster even in repeated experiments, my only explanation is tuple deforming: to get to the third column, PostgreSQL has to process the first two columns.
answered 5 hours ago
Laurenz Albe
40.8k92745
40.8k92745
add a comment |
add a comment |
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function () {
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53264063%2fwhy-is-sumbigint-significantly-faster-than-suminteger-in-postgresql-v10%23new-answer', 'question_page');
}
);
Post as a guest
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function () {
StackExchange.helpers.onClickDraftSave('#login-link');
});
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
2
Could you edit your question to include a clearer description of the question in the body? The title refers to "the efficiency being increased significantly", but it's not clear what numbers we're comparing - are we comparing the execution time of these three queries, an unshown difference between Postgres versions, or something else?
– IMSoP
7 hours ago