mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-16 10:33:27 +08:00
Fix vectorized hash agg in GroupAgg mode in reverse order (#7686)
The hash grouping strategies do not preserve the input order when the compressed batches need to be reversed. Disable them in this case.
This commit is contained in:
parent
d496a186d0
commit
6ce2fc0df4
1
.unreleased/hash-groupagg-bug
Normal file
1
.unreleased/hash-groupagg-bug
Normal file
@ -0,0 +1 @@
|
||||
Fixes: #7686 Potential wrong aggregation result when using vectorized aggregation with hash grouping in reverse order
|
@ -43,7 +43,9 @@ typedef struct GroupingPolicyHash GroupingPolicyHash;
|
||||
* results are emitted into the output slot. This is done in the order of unique
|
||||
* grouping key indexes, thereby preserving the incoming key order. This
|
||||
* guarantees that this policy works correctly even in a Partial GroupAggregate
|
||||
* node, even though it's not optimal performance-wise.
|
||||
* node, even though it's not optimal performance-wise. We only support the
|
||||
* direct order of records in batch though, not reverse. This is checked at
|
||||
* planning time.
|
||||
*/
|
||||
typedef struct GroupingPolicyHash
|
||||
{
|
||||
|
@ -505,6 +505,8 @@ get_vectorized_grouping_type(Agg *agg, CustomScan *custom, List *resolved_target
|
||||
/*
|
||||
* We support hashed vectorized grouping by one fixed-size by-value
|
||||
* compressed column.
|
||||
* We can use our hash table for GroupAggregate as well, because it preserves
|
||||
* the input order of the keys, but only for the direct order, not reverse.
|
||||
*/
|
||||
if (num_grouping_columns == 1)
|
||||
{
|
||||
@ -729,6 +731,21 @@ try_insert_vector_agg_node(Plan *plan)
|
||||
return plan;
|
||||
}
|
||||
|
||||
/*
|
||||
* The hash grouping strategies do not preserve the input key order when the
|
||||
* reverse ordering is requested, so in this case they cannot work in
|
||||
* GroupAggregate mode.
|
||||
*/
|
||||
if (grouping_type != VAGT_Batch && agg->aggstrategy != AGG_HASHED)
|
||||
{
|
||||
List *settings = linitial(custom->custom_private);
|
||||
const bool reverse = list_nth_int(settings, DCS_Reverse);
|
||||
if (reverse)
|
||||
{
|
||||
return plan;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Build supplementary info to determine whether we can vectorize the
|
||||
* aggregate FILTER clauses.
|
||||
|
236
tsl/test/expected/vector_agg_groupagg.out
Normal file
236
tsl/test/expected/vector_agg_groupagg.out
Normal file
@ -0,0 +1,236 @@
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
-- Check that the vectorized aggregation works properly in the GroupAggregate
|
||||
-- mode.
|
||||
\pset null $
|
||||
set max_parallel_workers_per_gather = 0;
|
||||
set enable_hashagg to off;
|
||||
--set timescaledb.enable_vectorized_aggregation to off;
|
||||
--set timescaledb.debug_require_vector_agg to 'forbid';
|
||||
create table groupagg(t int, s int, value int);
|
||||
select create_hypertable('groupagg', 't', chunk_time_interval => 10000);
|
||||
NOTICE: adding not-null constraint to column "t"
|
||||
create_hypertable
|
||||
-----------------------
|
||||
(1,public,groupagg,t)
|
||||
(1 row)
|
||||
|
||||
insert into groupagg
|
||||
select
|
||||
xslow * 10,
|
||||
case when xfast = 13 then null else xfast end,
|
||||
xfast * 7 + xslow * 3
|
||||
from generate_series(10, 99) xfast,
|
||||
generate_series(1, 1487) xslow
|
||||
;
|
||||
alter table groupagg set (timescaledb.compress, timescaledb.compress_segmentby = '',
|
||||
timescaledb.compress_orderby = 's');
|
||||
select count(compress_chunk(x)) from show_chunks('groupagg') x;
|
||||
count
|
||||
-------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
vacuum analyze groupagg;
|
||||
set timescaledb.debug_require_vector_agg to 'require';
|
||||
select s, sum(value) from groupagg group by s order by s limit 10;
|
||||
s | sum
|
||||
----+---------
|
||||
10 | 3423074
|
||||
11 | 3433483
|
||||
12 | 3443892
|
||||
14 | 3464710
|
||||
15 | 3475119
|
||||
16 | 3485528
|
||||
17 | 3495937
|
||||
18 | 3506346
|
||||
19 | 3516755
|
||||
20 | 3527164
|
||||
(10 rows)
|
||||
|
||||
-- The hash grouping policies do not support the GroupAggregate mode in the
|
||||
-- reverse order.
|
||||
set timescaledb.debug_require_vector_agg to 'forbid';
|
||||
select s, sum(value) from groupagg group by s order by s desc limit 10;
|
||||
s | sum
|
||||
----+---------
|
||||
$ | 3454301
|
||||
99 | 4349475
|
||||
98 | 4339066
|
||||
97 | 4328657
|
||||
96 | 4318248
|
||||
95 | 4307839
|
||||
94 | 4297430
|
||||
93 | 4287021
|
||||
92 | 4276612
|
||||
91 | 4266203
|
||||
(10 rows)
|
||||
|
||||
reset timescaledb.debug_require_vector_agg;
|
||||
-- Also test the NULLS FIRST order.
|
||||
select count(decompress_chunk(x)) from show_chunks('groupagg') x;
|
||||
count
|
||||
-------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
alter table groupagg set (timescaledb.compress, timescaledb.compress_segmentby = '',
|
||||
timescaledb.compress_orderby = 's nulls first');
|
||||
select count(compress_chunk(x)) from show_chunks('groupagg') x;
|
||||
count
|
||||
-------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
vacuum analyze groupagg;
|
||||
set timescaledb.debug_require_vector_agg to 'require';
|
||||
select s, sum(value) from groupagg group by s order by s nulls first limit 10;
|
||||
s | sum
|
||||
----+---------
|
||||
$ | 3454301
|
||||
10 | 3423074
|
||||
11 | 3433483
|
||||
12 | 3443892
|
||||
14 | 3464710
|
||||
15 | 3475119
|
||||
16 | 3485528
|
||||
17 | 3495937
|
||||
18 | 3506346
|
||||
19 | 3516755
|
||||
(10 rows)
|
||||
|
||||
reset timescaledb.debug_require_vector_agg;
|
||||
-- More tests for dictionary encoding. This is not vectorized at the moment.
|
||||
set timescaledb.debug_require_vector_agg to 'forbid';
|
||||
create table text_table(ts int);
|
||||
select create_hypertable('text_table', 'ts', chunk_time_interval => 3);
|
||||
NOTICE: adding not-null constraint to column "ts"
|
||||
create_hypertable
|
||||
-------------------------
|
||||
(3,public,text_table,t)
|
||||
(1 row)
|
||||
|
||||
alter table text_table set (timescaledb.compress);
|
||||
WARNING: there was some uncertainty picking the default segment by for the hypertable: You do not have any indexes on columns that can be used for segment_by and thus we are not using segment_by for compression. Please make sure you are not missing any indexes
|
||||
NOTICE: default segment by for hypertable "text_table" is set to ""
|
||||
NOTICE: default order by for hypertable "text_table" is set to "ts DESC"
|
||||
insert into text_table select 0 /*, default */ from generate_series(1, 1000) x;
|
||||
select count(compress_chunk(x)) from show_chunks('text_table') x;
|
||||
count
|
||||
-------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
alter table text_table add column a text default 'default';
|
||||
alter table text_table set (timescaledb.compress,
|
||||
timescaledb.compress_segmentby = '', timescaledb.compress_orderby = 'a');
|
||||
insert into text_table select 1, '' from generate_series(1, 1000) x;
|
||||
insert into text_table select 2, 'same' from generate_series(1, 1000) x;
|
||||
insert into text_table select 3, 'different' || x from generate_series(1, 1000) x;
|
||||
insert into text_table select 4, case when x % 2 = 0 then null else 'same-with-nulls' end from generate_series(1, 1000) x;
|
||||
insert into text_table select 5, case when x % 2 = 0 then null else 'different-with-nulls' || x end from generate_series(1, 1000) x;
|
||||
select count(compress_chunk(x)) from show_chunks('text_table') x;
|
||||
count
|
||||
-------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
vacuum analyze text_table;
|
||||
-- Test the GroupAggregate by a text column. Not vectorized for now.
|
||||
select a, count(*) from text_table group by a order by a limit 10;
|
||||
a | count
|
||||
-------------------------+-------
|
||||
| 1000
|
||||
default | 1000
|
||||
different-with-nulls1 | 1
|
||||
different-with-nulls101 | 1
|
||||
different-with-nulls103 | 1
|
||||
different-with-nulls105 | 1
|
||||
different-with-nulls107 | 1
|
||||
different-with-nulls109 | 1
|
||||
different-with-nulls11 | 1
|
||||
different-with-nulls111 | 1
|
||||
(10 rows)
|
||||
|
||||
-- The hash grouping policies do not support the GroupAggregate mode in the
|
||||
-- reverse order.
|
||||
select a, count(*) from text_table group by a order by a desc limit 10;
|
||||
a | count
|
||||
-----------------+-------
|
||||
$ | 1000
|
||||
same-with-nulls | 500
|
||||
same | 1000
|
||||
different999 | 1
|
||||
different998 | 1
|
||||
different997 | 1
|
||||
different996 | 1
|
||||
different995 | 1
|
||||
different994 | 1
|
||||
different993 | 1
|
||||
(10 rows)
|
||||
|
||||
-- with NULLS FIRST
|
||||
select count(decompress_chunk(x)) from show_chunks('text_table') x;
|
||||
count
|
||||
-------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
alter table text_table set (timescaledb.compress,
|
||||
timescaledb.compress_segmentby = '', timescaledb.compress_orderby = 'a nulls first');
|
||||
select count(compress_chunk(x)) from show_chunks('text_table') x;
|
||||
count
|
||||
-------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
select a, count(*) from text_table group by a order by a nulls first limit 10;
|
||||
a | count
|
||||
-------------------------+-------
|
||||
$ | 1000
|
||||
| 1000
|
||||
default | 1000
|
||||
different-with-nulls1 | 1
|
||||
different-with-nulls101 | 1
|
||||
different-with-nulls103 | 1
|
||||
different-with-nulls105 | 1
|
||||
different-with-nulls107 | 1
|
||||
different-with-nulls109 | 1
|
||||
different-with-nulls11 | 1
|
||||
(10 rows)
|
||||
|
||||
-- TODO verify that this works with the serialized hash grouping strategy
|
||||
select ts, a, count(*) from text_table group by ts, a order by ts, a limit 10;
|
||||
ts | a | count
|
||||
----+---------------+-------
|
||||
0 | default | 1000
|
||||
1 | | 1000
|
||||
2 | same | 1000
|
||||
3 | different1 | 1
|
||||
3 | different10 | 1
|
||||
3 | different100 | 1
|
||||
3 | different1000 | 1
|
||||
3 | different101 | 1
|
||||
3 | different102 | 1
|
||||
3 | different103 | 1
|
||||
(10 rows)
|
||||
|
||||
select a, ts, count(*) from text_table group by a, ts order by a desc, ts desc limit 10;
|
||||
a | ts | count
|
||||
-----------------+----+-------
|
||||
$ | 5 | 500
|
||||
$ | 4 | 500
|
||||
same-with-nulls | 4 | 500
|
||||
same | 2 | 1000
|
||||
different999 | 3 | 1
|
||||
different998 | 3 | 1
|
||||
different997 | 3 | 1
|
||||
different996 | 3 | 1
|
||||
different995 | 3 | 1
|
||||
different994 | 3 | 1
|
||||
(10 rows)
|
||||
|
||||
reset max_parallel_workers_per_gather;
|
||||
reset timescaledb.debug_require_vector_agg;
|
||||
reset enable_hashagg;
|
@ -45,6 +45,7 @@ set(TEST_FILES
|
||||
skip_scan.sql
|
||||
transparent_decompression_join_index.sql
|
||||
vector_agg_functions.sql
|
||||
vector_agg_groupagg.sql
|
||||
vector_agg_param.sql
|
||||
vectorized_aggregation.sql)
|
||||
|
||||
|
101
tsl/test/sql/vector_agg_groupagg.sql
Normal file
101
tsl/test/sql/vector_agg_groupagg.sql
Normal file
@ -0,0 +1,101 @@
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
|
||||
-- Check that the vectorized aggregation works properly in the GroupAggregate
|
||||
-- mode.
|
||||
|
||||
\pset null $
|
||||
|
||||
set max_parallel_workers_per_gather = 0;
|
||||
set enable_hashagg to off;
|
||||
|
||||
--set timescaledb.enable_vectorized_aggregation to off;
|
||||
--set timescaledb.debug_require_vector_agg to 'forbid';
|
||||
|
||||
create table groupagg(t int, s int, value int);
|
||||
select create_hypertable('groupagg', 't', chunk_time_interval => 10000);
|
||||
|
||||
insert into groupagg
|
||||
select
|
||||
xslow * 10,
|
||||
case when xfast = 13 then null else xfast end,
|
||||
xfast * 7 + xslow * 3
|
||||
from generate_series(10, 99) xfast,
|
||||
generate_series(1, 1487) xslow
|
||||
;
|
||||
|
||||
alter table groupagg set (timescaledb.compress, timescaledb.compress_segmentby = '',
|
||||
timescaledb.compress_orderby = 's');
|
||||
select count(compress_chunk(x)) from show_chunks('groupagg') x;
|
||||
vacuum analyze groupagg;
|
||||
|
||||
|
||||
set timescaledb.debug_require_vector_agg to 'require';
|
||||
select s, sum(value) from groupagg group by s order by s limit 10;
|
||||
|
||||
-- The hash grouping policies do not support the GroupAggregate mode in the
|
||||
-- reverse order.
|
||||
set timescaledb.debug_require_vector_agg to 'forbid';
|
||||
select s, sum(value) from groupagg group by s order by s desc limit 10;
|
||||
reset timescaledb.debug_require_vector_agg;
|
||||
|
||||
-- Also test the NULLS FIRST order.
|
||||
select count(decompress_chunk(x)) from show_chunks('groupagg') x;
|
||||
alter table groupagg set (timescaledb.compress, timescaledb.compress_segmentby = '',
|
||||
timescaledb.compress_orderby = 's nulls first');
|
||||
select count(compress_chunk(x)) from show_chunks('groupagg') x;
|
||||
vacuum analyze groupagg;
|
||||
|
||||
set timescaledb.debug_require_vector_agg to 'require';
|
||||
select s, sum(value) from groupagg group by s order by s nulls first limit 10;
|
||||
reset timescaledb.debug_require_vector_agg;
|
||||
|
||||
|
||||
-- More tests for dictionary encoding. This is not vectorized at the moment.
|
||||
set timescaledb.debug_require_vector_agg to 'forbid';
|
||||
create table text_table(ts int);
|
||||
select create_hypertable('text_table', 'ts', chunk_time_interval => 3);
|
||||
alter table text_table set (timescaledb.compress);
|
||||
|
||||
insert into text_table select 0 /*, default */ from generate_series(1, 1000) x;
|
||||
select count(compress_chunk(x)) from show_chunks('text_table') x;
|
||||
|
||||
alter table text_table add column a text default 'default';
|
||||
alter table text_table set (timescaledb.compress,
|
||||
timescaledb.compress_segmentby = '', timescaledb.compress_orderby = 'a');
|
||||
|
||||
insert into text_table select 1, '' from generate_series(1, 1000) x;
|
||||
insert into text_table select 2, 'same' from generate_series(1, 1000) x;
|
||||
insert into text_table select 3, 'different' || x from generate_series(1, 1000) x;
|
||||
insert into text_table select 4, case when x % 2 = 0 then null else 'same-with-nulls' end from generate_series(1, 1000) x;
|
||||
insert into text_table select 5, case when x % 2 = 0 then null else 'different-with-nulls' || x end from generate_series(1, 1000) x;
|
||||
|
||||
select count(compress_chunk(x)) from show_chunks('text_table') x;
|
||||
vacuum analyze text_table;
|
||||
|
||||
|
||||
-- Test the GroupAggregate by a text column. Not vectorized for now.
|
||||
select a, count(*) from text_table group by a order by a limit 10;
|
||||
|
||||
-- The hash grouping policies do not support the GroupAggregate mode in the
|
||||
-- reverse order.
|
||||
select a, count(*) from text_table group by a order by a desc limit 10;
|
||||
|
||||
|
||||
-- with NULLS FIRST
|
||||
select count(decompress_chunk(x)) from show_chunks('text_table') x;
|
||||
alter table text_table set (timescaledb.compress,
|
||||
timescaledb.compress_segmentby = '', timescaledb.compress_orderby = 'a nulls first');
|
||||
select count(compress_chunk(x)) from show_chunks('text_table') x;
|
||||
|
||||
select a, count(*) from text_table group by a order by a nulls first limit 10;
|
||||
|
||||
|
||||
-- TODO verify that this works with the serialized hash grouping strategy
|
||||
select ts, a, count(*) from text_table group by ts, a order by ts, a limit 10;
|
||||
select a, ts, count(*) from text_table group by a, ts order by a desc, ts desc limit 10;
|
||||
|
||||
reset max_parallel_workers_per_gather;
|
||||
reset timescaledb.debug_require_vector_agg;
|
||||
reset enable_hashagg;
|
Loading…
x
Reference in New Issue
Block a user