diff --git a/CHANGELOG.md b/CHANGELOG.md index ea9479875..0673f366d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,10 +36,12 @@ accidentally triggering the load of a previous DB version.** * #5642 Fix ALTER TABLE SET with normal tables * #5666 Reduce memory usage for distributed analyze * #5668 Fix subtransaction resource owner +* #5680 Fix DISTINCT query with JOIN on multiple segmentby columns **Thanks** * @kovetskiy and @DZDomi for reporting peformance regression in Realtime Continuous Aggregates * @ollz272 for reporting an issue with interpolate error messages +* @ericdevries for reporting an issue with DISTINCT queries using segmentby columns of compressed hypertable ## 2.10.3 (2023-04-26) diff --git a/tsl/src/nodes/decompress_chunk/decompress_chunk.c b/tsl/src/nodes/decompress_chunk/decompress_chunk.c index 7b738e90f..a079161a4 100644 --- a/tsl/src/nodes/decompress_chunk/decompress_chunk.c +++ b/tsl/src/nodes/decompress_chunk/decompress_chunk.c @@ -1290,12 +1290,14 @@ add_segmentby_to_equivalence_class(EquivalenceClass *cur_ec, CompressionInfo *in Var *var; Assert(!bms_overlap(cur_em->em_relids, info->compressed_rel->relids)); - /* only consider EquivalenceMembers that are vars of the uncompressed chunk */ - if (!IsA(cur_em->em_expr, Var)) + /* only consider EquivalenceMembers that are Vars, possibly with RelabelType, of the + * uncompressed chunk */ + var = (Var *) cur_em->em_expr; + while (var && IsA(var, RelabelType)) + var = (Var *) ((RelabelType *) var)->arg; + if (!(var && IsA(var, Var))) continue; - var = castNode(Var, cur_em->em_expr); - if ((Index) var->varno != info->chunk_rel->relid) continue; @@ -1303,7 +1305,7 @@ add_segmentby_to_equivalence_class(EquivalenceClass *cur_ec, CompressionInfo *in * be set on the em */ Assert(bms_overlap(cur_em->em_relids, uncompressed_chunk_relids)); - context->current_col_info = get_compression_info_for_em((Node *) cur_em->em_expr, context); + context->current_col_info = get_compression_info_for_em((Node *) var, context); if (context->current_col_info == NULL) continue; diff --git a/tsl/test/expected/transparent_decompression_join_index.out b/tsl/test/expected/transparent_decompression_join_index.out new file mode 100644 index 000000000..23e62a270 --- /dev/null +++ b/tsl/test/expected/transparent_decompression_join_index.out @@ -0,0 +1,101 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. +\c :TEST_DBNAME :ROLE_SUPERUSER +-- github issue 5585 +create table test ( + time timestamptz not null, + a varchar(255) not null, + b int, + c int +); +SELECT create_hypertable('test', 'time'); +WARNING: column type "character varying" used for "a" does not follow best practices + create_hypertable +------------------- + (1,public,test,t) +(1 row) + +insert into test values +('2020-01-01 00:00'::timestamptz, 'lat', 1, 2), +('2020-01-01 00:01'::timestamptz, 'lat', 1, 2), +('2020-01-01 00:01'::timestamptz, 'lat', 2, 2), +('2020-01-01 00:03'::timestamptz, 'lat', 1, 2), +('2020-01-01 00:01'::timestamptz, 'lon', 1, 2); +create table test_copy as select * from test; +-- compress the chunk +alter table test set (timescaledb.compress, timescaledb.compress_segmentby='a, b'); +select compress_chunk(show_chunks('test')); + compress_chunk +---------------------------------------- + _timescaledb_internal._hyper_1_1_chunk +(1 row) + +-- force an index scan +set enable_seqscan = 'off'; +-- disable jit to avoid test flakiness +set jit = off; +explain with query_params as ( + select distinct a, b + from test_copy + where test_copy.a IN ('lat', 'lon') + and test_copy.b IN (1) +) +select + test.time, + test.a = q.a as "this should never be false", + test.a, + test.b, + test.c, + q.* +from +test inner join query_params q + on q.a = test.a and q.b = test.b +where test.time between '2020-01-01 00:00' and '2020-01-01 00:02' +order by test.time; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Sort (cost=10000000024.30..10000000024.30 rows=1 width=541) + Sort Key: _hyper_1_1_chunk."time" + -> Nested Loop (cost=10000000014.27..10000000024.29 rows=1 width=541) + -> Unique (cost=10000000012.11..10000000012.12 rows=1 width=520) + -> Sort (cost=10000000012.11..10000000012.11 rows=1 width=520) + Sort Key: test_copy.a + -> Seq Scan on test_copy (cost=10000000000.00..10000000012.10 rows=1 width=520) + Filter: (((a)::text = ANY ('{lat,lon}'::text[])) AND (b = 1)) + -> Custom Scan (DecompressChunk) on _hyper_1_1_chunk (cost=2.15..2.15 rows=1000 width=20) + Filter: (("time" >= 'Wed Jan 01 00:00:00 2020 PST'::timestamp with time zone) AND ("time" <= 'Wed Jan 01 00:02:00 2020 PST'::timestamp with time zone) AND ((test_copy.a)::text = a) AND (test_copy.b = b)) + -> Index Scan using compress_hyper_2_2_chunk__compressed_hypertable_2_a_b__ts_meta_ on compress_hyper_2_2_chunk (cost=0.13..2.15 rows=1 width=604) + Index Cond: ((a = (test_copy.a)::text) AND (b = test_copy.b)) + Filter: ((_ts_meta_max_1 >= 'Wed Jan 01 00:00:00 2020 PST'::timestamp with time zone) AND (_ts_meta_min_1 <= 'Wed Jan 01 00:02:00 2020 PST'::timestamp with time zone)) +(13 rows) + +with query_params as ( + select distinct a, b + from test_copy + where test_copy.a IN ('lat', 'lon') + and test_copy.b IN (1) +) +select + test.time, + test.a = q.a as "this should never be false", + test.a, + test.b, + test.c, + q.* +from +test inner join query_params q + on q.a = test.a and q.b = test.b +where test.time between '2020-01-01 00:00' and '2020-01-01 00:02' +order by test.time; + time | this should never be false | a | b | c | a | b +------------------------------+----------------------------+-----+---+---+-----+--- + Wed Jan 01 00:00:00 2020 PST | t | lat | 1 | 2 | lat | 1 + Wed Jan 01 00:01:00 2020 PST | t | lat | 1 | 2 | lat | 1 + Wed Jan 01 00:01:00 2020 PST | t | lon | 1 | 2 | lon | 1 +(3 rows) + +reset enable_seqscan; +reset jit; +drop table test; +drop table test_copy; diff --git a/tsl/test/sql/CMakeLists.txt b/tsl/test/sql/CMakeLists.txt index bd59c7f6b..a1050593b 100644 --- a/tsl/test/sql/CMakeLists.txt +++ b/tsl/test/sql/CMakeLists.txt @@ -105,7 +105,8 @@ if(CMAKE_BUILD_TYPE MATCHES Debug) tsl_tables.sql license_tsl.sql fixed_schedules.sql - recompress_chunk_segmentwise.sql) + recompress_chunk_segmentwise.sql + transparent_decompression_join_index.sql) endif(CMAKE_BUILD_TYPE MATCHES Debug) if((${PG_VERSION_MAJOR} GREATER_EQUAL "14")) diff --git a/tsl/test/sql/transparent_decompression_join_index.sql b/tsl/test/sql/transparent_decompression_join_index.sql new file mode 100644 index 000000000..e4898047d --- /dev/null +++ b/tsl/test/sql/transparent_decompression_join_index.sql @@ -0,0 +1,76 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. + +\c :TEST_DBNAME :ROLE_SUPERUSER + +-- github issue 5585 +create table test ( + time timestamptz not null, + a varchar(255) not null, + b int, + c int +); + +SELECT create_hypertable('test', 'time'); + +insert into test values +('2020-01-01 00:00'::timestamptz, 'lat', 1, 2), +('2020-01-01 00:01'::timestamptz, 'lat', 1, 2), +('2020-01-01 00:01'::timestamptz, 'lat', 2, 2), +('2020-01-01 00:03'::timestamptz, 'lat', 1, 2), +('2020-01-01 00:01'::timestamptz, 'lon', 1, 2); + +create table test_copy as select * from test; + +-- compress the chunk +alter table test set (timescaledb.compress, timescaledb.compress_segmentby='a, b'); +select compress_chunk(show_chunks('test')); +-- force an index scan +set enable_seqscan = 'off'; +-- disable jit to avoid test flakiness +set jit = off; + +explain with query_params as ( + select distinct a, b + from test_copy + where test_copy.a IN ('lat', 'lon') + and test_copy.b IN (1) +) +select + test.time, + test.a = q.a as "this should never be false", + test.a, + test.b, + test.c, + q.* +from +test inner join query_params q + on q.a = test.a and q.b = test.b +where test.time between '2020-01-01 00:00' and '2020-01-01 00:02' +order by test.time; + +with query_params as ( + select distinct a, b + from test_copy + where test_copy.a IN ('lat', 'lon') + and test_copy.b IN (1) +) +select + test.time, + test.a = q.a as "this should never be false", + test.a, + test.b, + test.c, + q.* +from +test inner join query_params q + on q.a = test.a and q.b = test.b +where test.time between '2020-01-01 00:00' and '2020-01-01 00:02' +order by test.time; + +reset enable_seqscan; +reset jit; + +drop table test; +drop table test_copy;