From 19dd7bbd7a09de25af7c233a5923ac7eaef809de Mon Sep 17 00:00:00 2001 From: Konstantina Skovola Date: Fri, 12 May 2023 17:47:06 +0300 Subject: [PATCH] Fix DISTINCT query with JOIN on multiple segmentby columns Previously when adding equivalence class members for the compressed chunk's variables, we would only consider Vars. This led us to ignore cases where the Var was wrapped in a RelabelType, returning inaccurate results. Fixed the issue by accepting Vars with RelabelType for segmentby equivalence class. Fixes #5585 --- CHANGELOG.md | 2 + .../nodes/decompress_chunk/decompress_chunk.c | 12 ++- .../transparent_decompression_join_index.out | 101 ++++++++++++++++++ tsl/test/sql/CMakeLists.txt | 3 +- .../transparent_decompression_join_index.sql | 76 +++++++++++++ 5 files changed, 188 insertions(+), 6 deletions(-) create mode 100644 tsl/test/expected/transparent_decompression_join_index.out create mode 100644 tsl/test/sql/transparent_decompression_join_index.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index ea9479875..0673f366d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,10 +36,12 @@ accidentally triggering the load of a previous DB version.** * #5642 Fix ALTER TABLE SET with normal tables * #5666 Reduce memory usage for distributed analyze * #5668 Fix subtransaction resource owner +* #5680 Fix DISTINCT query with JOIN on multiple segmentby columns **Thanks** * @kovetskiy and @DZDomi for reporting peformance regression in Realtime Continuous Aggregates * @ollz272 for reporting an issue with interpolate error messages +* @ericdevries for reporting an issue with DISTINCT queries using segmentby columns of compressed hypertable ## 2.10.3 (2023-04-26) diff --git a/tsl/src/nodes/decompress_chunk/decompress_chunk.c b/tsl/src/nodes/decompress_chunk/decompress_chunk.c index 7b738e90f..a079161a4 100644 --- a/tsl/src/nodes/decompress_chunk/decompress_chunk.c +++ b/tsl/src/nodes/decompress_chunk/decompress_chunk.c @@ -1290,12 +1290,14 @@ add_segmentby_to_equivalence_class(EquivalenceClass *cur_ec, CompressionInfo *in Var *var; Assert(!bms_overlap(cur_em->em_relids, info->compressed_rel->relids)); - /* only consider EquivalenceMembers that are vars of the uncompressed chunk */ - if (!IsA(cur_em->em_expr, Var)) + /* only consider EquivalenceMembers that are Vars, possibly with RelabelType, of the + * uncompressed chunk */ + var = (Var *) cur_em->em_expr; + while (var && IsA(var, RelabelType)) + var = (Var *) ((RelabelType *) var)->arg; + if (!(var && IsA(var, Var))) continue; - var = castNode(Var, cur_em->em_expr); - if ((Index) var->varno != info->chunk_rel->relid) continue; @@ -1303,7 +1305,7 @@ add_segmentby_to_equivalence_class(EquivalenceClass *cur_ec, CompressionInfo *in * be set on the em */ Assert(bms_overlap(cur_em->em_relids, uncompressed_chunk_relids)); - context->current_col_info = get_compression_info_for_em((Node *) cur_em->em_expr, context); + context->current_col_info = get_compression_info_for_em((Node *) var, context); if (context->current_col_info == NULL) continue; diff --git a/tsl/test/expected/transparent_decompression_join_index.out b/tsl/test/expected/transparent_decompression_join_index.out new file mode 100644 index 000000000..23e62a270 --- /dev/null +++ b/tsl/test/expected/transparent_decompression_join_index.out @@ -0,0 +1,101 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. +\c :TEST_DBNAME :ROLE_SUPERUSER +-- github issue 5585 +create table test ( + time timestamptz not null, + a varchar(255) not null, + b int, + c int +); +SELECT create_hypertable('test', 'time'); +WARNING: column type "character varying" used for "a" does not follow best practices + create_hypertable +------------------- + (1,public,test,t) +(1 row) + +insert into test values +('2020-01-01 00:00'::timestamptz, 'lat', 1, 2), +('2020-01-01 00:01'::timestamptz, 'lat', 1, 2), +('2020-01-01 00:01'::timestamptz, 'lat', 2, 2), +('2020-01-01 00:03'::timestamptz, 'lat', 1, 2), +('2020-01-01 00:01'::timestamptz, 'lon', 1, 2); +create table test_copy as select * from test; +-- compress the chunk +alter table test set (timescaledb.compress, timescaledb.compress_segmentby='a, b'); +select compress_chunk(show_chunks('test')); + compress_chunk +---------------------------------------- + _timescaledb_internal._hyper_1_1_chunk +(1 row) + +-- force an index scan +set enable_seqscan = 'off'; +-- disable jit to avoid test flakiness +set jit = off; +explain with query_params as ( + select distinct a, b + from test_copy + where test_copy.a IN ('lat', 'lon') + and test_copy.b IN (1) +) +select + test.time, + test.a = q.a as "this should never be false", + test.a, + test.b, + test.c, + q.* +from +test inner join query_params q + on q.a = test.a and q.b = test.b +where test.time between '2020-01-01 00:00' and '2020-01-01 00:02' +order by test.time; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Sort (cost=10000000024.30..10000000024.30 rows=1 width=541) + Sort Key: _hyper_1_1_chunk."time" + -> Nested Loop (cost=10000000014.27..10000000024.29 rows=1 width=541) + -> Unique (cost=10000000012.11..10000000012.12 rows=1 width=520) + -> Sort (cost=10000000012.11..10000000012.11 rows=1 width=520) + Sort Key: test_copy.a + -> Seq Scan on test_copy (cost=10000000000.00..10000000012.10 rows=1 width=520) + Filter: (((a)::text = ANY ('{lat,lon}'::text[])) AND (b = 1)) + -> Custom Scan (DecompressChunk) on _hyper_1_1_chunk (cost=2.15..2.15 rows=1000 width=20) + Filter: (("time" >= 'Wed Jan 01 00:00:00 2020 PST'::timestamp with time zone) AND ("time" <= 'Wed Jan 01 00:02:00 2020 PST'::timestamp with time zone) AND ((test_copy.a)::text = a) AND (test_copy.b = b)) + -> Index Scan using compress_hyper_2_2_chunk__compressed_hypertable_2_a_b__ts_meta_ on compress_hyper_2_2_chunk (cost=0.13..2.15 rows=1 width=604) + Index Cond: ((a = (test_copy.a)::text) AND (b = test_copy.b)) + Filter: ((_ts_meta_max_1 >= 'Wed Jan 01 00:00:00 2020 PST'::timestamp with time zone) AND (_ts_meta_min_1 <= 'Wed Jan 01 00:02:00 2020 PST'::timestamp with time zone)) +(13 rows) + +with query_params as ( + select distinct a, b + from test_copy + where test_copy.a IN ('lat', 'lon') + and test_copy.b IN (1) +) +select + test.time, + test.a = q.a as "this should never be false", + test.a, + test.b, + test.c, + q.* +from +test inner join query_params q + on q.a = test.a and q.b = test.b +where test.time between '2020-01-01 00:00' and '2020-01-01 00:02' +order by test.time; + time | this should never be false | a | b | c | a | b +------------------------------+----------------------------+-----+---+---+-----+--- + Wed Jan 01 00:00:00 2020 PST | t | lat | 1 | 2 | lat | 1 + Wed Jan 01 00:01:00 2020 PST | t | lat | 1 | 2 | lat | 1 + Wed Jan 01 00:01:00 2020 PST | t | lon | 1 | 2 | lon | 1 +(3 rows) + +reset enable_seqscan; +reset jit; +drop table test; +drop table test_copy; diff --git a/tsl/test/sql/CMakeLists.txt b/tsl/test/sql/CMakeLists.txt index bd59c7f6b..a1050593b 100644 --- a/tsl/test/sql/CMakeLists.txt +++ b/tsl/test/sql/CMakeLists.txt @@ -105,7 +105,8 @@ if(CMAKE_BUILD_TYPE MATCHES Debug) tsl_tables.sql license_tsl.sql fixed_schedules.sql - recompress_chunk_segmentwise.sql) + recompress_chunk_segmentwise.sql + transparent_decompression_join_index.sql) endif(CMAKE_BUILD_TYPE MATCHES Debug) if((${PG_VERSION_MAJOR} GREATER_EQUAL "14")) diff --git a/tsl/test/sql/transparent_decompression_join_index.sql b/tsl/test/sql/transparent_decompression_join_index.sql new file mode 100644 index 000000000..e4898047d --- /dev/null +++ b/tsl/test/sql/transparent_decompression_join_index.sql @@ -0,0 +1,76 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. + +\c :TEST_DBNAME :ROLE_SUPERUSER + +-- github issue 5585 +create table test ( + time timestamptz not null, + a varchar(255) not null, + b int, + c int +); + +SELECT create_hypertable('test', 'time'); + +insert into test values +('2020-01-01 00:00'::timestamptz, 'lat', 1, 2), +('2020-01-01 00:01'::timestamptz, 'lat', 1, 2), +('2020-01-01 00:01'::timestamptz, 'lat', 2, 2), +('2020-01-01 00:03'::timestamptz, 'lat', 1, 2), +('2020-01-01 00:01'::timestamptz, 'lon', 1, 2); + +create table test_copy as select * from test; + +-- compress the chunk +alter table test set (timescaledb.compress, timescaledb.compress_segmentby='a, b'); +select compress_chunk(show_chunks('test')); +-- force an index scan +set enable_seqscan = 'off'; +-- disable jit to avoid test flakiness +set jit = off; + +explain with query_params as ( + select distinct a, b + from test_copy + where test_copy.a IN ('lat', 'lon') + and test_copy.b IN (1) +) +select + test.time, + test.a = q.a as "this should never be false", + test.a, + test.b, + test.c, + q.* +from +test inner join query_params q + on q.a = test.a and q.b = test.b +where test.time between '2020-01-01 00:00' and '2020-01-01 00:02' +order by test.time; + +with query_params as ( + select distinct a, b + from test_copy + where test_copy.a IN ('lat', 'lon') + and test_copy.b IN (1) +) +select + test.time, + test.a = q.a as "this should never be false", + test.a, + test.b, + test.c, + q.* +from +test inner join query_params q + on q.a = test.a and q.b = test.b +where test.time between '2020-01-01 00:00' and '2020-01-01 00:02' +order by test.time; + +reset enable_seqscan; +reset jit; + +drop table test; +drop table test_copy;