Fix DISTINCT query with JOIN on multiple segmentby columns

Previously when adding equivalence class members for the compressed
chunk's variables, we would only consider Vars. This led us to ignore
cases where the Var was wrapped in a RelabelType,
returning inaccurate results.

Fixed the issue by accepting Vars
with RelabelType for segmentby equivalence class.

Fixes #5585
This commit is contained in:
Konstantina Skovola 2023-05-12 17:47:06 +03:00 committed by Konstantina Skovola
parent fb65086b55
commit 19dd7bbd7a
5 changed files with 188 additions and 6 deletions

View File

@ -36,10 +36,12 @@ accidentally triggering the load of a previous DB version.**
* #5642 Fix ALTER TABLE SET with normal tables * #5642 Fix ALTER TABLE SET with normal tables
* #5666 Reduce memory usage for distributed analyze * #5666 Reduce memory usage for distributed analyze
* #5668 Fix subtransaction resource owner * #5668 Fix subtransaction resource owner
* #5680 Fix DISTINCT query with JOIN on multiple segmentby columns
**Thanks** **Thanks**
* @kovetskiy and @DZDomi for reporting peformance regression in Realtime Continuous Aggregates * @kovetskiy and @DZDomi for reporting peformance regression in Realtime Continuous Aggregates
* @ollz272 for reporting an issue with interpolate error messages * @ollz272 for reporting an issue with interpolate error messages
* @ericdevries for reporting an issue with DISTINCT queries using segmentby columns of compressed hypertable
## 2.10.3 (2023-04-26) ## 2.10.3 (2023-04-26)

View File

@ -1290,12 +1290,14 @@ add_segmentby_to_equivalence_class(EquivalenceClass *cur_ec, CompressionInfo *in
Var *var; Var *var;
Assert(!bms_overlap(cur_em->em_relids, info->compressed_rel->relids)); Assert(!bms_overlap(cur_em->em_relids, info->compressed_rel->relids));
/* only consider EquivalenceMembers that are vars of the uncompressed chunk */ /* only consider EquivalenceMembers that are Vars, possibly with RelabelType, of the
if (!IsA(cur_em->em_expr, Var)) * uncompressed chunk */
var = (Var *) cur_em->em_expr;
while (var && IsA(var, RelabelType))
var = (Var *) ((RelabelType *) var)->arg;
if (!(var && IsA(var, Var)))
continue; continue;
var = castNode(Var, cur_em->em_expr);
if ((Index) var->varno != info->chunk_rel->relid) if ((Index) var->varno != info->chunk_rel->relid)
continue; continue;
@ -1303,7 +1305,7 @@ add_segmentby_to_equivalence_class(EquivalenceClass *cur_ec, CompressionInfo *in
* be set on the em */ * be set on the em */
Assert(bms_overlap(cur_em->em_relids, uncompressed_chunk_relids)); Assert(bms_overlap(cur_em->em_relids, uncompressed_chunk_relids));
context->current_col_info = get_compression_info_for_em((Node *) cur_em->em_expr, context); context->current_col_info = get_compression_info_for_em((Node *) var, context);
if (context->current_col_info == NULL) if (context->current_col_info == NULL)
continue; continue;

View File

@ -0,0 +1,101 @@
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
\c :TEST_DBNAME :ROLE_SUPERUSER
-- github issue 5585
create table test (
time timestamptz not null,
a varchar(255) not null,
b int,
c int
);
SELECT create_hypertable('test', 'time');
WARNING: column type "character varying" used for "a" does not follow best practices
create_hypertable
-------------------
(1,public,test,t)
(1 row)
insert into test values
('2020-01-01 00:00'::timestamptz, 'lat', 1, 2),
('2020-01-01 00:01'::timestamptz, 'lat', 1, 2),
('2020-01-01 00:01'::timestamptz, 'lat', 2, 2),
('2020-01-01 00:03'::timestamptz, 'lat', 1, 2),
('2020-01-01 00:01'::timestamptz, 'lon', 1, 2);
create table test_copy as select * from test;
-- compress the chunk
alter table test set (timescaledb.compress, timescaledb.compress_segmentby='a, b');
select compress_chunk(show_chunks('test'));
compress_chunk
----------------------------------------
_timescaledb_internal._hyper_1_1_chunk
(1 row)
-- force an index scan
set enable_seqscan = 'off';
-- disable jit to avoid test flakiness
set jit = off;
explain with query_params as (
select distinct a, b
from test_copy
where test_copy.a IN ('lat', 'lon')
and test_copy.b IN (1)
)
select
test.time,
test.a = q.a as "this should never be false",
test.a,
test.b,
test.c,
q.*
from
test inner join query_params q
on q.a = test.a and q.b = test.b
where test.time between '2020-01-01 00:00' and '2020-01-01 00:02'
order by test.time;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=10000000024.30..10000000024.30 rows=1 width=541)
Sort Key: _hyper_1_1_chunk."time"
-> Nested Loop (cost=10000000014.27..10000000024.29 rows=1 width=541)
-> Unique (cost=10000000012.11..10000000012.12 rows=1 width=520)
-> Sort (cost=10000000012.11..10000000012.11 rows=1 width=520)
Sort Key: test_copy.a
-> Seq Scan on test_copy (cost=10000000000.00..10000000012.10 rows=1 width=520)
Filter: (((a)::text = ANY ('{lat,lon}'::text[])) AND (b = 1))
-> Custom Scan (DecompressChunk) on _hyper_1_1_chunk (cost=2.15..2.15 rows=1000 width=20)
Filter: (("time" >= 'Wed Jan 01 00:00:00 2020 PST'::timestamp with time zone) AND ("time" <= 'Wed Jan 01 00:02:00 2020 PST'::timestamp with time zone) AND ((test_copy.a)::text = a) AND (test_copy.b = b))
-> Index Scan using compress_hyper_2_2_chunk__compressed_hypertable_2_a_b__ts_meta_ on compress_hyper_2_2_chunk (cost=0.13..2.15 rows=1 width=604)
Index Cond: ((a = (test_copy.a)::text) AND (b = test_copy.b))
Filter: ((_ts_meta_max_1 >= 'Wed Jan 01 00:00:00 2020 PST'::timestamp with time zone) AND (_ts_meta_min_1 <= 'Wed Jan 01 00:02:00 2020 PST'::timestamp with time zone))
(13 rows)
with query_params as (
select distinct a, b
from test_copy
where test_copy.a IN ('lat', 'lon')
and test_copy.b IN (1)
)
select
test.time,
test.a = q.a as "this should never be false",
test.a,
test.b,
test.c,
q.*
from
test inner join query_params q
on q.a = test.a and q.b = test.b
where test.time between '2020-01-01 00:00' and '2020-01-01 00:02'
order by test.time;
time | this should never be false | a | b | c | a | b
------------------------------+----------------------------+-----+---+---+-----+---
Wed Jan 01 00:00:00 2020 PST | t | lat | 1 | 2 | lat | 1
Wed Jan 01 00:01:00 2020 PST | t | lat | 1 | 2 | lat | 1
Wed Jan 01 00:01:00 2020 PST | t | lon | 1 | 2 | lon | 1
(3 rows)
reset enable_seqscan;
reset jit;
drop table test;
drop table test_copy;

View File

@ -105,7 +105,8 @@ if(CMAKE_BUILD_TYPE MATCHES Debug)
tsl_tables.sql tsl_tables.sql
license_tsl.sql license_tsl.sql
fixed_schedules.sql fixed_schedules.sql
recompress_chunk_segmentwise.sql) recompress_chunk_segmentwise.sql
transparent_decompression_join_index.sql)
endif(CMAKE_BUILD_TYPE MATCHES Debug) endif(CMAKE_BUILD_TYPE MATCHES Debug)
if((${PG_VERSION_MAJOR} GREATER_EQUAL "14")) if((${PG_VERSION_MAJOR} GREATER_EQUAL "14"))

View File

@ -0,0 +1,76 @@
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
\c :TEST_DBNAME :ROLE_SUPERUSER
-- github issue 5585
create table test (
time timestamptz not null,
a varchar(255) not null,
b int,
c int
);
SELECT create_hypertable('test', 'time');
insert into test values
('2020-01-01 00:00'::timestamptz, 'lat', 1, 2),
('2020-01-01 00:01'::timestamptz, 'lat', 1, 2),
('2020-01-01 00:01'::timestamptz, 'lat', 2, 2),
('2020-01-01 00:03'::timestamptz, 'lat', 1, 2),
('2020-01-01 00:01'::timestamptz, 'lon', 1, 2);
create table test_copy as select * from test;
-- compress the chunk
alter table test set (timescaledb.compress, timescaledb.compress_segmentby='a, b');
select compress_chunk(show_chunks('test'));
-- force an index scan
set enable_seqscan = 'off';
-- disable jit to avoid test flakiness
set jit = off;
explain with query_params as (
select distinct a, b
from test_copy
where test_copy.a IN ('lat', 'lon')
and test_copy.b IN (1)
)
select
test.time,
test.a = q.a as "this should never be false",
test.a,
test.b,
test.c,
q.*
from
test inner join query_params q
on q.a = test.a and q.b = test.b
where test.time between '2020-01-01 00:00' and '2020-01-01 00:02'
order by test.time;
with query_params as (
select distinct a, b
from test_copy
where test_copy.a IN ('lat', 'lon')
and test_copy.b IN (1)
)
select
test.time,
test.a = q.a as "this should never be false",
test.a,
test.b,
test.c,
q.*
from
test inner join query_params q
on q.a = test.a and q.b = test.b
where test.time between '2020-01-01 00:00' and '2020-01-01 00:02'
order by test.time;
reset enable_seqscan;
reset jit;
drop table test;
drop table test_copy;