1
0
mirror of https://github.com/timescale/timescaledb.git synced 2025-05-17 11:03:36 +08:00

Set correct collation for segmentby columns of compressed chunks

We don't do this currently, so some queries return the wrong ordering
of rows if there is an index on the compressed chunk. The fix only
works for the newly created chunks. We could add a migration that
corrects the old compressed chunks, but it seems to be too heavy and
not to lend itself well to automation -- we'll have to recreate the
indexes if there are any. So the old chunks continue to return a wrong
result.
This commit is contained in:
Alexander Kuzmenkov 2022-04-14 19:13:09 +03:00 committed by Alexander Kuzmenkov
parent 472a68726c
commit 0ab2d39f25
5 changed files with 116 additions and 6 deletions

@ -111,7 +111,7 @@ def macos_config(overrides):
"tsdb_build_args": "-DASSERTIONS=ON -DOPENSSL_ROOT_DIR=/usr/local/opt/openssl",
"llvm_config": "/usr/local/opt/llvm/bin/llvm-config",
"coverage": False,
"installcheck_args": "IGNORES='bgw_db_scheduler bgw_launcher pg_dump remote_connection'",
"installcheck_args": "IGNORES='bgw_db_scheduler bgw_launcher pg_dump remote_connection compressed_collation'",
"extra_packages": "",
})
base_config.update(overrides)

@ -270,6 +270,9 @@ compresscolinfo_init(CompressColInfo *cc, Oid srctbl_relid, List *segmentby_cols
for (attno = 0; attno < tupdesc->natts; attno++)
{
Oid attroid = InvalidOid;
int32 typmod = -1;
Oid collid = 0;
Form_pg_attribute attr = TupleDescAttr(tupdesc, attno);
ColumnDef *coldef;
if (attr->attisdropped)
@ -287,6 +290,8 @@ compresscolinfo_init(CompressColInfo *cc, Oid srctbl_relid, List *segmentby_cols
if (segorder_colindex[attno] <= seg_attnolen)
{
attroid = attr->atttypid; /*segment by columns have original type */
typmod = attr->atttypmod;
collid = attr->attcollation;
cc->col_meta[colno].segmentby_column_index = segorder_colindex[attno];
}
else
@ -307,7 +312,7 @@ compresscolinfo_init(CompressColInfo *cc, Oid srctbl_relid, List *segmentby_cols
{
cc->col_meta[colno].algo_id = 0; // invalid algo number
}
coldef = makeColumnDef(NameStr(attr->attname), attroid, -1 /*typmod*/, 0 /*collation*/);
coldef = makeColumnDef(NameStr(attr->attname), attroid, typmod, collid);
cc->coldeflist = lappend(cc->coldeflist, coldef);
colno++;
}

@ -0,0 +1,58 @@
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
\c :TEST_DBNAME :ROLE_SUPERUSER
-- We have different collation names such as en_US, en-US-x-icu and so on,
-- that are available on different platforms.
select * from (
select 3 priority, 'en_US' "COLLATION"
union all (select 2, collname from pg_collation where collname ilike 'en_us%' order by collname limit 1)
union all (select 1, collname from pg_collation where collname ilike 'en_us_utf%8%' order by collname limit 1)
) c
order by priority limit 1 \gset
create table compressed_collation_ht(time timestamp, name text collate :"COLLATION",
value float);
select create_hypertable('compressed_collation_ht', 'time');
NOTICE: adding not-null constraint to column "time"
create_hypertable
--------------------------------------
(1,public,compressed_collation_ht,t)
(1 row)
alter table compressed_collation_ht set (timescaledb.compress,
timescaledb.compress_segmentby = 'name', timescaledb.compress_orderby = 'time');
insert into compressed_collation_ht values ('2021-01-01 01:01:01', 'á', '1'),
('2021-01-01 01:01:02', 'b', '2'), ('2021-01-01 01:01:03', 'ç', '2');
select 1 from (
select compress_chunk(chunk_schema || '.' || chunk_name)
from timescaledb_information.chunks
where hypertable_name = 'compressed_collation_ht'
) t;
?column?
----------
1
(1 row)
select ht.schema_name || '.' || ht.table_name as "CHUNK"
from _timescaledb_catalog.hypertable ht
inner join _timescaledb_catalog.hypertable ht2
on ht.id = ht2.compressed_hypertable_id
and ht2.table_name = 'compressed_collation_ht' \gset
create index on :CHUNK (name);
set enable_seqscan to off;
explain (costs off)
select * from compressed_collation_ht order by name;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------
Custom Scan (DecompressChunk) on _hyper_1_1_chunk
-> Index Scan using compress_hyper_2_2_chunk__compressed_hypertable_2_name_idx on compress_hyper_2_2_chunk
(2 rows)
select * from compressed_collation_ht order by name;
time | name | value
--------------------------+------+-------
Fri Jan 01 01:01:01 2021 | á | 1
Fri Jan 01 01:01:02 2021 | b | 2
Fri Jan 01 01:01:03 2021 | ç | 2
(3 rows)

@ -10,18 +10,19 @@ set(TEST_CONFIGURATIONS postgresql max_bgw_8)
set(TEST_FILES_postgresql
bgw_custom.sql
bgw_policy.sql
compression_bgw.sql
compression_permissions.sql
compression_qualpushdown.sql
cagg_errors.sql
cagg_invalidation.sql
cagg_permissions.sql
cagg_policy.sql
cagg_refresh.sql
cagg_watermark.sql
compressed_collation.sql
compression_bgw.sql
compression_permissions.sql
compression_qualpushdown.sql
dist_views.sql
exp_cagg_next_gen.sql
exp_cagg_monthly.sql
exp_cagg_next_gen.sql
exp_cagg_origin.sql
exp_cagg_timezone.sql
move.sql

@ -0,0 +1,46 @@
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
\c :TEST_DBNAME :ROLE_SUPERUSER
-- We have different collation names such as en_US, en-US-x-icu and so on,
-- that are available on different platforms.
select * from (
select 3 priority, 'en_US' "COLLATION"
union all (select 2, collname from pg_collation where collname ilike 'en_us%' order by collname limit 1)
union all (select 1, collname from pg_collation where collname ilike 'en_us_utf%8%' order by collname limit 1)
) c
order by priority limit 1 \gset
create table compressed_collation_ht(time timestamp, name text collate :"COLLATION",
value float);
select create_hypertable('compressed_collation_ht', 'time');
alter table compressed_collation_ht set (timescaledb.compress,
timescaledb.compress_segmentby = 'name', timescaledb.compress_orderby = 'time');
insert into compressed_collation_ht values ('2021-01-01 01:01:01', 'á', '1'),
('2021-01-01 01:01:02', 'b', '2'), ('2021-01-01 01:01:03', 'ç', '2');
select 1 from (
select compress_chunk(chunk_schema || '.' || chunk_name)
from timescaledb_information.chunks
where hypertable_name = 'compressed_collation_ht'
) t;
select ht.schema_name || '.' || ht.table_name as "CHUNK"
from _timescaledb_catalog.hypertable ht
inner join _timescaledb_catalog.hypertable ht2
on ht.id = ht2.compressed_hypertable_id
and ht2.table_name = 'compressed_collation_ht' \gset
create index on :CHUNK (name);
set enable_seqscan to off;
explain (costs off)
select * from compressed_collation_ht order by name;
select * from compressed_collation_ht order by name;