From 1d092560f45bbec887a8cb098f338a789250196d Mon Sep 17 00:00:00 2001 From: Zoltan Haindrich Date: Fri, 14 Apr 2023 14:24:42 +0000 Subject: [PATCH] Fix on-insert decompression after schema changes On compressed hypertables 3 schema levels are in use simultaneously * main - hypertable level * chunk - inheritance level * compressed chunk In the build_scankeys method all of them appear - as slot have their fields represented as a for a row of the main hypertable. Accessing the slot by the attribut numbers of the chunks may lead to indexing mismatches if there are differences between the schemes. Fixes: #5577 --- CHANGELOG.md | 1 + tsl/src/compression/compression.c | 16 ++--- tsl/test/expected/compression_errors.out | 86 ++++++++++++++++++++++-- tsl/test/sql/compression_errors.sql | 53 ++++++++++++++- 4 files changed, 139 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fd9b2ef6..71a375628 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ accidentally triggering the load of a previous DB version.** * #5573 Fix unique constraint on compressed tables * #5615 Add permission checks to run_job() * #5614 Enable run_job() for telemetry job +* #5578 Fix on-insert decompression after schema changes **Thanks** * @kovetskiy and @DZDomi for reporting peformance regression in Realtime Continuous Aggregates diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index a6a14025b..f749f741f 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -1805,8 +1805,9 @@ compression_get_toast_storage(CompressionAlgorithms algorithm) * columns of the uncompressed chunk. */ static ScanKeyData * -build_scankeys(int32 hypertable_id, RowDecompressor decompressor, Bitmapset *key_columns, - Bitmapset **null_columns, TupleTableSlot *slot, int *num_scankeys) +build_scankeys(int32 hypertable_id, Oid hypertable_relid, RowDecompressor decompressor, + Bitmapset *key_columns, Bitmapset **null_columns, TupleTableSlot *slot, + int *num_scankeys) { int key_index = 0; ScanKeyData *scankeys = NULL; @@ -1821,7 +1822,9 @@ build_scankeys(int32 hypertable_id, RowDecompressor decompressor, Bitmapset *key char *attname = get_attname(decompressor.out_rel->rd_id, attno, false); FormData_hypertable_compression *fd = ts_hypertable_compression_get_by_pkey(hypertable_id, attname); - + bool isnull; + AttrNumber ht_attno = get_attnum(hypertable_relid, attname); + Datum value = slot_getattr(slot, ht_attno, &isnull); /* * There are 3 possible scenarios we have to consider * when dealing with columns which are part of unique @@ -1838,11 +1841,8 @@ build_scankeys(int32 hypertable_id, RowDecompressor decompressor, Bitmapset *key * batch filtering as the values are compressed and * we have no metadata. */ - if (COMPRESSIONCOL_IS_SEGMENT_BY(fd)) { - bool isnull; - Datum value = slot_getattr(slot, attno, &isnull); key_index = create_segment_filter_scankey(&decompressor, attname, BTEqualStrategyNumber, @@ -1854,9 +1854,6 @@ build_scankeys(int32 hypertable_id, RowDecompressor decompressor, Bitmapset *key } if (COMPRESSIONCOL_IS_ORDER_BY(fd)) { - bool isnull; - Datum value = slot_getattr(slot, attno, &isnull); - /* Cannot optimize orderby columns with NULL values since those * are not visible in metadata */ @@ -1967,6 +1964,7 @@ decompress_batches_for_insert(ChunkInsertState *cis, Chunk *chunk, TupleTableSlo int num_scankeys; ScanKeyData *scankeys = build_scankeys(chunk->fd.hypertable_id, + chunk->hypertable_relid, decompressor, key_columns, &null_columns, diff --git a/tsl/test/expected/compression_errors.out b/tsl/test/expected/compression_errors.out index 836129804..3d1c4cf97 100644 --- a/tsl/test/expected/compression_errors.out +++ b/tsl/test/expected/compression_errors.out @@ -631,7 +631,7 @@ INSERT INTO ts_table SELECT * FROM data_table; --cleanup tables DROP TABLE data_table cascade; DROP TABLE ts_table cascade; ---invalid reads for row expressions after column dropped on compressed tables #5458 +-- #5458 invalid reads for row expressions after column dropped on compressed tables CREATE TABLE readings( "time" TIMESTAMPTZ NOT NULL, battery_status TEXT, @@ -645,6 +645,7 @@ NOTICE: migrating data to chunks (35,public,readings,t) (1 row) +create unique index readings_uniq_idx on readings("time",battery_temperature); ALTER TABLE readings SET (timescaledb.compress,timescaledb.compress_segmentby = 'battery_temperature'); SELECT compress_chunk(show_chunks('readings')); compress_chunk @@ -661,7 +662,50 @@ SELECT readings FROM readings; ("Fri Nov 11 11:11:11 2022 PST",0.2) (2 rows) --- Unique constraints are not always respected on compressed tables #5553 +-- #5577 On-insert decompression after schema changes may not work properly +SELECT decompress_chunk(show_chunks('readings'),true); +NOTICE: chunk "_hyper_35_24_chunk" is not compressed + decompress_chunk +------------------------------------------ + _timescaledb_internal._hyper_35_22_chunk + +(2 rows) + +SELECT compress_chunk(show_chunks('readings'),true); + compress_chunk +------------------------------------------ + _timescaledb_internal._hyper_35_22_chunk + _timescaledb_internal._hyper_35_24_chunk +(2 rows) + +\set ON_ERROR_STOP 0 +INSERT INTO readings ("time", battery_temperature) VALUES + ('2022-11-11 11:11:11',0.2) -- same record as inserted +; +ERROR: duplicate key value violates unique constraint "_hyper_35_24_chunk_readings_uniq_idx" +\set ON_ERROR_STOP 1 +SELECT * from readings; + time | battery_temperature +------------------------------+--------------------- + Fri Nov 11 03:11:11 2022 PST | + Fri Nov 11 11:11:11 2022 PST | 0.2 +(2 rows) + +SELECT assert_equal(count(1), 2::bigint) FROM readings; + assert_equal +-------------- + +(1 row) + +-- no unique check failure during decompression +SELECT decompress_chunk(show_chunks('readings'),true); + decompress_chunk +------------------------------------------ + _timescaledb_internal._hyper_35_22_chunk + _timescaledb_internal._hyper_35_24_chunk +(2 rows) + +-- #5553 Unique constraints are not always respected on compressed tables CREATE TABLE main_table AS SELECT '2011-11-11 11:11:11'::timestamptz AS time, 'foo' AS device_id; CREATE UNIQUE INDEX xm ON main_table(time, device_id); @@ -680,19 +724,19 @@ ALTER TABLE main_table SET ( SELECT compress_chunk(show_chunks('main_table')); compress_chunk ------------------------------------------ - _timescaledb_internal._hyper_37_25_chunk + _timescaledb_internal._hyper_37_27_chunk (1 row) -- insert rejected \set ON_ERROR_STOP 0 INSERT INTO main_table VALUES ('2011-11-11 11:11:11', 'foo'); -ERROR: duplicate key value violates unique constraint "_hyper_37_25_chunk_xm" +ERROR: duplicate key value violates unique constraint "_hyper_37_27_chunk_xm" -- insert rejected in case 1st row doesn't violate constraint with different segmentby INSERT INTO main_table VALUES ('2011-11-11 11:12:11', 'bar'), ('2011-11-11 11:11:11', 'foo'); -ERROR: duplicate key value violates unique constraint "_hyper_37_25_chunk_xm" +ERROR: duplicate key value violates unique constraint "_hyper_37_27_chunk_xm" \set ON_ERROR_STOP 1 SELECT assert_equal(count(1), 1::bigint) FROM main_table; assert_equal @@ -704,6 +748,36 @@ SELECT assert_equal(count(1), 1::bigint) FROM main_table; SELECT decompress_chunk(show_chunks('main_table'), TRUE); decompress_chunk ------------------------------------------ - _timescaledb_internal._hyper_37_25_chunk + _timescaledb_internal._hyper_37_27_chunk (1 row) +DROP TABLE IF EXISTS readings; +CREATE TABLE readings( + "time" timestamptz NOT NULL, + battery_status text, + candy integer, + battery_status2 text, + battery_temperature text +); +SELECT create_hypertable('readings', 'time', chunk_time_interval => interval '12 hour'); + create_hypertable +------------------------ + (39,public,readings,t) +(1 row) + +CREATE UNIQUE INDEX readings_uniq_idx ON readings("time", battery_temperature); +ALTER TABLE readings SET (timescaledb.compress, timescaledb.compress_segmentby = 'battery_temperature'); +ALTER TABLE readings DROP COLUMN battery_status; +ALTER TABLE readings DROP COLUMN battery_status2; +INSERT INTO readings("time", candy, battery_temperature) + VALUES ('2022-11-11 11:11:11', 88, '0.2'); +SELECT compress_chunk(show_chunks('readings'), TRUE); + compress_chunk +------------------------------------------ + _timescaledb_internal._hyper_39_29_chunk +(1 row) + +-- no error happens +INSERT INTO readings("time", candy, battery_temperature) + VALUES ('2022-11-11 11:11:11', 33, 0.3) +; diff --git a/tsl/test/sql/compression_errors.sql b/tsl/test/sql/compression_errors.sql index 0dc65b642..b685e96cc 100644 --- a/tsl/test/sql/compression_errors.sql +++ b/tsl/test/sql/compression_errors.sql @@ -371,7 +371,7 @@ INSERT INTO ts_table SELECT * FROM data_table; DROP TABLE data_table cascade; DROP TABLE ts_table cascade; ---invalid reads for row expressions after column dropped on compressed tables #5458 +-- #5458 invalid reads for row expressions after column dropped on compressed tables CREATE TABLE readings( "time" TIMESTAMPTZ NOT NULL, battery_status TEXT, @@ -382,6 +382,7 @@ INSERT INTO readings ("time") VALUES ('2022-11-11 11:11:11-00'); SELECT create_hypertable('readings', 'time', chunk_time_interval => interval '12 hour', migrate_data=>true); +create unique index readings_uniq_idx on readings("time",battery_temperature); ALTER TABLE readings SET (timescaledb.compress,timescaledb.compress_segmentby = 'battery_temperature'); SELECT compress_chunk(show_chunks('readings')); @@ -389,7 +390,25 @@ ALTER TABLE readings DROP COLUMN battery_status; INSERT INTO readings ("time", battery_temperature) VALUES ('2022-11-11 11:11:11', 0.2); SELECT readings FROM readings; --- Unique constraints are not always respected on compressed tables #5553 +-- #5577 On-insert decompression after schema changes may not work properly + +SELECT decompress_chunk(show_chunks('readings'),true); +SELECT compress_chunk(show_chunks('readings'),true); + +\set ON_ERROR_STOP 0 +INSERT INTO readings ("time", battery_temperature) VALUES + ('2022-11-11 11:11:11',0.2) -- same record as inserted +; + +\set ON_ERROR_STOP 1 + +SELECT * from readings; +SELECT assert_equal(count(1), 2::bigint) FROM readings; + +-- no unique check failure during decompression +SELECT decompress_chunk(show_chunks('readings'),true); + +-- #5553 Unique constraints are not always respected on compressed tables CREATE TABLE main_table AS SELECT '2011-11-11 11:11:11'::timestamptz AS time, 'foo' AS device_id; @@ -419,3 +438,33 @@ SELECT assert_equal(count(1), 1::bigint) FROM main_table; -- no unique check failure during decompression SELECT decompress_chunk(show_chunks('main_table'), TRUE); + + +DROP TABLE IF EXISTS readings; + +CREATE TABLE readings( + "time" timestamptz NOT NULL, + battery_status text, + candy integer, + battery_status2 text, + battery_temperature text +); + +SELECT create_hypertable('readings', 'time', chunk_time_interval => interval '12 hour'); + +CREATE UNIQUE INDEX readings_uniq_idx ON readings("time", battery_temperature); + +ALTER TABLE readings SET (timescaledb.compress, timescaledb.compress_segmentby = 'battery_temperature'); + +ALTER TABLE readings DROP COLUMN battery_status; +ALTER TABLE readings DROP COLUMN battery_status2; + +INSERT INTO readings("time", candy, battery_temperature) + VALUES ('2022-11-11 11:11:11', 88, '0.2'); + +SELECT compress_chunk(show_chunks('readings'), TRUE); + +-- no error happens +INSERT INTO readings("time", candy, battery_temperature) + VALUES ('2022-11-11 11:11:11', 33, 0.3) +;