From 6782beb1504bc421f973269f029c9854e17c7650 Mon Sep 17 00:00:00 2001 From: Ante Kresic Date: Thu, 4 May 2023 11:42:13 +0200 Subject: [PATCH] Fix index scan handling in DML decompression We need to use the correct qualifiers for index scans since the generic scan qualifiers are not populated in this case. --- tsl/src/compression/compression.c | 57 +++++-- .../expected/compression_update_delete.out | 148 ++++++++++++++++++ tsl/test/sql/compression_update_delete.sql | 74 +++++++++ 3 files changed, 262 insertions(+), 17 deletions(-) diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index 1486cd1bb..b8b70b16f 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -2421,40 +2421,63 @@ static bool decompress_chunk_walker(PlanState *ps, List *relids) { RangeTblEntry *rte = NULL; + bool needs_decompression = false; + List *predicates = NIL; Chunk *current_chunk; if (ps == NULL) return false; switch (nodeTag(ps)) { + /* Note: IndexOnlyScans will never be selected for target + * tables because system columns are necessary in order to modify the + * data and those columns cannot be a part of the index + */ + case T_IndexScanState: + { + /* Get the index quals on the original table and also include + * any filters that are used to for filtering heap tuples + */ + predicates = list_union(((IndexScan *) ps->plan)->indexqualorig, ps->plan->qual); + needs_decompression = true; + break; + } case T_SeqScanState: case T_SampleScanState: - case T_IndexScanState: - case T_IndexOnlyScanState: case T_BitmapHeapScanState: case T_TidScanState: case T_TidRangeScanState: { - /* - * We are only interested in chunk scans of chunks that are the - * target of the DML statement not chunk scan on joined hypertables - * even when it is a self join - */ - int scanrelid = ((Scan *) ps->plan)->scanrelid; - if (list_member_int(relids, scanrelid)) - { - rte = rt_fetch(scanrelid, ps->state->es_range_table); - current_chunk = ts_chunk_get_by_relid(rte->relid, false); - if (current_chunk && ts_chunk_is_compressed(current_chunk)) - { - decompress_batches_for_update_delete(current_chunk, ps->plan->qual); - } - } + /* We copy so we can always just free the predicates */ + predicates = list_copy(ps->plan->qual); + needs_decompression = true; break; } default: break; } + if (needs_decompression) + { + /* + * We are only interested in chunk scans of chunks that are the + * target of the DML statement not chunk scan on joined hypertables + * even when it is a self join + */ + int scanrelid = ((Scan *) ps->plan)->scanrelid; + if (list_member_int(relids, scanrelid)) + { + rte = rt_fetch(scanrelid, ps->state->es_range_table); + current_chunk = ts_chunk_get_by_relid(rte->relid, false); + if (current_chunk && ts_chunk_is_compressed(current_chunk)) + { + decompress_batches_for_update_delete(current_chunk, predicates); + } + } + } + + if (predicates) + pfree(predicates); + return planstate_tree_walker(ps, decompress_chunk_walker, relids); } diff --git a/tsl/test/expected/compression_update_delete.out b/tsl/test/expected/compression_update_delete.out index 3934476be..57655603f 100644 --- a/tsl/test/expected/compression_update_delete.out +++ b/tsl/test/expected/compression_update_delete.out @@ -1974,3 +1974,151 @@ SELECT * FROM chunk_status; (4 rows) ROLLBACK; +DROP TABLE join_test1; +DROP TABLE join_test2; +-- test if index scan qualifiers are properly used +CREATE TABLE index_scan_test(time timestamptz NOT NULL, device_id int, value float); +SELECT create_hypertable('index_scan_test','time',create_default_indexes:=false); + create_hypertable +------------------------------- + (29,public,index_scan_test,t) +(1 row) + +INSERT INTO index_scan_test(time,device_id,value) SELECT time, device_id, device_id + 0.5 FROM generate_series('2000-01-01 0:00:00+0'::timestamptz,'2000-01-01 23:55:00+0','1m') gtime(time), generate_series(1,5,1) gdevice(device_id); +-- compress chunks +ALTER TABLE index_scan_test SET (timescaledb.compress, timescaledb.compress_orderby='time DESC', timescaledb.compress_segmentby='device_id'); +SELECT compress_chunk(show_chunks('index_scan_test')); + compress_chunk +------------------------------------------ + _timescaledb_internal._hyper_29_59_chunk +(1 row) + +ANALYZE index_scan_test; +SELECT ch1.schema_name|| '.' || ch1.table_name AS "CHUNK_1" +FROM _timescaledb_catalog.chunk ch1, _timescaledb_catalog.hypertable ht +WHERE ht.table_name = 'index_scan_test' +AND ch1.hypertable_id = ht.id +AND ch1.table_name LIKE '_hyper%' +ORDER BY ch1.id LIMIT 1 \gset +SELECT ch2.schema_name|| '.' || ch2.table_name AS "COMP_CHUNK_1" +FROM _timescaledb_catalog.chunk ch1, _timescaledb_catalog.chunk ch2, _timescaledb_catalog.hypertable ht +WHERE ht.table_name = 'index_scan_test' +AND ch1.hypertable_id = ht.id +AND ch1.compressed_chunk_id = ch2.id +ORDER BY ch2.id LIMIT 1 \gset +INSERT INTO index_scan_test(time,device_id,value) SELECT time, device_id, device_id + 0.5 FROM generate_series('2000-01-01 0:00:00+0'::timestamptz,'2000-01-05 23:55:00+0','1m') gtime(time), generate_series(1,5,1) gdevice(device_id); +-- test index on single column +BEGIN; +SELECT count(*) as "UNCOMP_LEFTOVER" FROM ONLY :CHUNK_1 WHERE device_id != 2 \gset +CREATE INDEX ON index_scan_test(device_id); +EXPLAIN (costs off, verbose) DELETE FROM index_scan_test WHERE device_id = 2; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------- + Custom Scan (HypertableModify) + -> Delete on public.index_scan_test + Delete on _timescaledb_internal._hyper_29_59_chunk index_scan_test_1 + -> Index Scan using _hyper_29_59_chunk_index_scan_test_device_id_idx on _timescaledb_internal._hyper_29_59_chunk index_scan_test_1 + Output: index_scan_test_1.tableoid, index_scan_test_1.ctid + Index Cond: (index_scan_test_1.device_id = 2) +(6 rows) + +DELETE FROM index_scan_test WHERE device_id = 2; +-- everything should be deleted +SELECT count(*) FROM index_scan_test where device_id = 2; + count +------- + 0 +(1 row) + +-- there shouldn't be anything in the uncompressed chunk where device_id = 2 +SELECT count(*) = :UNCOMP_LEFTOVER FROM ONLY :CHUNK_1; + ?column? +---------- + t +(1 row) + +-- there shouldn't be anything in the compressed chunk from device_id = 2 +SELECT count(*) FROM :COMP_CHUNK_1 where device_id = 2; + count +------- + 0 +(1 row) + +ROLLBACK; +-- test multi column index +BEGIN; +SELECT count(*) as "UNCOMP_LEFTOVER" FROM ONLY :CHUNK_1 WHERE device_id != 2 OR time <= '2000-01-02'::timestamptz \gset +CREATE INDEX ON index_scan_test(device_id, time); +EXPLAIN (costs off, verbose) DELETE FROM index_scan_test WHERE device_id = 2 AND time > '2000-01-02'::timestamptz; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------- + Custom Scan (HypertableModify) + -> Delete on public.index_scan_test + Delete on _timescaledb_internal._hyper_29_59_chunk index_scan_test_1 + -> Index Scan using _hyper_29_59_chunk_index_scan_test_device_id_time_idx on _timescaledb_internal._hyper_29_59_chunk index_scan_test_1 + Output: index_scan_test_1.tableoid, index_scan_test_1.ctid + Index Cond: ((index_scan_test_1.device_id = 2) AND (index_scan_test_1."time" > 'Sun Jan 02 00:00:00 2000 PST'::timestamp with time zone)) +(6 rows) + +DELETE FROM index_scan_test WHERE device_id = 2 AND time > '2000-01-02'::timestamptz; +-- everything should be deleted +SELECT count(*) FROM index_scan_test WHERE device_id = 2 AND time > '2000-01-02'::timestamptz; + count +------- + 0 +(1 row) + +-- there shouldn't be anything in the uncompressed chunk that matches predicates +SELECT count(*) = :UNCOMP_LEFTOVER FROM ONLY :CHUNK_1; + ?column? +---------- + t +(1 row) + +-- there shouldn't be anything in the compressed chunk that matches predicates +SELECT count(*) FROM :COMP_CHUNK_1 WHERE device_id = 2 AND _ts_meta_max_1 >= '2000-01-02'::timestamptz; + count +------- + 0 +(1 row) + +ROLLBACK; +-- test index with filter condition +BEGIN; +SELECT count(*) as "UNCOMP_LEFTOVER" FROM ONLY :CHUNK_1 WHERE device_id != 2 OR time <= '2000-01-02'::timestamptz \gset +CREATE INDEX ON index_scan_test(device_id); +EXPLAIN (costs off, verbose) DELETE FROM index_scan_test WHERE device_id = 2 AND time > '2000-01-02'::timestamptz; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------- + Custom Scan (HypertableModify) + -> Delete on public.index_scan_test + Delete on _timescaledb_internal._hyper_29_59_chunk index_scan_test_1 + -> Index Scan using _hyper_29_59_chunk_index_scan_test_device_id_idx on _timescaledb_internal._hyper_29_59_chunk index_scan_test_1 + Output: index_scan_test_1.tableoid, index_scan_test_1.ctid + Index Cond: (index_scan_test_1.device_id = 2) + Filter: (index_scan_test_1."time" > 'Sun Jan 02 00:00:00 2000 PST'::timestamp with time zone) +(7 rows) + +DELETE FROM index_scan_test WHERE device_id = 2 AND time > '2000-01-02'::timestamptz; +-- everything should be deleted +SELECT count(*) FROM index_scan_test WHERE device_id = 2 AND time > '2000-01-02'::timestamptz; + count +------- + 0 +(1 row) + +-- there shouldn't be anything in the uncompressed chunk that matches predicates +SELECT count(*) = :UNCOMP_LEFTOVER FROM ONLY :CHUNK_1; + ?column? +---------- + t +(1 row) + +-- there shouldn't be anything in the compressed chunk that matches predicates +SELECT count(*) FROM :COMP_CHUNK_1 WHERE device_id = 2 AND _ts_meta_max_1 >= '2000-01-02'::timestamptz; + count +------- + 0 +(1 row) + +ROLLBACK; diff --git a/tsl/test/sql/compression_update_delete.sql b/tsl/test/sql/compression_update_delete.sql index 05fb732da..6b1f62b7c 100644 --- a/tsl/test/sql/compression_update_delete.sql +++ b/tsl/test/sql/compression_update_delete.sql @@ -1068,3 +1068,77 @@ UPDATE join_test1 t1 SET value = t1.value + 1 FROM join_test1 t2 WHERE t2.time = SELECT * FROM chunk_status; ROLLBACK; +DROP TABLE join_test1; +DROP TABLE join_test2; + +-- test if index scan qualifiers are properly used +CREATE TABLE index_scan_test(time timestamptz NOT NULL, device_id int, value float); +SELECT create_hypertable('index_scan_test','time',create_default_indexes:=false); +INSERT INTO index_scan_test(time,device_id,value) SELECT time, device_id, device_id + 0.5 FROM generate_series('2000-01-01 0:00:00+0'::timestamptz,'2000-01-01 23:55:00+0','1m') gtime(time), generate_series(1,5,1) gdevice(device_id); + +-- compress chunks +ALTER TABLE index_scan_test SET (timescaledb.compress, timescaledb.compress_orderby='time DESC', timescaledb.compress_segmentby='device_id'); +SELECT compress_chunk(show_chunks('index_scan_test')); +ANALYZE index_scan_test; + +SELECT ch1.schema_name|| '.' || ch1.table_name AS "CHUNK_1" +FROM _timescaledb_catalog.chunk ch1, _timescaledb_catalog.hypertable ht +WHERE ht.table_name = 'index_scan_test' +AND ch1.hypertable_id = ht.id +AND ch1.table_name LIKE '_hyper%' +ORDER BY ch1.id LIMIT 1 \gset + +SELECT ch2.schema_name|| '.' || ch2.table_name AS "COMP_CHUNK_1" +FROM _timescaledb_catalog.chunk ch1, _timescaledb_catalog.chunk ch2, _timescaledb_catalog.hypertable ht +WHERE ht.table_name = 'index_scan_test' +AND ch1.hypertable_id = ht.id +AND ch1.compressed_chunk_id = ch2.id +ORDER BY ch2.id LIMIT 1 \gset + +INSERT INTO index_scan_test(time,device_id,value) SELECT time, device_id, device_id + 0.5 FROM generate_series('2000-01-01 0:00:00+0'::timestamptz,'2000-01-05 23:55:00+0','1m') gtime(time), generate_series(1,5,1) gdevice(device_id); + +-- test index on single column +BEGIN; +SELECT count(*) as "UNCOMP_LEFTOVER" FROM ONLY :CHUNK_1 WHERE device_id != 2 \gset +CREATE INDEX ON index_scan_test(device_id); +EXPLAIN (costs off, verbose) DELETE FROM index_scan_test WHERE device_id = 2; +DELETE FROM index_scan_test WHERE device_id = 2; +-- everything should be deleted +SELECT count(*) FROM index_scan_test where device_id = 2; + +-- there shouldn't be anything in the uncompressed chunk where device_id = 2 +SELECT count(*) = :UNCOMP_LEFTOVER FROM ONLY :CHUNK_1; +-- there shouldn't be anything in the compressed chunk from device_id = 2 +SELECT count(*) FROM :COMP_CHUNK_1 where device_id = 2; +ROLLBACK; + +-- test multi column index +BEGIN; +SELECT count(*) as "UNCOMP_LEFTOVER" FROM ONLY :CHUNK_1 WHERE device_id != 2 OR time <= '2000-01-02'::timestamptz \gset +CREATE INDEX ON index_scan_test(device_id, time); +EXPLAIN (costs off, verbose) DELETE FROM index_scan_test WHERE device_id = 2 AND time > '2000-01-02'::timestamptz; +DELETE FROM index_scan_test WHERE device_id = 2 AND time > '2000-01-02'::timestamptz; +-- everything should be deleted +SELECT count(*) FROM index_scan_test WHERE device_id = 2 AND time > '2000-01-02'::timestamptz; + +-- there shouldn't be anything in the uncompressed chunk that matches predicates +SELECT count(*) = :UNCOMP_LEFTOVER FROM ONLY :CHUNK_1; +-- there shouldn't be anything in the compressed chunk that matches predicates +SELECT count(*) FROM :COMP_CHUNK_1 WHERE device_id = 2 AND _ts_meta_max_1 >= '2000-01-02'::timestamptz; +ROLLBACK; + +-- test index with filter condition +BEGIN; +SELECT count(*) as "UNCOMP_LEFTOVER" FROM ONLY :CHUNK_1 WHERE device_id != 2 OR time <= '2000-01-02'::timestamptz \gset +CREATE INDEX ON index_scan_test(device_id); +EXPLAIN (costs off, verbose) DELETE FROM index_scan_test WHERE device_id = 2 AND time > '2000-01-02'::timestamptz; +DELETE FROM index_scan_test WHERE device_id = 2 AND time > '2000-01-02'::timestamptz; +-- everything should be deleted +SELECT count(*) FROM index_scan_test WHERE device_id = 2 AND time > '2000-01-02'::timestamptz; + +-- there shouldn't be anything in the uncompressed chunk that matches predicates +SELECT count(*) = :UNCOMP_LEFTOVER FROM ONLY :CHUNK_1; +-- there shouldn't be anything in the compressed chunk that matches predicates +SELECT count(*) FROM :COMP_CHUNK_1 WHERE device_id = 2 AND _ts_meta_max_1 >= '2000-01-02'::timestamptz; +ROLLBACK; +