diff --git a/CHANGELOG.md b/CHANGELOG.md index beae22bb0..efdcf5a44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ accidentally triggering the load of a previous DB version.** * #5454 Add support for ON CONFLICT DO UPDATE for compressed hypertables * #5547 Skip Ordered Append when only 1 child node is present * #5510 Propagate vacuum/analyze to compressed chunks +* #5584 Reduce decompression during constraint checking **Bugfixes** * #5396 Fix SEGMENTBY columns predicates to be pushed down diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index 19b45ae7e..468ed1097 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -90,6 +90,10 @@ static void row_compressor_append_row(RowCompressor *row_compressor, TupleTableS static void row_compressor_flush(RowCompressor *row_compressor, CommandId mycid, bool changed_groups); +static int create_segment_metadata_scankey(RowDecompressor *decompressor, + char *segment_meta_col_name, AttrNumber in_attno, + StrategyNumber strategy, ScanKeyData *scankeys, + int num_scankeys, Datum value); static void run_analyze_on_chunk(Oid chunk_relid); /******************** @@ -1898,6 +1902,32 @@ build_scankeys(int32 hypertable_id, RowDecompressor decompressor, Bitmapset *key key_index++; } } + if (COMPRESSIONCOL_IS_ORDER_BY(fd)) + { + bool isnull; + Datum value = slot_getattr(slot, attno, &isnull); + + /* Cannot optimize orderby columns with NULL values since those + * are not visible in metadata + */ + if (isnull) + continue; + + key_index = create_segment_metadata_scankey(&decompressor, + compression_column_segment_min_name(fd), + attno, + BTLessEqualStrategyNumber, + scankeys, + key_index, + value); + key_index = create_segment_metadata_scankey(&decompressor, + compression_column_segment_max_name(fd), + attno, + BTGreaterEqualStrategyNumber, + scankeys, + key_index, + value); + } } } @@ -1905,6 +1935,57 @@ build_scankeys(int32 hypertable_id, RowDecompressor decompressor, Bitmapset *key return scankeys; } +static int +create_segment_metadata_scankey(RowDecompressor *decompressor, char *segment_meta_col_name, + AttrNumber in_attno, StrategyNumber strategy, ScanKeyData *scankeys, + int num_scankeys, Datum value) +{ + AttrNumber segment_meta_attr_number = + get_attnum(decompressor->in_rel->rd_id, segment_meta_col_name); + Assert(segment_meta_attr_number != InvalidAttrNumber); + + /* This should never happen but if it does happen, we can't generate a scan key for + * the orderby column so just skip it */ + if (segment_meta_attr_number == InvalidAttrNumber) + return num_scankeys; + + Oid atttypid = decompressor->out_desc->attrs[AttrNumberGetAttrOffset(in_attno)].atttypid; + + /* Orderby column type should match in compressed metadata columns and uncompressed + * chunk attribute */ + Assert( + atttypid == + decompressor->in_desc->attrs[AttrNumberGetAttrOffset(segment_meta_attr_number)].atttypid); + + TypeCacheEntry *tce = lookup_type_cache(atttypid, TYPECACHE_BTREE_OPFAMILY); + if (!OidIsValid(tce->btree_opf)) + elog(ERROR, "no btree opfamily for type \"%s\"", format_type_be(atttypid)); + + Oid opr = get_opfamily_member(tce->btree_opf, atttypid, atttypid, strategy); + Assert(OidIsValid(opr)); + /* We should never end up here but: no operator, no optimization */ + if (!OidIsValid(opr)) + return num_scankeys; + + opr = get_opcode(opr); + Assert(OidIsValid(opr)); + /* We should never end up here but: no opcode, no optimization */ + if (!OidIsValid(opr)) + return num_scankeys; + + ScanKeyEntryInitialize(&scankeys[num_scankeys++], + 0, /* flags */ + segment_meta_attr_number, + strategy, + InvalidOid, /* No strategy subtype. */ + decompressor->out_desc->attrs[AttrNumberGetAttrOffset(in_attno)] + .attcollation, + opr, + value); + + return num_scankeys; +} + void decompress_batches_for_insert(ChunkInsertState *cis, Chunk *chunk, TupleTableSlot *slot) { diff --git a/tsl/test/expected/compression_conflicts.out b/tsl/test/expected/compression_conflicts.out index 4e39e01f3..e5de2145c 100644 --- a/tsl/test/expected/compression_conflicts.out +++ b/tsl/test/expected/compression_conflicts.out @@ -64,11 +64,12 @@ BEGIN; ('2020-01-01 0:00:01','d1',0.1), ('2020-01-01 0:00:02','d2',0.2), ('2020-01-01 0:00:03','d3',0.3); - -- data should have move into uncompressed chunk for conflict check + -- no data should have moved into uncompressed chunk for conflict check + -- since we used metadata optimization to guarantee uniqueness SELECT count(*) FROM ONLY :CHUNK; count ------- - 4 + 3 (1 row) ROLLBACK; @@ -139,11 +140,12 @@ BEGIN; ('2020-01-01 0:00:01','d1',0.1), ('2020-01-01 0:00:01','d2',0.2), ('2020-01-01 0:00:01','d3',0.3); - -- data should have move into uncompressed chunk for conflict check + -- no data should have move into uncompressed chunk for conflict check + -- since we used metadata optimization to guarantee uniqueness SELECT count(*) FROM ONLY :CHUNK; count ------- - 5 + 3 (1 row) ROLLBACK; @@ -213,6 +215,7 @@ SELECT count(*) FROM ONLY :CHUNK; BEGIN; INSERT INTO comp_conflicts_3 VALUES ('2020-01-01',NULL,0.3); -- data for 1 segment (count = 1 value + 1 inserted) should be present in uncompressed chunk + -- we treat NULLs as NOT DISTINCT and let the constraint configuration handle the check SELECT count(*) FROM ONLY :CHUNK; count ------- @@ -223,11 +226,12 @@ ROLLBACK; -- should succeed since there are no conflicts in the values BEGIN; INSERT INTO comp_conflicts_3 VALUES ('2020-01-01 0:00:01','d1',0.1); - -- data for 1 segment (count = 1 value + 1 inserted) should have move into uncompressed chunk for conflict check + -- no data should have move into uncompressed chunk for conflict check + -- since we used metadata optimization to guarantee uniqueness SELECT count(*) FROM ONLY :CHUNK; count ------- - 2 + 1 (1 row) ROLLBACK; @@ -236,11 +240,12 @@ BEGIN; ('2020-01-01 0:00:01','d1',0.1), ('2020-01-01 0:00:01','d2',0.2), ('2020-01-01 0:00:01','d3',0.3); - -- data for 2 segment (count = 2 value + 2 inserted) should have move into uncompressed chunk for conflict check + -- no data for should have move into uncompressed chunk for conflict check + -- since we used metadata optimization to guarantee uniqueness SELECT count(*) FROM ONLY :CHUNK; count ------- - 4 + 3 (1 row) ROLLBACK; @@ -274,6 +279,106 @@ SELECT count(*) FROM ONLY :CHUNK; 1 (1 row) +-- test 4: multi-column primary key with multi-column orderby compression +CREATE TABLE comp_conflicts_4(time timestamptz NOT NULL, device text, value float, UNIQUE(time, device)); +SELECT table_name FROM create_hypertable('comp_conflicts_4','time'); + table_name +------------------ + comp_conflicts_4 +(1 row) + +ALTER TABLE comp_conflicts_4 SET (timescaledb.compress,timescaledb.compress_orderby='time,device'); +-- implicitly create chunk +INSERT INTO comp_conflicts_4 SELECT generate_series('2020-01-01'::timestamp, '2020-01-01 2:00:00', '1s'), 'd1',0.1; +INSERT INTO comp_conflicts_4 VALUES ('2020-01-01','d2',0.2); +INSERT INTO comp_conflicts_4 VALUES ('2020-01-01',NULL,0.3); +SELECT compress_chunk(c) AS "CHUNK" FROM show_chunks('comp_conflicts_4') c +\gset +-- after compression no data should be in uncompressed chunk +SELECT count(*) FROM ONLY :CHUNK; + count +------- + 0 +(1 row) + +-- NULL is considered distinct from other NULL so even though the next INSERT looks +-- like a conflict it is not a constraint violation (PG15 makes NULL behaviour configurable) +BEGIN; + INSERT INTO comp_conflicts_4 VALUES ('2020-01-01',NULL,0.3); + -- data for 1 segment (count = 1000 values + 1 inserted) should be present in uncompressed chunk + -- we treat NULLs as NOT DISTINCT and let the constraint configuration handle the check + SELECT count(*) FROM ONLY :CHUNK; + count +------- + 1001 +(1 row) + +ROLLBACK; +-- should succeed since there are no conflicts in the values +BEGIN; + INSERT INTO comp_conflicts_4 VALUES ('2020-01-01 2:00:01','d1',0.1); + -- no data should have move into uncompressed chunk for conflict check + -- since we used metadata optimization to guarantee uniqueness + SELECT count(*) FROM ONLY :CHUNK; + count +------- + 1 +(1 row) + +ROLLBACK; +BEGIN; + INSERT INTO comp_conflicts_4 VALUES + ('2020-01-01 2:00:01','d1',0.1), + ('2020-01-01 2:00:01','d2',0.2), + ('2020-01-01 2:00:01','d3',0.3); + -- no data for should have move into uncompressed chunk for conflict check + -- since we used metadata optimization to guarantee uniqueness + SELECT count(*) FROM ONLY :CHUNK; + count +------- + 3 +(1 row) + +ROLLBACK; +BEGIN; + INSERT INTO comp_conflicts_4 VALUES ('2020-01-01 0:00:01','d3',0.2); + -- count = 1 since no data should have move into uncompressed chunk for conflict check since d3 is new segment + SELECT count(*) FROM ONLY :CHUNK; + count +------- + 1 +(1 row) + +ROLLBACK; +-- no data should be in uncompressed chunk since we did rollback +SELECT count(*) FROM ONLY :CHUNK; + count +------- + 0 +(1 row) + +-- should fail since it conflicts with existing row +\set ON_ERROR_STOP 0 +INSERT INTO comp_conflicts_4 VALUES ('2020-01-01','d1',0.1); +ERROR: duplicate key value violates unique constraint "7_4_comp_conflicts_4_time_device_key" +\set ON_ERROR_STOP 1 +-- data not should have move into uncompressed chunk for conflict check +SELECT count(*) FROM ONLY :CHUNK; + count +------- + 0 +(1 row) + +INSERT INTO comp_conflicts_4 VALUES ('2020-01-01 0:00:01','d1',0.1) ON CONFLICT DO NOTHING; +INSERT INTO comp_conflicts_4 VALUES ('2020-01-01 0:30:00','d1',0.1) ON CONFLICT DO NOTHING; +-- data should have move into uncompressed chunk for conflict check +-- 2 segments (count = 2000) +SELECT count(*) FROM ONLY :CHUNK; + count +------- + 2000 +(1 row) + CREATE OR REPLACE VIEW compressed_chunk_info_view AS SELECT h.schema_name AS hypertable_schema, @@ -301,7 +406,7 @@ SELECT * FROM create_hypertable('compressed_ht', 'time', WARNING: column type "character varying" used for "name" does not follow best practices hypertable_id | schema_name | table_name | created ---------------+-------------+---------------+--------- - 7 | public | compressed_ht | t + 9 | public | compressed_ht | t (1 row) -- create chunk 1 @@ -320,11 +425,11 @@ ALTER TABLE compressed_ht SET ( timescaledb.compress_segmentby = 'sensor_id' ); SELECT COMPRESS_CHUNK(SHOW_CHUNKS('compressed_ht')); - compress_chunk ----------------------------------------- - _timescaledb_internal._hyper_7_7_chunk - _timescaledb_internal._hyper_7_8_chunk - _timescaledb_internal._hyper_7_9_chunk + compress_chunk +----------------------------------------- + _timescaledb_internal._hyper_9_9_chunk + _timescaledb_internal._hyper_9_10_chunk + _timescaledb_internal._hyper_9_11_chunk (3 rows) -- check compression status @@ -332,11 +437,11 @@ SELECT chunk_status, chunk_name as "CHUNK_NAME" FROM compressed_chunk_info_view WHERE hypertable_name = 'compressed_ht' ORDER BY chunk_name; - chunk_status | CHUNK_NAME ---------------+------------------ - 1 | _hyper_7_7_chunk - 1 | _hyper_7_8_chunk - 1 | _hyper_7_9_chunk + chunk_status | CHUNK_NAME +--------------+------------------- + 1 | _hyper_9_10_chunk + 1 | _hyper_9_11_chunk + 1 | _hyper_9_9_chunk (3 rows) -- should report 0 row @@ -361,11 +466,11 @@ SELECT chunk_status, chunk_name as "CHUNK_NAME" FROM compressed_chunk_info_view WHERE hypertable_name = 'compressed_ht' ORDER BY chunk_name; - chunk_status | CHUNK_NAME ---------------+------------------ - 9 | _hyper_7_7_chunk - 1 | _hyper_7_8_chunk - 1 | _hyper_7_9_chunk + chunk_status | CHUNK_NAME +--------------+------------------- + 1 | _hyper_9_10_chunk + 1 | _hyper_9_11_chunk + 9 | _hyper_9_9_chunk (3 rows) INSERT INTO compressed_ht VALUES ('2022-01-24 01:10:28.192199+05:30', '6', 0.876, 4.123, 'new insert row') @@ -381,10 +486,10 @@ SELECT chunk_status, chunk_name as "CHUNK_NAME" FROM compressed_chunk_info_view WHERE hypertable_name = 'compressed_ht' ORDER BY chunk_name; - chunk_status | CHUNK_NAME ---------------+------------------ - 9 | _hyper_7_7_chunk - 1 | _hyper_7_8_chunk - 9 | _hyper_7_9_chunk + chunk_status | CHUNK_NAME +--------------+------------------- + 1 | _hyper_9_10_chunk + 9 | _hyper_9_11_chunk + 9 | _hyper_9_9_chunk (3 rows) diff --git a/tsl/test/sql/compression_conflicts.sql b/tsl/test/sql/compression_conflicts.sql index 63f757bf6..a40651576 100644 --- a/tsl/test/sql/compression_conflicts.sql +++ b/tsl/test/sql/compression_conflicts.sql @@ -58,7 +58,8 @@ BEGIN; ('2020-01-01 0:00:02','d2',0.2), ('2020-01-01 0:00:03','d3',0.3); - -- data should have move into uncompressed chunk for conflict check + -- no data should have moved into uncompressed chunk for conflict check + -- since we used metadata optimization to guarantee uniqueness SELECT count(*) FROM ONLY :CHUNK; ROLLBACK; @@ -113,7 +114,8 @@ BEGIN; ('2020-01-01 0:00:01','d2',0.2), ('2020-01-01 0:00:01','d3',0.3); - -- data should have move into uncompressed chunk for conflict check + -- no data should have move into uncompressed chunk for conflict check + -- since we used metadata optimization to guarantee uniqueness SELECT count(*) FROM ONLY :CHUNK; ROLLBACK; @@ -167,6 +169,7 @@ BEGIN; INSERT INTO comp_conflicts_3 VALUES ('2020-01-01',NULL,0.3); -- data for 1 segment (count = 1 value + 1 inserted) should be present in uncompressed chunk + -- we treat NULLs as NOT DISTINCT and let the constraint configuration handle the check SELECT count(*) FROM ONLY :CHUNK; ROLLBACK; @@ -175,7 +178,8 @@ BEGIN; INSERT INTO comp_conflicts_3 VALUES ('2020-01-01 0:00:01','d1',0.1); - -- data for 1 segment (count = 1 value + 1 inserted) should have move into uncompressed chunk for conflict check + -- no data should have move into uncompressed chunk for conflict check + -- since we used metadata optimization to guarantee uniqueness SELECT count(*) FROM ONLY :CHUNK; ROLLBACK; @@ -186,7 +190,8 @@ BEGIN; ('2020-01-01 0:00:01','d2',0.2), ('2020-01-01 0:00:01','d3',0.3); - -- data for 2 segment (count = 2 value + 2 inserted) should have move into uncompressed chunk for conflict check + -- no data for should have move into uncompressed chunk for conflict check + -- since we used metadata optimization to guarantee uniqueness SELECT count(*) FROM ONLY :CHUNK; ROLLBACK; @@ -210,6 +215,80 @@ INSERT INTO comp_conflicts_3 VALUES ('2020-01-01','d1',0.1) ON CONFLICT DO NOTHI -- data should have move into uncompressed chunk for conflict check SELECT count(*) FROM ONLY :CHUNK; +-- test 4: multi-column primary key with multi-column orderby compression +CREATE TABLE comp_conflicts_4(time timestamptz NOT NULL, device text, value float, UNIQUE(time, device)); + +SELECT table_name FROM create_hypertable('comp_conflicts_4','time'); +ALTER TABLE comp_conflicts_4 SET (timescaledb.compress,timescaledb.compress_orderby='time,device'); + +-- implicitly create chunk +INSERT INTO comp_conflicts_4 SELECT generate_series('2020-01-01'::timestamp, '2020-01-01 2:00:00', '1s'), 'd1',0.1; +INSERT INTO comp_conflicts_4 VALUES ('2020-01-01','d2',0.2); +INSERT INTO comp_conflicts_4 VALUES ('2020-01-01',NULL,0.3); + +SELECT compress_chunk(c) AS "CHUNK" FROM show_chunks('comp_conflicts_4') c +\gset + +-- after compression no data should be in uncompressed chunk +SELECT count(*) FROM ONLY :CHUNK; + +-- NULL is considered distinct from other NULL so even though the next INSERT looks +-- like a conflict it is not a constraint violation (PG15 makes NULL behaviour configurable) +BEGIN; + INSERT INTO comp_conflicts_4 VALUES ('2020-01-01',NULL,0.3); + + -- data for 1 segment (count = 1000 values + 1 inserted) should be present in uncompressed chunk + -- we treat NULLs as NOT DISTINCT and let the constraint configuration handle the check + SELECT count(*) FROM ONLY :CHUNK; +ROLLBACK; + +-- should succeed since there are no conflicts in the values +BEGIN; + + INSERT INTO comp_conflicts_4 VALUES ('2020-01-01 2:00:01','d1',0.1); + + -- no data should have move into uncompressed chunk for conflict check + -- since we used metadata optimization to guarantee uniqueness + SELECT count(*) FROM ONLY :CHUNK; + +ROLLBACK; + +BEGIN; + INSERT INTO comp_conflicts_4 VALUES + ('2020-01-01 2:00:01','d1',0.1), + ('2020-01-01 2:00:01','d2',0.2), + ('2020-01-01 2:00:01','d3',0.3); + + -- no data for should have move into uncompressed chunk for conflict check + -- since we used metadata optimization to guarantee uniqueness + SELECT count(*) FROM ONLY :CHUNK; +ROLLBACK; + +BEGIN; + INSERT INTO comp_conflicts_4 VALUES ('2020-01-01 0:00:01','d3',0.2); + + -- count = 1 since no data should have move into uncompressed chunk for conflict check since d3 is new segment + SELECT count(*) FROM ONLY :CHUNK; +ROLLBACK; + +-- no data should be in uncompressed chunk since we did rollback +SELECT count(*) FROM ONLY :CHUNK; + +-- should fail since it conflicts with existing row +\set ON_ERROR_STOP 0 +INSERT INTO comp_conflicts_4 VALUES ('2020-01-01','d1',0.1); +\set ON_ERROR_STOP 1 + +-- data not should have move into uncompressed chunk for conflict check +SELECT count(*) FROM ONLY :CHUNK; + +INSERT INTO comp_conflicts_4 VALUES ('2020-01-01 0:00:01','d1',0.1) ON CONFLICT DO NOTHING; +INSERT INTO comp_conflicts_4 VALUES ('2020-01-01 0:30:00','d1',0.1) ON CONFLICT DO NOTHING; + +-- data should have move into uncompressed chunk for conflict check +-- 2 segments (count = 2000) +SELECT count(*) FROM ONLY :CHUNK; + CREATE OR REPLACE VIEW compressed_chunk_info_view AS SELECT h.schema_name AS hypertable_schema,