Reduce decompressions for compressed UPDATE/DELETE

Only decompress batches for compressed UPDATE/DELETE when the batch
actually has tuples that match the query constraints. This will
work even for columns we have no metadata on.
This commit is contained in:
Sven Klemm 2024-07-04 21:08:02 +02:00 committed by Sven Klemm
parent c10fae76dd
commit 1e04331615
9 changed files with 267 additions and 27 deletions

1
.unreleased/pr_7101 Normal file
View File

@ -0,0 +1 @@
Implements: #7101 Reduce decompressions for compressed UPDATE/DELETE

View File

@ -69,6 +69,7 @@ TSDLLEXPORT bool ts_guc_enable_cagg_watermark_constify = true;
TSDLLEXPORT int ts_guc_cagg_max_individual_materializations = 10;
bool ts_guc_enable_osm_reads = true;
TSDLLEXPORT bool ts_guc_enable_dml_decompression = true;
TSDLLEXPORT bool ts_guc_enable_dml_decompression_tuple_filtering = true;
TSDLLEXPORT int ts_guc_max_tuples_decompressed_per_dml = 100000;
TSDLLEXPORT bool ts_guc_enable_transparent_decompression = true;
TSDLLEXPORT bool ts_guc_enable_compression_wal_markers = false;
@ -437,6 +438,18 @@ _guc_init(void)
NULL,
NULL);
DefineCustomBoolVariable(MAKE_EXTOPTION("enable_dml_decompression_tuple_filtering"),
"Enable DML decompression tuple filtering",
"Recheck tuples during DML decompression to only decompress batches "
"with matching tuples",
&ts_guc_enable_dml_decompression_tuple_filtering,
true,
PGC_USERSET,
0,
NULL,
NULL,
NULL);
DefineCustomIntVariable(MAKE_EXTOPTION("max_tuples_decompressed_per_dml_transaction"),
"The max number of tuples that can be decompressed during an "
"INSERT, UPDATE, or DELETE.",

View File

@ -29,6 +29,7 @@ extern bool ts_guc_enable_now_constify;
extern TSDLLEXPORT bool ts_guc_enable_cagg_watermark_constify;
extern bool ts_guc_enable_osm_reads;
extern TSDLLEXPORT bool ts_guc_enable_dml_decompression;
extern TSDLLEXPORT bool ts_guc_enable_dml_decompression_tuple_filtering;
extern TSDLLEXPORT int ts_guc_max_tuples_decompressed_per_dml;
extern TSDLLEXPORT bool ts_guc_enable_transparent_decompression;
extern TSDLLEXPORT bool ts_guc_enable_compression_wal_markers;

View File

@ -42,6 +42,7 @@ decompress_batches_seqscan(Relation in_rel, Relation out_rel, Snapshot snapshot,
static bool batch_matches(RowDecompressor *decompressor, ScanKeyData *scankeys, int num_scankeys);
static void process_predicates(Chunk *ch, CompressionSettings *settings, List *predicates,
ScanKeyData **mem_scankeys, int *num_mem_scankeys,
List **heap_filters, List **index_filters, List **is_null);
static Relation find_matching_index(Relation comp_chunk_rel, List **index_filters,
List **heap_filters);
@ -89,13 +90,17 @@ decompress_batches_for_insert(const ChunkInsertState *cis, TupleTableSlot *slot)
struct decompress_batches_stats stats;
/* the scan keys used for in memory tests of the decompressed tuples */
int num_mem_scankeys;
ScanKeyData *mem_scankeys = build_scankeys_for_uncompressed(cis->hypertable_relid,
settings,
out_rel,
key_columns,
slot,
&num_mem_scankeys);
int num_mem_scankeys = 0;
ScanKeyData *mem_scankeys = NULL;
if (ts_guc_enable_dml_decompression_tuple_filtering)
{
mem_scankeys = build_mem_scankeys_from_slot(cis->hypertable_relid,
settings,
out_rel,
key_columns,
slot,
&num_mem_scankeys);
}
int num_index_scankeys;
Relation index_rel = NULL;
@ -211,11 +216,20 @@ decompress_batches_for_update_delete(HypertableModifyState *ht_state, Chunk *chu
ScanKeyData *index_scankeys = NULL;
int num_index_scankeys = 0;
struct decompress_batches_stats stats;
int num_mem_scankeys = 0;
ScanKeyData *mem_scankeys = NULL;
comp_chunk = ts_chunk_get_by_id(chunk->fd.compressed_chunk_id, true);
CompressionSettings *settings = ts_compression_settings_get(comp_chunk->table_id);
process_predicates(chunk, settings, predicates, &heap_filters, &index_filters, &is_null);
process_predicates(chunk,
settings,
predicates,
&mem_scankeys,
&num_mem_scankeys,
&heap_filters,
&index_filters,
&is_null);
chunk_rel = table_open(chunk->table_id, RowExclusiveLock);
comp_chunk_rel = table_open(comp_chunk->table_id, RowExclusiveLock);
@ -244,8 +258,8 @@ decompress_batches_for_update_delete(HypertableModifyState *ht_state, Chunk *chu
num_index_scankeys,
scankeys,
num_scankeys,
NULL,
0,
mem_scankeys,
num_mem_scankeys,
null_columns,
is_null);
/* close the selected index */
@ -258,8 +272,8 @@ decompress_batches_for_update_delete(HypertableModifyState *ht_state, Chunk *chu
GetTransactionSnapshot(),
scankeys,
num_scankeys,
NULL,
0,
mem_scankeys,
num_mem_scankeys,
null_columns,
is_null);
}
@ -390,6 +404,12 @@ decompress_batches_indexscan(Relation in_rel, Relation out_rel, Relation index_r
decompressor.compressed_datums,
decompressor.compressed_is_nulls);
if (num_mem_scankeys && !batch_matches(&decompressor, mem_scankeys, num_mem_scankeys))
{
row_decompressor_reset(&decompressor);
continue;
}
write_logical_replication_msg_decompression_start();
result = delete_compressed_tuple(&decompressor, snapshot, compressed_tuple);
/* skip reporting error if isolation level is < Repeatable Read
@ -791,10 +811,17 @@ compressed_insert_key_columns(Relation relation)
* filters are put into heap_filters list.
*/
static void
process_predicates(Chunk *ch, CompressionSettings *settings, List *predicates, List **heap_filters,
process_predicates(Chunk *ch, CompressionSettings *settings, List *predicates,
ScanKeyData **mem_scankeys, int *num_mem_scankeys, List **heap_filters,
List **index_filters, List **is_null)
{
ListCell *lc;
if (ts_guc_enable_dml_decompression_tuple_filtering)
{
*mem_scankeys = palloc0(sizeof(ScanKeyData) * list_length(predicates));
}
*num_mem_scankeys = 0;
/*
* We dont want to forward boundParams from the execution state here
* as we dont want to constify join params in the predicates.
@ -837,6 +864,7 @@ process_predicates(Chunk *ch, CompressionSettings *settings, List *predicates, L
column_name = get_attname(ch->table_id, var->varattno, false);
TypeCacheEntry *tce = lookup_type_cache(var->vartype, TYPECACHE_BTREE_OPFAMILY);
int op_strategy = get_op_opfamily_strategy(opno, tce->btree_opf);
if (ts_array_is_member(settings->fd.segmentby, column_name))
{
switch (op_strategy)
@ -864,6 +892,21 @@ process_predicates(Chunk *ch, CompressionSettings *settings, List *predicates, L
continue;
}
/*
* Segmentby columns are checked as part of batch scan so no need to redo the check.
*/
if (ts_guc_enable_dml_decompression_tuple_filtering)
{
ScanKeyEntryInitialize(&(*mem_scankeys)[(*num_mem_scankeys)++],
arg_value->constisnull ? SK_ISNULL : 0,
var->varattno,
op_strategy,
arg_value->consttype,
arg_value->constcollid,
opcode,
arg_value->constisnull ? 0 : arg_value->constvalue);
}
int min_attno = compressed_column_metadata_attno(settings,
ch->table_id,
var->varattno,

View File

@ -10,9 +10,9 @@
#include "ts_catalog/compression_settings.h"
ScanKeyData *build_scankeys_for_uncompressed(Oid ht_relid, CompressionSettings *settings,
Relation out_rel, Bitmapset *key_columns,
TupleTableSlot *slot, int *num_scankeys);
ScanKeyData *build_mem_scankeys_from_slot(Oid ht_relid, CompressionSettings *settings,
Relation out_rel, Bitmapset *key_columns,
TupleTableSlot *slot, int *num_scankeys);
ScanKeyData *build_index_scankeys(Relation index_rel, List *index_filters, int *num_scankeys);
ScanKeyData *build_index_scankeys_using_slot(Oid hypertable_relid, Relation in_rel,
Relation out_rel, Bitmapset *key_columns,
@ -23,7 +23,3 @@ ScanKeyData *build_heap_scankeys(Oid hypertable_relid, Relation in_rel, Relation
Bitmapset **null_columns, TupleTableSlot *slot, int *num_scankeys);
ScanKeyData *build_update_delete_scankeys(Relation in_rel, List *heap_filters, int *num_scankeys,
Bitmapset **null_columns);
int create_segment_filter_scankey(Relation in_rel, char *segment_filter_col_name,
StrategyNumber strategy, Oid subtype, ScanKeyData *scankeys,
int num_scankeys, Bitmapset **null_columns, Datum value,
bool is_null_check, bool is_array_op);

View File

@ -16,6 +16,11 @@
#include "ts_catalog/array_utils.h"
static Oid deduce_filter_subtype(BatchFilter *filter, Oid att_typoid);
static int create_segment_filter_scankey(Relation in_rel, char *segment_filter_col_name,
StrategyNumber strategy, Oid subtype,
ScanKeyData *scankeys, int num_scankeys,
Bitmapset **null_columns, Datum value, bool is_null_check,
bool is_array_op);
/*
* Build scankeys for decompressed tuple to check if it is part of the batch.
@ -23,12 +28,13 @@ static Oid deduce_filter_subtype(BatchFilter *filter, Oid att_typoid);
* The key_columns are the columns of the uncompressed chunk.
*/
ScanKeyData *
build_scankeys_for_uncompressed(Oid ht_relid, CompressionSettings *settings, Relation out_rel,
Bitmapset *key_columns, TupleTableSlot *slot, int *num_scankeys)
build_mem_scankeys_from_slot(Oid ht_relid, CompressionSettings *settings, Relation out_rel,
Bitmapset *key_columns, TupleTableSlot *slot, int *num_scankeys)
{
ScanKeyData *scankeys = NULL;
int key_index = 0;
TupleDesc out_desc = RelationGetDescr(out_rel);
TupleDesc in_desc = slot->tts_tupleDescriptor;
if (bms_is_empty(key_columns))
{
@ -93,8 +99,8 @@ build_scankeys_for_uncompressed(Oid ht_relid, CompressionSettings *settings, Rel
isnull ? SK_ISNULL | SK_SEARCHNULL : 0,
attno,
BTEqualStrategyNumber,
InvalidOid,
out_desc->attrs[AttrNumberGetAttrOffset(attno)].attcollation,
in_desc->attrs[AttrNumberGetAttrOffset(ht_attno)].atttypid,
in_desc->attrs[AttrNumberGetAttrOffset(ht_attno)].attcollation,
get_opcode(opr),
isnull ? 0 : value);
}
@ -437,7 +443,7 @@ build_update_delete_scankeys(Relation in_rel, List *heap_filters, int *num_scank
return scankeys;
}
int
static int
create_segment_filter_scankey(Relation in_rel, char *segment_filter_col_name,
StrategyNumber strategy, Oid subtype, ScanKeyData *scankeys,
int num_scankeys, Bitmapset **null_columns, Datum value,

View File

@ -122,12 +122,13 @@ FROM compressed_chunk_info_view
WHERE hypertable_name = 'sample_table' ORDER BY chunk_name;
chunk_status | CHUNK_NAME
--------------+------------------
9 | _hyper_1_1_chunk
1 | _hyper_1_1_chunk
9 | _hyper_1_2_chunk
(2 rows)
-- recompress the partial chunks
SELECT compress_chunk('_timescaledb_internal._hyper_1_1_chunk');
NOTICE: chunk "_hyper_1_1_chunk" is already compressed
compress_chunk
----------------------------------------
_timescaledb_internal._hyper_1_1_chunk
@ -173,12 +174,13 @@ FROM compressed_chunk_info_view
WHERE hypertable_name = 'sample_table' ORDER BY chunk_name;
chunk_status | CHUNK_NAME
--------------+------------------
9 | _hyper_1_1_chunk
1 | _hyper_1_1_chunk
9 | _hyper_1_2_chunk
(2 rows)
-- recompress the paritial chunks
SELECT compress_chunk('_timescaledb_internal._hyper_1_1_chunk');
NOTICE: chunk "_hyper_1_1_chunk" is already compressed
compress_chunk
----------------------------------------
_timescaledb_internal._hyper_1_1_chunk

View File

@ -258,3 +258,152 @@ QUERY PLAN
-> Result (actual rows=1 loops=1)
(10 rows)
-- no decompression cause no match in batch
BEGIN; :ANALYZE UPDATE lazy_decompress SET value = 3.14 WHERE value = 0; ROLLBACK;
QUERY PLAN
Custom Scan (HypertableModify) (actual rows=0 loops=1)
-> Update on lazy_decompress (actual rows=0 loops=1)
Update on _hyper_X_X_chunk lazy_decompress_1
-> Result (actual rows=0 loops=1)
-> Seq Scan on _hyper_X_X_chunk lazy_decompress_1 (actual rows=0 loops=1)
Filter: (value = '0'::double precision)
(6 rows)
BEGIN; :ANALYZE UPDATE lazy_decompress SET value = 3.14 WHERE value = 0 AND device='d1'; ROLLBACK;
QUERY PLAN
Custom Scan (HypertableModify) (actual rows=0 loops=1)
-> Update on lazy_decompress (actual rows=0 loops=1)
Update on _hyper_X_X_chunk lazy_decompress_1
-> Result (actual rows=0 loops=1)
-> Seq Scan on _hyper_X_X_chunk lazy_decompress_1 (actual rows=0 loops=1)
Filter: ((value = '0'::double precision) AND (device = 'd1'::text))
(6 rows)
-- 1 batch decompression
BEGIN; :ANALYZE UPDATE lazy_decompress SET value = 3.14 WHERE value = 2300; ROLLBACK;
QUERY PLAN
Custom Scan (HypertableModify) (actual rows=0 loops=1)
Batches decompressed: 1
Tuples decompressed: 1000
-> Update on lazy_decompress (actual rows=0 loops=1)
Update on _hyper_X_X_chunk lazy_decompress_1
-> Result (actual rows=1 loops=1)
-> Seq Scan on _hyper_X_X_chunk lazy_decompress_1 (actual rows=1 loops=1)
Filter: (value = '2300'::double precision)
Rows Removed by Filter: 999
(9 rows)
BEGIN; :ANALYZE UPDATE lazy_decompress SET value = 3.14 WHERE value > 3100 AND value < 3200; ROLLBACK;
QUERY PLAN
Custom Scan (HypertableModify) (actual rows=0 loops=1)
Batches decompressed: 1
Tuples decompressed: 1000
-> Update on lazy_decompress (actual rows=0 loops=1)
Update on _hyper_X_X_chunk lazy_decompress_1
-> Result (actual rows=99 loops=1)
-> Seq Scan on _hyper_X_X_chunk lazy_decompress_1 (actual rows=99 loops=1)
Filter: ((value > '3100'::double precision) AND (value < '3200'::double precision))
Rows Removed by Filter: 901
(9 rows)
BEGIN; :ANALYZE UPDATE lazy_decompress SET value = 3.14 WHERE value BETWEEN 3100 AND 3200; ROLLBACK;
QUERY PLAN
Custom Scan (HypertableModify) (actual rows=0 loops=1)
Batches decompressed: 1
Tuples decompressed: 1000
-> Update on lazy_decompress (actual rows=0 loops=1)
Update on _hyper_X_X_chunk lazy_decompress_1
-> Result (actual rows=101 loops=1)
-> Seq Scan on _hyper_X_X_chunk lazy_decompress_1 (actual rows=101 loops=1)
Filter: ((value >= '3100'::double precision) AND (value <= '3200'::double precision))
Rows Removed by Filter: 899
(9 rows)
-- check GUC is working, should be 6 batches and 6000 tuples decompresed
SET timescaledb.enable_dml_decompression_tuple_filtering TO off;
BEGIN; :ANALYZE UPDATE lazy_decompress SET value = 3.14 WHERE value = 0 AND device='d1'; ROLLBACK;
QUERY PLAN
Custom Scan (HypertableModify) (actual rows=0 loops=1)
Batches decompressed: 6
Tuples decompressed: 6000
-> Update on lazy_decompress (actual rows=0 loops=1)
Update on _hyper_X_X_chunk lazy_decompress_1
-> Result (actual rows=0 loops=1)
-> Seq Scan on _hyper_X_X_chunk lazy_decompress_1 (actual rows=0 loops=1)
Filter: ((value = '0'::double precision) AND (device = 'd1'::text))
Rows Removed by Filter: 6000
(9 rows)
RESET timescaledb.enable_dml_decompression_tuple_filtering;
-- no decompression cause no match in batch
BEGIN; :ANALYZE DELETE FROM lazy_decompress WHERE value = 0; ROLLBACK;
QUERY PLAN
Custom Scan (HypertableModify) (actual rows=0 loops=1)
-> Delete on lazy_decompress (actual rows=0 loops=1)
Delete on _hyper_X_X_chunk lazy_decompress_1
-> Seq Scan on _hyper_X_X_chunk lazy_decompress_1 (actual rows=0 loops=1)
Filter: (value = '0'::double precision)
(5 rows)
BEGIN; :ANALYZE DELETE FROM lazy_decompress WHERE value = 0 AND device='d1'; ROLLBACK;
QUERY PLAN
Custom Scan (HypertableModify) (actual rows=0 loops=1)
-> Delete on lazy_decompress (actual rows=0 loops=1)
Delete on _hyper_X_X_chunk lazy_decompress_1
-> Seq Scan on _hyper_X_X_chunk lazy_decompress_1 (actual rows=0 loops=1)
Filter: ((value = '0'::double precision) AND (device = 'd1'::text))
(5 rows)
-- 1 batch decompression
BEGIN; :ANALYZE DELETE FROM lazy_decompress WHERE value = 2300; ROLLBACK;
QUERY PLAN
Custom Scan (HypertableModify) (actual rows=0 loops=1)
Batches decompressed: 1
Tuples decompressed: 1000
-> Delete on lazy_decompress (actual rows=0 loops=1)
Delete on _hyper_X_X_chunk lazy_decompress_1
-> Seq Scan on _hyper_X_X_chunk lazy_decompress_1 (actual rows=1 loops=1)
Filter: (value = '2300'::double precision)
Rows Removed by Filter: 999
(8 rows)
BEGIN; :ANALYZE DELETE FROM lazy_decompress WHERE value > 3100 AND value < 3200; ROLLBACK;
QUERY PLAN
Custom Scan (HypertableModify) (actual rows=0 loops=1)
Batches decompressed: 1
Tuples decompressed: 1000
-> Delete on lazy_decompress (actual rows=0 loops=1)
Delete on _hyper_X_X_chunk lazy_decompress_1
-> Seq Scan on _hyper_X_X_chunk lazy_decompress_1 (actual rows=99 loops=1)
Filter: ((value > '3100'::double precision) AND (value < '3200'::double precision))
Rows Removed by Filter: 901
(8 rows)
BEGIN; :ANALYZE DELETE FROM lazy_decompress WHERE value BETWEEN 3100 AND 3200; ROLLBACK;
QUERY PLAN
Custom Scan (HypertableModify) (actual rows=0 loops=1)
Batches decompressed: 1
Tuples decompressed: 1000
-> Delete on lazy_decompress (actual rows=0 loops=1)
Delete on _hyper_X_X_chunk lazy_decompress_1
-> Seq Scan on _hyper_X_X_chunk lazy_decompress_1 (actual rows=101 loops=1)
Filter: ((value >= '3100'::double precision) AND (value <= '3200'::double precision))
Rows Removed by Filter: 899
(8 rows)
-- check GUC is working, should be 6 batches and 6000 tuples decompresed
SET timescaledb.enable_dml_decompression_tuple_filtering TO off;
BEGIN; :ANALYZE DELETE FROM lazy_decompress WHERE value = 0 AND device='d1'; ROLLBACK;
QUERY PLAN
Custom Scan (HypertableModify) (actual rows=0 loops=1)
Batches decompressed: 6
Tuples decompressed: 6000
-> Delete on lazy_decompress (actual rows=0 loops=1)
Delete on _hyper_X_X_chunk lazy_decompress_1
-> Seq Scan on _hyper_X_X_chunk lazy_decompress_1 (actual rows=0 loops=1)
Filter: ((value = '0'::double precision) AND (device = 'd1'::text))
Rows Removed by Filter: 6000
(8 rows)
RESET timescaledb.enable_dml_decompression_tuple_filtering;
DROP TABLE lazy_decompress;

View File

@ -159,3 +159,32 @@ BEGIN; :ANALYZE INSERT INTO lazy_decompress SELECT '2024-01-01 0:00:00.5','d1',r
BEGIN; :ANALYZE INSERT INTO lazy_decompress SELECT '2024-01-01 0:00:00.5','d1',random() ON CONFLICT(time,device) DO UPDATE SET value=EXCLUDED.value; ROLLBACK;
-- should decompress 1 batch cause there is match
BEGIN; :ANALYZE INSERT INTO lazy_decompress SELECT '2024-01-01 0:00:01','d1',random() ON CONFLICT DO NOTHING; ROLLBACK;
-- no decompression cause no match in batch
BEGIN; :ANALYZE UPDATE lazy_decompress SET value = 3.14 WHERE value = 0; ROLLBACK;
BEGIN; :ANALYZE UPDATE lazy_decompress SET value = 3.14 WHERE value = 0 AND device='d1'; ROLLBACK;
-- 1 batch decompression
BEGIN; :ANALYZE UPDATE lazy_decompress SET value = 3.14 WHERE value = 2300; ROLLBACK;
BEGIN; :ANALYZE UPDATE lazy_decompress SET value = 3.14 WHERE value > 3100 AND value < 3200; ROLLBACK;
BEGIN; :ANALYZE UPDATE lazy_decompress SET value = 3.14 WHERE value BETWEEN 3100 AND 3200; ROLLBACK;
-- check GUC is working, should be 6 batches and 6000 tuples decompresed
SET timescaledb.enable_dml_decompression_tuple_filtering TO off;
BEGIN; :ANALYZE UPDATE lazy_decompress SET value = 3.14 WHERE value = 0 AND device='d1'; ROLLBACK;
RESET timescaledb.enable_dml_decompression_tuple_filtering;
-- no decompression cause no match in batch
BEGIN; :ANALYZE DELETE FROM lazy_decompress WHERE value = 0; ROLLBACK;
BEGIN; :ANALYZE DELETE FROM lazy_decompress WHERE value = 0 AND device='d1'; ROLLBACK;
-- 1 batch decompression
BEGIN; :ANALYZE DELETE FROM lazy_decompress WHERE value = 2300; ROLLBACK;
BEGIN; :ANALYZE DELETE FROM lazy_decompress WHERE value > 3100 AND value < 3200; ROLLBACK;
BEGIN; :ANALYZE DELETE FROM lazy_decompress WHERE value BETWEEN 3100 AND 3200; ROLLBACK;
-- check GUC is working, should be 6 batches and 6000 tuples decompresed
SET timescaledb.enable_dml_decompression_tuple_filtering TO off;
BEGIN; :ANALYZE DELETE FROM lazy_decompress WHERE value = 0 AND device='d1'; ROLLBACK;
RESET timescaledb.enable_dml_decompression_tuple_filtering;
DROP TABLE lazy_decompress;