1
0
mirror of https://github.com/timescale/timescaledb.git synced 2025-05-17 02:53:51 +08:00

Fix duplicates on partially compressed chunk reads

When the uncompressed part of a partially compressed chunk is read by a
non-partial path and the compressed part by a partial path, the append
node on top could process the uncompressed part multiple times because
the path was declared as a partial path and the append node assumed it
could be executed in all workers in parallel without producing
duplicates.

This PR fixes the declaration of the path.
This commit is contained in:
Jan Nidzwetzki 2023-07-12 22:29:25 +02:00 committed by Jan Nidzwetzki
parent 1bd527375d
commit 36e7100013
4 changed files with 97 additions and 3 deletions
.unreleased
tsl
src/nodes/decompress_chunk
test

1
.unreleased/bugfix_5872 Normal file

@ -0,0 +1 @@
Fixes: #5872 Fix duplicates on partially compressed chunk reads

@ -874,6 +874,7 @@ ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hyp
{
Bitmapset *req_outer = PATH_REQ_OUTER(path);
Path *uncompressed_path = NULL;
bool uncompressed_path_is_partial = true;
if (initial_partial_pathlist)
uncompressed_path = get_cheapest_path_for_pathkeys(initial_partial_pathlist,
@ -883,11 +884,14 @@ ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hyp
true);
if (!uncompressed_path)
{
uncompressed_path = get_cheapest_path_for_pathkeys(initial_pathlist,
NIL,
req_outer,
TOTAL_COST,
true);
uncompressed_path_is_partial = false;
}
/*
* All children of an append path are required to have the same parameterization
@ -903,15 +907,26 @@ ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hyp
continue;
}
/* uncompressed_path can be a partial or a non-partial path. Categorize the path
* and add it to the proper list of the append path. */
List *partial_path_list = list_make1(path);
List *path_list = NIL;
if (uncompressed_path_is_partial)
partial_path_list = lappend(partial_path_list, uncompressed_path);
else
path_list = list_make1(uncompressed_path);
/* Use a parallel aware append to handle non-partial paths properly */
path = (Path *) create_append_path_compat(root,
chunk_rel,
NIL,
list_make2(path, uncompressed_path),
path_list,
partial_path_list,
NIL /* pathkeys */,
req_outer,
Max(path->parallel_workers,
uncompressed_path->parallel_workers),
false,
true, /* parallel aware */
NIL,
path->rows + uncompressed_path->rows);
}

@ -1892,3 +1892,53 @@ SELECT * FROM f_sensor_data WHERE sensor_id > 100;
Index Cond: (_hyper_37_71_chunk.sensor_id > 100)
(13 rows)
-- Test non-partial paths below append are not executed multiple times
CREATE TABLE ts_device_table(time INTEGER, device INTEGER, location INTEGER, value INTEGER);
CREATE UNIQUE INDEX device_time_idx on ts_device_table(time, device);
SELECT create_hypertable('ts_device_table', 'time', chunk_time_interval => 1000);
NOTICE: adding not-null constraint to column "time"
create_hypertable
-------------------------------
(39,public,ts_device_table,t)
(1 row)
INSERT INTO ts_device_table SELECT generate_series(0,999,1), 1, 100, 20;
ALTER TABLE ts_device_table set(timescaledb.compress, timescaledb.compress_segmentby='location', timescaledb.compress_orderby='time');
SELECT compress_chunk(i) AS chunk_name FROM show_chunks('ts_device_table') i \gset
SELECT count(*) FROM ts_device_table;
count
-------
1000
(1 row)
SELECT count(*) FROM :chunk_name;
count
-------
1000
(1 row)
INSERT INTO ts_device_table VALUES (1, 1, 100, 100) ON CONFLICT DO NOTHING;
SELECT count(*) FROM :chunk_name;
count
-------
1000
(1 row)
SET parallel_setup_cost TO '0';
SET parallel_tuple_cost TO '0';
SET min_parallel_table_scan_size TO '8';
SET min_parallel_index_scan_size TO '8';
SET random_page_cost TO '0';
SELECT count(*) FROM :chunk_name;
count
-------
1000
(1 row)
ANALYZE :chunk_name;
SELECT count(*) FROM :chunk_name;
count
-------
1000
(1 row)

@ -860,3 +860,31 @@ SELECT sum(cpu) FROM f_sensor_data;
:explain
SELECT * FROM f_sensor_data WHERE sensor_id > 100;
-- Test non-partial paths below append are not executed multiple times
CREATE TABLE ts_device_table(time INTEGER, device INTEGER, location INTEGER, value INTEGER);
CREATE UNIQUE INDEX device_time_idx on ts_device_table(time, device);
SELECT create_hypertable('ts_device_table', 'time', chunk_time_interval => 1000);
INSERT INTO ts_device_table SELECT generate_series(0,999,1), 1, 100, 20;
ALTER TABLE ts_device_table set(timescaledb.compress, timescaledb.compress_segmentby='location', timescaledb.compress_orderby='time');
SELECT compress_chunk(i) AS chunk_name FROM show_chunks('ts_device_table') i \gset
SELECT count(*) FROM ts_device_table;
SELECT count(*) FROM :chunk_name;
INSERT INTO ts_device_table VALUES (1, 1, 100, 100) ON CONFLICT DO NOTHING;
SELECT count(*) FROM :chunk_name;
SET parallel_setup_cost TO '0';
SET parallel_tuple_cost TO '0';
SET min_parallel_table_scan_size TO '8';
SET min_parallel_index_scan_size TO '8';
SET random_page_cost TO '0';
SELECT count(*) FROM :chunk_name;
ANALYZE :chunk_name;
SELECT count(*) FROM :chunk_name;