mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-17 19:13:16 +08:00
Fix duplicates on partially compressed chunk reads
When the uncompressed part of a partially compressed chunk is read by a non-partial path and the compressed part by a partial path, the append node on top could process the uncompressed part multiple times because the path was declared as a partial path and the append node assumed it could be executed in all workers in parallel without producing duplicates. This PR fixes the declaration of the path.
This commit is contained in:
parent
1bd527375d
commit
36e7100013
1
.unreleased/bugfix_5872
Normal file
1
.unreleased/bugfix_5872
Normal file
@ -0,0 +1 @@
|
||||
Fixes: #5872 Fix duplicates on partially compressed chunk reads
|
@ -874,6 +874,7 @@ ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hyp
|
||||
{
|
||||
Bitmapset *req_outer = PATH_REQ_OUTER(path);
|
||||
Path *uncompressed_path = NULL;
|
||||
bool uncompressed_path_is_partial = true;
|
||||
|
||||
if (initial_partial_pathlist)
|
||||
uncompressed_path = get_cheapest_path_for_pathkeys(initial_partial_pathlist,
|
||||
@ -883,11 +884,14 @@ ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hyp
|
||||
true);
|
||||
|
||||
if (!uncompressed_path)
|
||||
{
|
||||
uncompressed_path = get_cheapest_path_for_pathkeys(initial_pathlist,
|
||||
NIL,
|
||||
req_outer,
|
||||
TOTAL_COST,
|
||||
true);
|
||||
uncompressed_path_is_partial = false;
|
||||
}
|
||||
|
||||
/*
|
||||
* All children of an append path are required to have the same parameterization
|
||||
@ -903,15 +907,26 @@ ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hyp
|
||||
continue;
|
||||
}
|
||||
|
||||
/* uncompressed_path can be a partial or a non-partial path. Categorize the path
|
||||
* and add it to the proper list of the append path. */
|
||||
List *partial_path_list = list_make1(path);
|
||||
List *path_list = NIL;
|
||||
|
||||
if (uncompressed_path_is_partial)
|
||||
partial_path_list = lappend(partial_path_list, uncompressed_path);
|
||||
else
|
||||
path_list = list_make1(uncompressed_path);
|
||||
|
||||
/* Use a parallel aware append to handle non-partial paths properly */
|
||||
path = (Path *) create_append_path_compat(root,
|
||||
chunk_rel,
|
||||
NIL,
|
||||
list_make2(path, uncompressed_path),
|
||||
path_list,
|
||||
partial_path_list,
|
||||
NIL /* pathkeys */,
|
||||
req_outer,
|
||||
Max(path->parallel_workers,
|
||||
uncompressed_path->parallel_workers),
|
||||
false,
|
||||
true, /* parallel aware */
|
||||
NIL,
|
||||
path->rows + uncompressed_path->rows);
|
||||
}
|
||||
|
@ -1892,3 +1892,53 @@ SELECT * FROM f_sensor_data WHERE sensor_id > 100;
|
||||
Index Cond: (_hyper_37_71_chunk.sensor_id > 100)
|
||||
(13 rows)
|
||||
|
||||
-- Test non-partial paths below append are not executed multiple times
|
||||
CREATE TABLE ts_device_table(time INTEGER, device INTEGER, location INTEGER, value INTEGER);
|
||||
CREATE UNIQUE INDEX device_time_idx on ts_device_table(time, device);
|
||||
SELECT create_hypertable('ts_device_table', 'time', chunk_time_interval => 1000);
|
||||
NOTICE: adding not-null constraint to column "time"
|
||||
create_hypertable
|
||||
-------------------------------
|
||||
(39,public,ts_device_table,t)
|
||||
(1 row)
|
||||
|
||||
INSERT INTO ts_device_table SELECT generate_series(0,999,1), 1, 100, 20;
|
||||
ALTER TABLE ts_device_table set(timescaledb.compress, timescaledb.compress_segmentby='location', timescaledb.compress_orderby='time');
|
||||
SELECT compress_chunk(i) AS chunk_name FROM show_chunks('ts_device_table') i \gset
|
||||
SELECT count(*) FROM ts_device_table;
|
||||
count
|
||||
-------
|
||||
1000
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) FROM :chunk_name;
|
||||
count
|
||||
-------
|
||||
1000
|
||||
(1 row)
|
||||
|
||||
INSERT INTO ts_device_table VALUES (1, 1, 100, 100) ON CONFLICT DO NOTHING;
|
||||
SELECT count(*) FROM :chunk_name;
|
||||
count
|
||||
-------
|
||||
1000
|
||||
(1 row)
|
||||
|
||||
SET parallel_setup_cost TO '0';
|
||||
SET parallel_tuple_cost TO '0';
|
||||
SET min_parallel_table_scan_size TO '8';
|
||||
SET min_parallel_index_scan_size TO '8';
|
||||
SET random_page_cost TO '0';
|
||||
SELECT count(*) FROM :chunk_name;
|
||||
count
|
||||
-------
|
||||
1000
|
||||
(1 row)
|
||||
|
||||
ANALYZE :chunk_name;
|
||||
SELECT count(*) FROM :chunk_name;
|
||||
count
|
||||
-------
|
||||
1000
|
||||
(1 row)
|
||||
|
||||
|
@ -860,3 +860,31 @@ SELECT sum(cpu) FROM f_sensor_data;
|
||||
|
||||
:explain
|
||||
SELECT * FROM f_sensor_data WHERE sensor_id > 100;
|
||||
|
||||
|
||||
-- Test non-partial paths below append are not executed multiple times
|
||||
CREATE TABLE ts_device_table(time INTEGER, device INTEGER, location INTEGER, value INTEGER);
|
||||
CREATE UNIQUE INDEX device_time_idx on ts_device_table(time, device);
|
||||
SELECT create_hypertable('ts_device_table', 'time', chunk_time_interval => 1000);
|
||||
INSERT INTO ts_device_table SELECT generate_series(0,999,1), 1, 100, 20;
|
||||
ALTER TABLE ts_device_table set(timescaledb.compress, timescaledb.compress_segmentby='location', timescaledb.compress_orderby='time');
|
||||
SELECT compress_chunk(i) AS chunk_name FROM show_chunks('ts_device_table') i \gset
|
||||
|
||||
SELECT count(*) FROM ts_device_table;
|
||||
SELECT count(*) FROM :chunk_name;
|
||||
|
||||
INSERT INTO ts_device_table VALUES (1, 1, 100, 100) ON CONFLICT DO NOTHING;
|
||||
|
||||
SELECT count(*) FROM :chunk_name;
|
||||
|
||||
SET parallel_setup_cost TO '0';
|
||||
SET parallel_tuple_cost TO '0';
|
||||
SET min_parallel_table_scan_size TO '8';
|
||||
SET min_parallel_index_scan_size TO '8';
|
||||
SET random_page_cost TO '0';
|
||||
|
||||
SELECT count(*) FROM :chunk_name;
|
||||
|
||||
ANALYZE :chunk_name;
|
||||
|
||||
SELECT count(*) FROM :chunk_name;
|
||||
|
Loading…
x
Reference in New Issue
Block a user