Fix duplicates on partially compressed chunk reads

When the uncompressed part of a partially compressed chunk is read by a non-partial path and the compressed part by a partial path, the append node on top could process the uncompressed part multiple times because the path was declared as a partial path and the append node assumed it could be executed in all workers in parallel without producing duplicates. This PR fixes the declaration of the path.
2025-05-17 02:53:51 +08:00 · 2023-07-12 22:29:25 +02:00 · 2023-07-12 22:29:25 +02:00 · 36e7100013
commit 36e7100013
parent 1bd527375d
4 changed files with 97 additions and 3 deletions
--- a/.unreleased/bugfix_5872
+++ b/.unreleased/bugfix_5872
@ -0,0 +1 @@
+Fixes: #5872 Fix duplicates on partially compressed chunk reads 
--- a/tsl/src/nodes/decompress_chunk/decompress_chunk.c
+++ b/tsl/src/nodes/decompress_chunk/decompress_chunk.c
@ -874,6 +874,7 @@ ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hyp
 			{
 				Bitmapset *req_outer = PATH_REQ_OUTER(path);
 				Path *uncompressed_path = NULL;
+				bool uncompressed_path_is_partial = true;

 				if (initial_partial_pathlist)
 					uncompressed_path = get_cheapest_path_for_pathkeys(initial_partial_pathlist,
@ -883,11 +884,14 @@ ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hyp
 																	   true);

 				if (!uncompressed_path)
+				{
 					uncompressed_path = get_cheapest_path_for_pathkeys(initial_pathlist,
 																	   NIL,
 																	   req_outer,
 																	   TOTAL_COST,
 																	   true);
+					uncompressed_path_is_partial = false;
+				}

 				/*
 				 * All children of an append path are required to have the same parameterization
@ -903,15 +907,26 @@ ts_decompress_chunk_generate_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hyp
 						continue;
 				}

+				/* uncompressed_path can be a partial or a non-partial path. Categorize the path
+				 * and add it to the proper list of the append path. */
+				List *partial_path_list = list_make1(path);
+				List *path_list = NIL;
+
+				if (uncompressed_path_is_partial)
+					partial_path_list = lappend(partial_path_list, uncompressed_path);
+				else
+					path_list = list_make1(uncompressed_path);
+
+				/* Use a parallel aware append to handle non-partial paths properly */
 				path = (Path *) create_append_path_compat(root,
 														  chunk_rel,
-														  NIL,
-														  list_make2(path, uncompressed_path),
+														  path_list,
+														  partial_path_list,
 														  NIL /* pathkeys */,
 														  req_outer,
 														  Max(path->parallel_workers,
 															  uncompressed_path->parallel_workers),
-														  false,
+														  true, /* parallel aware */
 														  NIL,
 														  path->rows + uncompressed_path->rows);
 			}
--- a/tsl/test/expected/compression.out
+++ b/tsl/test/expected/compression.out
@ -1892,3 +1892,53 @@ SELECT * FROM f_sensor_data WHERE sensor_id > 100;
               Index Cond: (_hyper_37_71_chunk.sensor_id > 100)
 (13 rows)

+-- Test non-partial paths below append are not executed multiple times
+CREATE TABLE ts_device_table(time INTEGER, device INTEGER, location INTEGER, value INTEGER);
+CREATE UNIQUE INDEX device_time_idx on ts_device_table(time, device);
+SELECT create_hypertable('ts_device_table', 'time', chunk_time_interval => 1000);
+NOTICE:  adding not-null constraint to column "time"
+       create_hypertable       
+-------------------------------
+ (39,public,ts_device_table,t)
+(1 row)
+
+INSERT INTO ts_device_table SELECT generate_series(0,999,1), 1, 100, 20;
+ALTER TABLE ts_device_table set(timescaledb.compress, timescaledb.compress_segmentby='location', timescaledb.compress_orderby='time');
+SELECT compress_chunk(i) AS chunk_name FROM show_chunks('ts_device_table') i \gset
+SELECT count(*) FROM ts_device_table;
+ count 
+-------
+  1000
+(1 row)
+
+SELECT count(*) FROM :chunk_name;
+ count 
+-------
+  1000
+(1 row)
+
+INSERT INTO ts_device_table VALUES (1, 1, 100, 100) ON CONFLICT DO NOTHING;
+SELECT count(*) FROM :chunk_name;
+ count 
+-------
+  1000
+(1 row)
+
+SET parallel_setup_cost TO '0';
+SET parallel_tuple_cost TO '0';
+SET min_parallel_table_scan_size TO '8';
+SET min_parallel_index_scan_size TO '8';
+SET random_page_cost TO '0';
+SELECT count(*) FROM :chunk_name;
+ count 
+-------
+  1000
+(1 row)
+
+ANALYZE :chunk_name;
+SELECT count(*) FROM :chunk_name;
+ count 
+-------
+  1000
+(1 row)
+
--- a/tsl/test/sql/compression.sql
+++ b/tsl/test/sql/compression.sql
@ -860,3 +860,31 @@ SELECT sum(cpu) FROM f_sensor_data;

 :explain
 SELECT * FROM f_sensor_data WHERE sensor_id > 100;
+
+
+-- Test non-partial paths below append are not executed multiple times
+CREATE TABLE ts_device_table(time INTEGER, device INTEGER, location INTEGER, value INTEGER);
+CREATE UNIQUE INDEX device_time_idx on ts_device_table(time, device);
+SELECT create_hypertable('ts_device_table', 'time', chunk_time_interval => 1000);
+INSERT INTO ts_device_table SELECT generate_series(0,999,1), 1, 100, 20;
+ALTER TABLE ts_device_table set(timescaledb.compress, timescaledb.compress_segmentby='location', timescaledb.compress_orderby='time');
+SELECT compress_chunk(i) AS chunk_name FROM show_chunks('ts_device_table') i \gset
+
+SELECT count(*) FROM ts_device_table;
+SELECT count(*) FROM :chunk_name;
+
+INSERT INTO ts_device_table VALUES (1, 1, 100, 100) ON CONFLICT DO NOTHING;
+
+SELECT count(*) FROM :chunk_name;
+
+SET parallel_setup_cost TO '0';
+SET parallel_tuple_cost TO '0';
+SET min_parallel_table_scan_size TO '8';
+SET min_parallel_index_scan_size TO '8';
+SET random_page_cost TO '0';
+
+SELECT count(*) FROM :chunk_name;
+
+ANALYZE :chunk_name;
+
+SELECT count(*) FROM :chunk_name;
				`@ -0,0 +1 @@`
				`Fixes: #5872 Fix duplicates on partially compressed chunk reads`