Enable ChunkAppend for space partitioned partial chunks

This is a follow-up patch for timescale#5599 which handles space partitioned hypertables.
2025-05-16 02:23:49 +08:00 · 2023-06-01 12:30:34 +03:00 · 2023-06-01 12:30:34 +03:00 · 1eb7e38d2d
commit 1eb7e38d2d
parent c48f905f78
4 changed files with 304 additions and 37 deletions
--- a/src/nodes/chunk_append/chunk_append.c
+++ b/src/nodes/chunk_append/chunk_append.c
@ -319,21 +319,40 @@ ts_chunk_append_path_create(PlannerInfo *root, RelOptInfo *rel, Hypertable *ht,
 			if (flat == NULL)
 				break;

+			/*
+			 * For each lc_oid, there will be 0, 1, or 2 matches in flat_list: 0 matches
+			 * if child was pruned, 1 match if the chunk is uncompressed or fully compressed,
+			 * 2 matches if the chunk is partially compressed.
+			 * If there are 2 matches they will also be consecutive (see assumption above)
+			 */
 			foreach (lc_oid, current_oids)
 			{
-				/* postgres may have pruned away some children already */
-				Path *child = (Path *) lfirst(flat);
-				Oid parent_relid = child->parent->relid;
-				bool is_not_pruned =
-					lfirst_oid(lc_oid) == root->simple_rte_array[parent_relid]->relid;
-
-				if (is_not_pruned)
+				bool is_not_pruned = true;
+#ifdef USE_ASSERT_CHECKING
+				int nmatches = 0;
+#endif
+				do
 				{
-					merge_childs = lappend(merge_childs, child);
-					flat = lnext_compat(children, flat);
-					if (flat == NULL)
-						break;
-				}
+					Path *child = (Path *) lfirst(flat);
+					Oid parent_relid = child->parent->relid;
+					is_not_pruned =
+						lfirst_oid(lc_oid) == root->simple_rte_array[parent_relid]->relid;
+					/* postgres may have pruned away some children already */
+					if (is_not_pruned)
+					{
+#ifdef USE_ASSERT_CHECKING
+						nmatches++;
+#endif
+						merge_childs = lappend(merge_childs, child);
+						flat = lnext_compat(children, flat);
+						if (flat == NULL)
+							break;
+					}
+					/* if current one matched then need to check next one for match */
+				} while (is_not_pruned);
+#ifdef USE_ASSERT_CHECKING
+				Assert(nmatches <= 2);
+#endif
 			}

 			if (list_length(merge_childs) > 1)
--- a/src/planner/planner.c
+++ b/src/planner/planner.c
@ -918,31 +918,6 @@ should_chunk_append(Hypertable *ht, PlannerInfo *root, RelOptInfo *rel, Path *pa
 					return false;
 				}

-				/*
-				 * Check for partially compressed chunks with space partitioning.
-				 *
-				 * When partially compressed chunks are present on a hypertable with
-				 * more than 1 dimension, we can not do 1-level ordered append.
-				 * We instead need nested Appends to correctly preserve
-				 * ordering. For now we skip ordered append optimization when we encounter
-				 * partial chunks on space-partitioned hypertables.
-				 * When there is no space partitioning, we move the check for partial chunks
-				 * to the place where we do chunk append for space partitioned hypertables.
-				 */
-				foreach (lc, merge->subpaths)
-				{
-					Path *child = lfirst(lc);
-					RelOptInfo *chunk_rel = child->parent;
-					if (chunk_rel->fdw_private)
-					{
-						TimescaleDBPrivate *private = chunk_rel->fdw_private;
-						/* for all partially compressed chunks in the plan */
-						if (private->chunk && ts_chunk_is_partial(private->chunk) &&
-							ht->space->num_dimensions > 1)
-							return false;
-					}
-				}
-
 				pk = linitial_node(PathKey, path->pathkeys);

 				/*
--- a/tsl/test/expected/compression_ddl.out
+++ b/tsl/test/expected/compression_ddl.out
@ -2132,3 +2132,223 @@ SELECT * FROM test_partials ORDER BY time;
 Mon Jan 01 00:02:00 2024 PST | 1 | 2
 (13 rows)

+-- add test for space partioning with partial chunks
+CREATE TABLE space_part (time timestamptz, a int, b int, c int);
+SELECT create_hypertable('space_part', 'time', chunk_time_interval => INTERVAL '1 day');
+NOTICE:  adding not-null constraint to column "time"
+    create_hypertable     
+--------------------------
+ (36,public,space_part,t)
+(1 row)
+
+INSERT INTO space_part VALUES
+-- chunk1
+('2020-01-01 00:00', 1, 1, 1),
+('2020-01-01 00:00', 2, 1, 1),
+('2020-01-01 00:03', 1, 1, 1),
+('2020-01-01 00:03', 2, 1, 1);
+INSERT INTO space_part values
+-- chunk2
+('2021-01-01 00:00', 1, 1, 1),
+('2021-01-01 00:00', 2, 1, 1),
+('2021-01-01 00:03', 1, 1, 1),
+('2021-01-01 00:03', 2, 1, 1);
+-- compress them
+ALTER TABLE space_part SET (timescaledb.compress);
+SELECT compress_chunk(show_chunks('space_part'));
+              compress_chunk               
+-------------------------------------------
+ _timescaledb_internal._hyper_36_130_chunk
+ _timescaledb_internal._hyper_36_131_chunk
+(2 rows)
+
+-- make first chunk partial
+INSERT INTO space_part VALUES
+-- chunk1
+('2020-01-01 00:01', 1, 1, 1),
+('2020-01-01 00:01', 2, 1, 1);
+-------- now enable the space partitioning, this will take effect for chunks created subsequently
+SELECT add_dimension('space_part', 'a', number_partitions => 5);
+       add_dimension        
+----------------------------
+ (19,public,space_part,a,t)
+(1 row)
+
+-- plan is still the same
+EXPLAIN (COSTS OFF) SELECT * FROM space_part ORDER BY time;
+                                      QUERY PLAN                                      
+--------------------------------------------------------------------------------------
+ Custom Scan (ChunkAppend) on space_part
+   Order: space_part."time"
+   ->  Merge Append
+         Sort Key: _hyper_36_130_chunk."time"
+         ->  Custom Scan (DecompressChunk) on _hyper_36_130_chunk
+               ->  Sort
+                     Sort Key: compress_hyper_37_132_chunk._ts_meta_sequence_num DESC
+                     ->  Seq Scan on compress_hyper_37_132_chunk
+         ->  Sort
+               Sort Key: _hyper_36_130_chunk."time"
+               ->  Seq Scan on _hyper_36_130_chunk
+   ->  Custom Scan (DecompressChunk) on _hyper_36_131_chunk
+         ->  Sort
+               Sort Key: compress_hyper_37_133_chunk._ts_meta_sequence_num DESC
+               ->  Seq Scan on compress_hyper_37_133_chunk
+(15 rows)
+
+-- now add more chunks that do adhere to the new space partitioning
+-- chunks 3,4
+INSERT INTO space_part VALUES
+('2022-01-01 00:00', 1, 1, 1),
+('2022-01-01 00:00', 2, 1, 1),
+('2022-01-01 00:03', 1, 1, 1),
+('2022-01-01 00:03', 2, 1, 1);
+-- plan still ok
+EXPLAIN (COSTS OFF) SELECT * FROM space_part ORDER BY time;
+                                              QUERY PLAN                                              
+------------------------------------------------------------------------------------------------------
+ Custom Scan (ChunkAppend) on space_part
+   Order: space_part."time"
+   ->  Merge Append
+         Sort Key: _hyper_36_130_chunk."time"
+         ->  Custom Scan (DecompressChunk) on _hyper_36_130_chunk
+               ->  Sort
+                     Sort Key: compress_hyper_37_132_chunk._ts_meta_sequence_num DESC
+                     ->  Seq Scan on compress_hyper_37_132_chunk
+         ->  Sort
+               Sort Key: _hyper_36_130_chunk."time"
+               ->  Seq Scan on _hyper_36_130_chunk
+   ->  Custom Scan (DecompressChunk) on _hyper_36_131_chunk
+         ->  Sort
+               Sort Key: compress_hyper_37_133_chunk._ts_meta_sequence_num DESC
+               ->  Seq Scan on compress_hyper_37_133_chunk
+   ->  Merge Append
+         Sort Key: _hyper_36_134_chunk."time"
+         ->  Index Scan Backward using _hyper_36_134_chunk_space_part_time_idx on _hyper_36_134_chunk
+         ->  Index Scan Backward using _hyper_36_135_chunk_space_part_time_idx on _hyper_36_135_chunk
+(19 rows)
+
+-- compress them
+SELECT compress_chunk(c, if_not_compressed=>true) FROM show_chunks('space_part') c;
+NOTICE:  chunk "_hyper_36_130_chunk" is already compressed
+NOTICE:  chunk "_hyper_36_131_chunk" is already compressed
+              compress_chunk               
+-------------------------------------------
+ _timescaledb_internal._hyper_36_130_chunk
+ _timescaledb_internal._hyper_36_131_chunk
+ _timescaledb_internal._hyper_36_134_chunk
+ _timescaledb_internal._hyper_36_135_chunk
+(4 rows)
+
+-- plan still ok
+EXPLAIN (COSTS OFF) SELECT * FROM space_part ORDER BY time;
+                                      QUERY PLAN                                      
+--------------------------------------------------------------------------------------
+ Custom Scan (ChunkAppend) on space_part
+   Order: space_part."time"
+   ->  Merge Append
+         Sort Key: _hyper_36_130_chunk."time"
+         ->  Custom Scan (DecompressChunk) on _hyper_36_130_chunk
+               ->  Sort
+                     Sort Key: compress_hyper_37_132_chunk._ts_meta_sequence_num DESC
+                     ->  Seq Scan on compress_hyper_37_132_chunk
+         ->  Sort
+               Sort Key: _hyper_36_130_chunk."time"
+               ->  Seq Scan on _hyper_36_130_chunk
+   ->  Custom Scan (DecompressChunk) on _hyper_36_131_chunk
+         ->  Sort
+               Sort Key: compress_hyper_37_133_chunk._ts_meta_sequence_num DESC
+               ->  Seq Scan on compress_hyper_37_133_chunk
+   ->  Merge Append
+         Sort Key: _hyper_36_134_chunk."time"
+         ->  Custom Scan (DecompressChunk) on _hyper_36_134_chunk
+               ->  Sort
+                     Sort Key: compress_hyper_37_136_chunk._ts_meta_sequence_num DESC
+                     ->  Seq Scan on compress_hyper_37_136_chunk
+         ->  Custom Scan (DecompressChunk) on _hyper_36_135_chunk
+               ->  Sort
+                     Sort Key: compress_hyper_37_137_chunk._ts_meta_sequence_num DESC
+                     ->  Seq Scan on compress_hyper_37_137_chunk
+(25 rows)
+
+-- make second one of them partial
+insert into space_part values
+('2022-01-01 00:02', 2, 1, 1),
+('2022-01-01 00:02', 2, 1, 1);
+EXPLAIN (COSTS OFF) SELECT * FROM space_part ORDER BY time;
+                                      QUERY PLAN                                      
+--------------------------------------------------------------------------------------
+ Custom Scan (ChunkAppend) on space_part
+   Order: space_part."time"
+   ->  Merge Append
+         Sort Key: _hyper_36_130_chunk."time"
+         ->  Custom Scan (DecompressChunk) on _hyper_36_130_chunk
+               ->  Sort
+                     Sort Key: compress_hyper_37_132_chunk._ts_meta_sequence_num DESC
+                     ->  Seq Scan on compress_hyper_37_132_chunk
+         ->  Sort
+               Sort Key: _hyper_36_130_chunk."time"
+               ->  Seq Scan on _hyper_36_130_chunk
+   ->  Custom Scan (DecompressChunk) on _hyper_36_131_chunk
+         ->  Sort
+               Sort Key: compress_hyper_37_133_chunk._ts_meta_sequence_num DESC
+               ->  Seq Scan on compress_hyper_37_133_chunk
+   ->  Merge Append
+         Sort Key: _hyper_36_134_chunk."time"
+         ->  Custom Scan (DecompressChunk) on _hyper_36_134_chunk
+               ->  Sort
+                     Sort Key: compress_hyper_37_136_chunk._ts_meta_sequence_num DESC
+                     ->  Seq Scan on compress_hyper_37_136_chunk
+         ->  Custom Scan (DecompressChunk) on _hyper_36_135_chunk
+               ->  Sort
+                     Sort Key: compress_hyper_37_137_chunk._ts_meta_sequence_num DESC
+                     ->  Seq Scan on compress_hyper_37_137_chunk
+         ->  Sort
+               Sort Key: _hyper_36_135_chunk."time"
+               ->  Sort
+                     Sort Key: _hyper_36_135_chunk."time"
+                     ->  Seq Scan on _hyper_36_135_chunk
+(30 rows)
+
+-- make other one partial too
+INSERT INTO space_part VALUES
+('2022-01-01 00:02', 1, 1, 1);
+EXPLAIN (COSTS OFF) SELECT * FROM space_part ORDER BY time;
+                                      QUERY PLAN                                      
+--------------------------------------------------------------------------------------
+ Custom Scan (ChunkAppend) on space_part
+   Order: space_part."time"
+   ->  Merge Append
+         Sort Key: _hyper_36_130_chunk."time"
+         ->  Custom Scan (DecompressChunk) on _hyper_36_130_chunk
+               ->  Sort
+                     Sort Key: compress_hyper_37_132_chunk._ts_meta_sequence_num DESC
+                     ->  Seq Scan on compress_hyper_37_132_chunk
+         ->  Sort
+               Sort Key: _hyper_36_130_chunk."time"
+               ->  Seq Scan on _hyper_36_130_chunk
+   ->  Custom Scan (DecompressChunk) on _hyper_36_131_chunk
+         ->  Sort
+               Sort Key: compress_hyper_37_133_chunk._ts_meta_sequence_num DESC
+               ->  Seq Scan on compress_hyper_37_133_chunk
+   ->  Merge Append
+         Sort Key: _hyper_36_134_chunk."time"
+         ->  Custom Scan (DecompressChunk) on _hyper_36_134_chunk
+               ->  Sort
+                     Sort Key: compress_hyper_37_136_chunk._ts_meta_sequence_num DESC
+                     ->  Seq Scan on compress_hyper_37_136_chunk
+         ->  Sort
+               Sort Key: _hyper_36_134_chunk."time"
+               ->  Sort
+                     Sort Key: _hyper_36_134_chunk."time"
+                     ->  Seq Scan on _hyper_36_134_chunk
+         ->  Custom Scan (DecompressChunk) on _hyper_36_135_chunk
+               ->  Sort
+                     Sort Key: compress_hyper_37_137_chunk._ts_meta_sequence_num DESC
+                     ->  Seq Scan on compress_hyper_37_137_chunk
+         ->  Sort
+               Sort Key: _hyper_36_135_chunk."time"
+               ->  Sort
+                     Sort Key: _hyper_36_135_chunk."time"
+                     ->  Seq Scan on _hyper_36_135_chunk
+(35 rows)
+
--- a/tsl/test/sql/compression_ddl.sql
+++ b/tsl/test/sql/compression_ddl.sql
@ -912,3 +912,56 @@ SELECT compress_chunk(c) FROM show_chunks('test_partials', newer_than => '2022-0
 EXPLAIN (costs off) SELECT * FROM test_partials ORDER BY time;
 -- verify result correctness
 SELECT * FROM test_partials ORDER BY time;
+
+-- add test for space partioning with partial chunks
+CREATE TABLE space_part (time timestamptz, a int, b int, c int);
+SELECT create_hypertable('space_part', 'time', chunk_time_interval => INTERVAL '1 day');
+
+INSERT INTO space_part VALUES
+-- chunk1
+('2020-01-01 00:00', 1, 1, 1),
+('2020-01-01 00:00', 2, 1, 1),
+('2020-01-01 00:03', 1, 1, 1),
+('2020-01-01 00:03', 2, 1, 1);
+INSERT INTO space_part values
+-- chunk2
+('2021-01-01 00:00', 1, 1, 1),
+('2021-01-01 00:00', 2, 1, 1),
+('2021-01-01 00:03', 1, 1, 1),
+('2021-01-01 00:03', 2, 1, 1);
+-- compress them
+ALTER TABLE space_part SET (timescaledb.compress);
+SELECT compress_chunk(show_chunks('space_part'));
+-- make first chunk partial
+INSERT INTO space_part VALUES
+-- chunk1
+('2020-01-01 00:01', 1, 1, 1),
+('2020-01-01 00:01', 2, 1, 1);
+
+-------- now enable the space partitioning, this will take effect for chunks created subsequently
+SELECT add_dimension('space_part', 'a', number_partitions => 5);
+-- plan is still the same
+EXPLAIN (COSTS OFF) SELECT * FROM space_part ORDER BY time;
+
+-- now add more chunks that do adhere to the new space partitioning
+-- chunks 3,4
+INSERT INTO space_part VALUES
+('2022-01-01 00:00', 1, 1, 1),
+('2022-01-01 00:00', 2, 1, 1),
+('2022-01-01 00:03', 1, 1, 1),
+('2022-01-01 00:03', 2, 1, 1);
+-- plan still ok
+EXPLAIN (COSTS OFF) SELECT * FROM space_part ORDER BY time;
+-- compress them
+SELECT compress_chunk(c, if_not_compressed=>true) FROM show_chunks('space_part') c;
+-- plan still ok
+EXPLAIN (COSTS OFF) SELECT * FROM space_part ORDER BY time;
+-- make second one of them partial
+insert into space_part values
+('2022-01-01 00:02', 2, 1, 1),
+('2022-01-01 00:02', 2, 1, 1);
+EXPLAIN (COSTS OFF) SELECT * FROM space_part ORDER BY time;
+-- make other one partial too
+INSERT INTO space_part VALUES
+('2022-01-01 00:02', 1, 1, 1);
+EXPLAIN (COSTS OFF) SELECT * FROM space_part ORDER BY time;