From 2a808cabada4a8e099e89c3e320a1b775fbbc540 Mon Sep 17 00:00:00 2001
From: Jan Nidzwetzki <jan@timescale.com>
Date: Fri, 22 Dec 2023 15:04:35 +0100
Subject: [PATCH] Fix use of freed path in decompression sort logic

In the function add_chunk_sorted_paths, we create sorted versions of the
decompress paths. We construct a sort node and place it on top of the
decompressed chunk to do this. However, the decompress chunk path will
be also added to the relation via add_path. This function can recycle
the provided path if better paths are already known. Therefore, we need
our own private copy for the sorted paths.
---
 .unreleased/fix_6465                          |  1 +
 .../nodes/decompress_chunk/decompress_chunk.c | 15 ++++-
 .../transparent_decompression_queries.out     | 56 +++++++++++++++++++
 .../transparent_decompress_chunk-16.out       |  2 +-
 .../sql/transparent_decompression_queries.sql | 45 +++++++++++++++
 5 files changed, 115 insertions(+), 4 deletions(-)
 create mode 100644 .unreleased/fix_6465

diff --git a/.unreleased/fix_6465 b/.unreleased/fix_6465
new file mode 100644
index 000000000..3d677dd1c
--- /dev/null
+++ b/.unreleased/fix_6465
@@ -0,0 +1 @@
+Fixes: #6465 Fix use of freed path in decompression sort logic
diff --git a/tsl/src/nodes/decompress_chunk/decompress_chunk.c b/tsl/src/nodes/decompress_chunk/decompress_chunk.c
index 326c2a92b..9063fcad7 100644
--- a/tsl/src/nodes/decompress_chunk/decompress_chunk.c
+++ b/tsl/src/nodes/decompress_chunk/decompress_chunk.c
@@ -627,7 +627,7 @@ can_batch_sorted_merge(PlannerInfo *root, CompressionInfo *info, Chunk *chunk)
  */
 static void
 add_chunk_sorted_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hypertable *ht, Index ht_relid,
-					   Path *decompress_chunk_path, Path *compressed_path)
+					   Path *path, Path *compressed_path)
 {
 	if (root->query_pathkeys == NIL)
 		return;
@@ -636,6 +636,14 @@ add_chunk_sorted_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hypertable *ht,
 	if (!IsA(compressed_path, Path))
 		return;
 
+	/* Copy the decompress chunk path because the original can be recycled in add_path, and our
+	 * sorted path must be independent. */
+	if (!ts_is_decompress_chunk_path(path))
+		return;
+
+	DecompressChunkPath *decompress_chunk_path =
+		copy_decompress_chunk_path((DecompressChunkPath *) path);
+
 	/* Iterate over the sort_pathkeys and generate all possible useful sortings */
 	List *useful_pathkeys = NIL;
 	ListCell *lc;
@@ -660,12 +668,13 @@ add_chunk_sorted_paths(PlannerInfo *root, RelOptInfo *chunk_rel, Hypertable *ht,
 		useful_pathkeys = lappend(useful_pathkeys, pathkey);
 
 		/* Create the sorted path for these useful_pathkeys */
-		if (!pathkeys_contained_in(useful_pathkeys, decompress_chunk_path->pathkeys))
+		if (!pathkeys_contained_in(useful_pathkeys,
+								   decompress_chunk_path->custom_path.path.pathkeys))
 		{
 			Path *sorted_path =
 				(Path *) create_sort_path(root,
 										  chunk_rel,
-										  decompress_chunk_path,
+										  &decompress_chunk_path->custom_path.path,
 										  list_copy(useful_pathkeys), /* useful_pathkeys is modified
 																		 in each iteration */
 										  root->limit_tuples);
diff --git a/tsl/test/expected/transparent_decompression_queries.out b/tsl/test/expected/transparent_decompression_queries.out
index bb52d8b06..625a758ee 100644
--- a/tsl/test/expected/transparent_decompression_queries.out
+++ b/tsl/test/expected/transparent_decompression_queries.out
@@ -219,3 +219,59 @@ SELECT * FROM pseudo WHERE now() IS NOT NULL;
  Sat Jan 01 00:00:00 2000 PST
 (1 row)
 
+-- ensure needed decompression paths underneath a sort node
+-- are not recycled by PostgreSQL
+SET random_page_cost = 4.0;
+DROP TABLE IF EXISTS options_data;
+NOTICE:  table "options_data" does not exist, skipping
+CREATE TABLE IF NOT EXISTS options_data (
+    date            date NOT NULL,
+    contract_code   text NOT NULL,
+    expiry_code     text NOT NULL,
+    option_type     character(1) NOT NULL,
+    strike          real NOT NULL,
+    price           double precision,
+    source          text NOT NULL,
+    meta            text
+);
+SELECT create_hypertable('options_data', 'date', chunk_time_interval => interval '1 day');
+     create_hypertable     
+---------------------------
+ (7,public,options_data,t)
+(1 row)
+
+INSERT INTO options_data (date, contract_code, expiry_code, option_type, strike, price, source, meta)
+SELECT
+    date_series,
+    'CONTRACT' || date_series::text,
+    'EXPIRY' || date_series::text,
+    CASE WHEN random() < 0.5 THEN 'C' ELSE 'P' END, -- Randomly choose 'C' or 'P' for option_type
+   random() * 100, -- Random strike value between 0 and 100
+   random() * 100, -- Random price value between 0 and 100
+   'SOURCE' || date_series::text,
+   'META' || date_series::text
+FROM generate_series(
+    '2023-12-01 00:00:00',
+    '2023-12-05 00:00:00',
+    INTERVAL '10 minute'
+) AS date_series;
+ALTER TABLE options_data SET (timescaledb.compress,
+    timescaledb.compress_segmentby = 'contract_code, expiry_code, option_type, strike, source',
+    timescaledb.compress_orderby = 'date DESC');
+SELECT compress_chunk(ch) FROM show_chunks('options_data', older_than=> '2023-12-05') ch;
+             compress_chunk              
+-----------------------------------------
+ _timescaledb_internal._hyper_7_13_chunk
+ _timescaledb_internal._hyper_7_14_chunk
+ _timescaledb_internal._hyper_7_15_chunk
+ _timescaledb_internal._hyper_7_16_chunk
+(4 rows)
+
+ANALYZE options_data;
+SELECT max(date) AS date FROM options_data WHERE contract_code='CONTRACT2023-12-03 04:00:00+01';
+ date 
+------
+ 
+(1 row)
+
+RESET random_page_cost;
diff --git a/tsl/test/shared/expected/transparent_decompress_chunk-16.out b/tsl/test/shared/expected/transparent_decompress_chunk-16.out
index dffc57c63..de0a06d7a 100644
--- a/tsl/test/shared/expected/transparent_decompress_chunk-16.out
+++ b/tsl/test/shared/expected/transparent_decompress_chunk-16.out
@@ -713,7 +713,7 @@ QUERY PLAN
                      ->  Sort
                            Sort Key: m3."time"
                            ->  Custom Scan (DecompressChunk) on _hyper_X_X_chunk m3
-                                 ->  Parallel Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_2
+                                 ->  Seq Scan on compress_hyper_X_X_chunk compress_hyper_X_X_chunk_2
                                        Filter: (device_id = 3)
                      ->  Custom Scan (DecompressChunk) on _hyper_X_X_chunk m1
                            ->  Seq Scan on compress_hyper_X_X_chunk
diff --git a/tsl/test/sql/transparent_decompression_queries.sql b/tsl/test/sql/transparent_decompression_queries.sql
index ee091c0ba..ca9a2ddc3 100644
--- a/tsl/test/sql/transparent_decompression_queries.sql
+++ b/tsl/test/sql/transparent_decompression_queries.sql
@@ -93,3 +93,48 @@ SELECT compress_chunk(show_chunks('pseudo'));
 
 SELECT * FROM pseudo WHERE now() IS NOT NULL;
 
+-- ensure needed decompression paths underneath a sort node
+-- are not recycled by PostgreSQL
+SET random_page_cost = 4.0;
+DROP TABLE IF EXISTS options_data;
+
+CREATE TABLE IF NOT EXISTS options_data (
+    date            date NOT NULL,
+    contract_code   text NOT NULL,
+    expiry_code     text NOT NULL,
+    option_type     character(1) NOT NULL,
+    strike          real NOT NULL,
+    price           double precision,
+    source          text NOT NULL,
+    meta            text
+);
+
+SELECT create_hypertable('options_data', 'date', chunk_time_interval => interval '1 day');
+
+INSERT INTO options_data (date, contract_code, expiry_code, option_type, strike, price, source, meta)
+SELECT
+    date_series,
+    'CONTRACT' || date_series::text,
+    'EXPIRY' || date_series::text,
+    CASE WHEN random() < 0.5 THEN 'C' ELSE 'P' END, -- Randomly choose 'C' or 'P' for option_type
+   random() * 100, -- Random strike value between 0 and 100
+   random() * 100, -- Random price value between 0 and 100
+   'SOURCE' || date_series::text,
+   'META' || date_series::text
+FROM generate_series(
+    '2023-12-01 00:00:00',
+    '2023-12-05 00:00:00',
+    INTERVAL '10 minute'
+) AS date_series;
+
+ALTER TABLE options_data SET (timescaledb.compress,
+    timescaledb.compress_segmentby = 'contract_code, expiry_code, option_type, strike, source',
+    timescaledb.compress_orderby = 'date DESC');
+
+SELECT compress_chunk(ch) FROM show_chunks('options_data', older_than=> '2023-12-05') ch;
+
+ANALYZE options_data;
+
+SELECT max(date) AS date FROM options_data WHERE contract_code='CONTRACT2023-12-03 04:00:00+01';
+
+RESET random_page_cost;