From ef783c4b559e3a10f1274d0aac46fb995230080a Mon Sep 17 00:00:00 2001
From: Dipesh Pandit <70561403+pdipesh02@users.noreply.github.com>
Date: Fri, 8 Sep 2023 14:37:36 +0530
Subject: [PATCH] Server crash when using duplicate segmentby column (#6044)

    Server crash when using duplicate segmentby column

    The segmentby column info array is populated by using the column
    attribute number as an array index. This is done as part of validating
    and creating segment by column info in function `compresscolinfo_init`.

    Since the column is duplicated the attribute number for both the
    segmentby column is same. When this attribute number is used as an
    index, only one of the array element is populated correctly with the
    detailed column info whareas the other element of the array ramins
    NULL. This segmentby column info is updated in catalog as part of
    processing compression options (ALTER TABLE ...).

    When the chunk is being compressed this segmentby column information is
    being retrieved from the catalog to create the scan key in order to
    identify any existing index on the table that matches the segmentby
    column. Out of the two keys one key gets updated correctly whereas the
    second key contains NULL values. This results into a crash during index
    scan to identify any existing index on the table.

    The proposed change avoid this crash by raising an error if user has
    specified duplicated columns as part of compress_segmentby or
    compress_orderby options.

    Also, added postgresql-client package in linux-32bit build dependencies
    to avoid failure as part of uploading the regression results.
---
 .../workflows/linux-32bit-build-and-test.yaml |  2 +-
 .unreleased/bugfix_6044                       |  1 +
 tsl/src/compression/create.c                  | 23 ++++++++++++++++++-
 tsl/test/expected/compression_errors.out      |  6 +++++
 tsl/test/sql/compression_errors.sql           |  2 ++
 5 files changed, 32 insertions(+), 2 deletions(-)
 create mode 100644 .unreleased/bugfix_6044

diff --git a/.github/workflows/linux-32bit-build-and-test.yaml b/.github/workflows/linux-32bit-build-and-test.yaml
index 920e9fd3d..abb0626e1 100644
--- a/.github/workflows/linux-32bit-build-and-test.yaml
+++ b/.github/workflows/linux-32bit-build-and-test.yaml
@@ -68,7 +68,7 @@ jobs:
         echo '/tmp/core.%h.%e.%t' > /proc/sys/kernel/core_pattern
         apt-get install -y gcc make cmake libssl-dev libkrb5-dev libipc-run-perl \
           libtest-most-perl sudo gdb git wget gawk lbzip2 flex bison lcov base-files \
-          locales clang-14 llvm-14 llvm-14-dev llvm-14-tools
+          locales clang-14 llvm-14 llvm-14-dev llvm-14-tools postgresql-client
 
     - name: Checkout TimescaleDB
       uses: actions/checkout@v3
diff --git a/.unreleased/bugfix_6044 b/.unreleased/bugfix_6044
new file mode 100644
index 000000000..f9270ad46
--- /dev/null
+++ b/.unreleased/bugfix_6044
@@ -0,0 +1 @@
+Fixes: #6044 Server crash when using duplicate segmentby column
diff --git a/tsl/src/compression/create.c b/tsl/src/compression/create.c
index 0e7da55ff..0e7415161 100644
--- a/tsl/src/compression/create.c
+++ b/tsl/src/compression/create.c
@@ -222,7 +222,7 @@ compresscolinfo_init(CompressColInfo *cc, Oid srctbl_relid, List *segmentby_cols
 	Relation rel;
 	TupleDesc tupdesc;
 	int i, colno, attno;
-	int16 *segorder_colindex;
+	int16 *segorder_colindex, *colindex;
 	int seg_attnolen = 0;
 	ListCell *lc;
 	Oid compresseddata_oid = ts_custom_type_cache_get(CUSTOM_TYPE_COMPRESSED_DATA)->type_oid;
@@ -230,6 +230,8 @@ compresscolinfo_init(CompressColInfo *cc, Oid srctbl_relid, List *segmentby_cols
 	seg_attnolen = list_length(segmentby_cols);
 	rel = table_open(srctbl_relid, AccessShareLock);
 	segorder_colindex = palloc0(sizeof(int32) * (rel->rd_att->natts));
+	/* To check duplicates in segmentby/orderby column list. */
+	colindex = palloc0(sizeof(int16) * (rel->rd_att->natts));
 	tupdesc = rel->rd_att;
 	i = 1;
 
@@ -245,11 +247,21 @@ compresscolinfo_init(CompressColInfo *cc, Oid srctbl_relid, List *segmentby_cols
 					 errhint("The timescaledb.compress_segmentby option must reference a valid "
 							 "column.")));
 		}
+
+		/* check if segmentby columns are distinct. */
+		if (colindex[col_attno - 1] != 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("duplicate column name \"%s\"", NameStr(col->colname)),
+					 errhint("The timescaledb.compress_segmentby option must reference distinct "
+							 "column.")));
+		colindex[col_attno - 1] = 1;
 		segorder_colindex[col_attno - 1] = i++;
 	}
 	/* the column indexes are numbered as seg_attnolen + <orderby_index>
 	 */
 	Assert(seg_attnolen == (i - 1));
+	memset(colindex, 0, sizeof(int16) * (rel->rd_att->natts));
 	foreach (lc, orderby_cols)
 	{
 		CompressedParsedCol *col = (CompressedParsedCol *) lfirst(lc);
@@ -262,6 +274,14 @@ compresscolinfo_init(CompressColInfo *cc, Oid srctbl_relid, List *segmentby_cols
 					 errhint("The timescaledb.compress_orderby option must reference a valid "
 							 "column.")));
 
+		/* check if orderby columns are distinct. */
+		if (colindex[col_attno - 1] != 0)
+			ereport(ERROR,
+					(errcode(ERRCODE_SYNTAX_ERROR),
+					 errmsg("duplicate column name \"%s\"", NameStr(col->colname)),
+					 errhint("The timescaledb.compress_orderby option must reference distinct "
+							 "column.")));
+
 		/* check if orderby_cols and segmentby_cols are distinct */
 		if (segorder_colindex[col_attno - 1] != 0)
 			ereport(ERROR,
@@ -271,6 +291,7 @@ compresscolinfo_init(CompressColInfo *cc, Oid srctbl_relid, List *segmentby_cols
 					 errhint("Use separate columns for the timescaledb.compress_orderby and"
 							 " timescaledb.compress_segmentby options.")));
 
+		colindex[col_attno - 1] = 1;
 		segorder_colindex[col_attno - 1] = i++;
 	}
 
diff --git a/tsl/test/expected/compression_errors.out b/tsl/test/expected/compression_errors.out
index a3bf686d4..daef6218f 100644
--- a/tsl/test/expected/compression_errors.out
+++ b/tsl/test/expected/compression_errors.out
@@ -195,6 +195,12 @@ HINT:  The option timescaledb.compress_segmentby must be a set of columns separa
 ALTER TABLE foo set (timescaledb.compress, timescaledb.compress_orderby = 'a, p');
 ERROR:  invalid ordering column type point
 DETAIL:  Could not identify a less-than operator for the type.
+ALTER TABLE foo set (timescaledb.compress, timescaledb.compress_segmentby = 'b, b');
+ERROR:  duplicate column name "b"
+HINT:  The timescaledb.compress_segmentby option must reference distinct column.
+ALTER TABLE foo set (timescaledb.compress, timescaledb.compress_orderby = 'b, b');
+ERROR:  duplicate column name "b"
+HINT:  The timescaledb.compress_orderby option must reference distinct column.
 --should succeed
 ALTER TABLE foo set (timescaledb.compress, timescaledb.compress_orderby = 'a, b');
 --ddl on ht with compression
diff --git a/tsl/test/sql/compression_errors.sql b/tsl/test/sql/compression_errors.sql
index bd75af50c..2df0a6723 100644
--- a/tsl/test/sql/compression_errors.sql
+++ b/tsl/test/sql/compression_errors.sql
@@ -99,6 +99,8 @@ ALTER TABLE foo set (timescaledb.compress, timescaledb.compress_segmentby = 'ran
 ALTER TABLE foo set (timescaledb.compress, timescaledb.compress_segmentby = 'c LIMIT 1');
 ALTER TABLE foo set (timescaledb.compress, timescaledb.compress_segmentby = 'c + b');
 ALTER TABLE foo set (timescaledb.compress, timescaledb.compress_orderby = 'a, p');
+ALTER TABLE foo set (timescaledb.compress, timescaledb.compress_segmentby = 'b, b');
+ALTER TABLE foo set (timescaledb.compress, timescaledb.compress_orderby = 'b, b');
 
 --should succeed
 ALTER TABLE foo set (timescaledb.compress, timescaledb.compress_orderby = 'a, b');