From fb14771308e928c02ffe3ca1938d2bd25bd9ba11 Mon Sep 17 00:00:00 2001
From: Sven Klemm <sven@timescale.com>
Date: Tue, 25 Jun 2024 07:43:20 +0200
Subject: [PATCH] Fix handling for multiple unique index in compressed INSERT

Adjust compressed_insert_key_columns to correctly handle multiple
unique indexes. This patch changes the function to no longer combine
the columns from multiple indexes but instead only return intersecting
columns from all the unique indexes.
This patch also fixes a couple comments in that function.
---
 .unreleased/pr_7061                      |  1 +
 tsl/src/compression/compression.c        | 54 ++++++++++++------------
 tsl/test/expected/compression_insert.out | 31 ++++++++++++++
 tsl/test/sql/compression_insert.sql      | 19 +++++++++
 4 files changed, 78 insertions(+), 27 deletions(-)
 create mode 100644 .unreleased/pr_7061

diff --git a/.unreleased/pr_7061 b/.unreleased/pr_7061
new file mode 100644
index 000000000..82e9e1c1d
--- /dev/null
+++ b/.unreleased/pr_7061
@@ -0,0 +1 @@
+Fixes: #7061 Fix handling of multiple unique indexes in compressed INSERT
diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c
index b2c56dfc3..bfdc9a456 100644
--- a/tsl/src/compression/compression.c
+++ b/tsl/src/compression/compression.c
@@ -2177,15 +2177,17 @@ create_segment_filter_scankey(Relation in_rel, char *segment_filter_col_name,
 
 /*
  * For insert into compressed chunks with unique index determine the
- * columns which are safe to use for batch filtering.
+ * columns which can be used for INSERT batch filtering.
+ * The passed in relation is the uncompressed chunk.
  *
- * This is based on RelationGetIndexAttrBitmap from postgres with changes
- * to also track unique expression indexes.
+ * In case of multiple unique indexes we have to return the shared columns.
+ * For expression indexes we ignore the columns with expressions, for partial
+ * indexes we ignore predicate.
  */
 static Bitmapset *
 compressed_insert_key_columns(Relation relation)
 {
-	Bitmapset *indexattrs = NULL; /* indexed columns */
+	Bitmapset *shared_attrs = NULL; /* indexed columns */
 	ListCell *l;
 
 	/* Fast path if definitely no indexes */
@@ -2198,47 +2200,45 @@ compressed_insert_key_columns(Relation relation)
 	if (indexoidlist == NIL)
 		return NULL;
 
-	/*
-	 * For each index, add referenced attributes to indexattrs.
-	 *
-	 * Note: we consider all indexes returned by RelationGetIndexList, even if
-	 * they are not indisready or indisvalid.  This is important because an
-	 * index for which CREATE INDEX CONCURRENTLY has just started must be
-	 * included in HOT-safety decisions (see README.HOT).  If a DROP INDEX
-	 * CONCURRENTLY is far enough along that we should ignore the index, it
-	 * won't be returned at all by RelationGetIndexList.
-	 */
 	foreach (l, indexoidlist)
 	{
 		Oid indexOid = lfirst_oid(l);
 		Relation indexDesc = index_open(indexOid, AccessShareLock);
 
-		if (!indexDesc->rd_index->indisunique)
+		/*
+		 * We are only interested in unique indexes. PRIMARY KEY indexes also have
+		 * indisunique set to true so we do not need to check for them separately.
+		 */
+		if (!indexDesc->rd_index->indislive || !indexDesc->rd_index->indisvalid ||
+			!indexDesc->rd_index->indisunique)
 		{
 			index_close(indexDesc, AccessShareLock);
 			continue;
 		}
 
-		/* Collect simple attribute references.
-		 * For covering indexes we only need to collect the key attributes.
-		 * Unlike RelationGetIndexAttrBitmap we allow expression indexes
-		 * but we do not extract attributes from the expressions as that
-		 * would not be a safe filter as the expression can alter attributes
-		 * which would not make them sufficient for batch filtering.
+		Bitmapset *idx_attrs = NULL;
+		/*
+		 * Collect attributes of current index.
+		 * For covering indexes we need to ignore the included columns.
 		 */
 		for (int i = 0; i < indexDesc->rd_index->indnkeyatts; i++)
 		{
 			int attrnum = indexDesc->rd_index->indkey.values[i];
-			if (attrnum != 0)
-			{
-				indexattrs =
-					bms_add_member(indexattrs, attrnum - FirstLowInvalidHeapAttributeNumber);
-			}
+			/* We are not interested in expression columns which will have attrnum = 0 */
+			if (!attrnum)
+				continue;
+
+			idx_attrs = bms_add_member(idx_attrs, attrnum - FirstLowInvalidHeapAttributeNumber);
 		}
 		index_close(indexDesc, AccessShareLock);
+
+		shared_attrs = shared_attrs ? bms_intersect(idx_attrs, shared_attrs) : idx_attrs;
+
+		if (!shared_attrs)
+			return NULL;
 	}
 
-	return indexattrs;
+	return shared_attrs;
 }
 
 /* This method is used to find matching index on compressed chunk
diff --git a/tsl/test/expected/compression_insert.out b/tsl/test/expected/compression_insert.out
index 2cf41aa0b..6003d8be0 100644
--- a/tsl/test/expected/compression_insert.out
+++ b/tsl/test/expected/compression_insert.out
@@ -1097,3 +1097,34 @@ SET timescaledb.max_tuples_decompressed_per_dml_transaction = 0;
 INSERT INTO test_limit SELECT t, 11 FROM generate_series(1,6000,1000) t;
 \set ON_ERROR_STOP 1
 DROP TABLE test_limit;
+RESET timescaledb.max_tuples_decompressed_per_dml_transaction;
+-- test multiple unique constraints
+CREATE TABLE multi_unique (time timestamptz NOT NULL, u1 int, u2 int, value float, unique(time, u1), unique(time, u2));
+SELECT table_name FROM create_hypertable('multi_unique', 'time');
+  table_name  
+--------------
+ multi_unique
+(1 row)
+
+ALTER TABLE multi_unique SET (timescaledb.compress, timescaledb.compress_segmentby = 'u1, u2');
+NOTICE:  default order by for hypertable "multi_unique" is set to ""time" DESC"
+INSERT INTO multi_unique VALUES('2024-01-01', 0, 0, 1.0);
+SELECT count(compress_chunk(c)) FROM show_chunks('multi_unique') c;
+ count 
+-------
+     1
+(1 row)
+
+\set ON_ERROR_STOP 0
+-- all INSERTS should fail with constraint violation
+BEGIN; INSERT INTO multi_unique VALUES('2024-01-01', 0, 0, 1.0); ROLLBACK;
+ERROR:  duplicate key value violates unique constraint "76_1_multi_unique_time_u1_key"
+DETAIL:  Key ("time", u1)=(Mon Jan 01 00:00:00 2024 PST, 0) already exists.
+BEGIN; INSERT INTO multi_unique VALUES('2024-01-01', 0, 1, 1.0); ROLLBACK;
+ERROR:  duplicate key value violates unique constraint "76_1_multi_unique_time_u1_key"
+DETAIL:  Key ("time", u1)=(Mon Jan 01 00:00:00 2024 PST, 0) already exists.
+BEGIN; INSERT INTO multi_unique VALUES('2024-01-01', 1, 0, 1.0); ROLLBACK;
+ERROR:  duplicate key value violates unique constraint "76_2_multi_unique_time_u2_key"
+DETAIL:  Key ("time", u2)=(Mon Jan 01 00:00:00 2024 PST, 0) already exists.
+\set ON_ERROR_STOP 1
+DROP TABLE multi_unique;
diff --git a/tsl/test/sql/compression_insert.sql b/tsl/test/sql/compression_insert.sql
index e3793d1e7..e1bfa6820 100644
--- a/tsl/test/sql/compression_insert.sql
+++ b/tsl/test/sql/compression_insert.sql
@@ -729,3 +729,22 @@ INSERT INTO test_limit SELECT t, 11 FROM generate_series(1,6000,1000) t;
 \set ON_ERROR_STOP 1
 
 DROP TABLE test_limit;
+RESET timescaledb.max_tuples_decompressed_per_dml_transaction;
+
+-- test multiple unique constraints
+CREATE TABLE multi_unique (time timestamptz NOT NULL, u1 int, u2 int, value float, unique(time, u1), unique(time, u2));
+SELECT table_name FROM create_hypertable('multi_unique', 'time');
+ALTER TABLE multi_unique SET (timescaledb.compress, timescaledb.compress_segmentby = 'u1, u2');
+
+INSERT INTO multi_unique VALUES('2024-01-01', 0, 0, 1.0);
+SELECT count(compress_chunk(c)) FROM show_chunks('multi_unique') c;
+
+\set ON_ERROR_STOP 0
+-- all INSERTS should fail with constraint violation
+BEGIN; INSERT INTO multi_unique VALUES('2024-01-01', 0, 0, 1.0); ROLLBACK;
+BEGIN; INSERT INTO multi_unique VALUES('2024-01-01', 0, 1, 1.0); ROLLBACK;
+BEGIN; INSERT INTO multi_unique VALUES('2024-01-01', 1, 0, 1.0); ROLLBACK;
+\set ON_ERROR_STOP 1
+
+DROP TABLE multi_unique;
+