From e30699101bccd0871143482d39aaaf42da311978 Mon Sep 17 00:00:00 2001
From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com>
Date: Tue, 12 Mar 2024 13:59:38 +0300
Subject: [PATCH] Add minmax sparse indexes when compressing columns with btree
 indexes (#6705)

The decision to add a minmax sparse index is made every time when the
compressed chunk is created (full decompression followed by compression)
based on the currently present indexes on the hypertable. No new chunk
compression settings are added.

No action is required on upgrade, but the feature is not enabled on
existing chunks. The minmax index will be added when the chunk is fully
decompressed and compressed.

No action is required on downgrade, we ignore the unknown metadata
columns. They will be removed when the chunk is fully decompressed and
compressed.

The potential drawback of this feature is increasing the storage
requirements for the compressed chunk table, but it is normally only a
few percent of the total compressed data size. It can be disabled with
the GUC `timescaledb.auto_sparse_indexes`.

Here's a small example of this feature in action:
https://gist.github.com/akuzm/84d4b3b609e3581768173bd21001dfbf
Note that the number of hit buffers is reduced almost 4x.
---
 .unreleased/auto_sparse_indexes               |   1 +
 src/guc.c                                     |  14 +
 src/guc.h                                     |   1 +
 tsl/src/compression/create.c                  | 241 ++++++++++++++----
 tsl/src/compression/create.h                  |   4 +-
 .../expected/compress_auto_sparse_index.out   | 157 ++++++++++++
 tsl/test/sql/CMakeLists.txt                   |   1 +
 tsl/test/sql/compress_auto_sparse_index.sql   |  66 +++++
 8 files changed, 436 insertions(+), 49 deletions(-)
 create mode 100644 .unreleased/auto_sparse_indexes
 create mode 100644 tsl/test/expected/compress_auto_sparse_index.out
 create mode 100644 tsl/test/sql/compress_auto_sparse_index.sql

diff --git a/.unreleased/auto_sparse_indexes b/.unreleased/auto_sparse_indexes
new file mode 100644
index 000000000..99b093272
--- /dev/null
+++ b/.unreleased/auto_sparse_indexes
@@ -0,0 +1 @@
+Implements: #6705 Add sparse minmax indexes for compressed columns that have uncompressed btree indexes
diff --git a/src/guc.c b/src/guc.c
index 7af4cf61b..a8fff8098 100644
--- a/src/guc.c
+++ b/src/guc.c
@@ -72,6 +72,7 @@ bool ts_guc_enable_chunkwise_aggregation = true;
 bool ts_guc_enable_vectorized_aggregation = true;
 TSDLLEXPORT bool ts_guc_enable_compression_indexscan = false;
 TSDLLEXPORT bool ts_guc_enable_bulk_decompression = true;
+TSDLLEXPORT bool ts_guc_auto_sparse_indexes = true;
 TSDLLEXPORT int ts_guc_bgw_log_level = WARNING;
 TSDLLEXPORT bool ts_guc_enable_skip_scan = true;
 /* default value of ts_guc_max_open_chunks_per_insert and ts_guc_max_cached_chunks_per_hypertable
@@ -494,6 +495,19 @@ _guc_init(void)
 							 NULL,
 							 NULL);
 
+	DefineCustomBoolVariable(MAKE_EXTOPTION("auto_sparse_indexes"),
+							 "Create sparse indexes on compressed chunks",
+							 "The hypertable columns that are used as index keys will have "
+							 "suitable sparse indexes when compressed. Must be set at the moment "
+							 "of chunk compression, e.g. when the `compress_chunk()` is called.",
+							 &ts_guc_auto_sparse_indexes,
+							 true,
+							 PGC_USERSET,
+							 0,
+							 NULL,
+							 NULL,
+							 NULL);
+
 	DefineCustomIntVariable(MAKE_EXTOPTION("max_open_chunks_per_insert"),
 							"Maximum open chunks per insert",
 							"Maximum number of open chunk tables per insert",
diff --git a/src/guc.h b/src/guc.h
index 44c859e31..e8844fb17 100644
--- a/src/guc.h
+++ b/src/guc.h
@@ -57,6 +57,7 @@ extern char *ts_last_tune_version;
 extern TSDLLEXPORT bool ts_guc_enable_2pc;
 extern TSDLLEXPORT bool ts_guc_enable_compression_indexscan;
 extern TSDLLEXPORT bool ts_guc_enable_bulk_decompression;
+extern TSDLLEXPORT bool ts_guc_auto_sparse_indexes;
 extern TSDLLEXPORT int ts_guc_bgw_log_level;
 
 #ifdef TS_DEBUG
diff --git a/tsl/src/compression/create.c b/tsl/src/compression/create.c
index c4060897c..3c722066f 100644
--- a/tsl/src/compression/create.c
+++ b/tsl/src/compression/create.c
@@ -11,6 +11,7 @@
 #include <catalog/index.h>
 #include <catalog/indexing.h>
 #include <catalog/objectaccess.h>
+#include <catalog/pg_am_d.h>
 #include <catalog/pg_constraint_d.h>
 #include <catalog/pg_constraint.h>
 #include <catalog/pg_type.h>
@@ -19,6 +20,7 @@
 #include <commands/defrem.h>
 #include <commands/tablecmds.h>
 #include <commands/tablespace.h>
+#include <common/md5.h>
 #include <miscadmin.h>
 #include <nodes/makefuncs.h>
 #include <parser/parse_type.h>
@@ -47,6 +49,22 @@
 #include "utils.h"
 #include "guc.h"
 
+static const char *sparse_index_types[] = { "min", "max" };
+
+static bool
+is_sparse_index_type(const char *type)
+{
+	for (size_t i = 0; i < sizeof(sparse_index_types) / sizeof(sparse_index_types[0]); i++)
+	{
+		if (strcmp(sparse_index_types[i], type) == 0)
+		{
+			return true;
+		}
+	}
+
+	return false;
+}
+
 static void validate_hypertable_for_compression(Hypertable *ht);
 static List *build_columndefs(CompressionSettings *settings, Oid src_relid);
 static ColumnDef *build_columndef_singlecolumn(const char *colname, Oid typid);
@@ -54,13 +72,15 @@ static void compression_settings_update(Hypertable *ht, CompressionSettings *set
 										WithClauseResult *with_clause_options);
 
 static char *
-compression_column_segment_metadata_name(int16 column_index, const char *type)
+compression_column_segment_metadata_name(const char *type, int16 column_index)
 {
+	Assert(is_sparse_index_type(type));
+
 	char *buf = palloc(sizeof(char) * NAMEDATALEN);
-	int ret;
 
 	Assert(column_index > 0);
-	ret = snprintf(buf, NAMEDATALEN, COMPRESSION_COLUMN_METADATA_PATTERN_V1, type, column_index);
+	int ret =
+		snprintf(buf, NAMEDATALEN, COMPRESSION_COLUMN_METADATA_PATTERN_V1, type, column_index);
 	if (ret < 0 || ret > NAMEDATALEN)
 	{
 		ereport(ERROR,
@@ -72,33 +92,70 @@ compression_column_segment_metadata_name(int16 column_index, const char *type)
 char *
 column_segment_min_name(int16 column_index)
 {
-	return compression_column_segment_metadata_name(column_index,
-													COMPRESSION_COLUMN_METADATA_MIN_COLUMN_NAME);
+	return compression_column_segment_metadata_name("min", column_index);
 }
 
 char *
 column_segment_max_name(int16 column_index)
 {
-	return compression_column_segment_metadata_name(column_index,
-													COMPRESSION_COLUMN_METADATA_MAX_COLUMN_NAME);
+	return compression_column_segment_metadata_name("max", column_index);
+}
+
+/*
+ * Get metadata name for a given column name and metadata type, format version 2.
+ * We can't reference the attribute numbers, because they can change after
+ * drop/restore if we had any dropped columns.
+ * We might have to truncate the column names to fit into the NAMEDATALEN here,
+ * in this case we disambiguate them with their md5 hash.
+ */
+char *
+compressed_column_metadata_name_v2(const char *metadata_type, const char *column_name)
+{
+	Assert(is_sparse_index_type(metadata_type));
+	Assert(strlen(metadata_type) <= 6);
+
+	const int len = strlen(column_name);
+	Assert(len < NAMEDATALEN);
+
+	/*
+	 * We have to fit the name into NAMEDATALEN - 1 which is 63 bytes:
+	 * 12 (_ts_meta_v2_) + 6 (metadata_type) + 1 (_) + x (column_name) + 1 (_) + 4 (hash) = 63;
+	 * x = 63 - 24 = 39.
+	 */
+	char *result;
+	if (len > 39)
+	{
+		const char *errstr = NULL;
+		char hash[33];
+		Ensure(pg_md5_hash_compat(column_name, len, hash, &errstr), "md5 computation failure");
+
+		result = psprintf("_ts_meta_v2_%.6s_%.4s_%.39s", metadata_type, hash, column_name);
+	}
+	else
+	{
+		result = psprintf("_ts_meta_v2_%.6s_%.39s", metadata_type, column_name);
+	}
+	Assert(strlen(result) < NAMEDATALEN);
+	return result;
 }
 
 int
 compressed_column_metadata_attno(CompressionSettings *settings, Oid chunk_reloid,
 								 AttrNumber chunk_attno, Oid compressed_reloid, char *metadata_type)
 {
-	Assert(strcmp(metadata_type, "min") == 0 || strcmp(metadata_type, "max") == 0);
+	Assert(is_sparse_index_type(metadata_type));
 
 	char *attname = get_attname(chunk_reloid, chunk_attno, /* missing_ok = */ false);
 	int16 orderby_pos = ts_array_position(settings->fd.orderby, attname);
 
 	if (orderby_pos != 0)
 	{
-		char *metadata_name = compression_column_segment_metadata_name(orderby_pos, metadata_type);
+		char *metadata_name = compression_column_segment_metadata_name(metadata_type, orderby_pos);
 		return get_attnum(compressed_reloid, metadata_name);
 	}
 
-	return InvalidAttrNumber;
+	char *metadata_name = compressed_column_metadata_name_v2(metadata_type, attname);
+	return get_attnum(compressed_reloid, metadata_name);
 }
 
 /*
@@ -118,16 +175,54 @@ build_columndefs(CompressionSettings *settings, Oid src_relid)
 	List *segmentby_column_defs = NIL;
 
 	Relation rel = table_open(src_relid, AccessShareLock);
+
+	Bitmapset *btree_columns = NULL;
+	if (ts_guc_auto_sparse_indexes)
+	{
+		/*
+		 * Check which columns have btree indexes. We will create sparse minmax
+		 * indexes for them in compressed chunk.
+		 */
+		ListCell *lc;
+		List *index_oids = RelationGetIndexList(rel);
+		foreach (lc, index_oids)
+		{
+			Oid index_oid = lfirst_oid(lc);
+			Relation index_rel = index_open(index_oid, AccessShareLock);
+			IndexInfo *index_info = BuildIndexInfo(index_rel);
+			index_close(index_rel, NoLock);
+
+			/*
+			 * We want to create the sparse minmax index, if it can satisfy the same
+			 * kinds of queries as the uncompressed index. The simplest case is btree
+			 * which can satisfy equality and comparison tests, same as sparse minmax.
+			 *
+			 * We can be smarter here, e.g. for 'BRIN', sparse minmax can be similar
+			 * to 'BRIN' with range opclass, but not for bloom filter opclass. For GIN,
+			 * sparse minmax is useless because it doesn't help satisfy text search
+			 * queries, and so on. Currently we check only the simplest btree case.
+			 */
+			if (index_info->ii_Am != BTREE_AM_OID)
+			{
+				continue;
+			}
+
+			for (int i = 0; i < index_info->ii_NumIndexKeyAttrs; i++)
+			{
+				AttrNumber attno = index_info->ii_IndexAttrNumbers[i];
+				if (attno != InvalidAttrNumber)
+				{
+					btree_columns = bms_add_member(btree_columns, attno);
+				}
+			}
+		}
+	}
+
 	TupleDesc tupdesc = rel->rd_att;
 
-	for (int attno = 0; attno < tupdesc->natts; attno++)
+	for (int attoffset = 0; attoffset < tupdesc->natts; attoffset++)
 	{
-		Oid attroid = InvalidOid;
-		int32 typmod = -1;
-		Oid collid = 0;
-
-		Form_pg_attribute attr = TupleDescAttr(tupdesc, attno);
-		ColumnDef *coldef;
+		Form_pg_attribute attr = TupleDescAttr(tupdesc, attoffset);
 		if (attr->attisdropped)
 			continue;
 		if (strncmp(NameStr(attr->attname),
@@ -138,31 +233,32 @@ build_columndefs(CompressionSettings *settings, Oid src_relid)
 				 COMPRESSION_COLUMN_METADATA_PREFIX);
 
 		bool is_segmentby = ts_array_is_member(segmentby, NameStr(attr->attname));
-		bool is_orderby = ts_array_is_member(settings->fd.orderby, NameStr(attr->attname));
-
 		if (is_segmentby)
 		{
-			attroid = attr->atttypid; /*segment by columns have original type */
-			typmod = attr->atttypmod;
-			collid = attr->attcollation;
+			segmentby_column_defs = lappend(segmentby_column_defs,
+											makeColumnDef(NameStr(attr->attname),
+														  attr->atttypid,
+														  attr->atttypmod,
+														  attr->attcollation));
+			continue;
 		}
 
-		if (!OidIsValid(attroid))
-		{
-			attroid = compresseddata_oid; /* default type for column */
-		}
-
-		coldef = makeColumnDef(NameStr(attr->attname), attroid, typmod, collid);
-
 		/*
-		 * Put the metadata columns before the compressed columns, because they
-		 * are accessed before decompression.
+		 * This is either an orderby or a normal compressed column. We want to
+		 * have metadata for some of them.  Put the metadata columns before the
+		 * respective compressed column, because they are accessed before
+		 * decompression.
 		 */
+		bool is_orderby = ts_array_is_member(settings->fd.orderby, NameStr(attr->attname));
 		if (is_orderby)
 		{
 			int index = ts_array_position(settings->fd.orderby, NameStr(attr->attname));
 			TypeCacheEntry *type = lookup_type_cache(attr->atttypid, TYPECACHE_LT_OPR);
 
+			/*
+			 * We must be able to create the metadata for the orderby columns,
+			 * because it is required for sorting.
+			 */
 			if (!OidIsValid(type->lt_opr))
 				ereport(ERROR,
 						(errcode(ERRCODE_UNDEFINED_FUNCTION),
@@ -181,15 +277,46 @@ build_columndefs(CompressionSettings *settings, Oid src_relid)
 														   attr->atttypmod,
 														   attr->attcollation));
 		}
+		else if (bms_is_member(attr->attnum, btree_columns))
+		{
+			TypeCacheEntry *type = lookup_type_cache(attr->atttypid, TYPECACHE_LT_OPR);
 
-		if (is_segmentby)
-		{
-			segmentby_column_defs = lappend(segmentby_column_defs, coldef);
-		}
-		else
-		{
-			compressed_column_defs = lappend(compressed_column_defs, coldef);
+			if (OidIsValid(type->lt_opr))
+			{
+				/*
+				 * Here we create minmax metadata for the columns for which
+				 * we have btree indexes. Not sure it is technically possible
+				 * to have a btree index for a column and at the same time
+				 * not have a "less" operator for it. Still, we can have
+				 * various unusual user-defined types, and the minmax metadata
+				 * for the rest of the columns are not required for correctness,
+				 * so play it safe and just don't create the metadata if we don't
+				 * have an operator.
+				 */
+				compressed_column_defs =
+					lappend(compressed_column_defs,
+							makeColumnDef(compressed_column_metadata_name_v2("min",
+																			 NameStr(
+																				 attr->attname)),
+										  attr->atttypid,
+										  attr->atttypmod,
+										  attr->attcollation));
+				compressed_column_defs =
+					lappend(compressed_column_defs,
+							makeColumnDef(compressed_column_metadata_name_v2("max",
+																			 NameStr(
+																				 attr->attname)),
+										  attr->atttypid,
+										  attr->atttypmod,
+										  attr->attcollation));
+			}
 		}
+
+		compressed_column_defs = lappend(compressed_column_defs,
+										 makeColumnDef(NameStr(attr->attname),
+													   compresseddata_oid,
+													   /* typmod = */ -1,
+													   /* collOid = */ InvalidOid));
 	}
 
 	/*
@@ -873,17 +1000,37 @@ tsl_process_compress_table_rename_column(Hypertable *ht, const RenameStmt *stmt)
 			 "cannot compress tables with reserved column prefix '%s'",
 			 COMPRESSION_COLUMN_METADATA_PREFIX);
 
-	if (TS_HYPERTABLE_HAS_COMPRESSION_TABLE(ht))
+	if (!TS_HYPERTABLE_HAS_COMPRESSION_TABLE(ht))
 	{
-		List *chunks = ts_chunk_get_by_hypertable_id(ht->fd.compressed_hypertable_id);
-		ListCell *lc;
-		foreach (lc, chunks)
+		return;
+	}
+
+	RenameStmt *compressed_col_stmt = (RenameStmt *) copyObject(stmt);
+	RenameStmt *compressed_index_stmt = (RenameStmt *) copyObject(stmt);
+	List *chunks = ts_chunk_get_by_hypertable_id(ht->fd.compressed_hypertable_id);
+	ListCell *lc;
+	foreach (lc, chunks)
+	{
+		Chunk *chunk = lfirst(lc);
+		compressed_col_stmt->relation =
+			makeRangeVar(NameStr(chunk->fd.schema_name), NameStr(chunk->fd.table_name), -1);
+		ExecRenameStmt(compressed_col_stmt);
+
+		compressed_index_stmt->relation = compressed_col_stmt->relation;
+		for (size_t i = 0; i < sizeof(sparse_index_types) / sizeof(sparse_index_types[0]); i++)
 		{
-			Chunk *chunk = lfirst(lc);
-			RenameStmt *compress_col_stmt = (RenameStmt *) copyObject(stmt);
-			compress_col_stmt->relation =
-				makeRangeVar(NameStr(chunk->fd.schema_name), NameStr(chunk->fd.table_name), -1);
-			ExecRenameStmt(compress_col_stmt);
+			char *old_index_name =
+				compressed_column_metadata_name_v2(sparse_index_types[i], stmt->subname);
+			if (get_attnum(chunk->table_id, old_index_name) == InvalidAttrNumber)
+			{
+				continue;
+			}
+
+			char *new_index_name =
+				compressed_column_metadata_name_v2(sparse_index_types[i], stmt->newname);
+			compressed_index_stmt->subname = old_index_name;
+			compressed_index_stmt->newname = new_index_name;
+			ExecRenameStmt(compressed_index_stmt);
 		}
 	}
 }
diff --git a/tsl/src/compression/create.h b/tsl/src/compression/create.h
index 9b53d4b28..944b2aa7d 100644
--- a/tsl/src/compression/create.h
+++ b/tsl/src/compression/create.h
@@ -15,8 +15,7 @@
 #define COMPRESSION_COLUMN_METADATA_COUNT_NAME COMPRESSION_COLUMN_METADATA_PREFIX "count"
 #define COMPRESSION_COLUMN_METADATA_SEQUENCE_NUM_NAME                                              \
 	COMPRESSION_COLUMN_METADATA_PREFIX "sequence_num"
-#define COMPRESSION_COLUMN_METADATA_MIN_COLUMN_NAME "min"
-#define COMPRESSION_COLUMN_METADATA_MAX_COLUMN_NAME "max"
+
 #define COMPRESSION_COLUMN_METADATA_PATTERN_V1 "_ts_meta_%s_%d"
 
 bool tsl_process_compress_table(AlterTableCmd *cmd, Hypertable *ht,
@@ -28,6 +27,7 @@ Chunk *create_compress_chunk(Hypertable *compress_ht, Chunk *src_chunk, Oid tabl
 
 char *column_segment_min_name(int16 column_index);
 char *column_segment_max_name(int16 column_index);
+char *compressed_column_metadata_name_v2(const char *metadata_type, const char *column_name);
 
 typedef struct CompressionSettings CompressionSettings;
 int compressed_column_metadata_attno(CompressionSettings *settings, Oid chunk_reloid,
diff --git a/tsl/test/expected/compress_auto_sparse_index.out b/tsl/test/expected/compress_auto_sparse_index.out
new file mode 100644
index 000000000..a4424d6e2
--- /dev/null
+++ b/tsl/test/expected/compress_auto_sparse_index.out
@@ -0,0 +1,157 @@
+-- This file and its contents are licensed under the Timescale License.
+-- Please see the included NOTICE for copyright information and
+-- LICENSE-TIMESCALE for a copy of the license.
+create table sparse(ts int, value float);
+select create_hypertable('sparse', 'ts');
+NOTICE:  adding not-null constraint to column "ts"
+  create_hypertable  
+---------------------
+ (1,public,sparse,t)
+(1 row)
+
+insert into sparse select x, x from generate_series(1, 10000) x;
+alter table sparse set (timescaledb.compress);
+-- When the chunks are compressed, minmax metadata are created for columns that
+-- have btree indexes.
+create index ii on sparse(value);
+select count(compress_chunk(x)) from show_chunks('sparse') x;
+ count 
+-------
+     1
+(1 row)
+
+explain select * from sparse where value = 1;
+                                                       QUERY PLAN                                                        
+-------------------------------------------------------------------------------------------------------------------------
+ Custom Scan (DecompressChunk) on _hyper_1_1_chunk  (cost=0.27..19.75 rows=72000 width=12)
+   Vectorized Filter: (value = '1'::double precision)
+   ->  Seq Scan on compress_hyper_2_2_chunk  (cost=0.00..19.75 rows=72 width=76)
+         Filter: ((_ts_meta_v2_min_value <= '1'::double precision) AND (_ts_meta_v2_max_value >= '1'::double precision))
+(4 rows)
+
+-- Should be disabled with the GUC
+set timescaledb.auto_sparse_indexes to off;
+select count(compress_chunk(decompress_chunk(x))) from show_chunks('sparse') x;
+ count 
+-------
+     1
+(1 row)
+
+explain select * from sparse where value = 1;
+                                         QUERY PLAN                                         
+--------------------------------------------------------------------------------------------
+ Custom Scan (DecompressChunk) on _hyper_1_1_chunk  (cost=0.02..17.50 rows=750000 width=12)
+   Vectorized Filter: (value = '1'::double precision)
+   ->  Seq Scan on compress_hyper_2_3_chunk  (cost=0.00..17.50 rows=750 width=76)
+(3 rows)
+
+reset timescaledb.auto_sparse_indexes;
+select count(compress_chunk(decompress_chunk(x))) from show_chunks('sparse') x;
+ count 
+-------
+     1
+(1 row)
+
+explain select * from sparse where value = 1;
+                                                       QUERY PLAN                                                        
+-------------------------------------------------------------------------------------------------------------------------
+ Custom Scan (DecompressChunk) on _hyper_1_1_chunk  (cost=0.27..19.75 rows=72000 width=12)
+   Vectorized Filter: (value = '1'::double precision)
+   ->  Seq Scan on compress_hyper_2_4_chunk  (cost=0.00..19.75 rows=72 width=76)
+         Filter: ((_ts_meta_v2_min_value <= '1'::double precision) AND (_ts_meta_v2_max_value >= '1'::double precision))
+(4 rows)
+
+-- Should survive renames.
+alter table sparse rename column value to wert;
+explain select * from sparse where wert = 1;
+                                                      QUERY PLAN                                                       
+-----------------------------------------------------------------------------------------------------------------------
+ Custom Scan (DecompressChunk) on _hyper_1_1_chunk  (cost=0.27..19.75 rows=72000 width=12)
+   Vectorized Filter: (wert = '1'::double precision)
+   ->  Seq Scan on compress_hyper_2_4_chunk  (cost=0.00..19.75 rows=72 width=76)
+         Filter: ((_ts_meta_v2_min_wert <= '1'::double precision) AND (_ts_meta_v2_max_wert >= '1'::double precision))
+(4 rows)
+
+alter table sparse rename column wert to value;
+explain select * from sparse where value = 1;
+                                                       QUERY PLAN                                                        
+-------------------------------------------------------------------------------------------------------------------------
+ Custom Scan (DecompressChunk) on _hyper_1_1_chunk  (cost=0.27..19.75 rows=72000 width=12)
+   Vectorized Filter: (value = '1'::double precision)
+   ->  Seq Scan on compress_hyper_2_4_chunk  (cost=0.00..19.75 rows=72 width=76)
+         Filter: ((_ts_meta_v2_min_value <= '1'::double precision) AND (_ts_meta_v2_max_value >= '1'::double precision))
+(4 rows)
+
+-- Not for expression indexes.
+drop index ii;
+create index ii on sparse((value + 1));
+select count(compress_chunk(decompress_chunk(x))) from show_chunks('sparse') x;
+ count 
+-------
+     1
+(1 row)
+
+explain select * from sparse where value = 1;
+                                         QUERY PLAN                                         
+--------------------------------------------------------------------------------------------
+ Custom Scan (DecompressChunk) on _hyper_1_1_chunk  (cost=0.02..17.50 rows=750000 width=12)
+   Vectorized Filter: (value = '1'::double precision)
+   ->  Seq Scan on compress_hyper_2_5_chunk  (cost=0.00..17.50 rows=750 width=76)
+(3 rows)
+
+-- Not for other index types.
+drop index ii;
+create index ii on sparse using hash(value);
+select count(compress_chunk(decompress_chunk(x))) from show_chunks('sparse') x;
+ count 
+-------
+     1
+(1 row)
+
+explain select * from sparse where value = 1;
+                                         QUERY PLAN                                         
+--------------------------------------------------------------------------------------------
+ Custom Scan (DecompressChunk) on _hyper_1_1_chunk  (cost=0.02..17.50 rows=750000 width=12)
+   Vectorized Filter: (value = '1'::double precision)
+   ->  Seq Scan on compress_hyper_2_6_chunk  (cost=0.00..17.50 rows=750 width=76)
+(3 rows)
+
+-- When the chunk is recompressed without index, no sparse index is created.
+drop index ii;
+select count(compress_chunk(decompress_chunk(x))) from show_chunks('sparse') x;
+ count 
+-------
+     1
+(1 row)
+
+explain select * from sparse where value = 1;
+                                         QUERY PLAN                                         
+--------------------------------------------------------------------------------------------
+ Custom Scan (DecompressChunk) on _hyper_1_1_chunk  (cost=0.02..17.50 rows=750000 width=12)
+   Vectorized Filter: (value = '1'::double precision)
+   ->  Seq Scan on compress_hyper_2_7_chunk  (cost=0.00..17.50 rows=750 width=76)
+(3 rows)
+
+-- Long column names.
+select count(decompress_chunk(x)) from show_chunks('sparse') x;
+ count 
+-------
+     1
+(1 row)
+
+\set ECHO none
+select count(compress_chunk(x)) from show_chunks('sparse') x;
+ count 
+-------
+     1
+(1 row)
+
+explain select * from sparse where Abcdef012345678_Bbcdef012345678_Cbcdef012345678_Dbcdef0 = 1;
+                                                                          QUERY PLAN                                                                           
+---------------------------------------------------------------------------------------------------------------------------------------------------------------
+ Custom Scan (DecompressChunk) on _hyper_1_1_chunk  (cost=3.48..10.45 rows=3000 width=264)
+   Vectorized Filter: (abcdef012345678_bbcdef012345678_cbcdef012345678_dbcdef0 = 1)
+   ->  Seq Scan on compress_hyper_2_8_chunk  (cost=0.00..10.45 rows=3 width=2092)
+         Filter: ((_ts_meta_v2_min_9218_abcdef012345678_bbcdef012345678_cbcdef0 <= 1) AND (_ts_meta_v2_max_9218_abcdef012345678_bbcdef012345678_cbcdef0 >= 1))
+(4 rows)
+
diff --git a/tsl/test/sql/CMakeLists.txt b/tsl/test/sql/CMakeLists.txt
index 9547e4826..0a7bc9079 100644
--- a/tsl/test/sql/CMakeLists.txt
+++ b/tsl/test/sql/CMakeLists.txt
@@ -13,6 +13,7 @@ set(TEST_FILES
     cagg_query.sql
     cagg_refresh.sql
     cagg_utils.sql
+    compress_auto_sparse_index.sql
     compress_default.sql
     compress_float8_corrupt.sql
     compressed_detoaster.sql
diff --git a/tsl/test/sql/compress_auto_sparse_index.sql b/tsl/test/sql/compress_auto_sparse_index.sql
new file mode 100644
index 000000000..6f3b6768b
--- /dev/null
+++ b/tsl/test/sql/compress_auto_sparse_index.sql
@@ -0,0 +1,66 @@
+-- This file and its contents are licensed under the Timescale License.
+-- Please see the included NOTICE for copyright information and
+-- LICENSE-TIMESCALE for a copy of the license.
+
+create table sparse(ts int, value float);
+select create_hypertable('sparse', 'ts');
+insert into sparse select x, x from generate_series(1, 10000) x;
+alter table sparse set (timescaledb.compress);
+
+-- When the chunks are compressed, minmax metadata are created for columns that
+-- have btree indexes.
+create index ii on sparse(value);
+select count(compress_chunk(x)) from show_chunks('sparse') x;
+explain select * from sparse where value = 1;
+
+
+-- Should be disabled with the GUC
+set timescaledb.auto_sparse_indexes to off;
+select count(compress_chunk(decompress_chunk(x))) from show_chunks('sparse') x;
+explain select * from sparse where value = 1;
+reset timescaledb.auto_sparse_indexes;
+select count(compress_chunk(decompress_chunk(x))) from show_chunks('sparse') x;
+explain select * from sparse where value = 1;
+
+
+-- Should survive renames.
+alter table sparse rename column value to wert;
+explain select * from sparse where wert = 1;
+alter table sparse rename column wert to value;
+explain select * from sparse where value = 1;
+
+
+-- Not for expression indexes.
+drop index ii;
+create index ii on sparse((value + 1));
+select count(compress_chunk(decompress_chunk(x))) from show_chunks('sparse') x;
+explain select * from sparse where value = 1;
+
+
+-- Not for other index types.
+drop index ii;
+create index ii on sparse using hash(value);
+select count(compress_chunk(decompress_chunk(x))) from show_chunks('sparse') x;
+explain select * from sparse where value = 1;
+
+
+-- When the chunk is recompressed without index, no sparse index is created.
+drop index ii;
+select count(compress_chunk(decompress_chunk(x))) from show_chunks('sparse') x;
+explain select * from sparse where value = 1;
+
+
+-- Long column names.
+select count(decompress_chunk(x)) from show_chunks('sparse') x;
+
+\set ECHO none
+select format('alter table sparse add column %1$s int; create index on sparse(%1$s);',
+    substr('Abcdef012345678_Bbcdef012345678_Cbcdef012345678_Dbcdef012345678_', 1, x))
+from generate_series(1, 63) x
+\gexec
+\set ECHO queries
+
+select count(compress_chunk(x)) from show_chunks('sparse') x;
+
+explain select * from sparse where Abcdef012345678_Bbcdef012345678_Cbcdef012345678_Dbcdef0 = 1;
+