GROUP BY error when setting compress_segmentby with an enum column

When using a custom ENUM data type for compressed hypertable on the GROUP BY clause raises an error. Fixed it by generating scan paths for the query by checking if the SEGMENT BY column is a custom ENUM type and then report a valid error message. Fixes #3481
2025-05-15 01:53:41 +08:00 · 2022-08-29 10:01:56 +05:30 · 2022-08-29 10:01:56 +05:30 · ed212b4442
commit ed212b4442
parent 1d4f90b1d3
4 changed files with 120 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2395,6 +2395,7 @@ complete, depending on the size of your database**
 **Thanks**
 * @yadid for reporting a segfault (fixed in 50c8c4c)
 * @ryan-shaw for reporting tuples not being correctly converted to a chunk's rowtype (fixed in 645b530)
+* @yuezhihan for reporting GROUP BY error when setting compress_segmentby with an enum column

 ## 0.4.0 (2017-08-21)

@ -2553,3 +2554,6 @@ the next release.
 * [72f754a] use PostgreSQL's own `hash_any` function as default partfunc (thanks @robin900)
 * [39f4c0f] Remove sample data instructions and point to docs site
 * [9015314] Revised the `get_general_index_definition` function to handle cases where indexes have definitions other than just `CREATE INDEX` (thanks @bricklen)
+
+**Bugfixes**
+* #4619 Improve handling enum columns in compressed hypertables
--- a/tsl/src/nodes/decompress_chunk/decompress_chunk.c
+++ b/tsl/src/nodes/decompress_chunk/decompress_chunk.c
@ -177,6 +177,8 @@ build_compressed_scan_pathkeys(SortInfo *sort_info, PlannerInfo *root, List *chu
 		ListCell *lc;
 		char *column_name;
 		Oid sortop;
+		Oid opfamily, opcintype;
+		int16 strategy;

 		for (lc = list_head(chunk_pathkeys);
 			 lc != NULL && bms_num_members(segmentby_columns) < info->num_segmentby_columns;
@ -210,6 +212,20 @@ build_compressed_scan_pathkeys(SortInfo *sort_info, PlannerInfo *root, List *chu

 			sortop =
 				get_opfamily_member(pk->pk_opfamily, var->vartype, var->vartype, pk->pk_strategy);
+			if (!get_ordering_op_properties(sortop, &opfamily, &opcintype, &strategy))
+			{
+				if (type_is_enum(var->vartype))
+				{
+					sortop = get_opfamily_member(pk->pk_opfamily,
+												 ANYENUMOID,
+												 ANYENUMOID,
+												 pk->pk_strategy);
+				}
+				else
+				{
+					elog(ERROR, "sort operator lookup failed for column \"%s\"", column_name);
+				}
+			}
 			pk = make_pathkey_from_compressed(root,
 											  info->compressed_rel->relid,
 											  (Expr *) var,
--- a/tsl/test/expected/compression_errors.out
+++ b/tsl/test/expected/compression_errors.out
@ -545,3 +545,67 @@ NOTICE:  column "medium" of relation "metric" already exists, skipping
 ALTER TABLE metric ADD COLUMN "medium_1" VARCHAR ;
 ALTER TABLE metric ADD COLUMN "medium_1" VARCHAR ;
 ERROR:  column "medium_1" of relation "metric" already exists
+--github issue 3481
+--GROUP BY error when setting compress_segmentby with an enum column
+CREATE TYPE an_enum_type AS ENUM ('home', 'school');
+CREATE TABLE test (
+	time timestamp NOT NULL,
+	enum_col an_enum_type NOT NULL
+);
+SELECT create_hypertable(
+    'test', 'time'
+);
+ create_hypertable  
+--------------------
+ (29,public,test,t)
+(1 row)
+
+INSERT INTO test VALUES ('2001-01-01 00:00', 'home'),
+                        ('2001-01-01 01:00', 'school'),
+                        ('2001-01-01 02:00', 'home');
+--enable compression on enum_col
+ALTER TABLE test SET (
+	timescaledb.compress,
+	timescaledb.compress_segmentby = 'enum_col',
+	timescaledb.compress_orderby = 'time'
+);
+--below queries will pass before chunks are compressed
+SELECT 1 FROM test GROUP BY enum_col;
+ ?column? 
+----------
+        1
+        1
+(2 rows)
+
+EXPLAIN SELECT DISTINCT 1 FROM test;
+                                    QUERY PLAN                                    
+----------------------------------------------------------------------------------
+ Unique  (cost=0.00..50.80 rows=1 width=4)
+   ->  Result  (cost=0.00..50.80 rows=2040 width=4)
+         ->  Seq Scan on _hyper_29_19_chunk  (cost=0.00..30.40 rows=2040 width=0)
+(3 rows)
+
+--compress chunks
+SELECT COMPRESS_CHUNK(X) FROM SHOW_CHUNKS('test') X;
+              compress_chunk              
+------------------------------------------
+ _timescaledb_internal._hyper_29_19_chunk
+(1 row)
+
+--below query should pass after chunks are compressed
+SELECT 1 FROM test GROUP BY enum_col;
+ ?column? 
+----------
+        1
+        1
+(2 rows)
+
+EXPLAIN SELECT DISTINCT 1 FROM test;
+                                              QUERY PLAN                                              
+------------------------------------------------------------------------------------------------------
+ Unique  (cost=0.51..21.02 rows=1 width=4)
+   ->  Result  (cost=0.51..21.02 rows=2000 width=4)
+         ->  Custom Scan (DecompressChunk) on _hyper_29_19_chunk  (cost=0.51..1.02 rows=2000 width=0)
+               ->  Seq Scan on compress_hyper_30_20_chunk  (cost=0.00..1.02 rows=2 width=4)
+(4 rows)
+
--- a/tsl/test/sql/compression_errors.sql
+++ b/tsl/test/sql/compression_errors.sql
@ -312,4 +312,39 @@ ALTER TABLE metric ADD COLUMN IF NOT EXISTS "medium" VARCHAR ;
 ALTER TABLE metric ADD COLUMN IF NOT EXISTS "medium" VARCHAR ;
 -- also add one without IF NOT EXISTS 
 ALTER TABLE metric ADD COLUMN "medium_1" VARCHAR ;
-ALTER TABLE metric ADD COLUMN "medium_1" VARCHAR ;
+ALTER TABLE metric ADD COLUMN "medium_1" VARCHAR ;
+
+--github issue 3481
+--GROUP BY error when setting compress_segmentby with an enum column
+
+CREATE TYPE an_enum_type AS ENUM ('home', 'school');
+
+CREATE TABLE test (
+	time timestamp NOT NULL,
+	enum_col an_enum_type NOT NULL
+);
+
+SELECT create_hypertable(
+    'test', 'time'
+);
+INSERT INTO test VALUES ('2001-01-01 00:00', 'home'),
+                        ('2001-01-01 01:00', 'school'),
+                        ('2001-01-01 02:00', 'home');
+
+--enable compression on enum_col
+ALTER TABLE test SET (
+	timescaledb.compress,
+	timescaledb.compress_segmentby = 'enum_col',
+	timescaledb.compress_orderby = 'time'
+);
+
+--below queries will pass before chunks are compressed
+SELECT 1 FROM test GROUP BY enum_col;
+EXPLAIN SELECT DISTINCT 1 FROM test;
+
+--compress chunks
+SELECT COMPRESS_CHUNK(X) FROM SHOW_CHUNKS('test') X;
+
+--below query should pass after chunks are compressed
+SELECT 1 FROM test GROUP BY enum_col;
+EXPLAIN SELECT DISTINCT 1 FROM test;