From 038b5757ac9645d5daeada6da725618c6a0f1987 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fabr=C3=ADzio=20de=20Royes=20Mello?=
 <fabriziomello@gmail.com>
Date: Mon, 1 Jul 2024 14:15:10 -0300
Subject: [PATCH] Use processed group clause in PG16 (take 2)

In #6377 we fixed an `ORDER BY/GROUP BY expression not found in
targetlist` by using the `root->processed_groupClause` instead of
`parse->groupClause` due to an optimization introduced in PG16 that
removes redundant grouping and distinct columns.

But looks like we didn't change all necessary places, specially our
HashAggregate optimization.
---
 src/planner/add_hashagg.c        | 12 +++++++++
 tsl/test/expected/cagg_joins.out | 43 ++++++++++++++++++++++++++++++++
 tsl/test/sql/cagg_joins.sql      | 40 +++++++++++++++++++++++++++++
 3 files changed, 95 insertions(+)

diff --git a/src/planner/add_hashagg.c b/src/planner/add_hashagg.c
index 5701b6516..34c2a5b2d 100644
--- a/src/planner/add_hashagg.c
+++ b/src/planner/add_hashagg.c
@@ -88,7 +88,11 @@ plan_add_parallel_hashagg(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *
 											  partial_grouping_target,
 											  AGG_HASHED,
 											  AGGSPLIT_INITIAL_SERIAL,
+#if PG16_LT
 											  parse->groupClause,
+#else
+											  root->processed_groupClause,
+#endif
 											  NIL,
 											  &agg_partial_costs,
 											  d_num_partial_groups));
@@ -113,7 +117,11 @@ plan_add_parallel_hashagg(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *
 									  target,
 									  AGG_HASHED,
 									  AGGSPLIT_FINAL_DESERIAL,
+#if PG16_LT
 									  parse->groupClause,
+#else
+									  root->processed_groupClause,
+#endif
 									  (List *) parse->havingQual,
 									  &agg_final_costs,
 									  d_num_groups));
@@ -197,7 +205,11 @@ ts_plan_add_hashagg(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *output
 									  target,
 									  AGG_HASHED,
 									  AGGSPLIT_SIMPLE,
+#if PG16_LT
 									  parse->groupClause,
+#else
+									  root->processed_groupClause,
+#endif
 									  (List *) parse->havingQual,
 									  &agg_costs,
 									  d_num_groups));
diff --git a/tsl/test/expected/cagg_joins.out b/tsl/test/expected/cagg_joins.out
index d40dad84b..5f097fbcb 100644
--- a/tsl/test/expected/cagg_joins.out
+++ b/tsl/test/expected/cagg_joins.out
@@ -1140,3 +1140,46 @@ DROP TABLE devices CASCADE;
 NOTICE:  drop cascades to view devices_view
 DROP TABLE conditions_dup CASCADE;
 DROP TABLE devices_dup CASCADE;
+\set VERBOSITY default
+-- SDC #1859
+CREATE TABLE conditions(
+  time TIMESTAMPTZ NOT NULL,
+  value FLOAT8 NOT NULL,
+  device_id int NOT NULL
+);
+SELECT create_hypertable('conditions', 'time', chunk_time_interval => INTERVAL '1 day');
+    create_hypertable     
+--------------------------
+ (24,public,conditions,t)
+(1 row)
+
+INSERT INTO conditions (time, value, device_id)
+SELECT t, 1, 1 FROM generate_series('2024-01-01 00:00:00-00'::timestamptz, '2024-12-31 00:00:00-00'::timestamptz, '1 hour'::interval) AS t;
+CREATE TABLE devices (device_id int not null, name text, location text);
+INSERT INTO devices values (1, 'thermo_1', 'Moscow'), (2, 'thermo_2', 'Berlin'),(3, 'thermo_3', 'London'),(4, 'thermo_4', 'Stockholm');
+CREATE MATERIALIZED VIEW cagg_realtime
+WITH (timescaledb.continuous, timescaledb.materialized_only = FALSE) AS
+SELECT time_bucket(INTERVAL '1 day', time) AS bucket,
+   MAX(value),
+   MIN(value),
+   AVG(value),
+   devices.name,
+   devices.location
+FROM conditions
+JOIN devices ON conditions.device_id = devices.device_id
+GROUP BY name, location, bucket
+WITH NO DATA;
+\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER
+VACUUM ANALYZE;
+\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
+SELECT a.* FROM cagg_realtime a WHERE a.location = 'Moscow' ORDER BY bucket LIMIT 2;
+            bucket            | max | min | avg |   name   | location 
+------------------------------+-----+-----+-----+----------+----------
+ Sun Dec 31 16:00:00 2023 PST |   1 |   1 |   1 | thermo_1 | Moscow
+ Mon Jan 01 16:00:00 2024 PST |   1 |   1 |   1 | thermo_1 | Moscow
+(2 rows)
+
+\set VERBOSITY terse
+DROP TABLE conditions CASCADE;
+NOTICE:  drop cascades to 3 other objects
+DROP TABLE devices CASCADE;
diff --git a/tsl/test/sql/cagg_joins.sql b/tsl/test/sql/cagg_joins.sql
index c34a7f95c..405985280 100644
--- a/tsl/test/sql/cagg_joins.sql
+++ b/tsl/test/sql/cagg_joins.sql
@@ -552,3 +552,43 @@ DROP TABLE conditions CASCADE;
 DROP TABLE devices CASCADE;
 DROP TABLE conditions_dup CASCADE;
 DROP TABLE devices_dup CASCADE;
+
+\set VERBOSITY default
+
+-- SDC #1859
+CREATE TABLE conditions(
+  time TIMESTAMPTZ NOT NULL,
+  value FLOAT8 NOT NULL,
+  device_id int NOT NULL
+);
+
+SELECT create_hypertable('conditions', 'time', chunk_time_interval => INTERVAL '1 day');
+
+INSERT INTO conditions (time, value, device_id)
+SELECT t, 1, 1 FROM generate_series('2024-01-01 00:00:00-00'::timestamptz, '2024-12-31 00:00:00-00'::timestamptz, '1 hour'::interval) AS t;
+
+CREATE TABLE devices (device_id int not null, name text, location text);
+INSERT INTO devices values (1, 'thermo_1', 'Moscow'), (2, 'thermo_2', 'Berlin'),(3, 'thermo_3', 'London'),(4, 'thermo_4', 'Stockholm');
+
+CREATE MATERIALIZED VIEW cagg_realtime
+WITH (timescaledb.continuous, timescaledb.materialized_only = FALSE) AS
+SELECT time_bucket(INTERVAL '1 day', time) AS bucket,
+   MAX(value),
+   MIN(value),
+   AVG(value),
+   devices.name,
+   devices.location
+FROM conditions
+JOIN devices ON conditions.device_id = devices.device_id
+GROUP BY name, location, bucket
+WITH NO DATA;
+
+\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER
+VACUUM ANALYZE;
+\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
+
+SELECT a.* FROM cagg_realtime a WHERE a.location = 'Moscow' ORDER BY bucket LIMIT 2;
+
+\set VERBOSITY terse
+DROP TABLE conditions CASCADE;
+DROP TABLE devices CASCADE;