From df0acb161e322443ea59c5ccd0283b85432b1e88 Mon Sep 17 00:00:00 2001
From: Joshua Lockerman <josh@timescale.com>
Date: Tue, 4 Feb 2020 12:39:42 -0500
Subject: [PATCH] Fix GapFill with ReScan

The GapFill node was not fully reset on a ReScan, so if there was a
GapFill within a NestedLoop, only the first iteration would return
results. This commit fixes this issues.
---
 CHANGELOG.md                  |  9 +++++----
 tsl/src/nodes/gapfill/exec.c  |  1 +
 tsl/test/expected/gapfill.out | 21 +++++++++++++++++++++
 tsl/test/sql/gapfill.sql      | 10 ++++++++++
 4 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 436c757bd..8b09f339a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@ accidentally triggering the load of a previous DB version.**
 **Bugfixes**
 * #1648 Drop chunks for materialized hypertable
 * #1665 Add ignore_invalidation_older_than to timescaledb_information.continuous_aggregates view
+* #1674 Fix time_bucket_gapfill's interaction with GROUP BY
 
 **Thanks**
 * @RJPhillips01 for reporting an issue with drop chunks.
@@ -26,17 +27,17 @@ The major new feature in this release allows users to keep the aggregated
 data in a continuous aggregate while dropping the raw data with drop_chunks.
 This allows users to save storage by keeping only the aggregates.
 
-The semantics of the refresh_lag parameter for continuous aggregates has 
+The semantics of the refresh_lag parameter for continuous aggregates has
 been changed to be relative to the current timestamp instead of the maximum
 value in the table. This change requires that an integer_now func be set on
 hypertables with integer-based time columns to use continuous aggregates on
 this table.
 
-We added a timescaledb.ignore_invalidation_older_than parameter for continuous 
-aggregates. This parameter accept a time-interval (e.g. 1 month). If set, 
+We added a timescaledb.ignore_invalidation_older_than parameter for continuous
+aggregates. This parameter accept a time-interval (e.g. 1 month). If set,
 it limits the amount of time for which to process invalidation. Thus, if
 timescaledb.ignore_invalidation_older_than = '1 month', then any modifications
-for data older than 1 month from the current timestamp at modification time may 
+for data older than 1 month from the current timestamp at modification time may
 not cause continuous aggregate to be updated. This limits the amount of work
 that a backfill can trigger. By default, all invalidations are processed.
 
diff --git a/tsl/src/nodes/gapfill/exec.c b/tsl/src/nodes/gapfill/exec.c
index bd45854ee..2d0a89ecc 100644
--- a/tsl/src/nodes/gapfill/exec.c
+++ b/tsl/src/nodes/gapfill/exec.c
@@ -728,6 +728,7 @@ gapfill_rescan(CustomScanState *node)
 	{
 		ExecReScan(linitial(node->custom_ps));
 	}
+	((GapFillState *) node)->state = FETCHED_NONE;
 }
 
 static void
diff --git a/tsl/test/expected/gapfill.out b/tsl/test/expected/gapfill.out
index 4d3b7d2b1..d55a19e28 100644
--- a/tsl/test/expected/gapfill.out
+++ b/tsl/test/expected/gapfill.out
@@ -2197,6 +2197,27 @@ GROUP BY 3,4;
           5 |    2 | red   |    4
 (10 rows)
 
+-- test with Nested Loop
+SELECT l.id, bucket, data_value FROM
+    (VALUES (1), (2), (3), (4)) a(id)
+    INNER JOIN LATERAL (
+        SELECT b.id id, time_bucket_gapfill('1'::int, time, start=>'1'::int, finish=> '5'::int) bucket, locf(last(data, time)) data_value
+        FROM (VALUES (1, 1, 1), (1, 4, 4), (2, 1, -1), (2, 4, -4)) b(id, time, data)
+        WHERE a.id = b.id
+        GROUP BY b.id, bucket
+    ) as l on (true);
+ id | bucket | data_value 
+----+--------+------------
+  1 |      1 |          1
+  1 |      2 |          1
+  1 |      3 |          1
+  1 |      4 |          4
+  2 |      1 |         -1
+  2 |      2 |         -1
+  2 |      3 |         -1
+  2 |      4 |         -4
+(8 rows)
+
 -- test prepared statement
 PREPARE prep_gapfill AS
 SELECT
diff --git a/tsl/test/sql/gapfill.sql b/tsl/test/sql/gapfill.sql
index c6354000f..d881cf461 100644
--- a/tsl/test/sql/gapfill.sql
+++ b/tsl/test/sql/gapfill.sql
@@ -1323,6 +1323,16 @@ SELECT
 FROM (VALUES (1,'blue',1),(2,'red',2)) v(time,color,value)
 GROUP BY 3,4;
 
+-- test with Nested Loop
+SELECT l.id, bucket, data_value FROM
+    (VALUES (1), (2), (3), (4)) a(id)
+    INNER JOIN LATERAL (
+        SELECT b.id id, time_bucket_gapfill('1'::int, time, start=>'1'::int, finish=> '5'::int) bucket, locf(last(data, time)) data_value
+        FROM (VALUES (1, 1, 1), (1, 4, 4), (2, 1, -1), (2, 4, -4)) b(id, time, data)
+        WHERE a.id = b.id
+        GROUP BY b.id, bucket
+    ) as l on (true);
+
 -- test prepared statement
 PREPARE prep_gapfill AS
 SELECT