From df0acb161e322443ea59c5ccd0283b85432b1e88 Mon Sep 17 00:00:00 2001 From: Joshua Lockerman Date: Tue, 4 Feb 2020 12:39:42 -0500 Subject: [PATCH] Fix GapFill with ReScan The GapFill node was not fully reset on a ReScan, so if there was a GapFill within a NestedLoop, only the first iteration would return results. This commit fixes this issues. --- CHANGELOG.md | 9 +++++---- tsl/src/nodes/gapfill/exec.c | 1 + tsl/test/expected/gapfill.out | 21 +++++++++++++++++++++ tsl/test/sql/gapfill.sql | 10 ++++++++++ 4 files changed, 37 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 436c757bd..8b09f339a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ accidentally triggering the load of a previous DB version.** **Bugfixes** * #1648 Drop chunks for materialized hypertable * #1665 Add ignore_invalidation_older_than to timescaledb_information.continuous_aggregates view +* #1674 Fix time_bucket_gapfill's interaction with GROUP BY **Thanks** * @RJPhillips01 for reporting an issue with drop chunks. @@ -26,17 +27,17 @@ The major new feature in this release allows users to keep the aggregated data in a continuous aggregate while dropping the raw data with drop_chunks. This allows users to save storage by keeping only the aggregates. -The semantics of the refresh_lag parameter for continuous aggregates has +The semantics of the refresh_lag parameter for continuous aggregates has been changed to be relative to the current timestamp instead of the maximum value in the table. This change requires that an integer_now func be set on hypertables with integer-based time columns to use continuous aggregates on this table. -We added a timescaledb.ignore_invalidation_older_than parameter for continuous -aggregates. This parameter accept a time-interval (e.g. 1 month). If set, +We added a timescaledb.ignore_invalidation_older_than parameter for continuous +aggregates. This parameter accept a time-interval (e.g. 1 month). If set, it limits the amount of time for which to process invalidation. Thus, if timescaledb.ignore_invalidation_older_than = '1 month', then any modifications -for data older than 1 month from the current timestamp at modification time may +for data older than 1 month from the current timestamp at modification time may not cause continuous aggregate to be updated. This limits the amount of work that a backfill can trigger. By default, all invalidations are processed. diff --git a/tsl/src/nodes/gapfill/exec.c b/tsl/src/nodes/gapfill/exec.c index bd45854ee..2d0a89ecc 100644 --- a/tsl/src/nodes/gapfill/exec.c +++ b/tsl/src/nodes/gapfill/exec.c @@ -728,6 +728,7 @@ gapfill_rescan(CustomScanState *node) { ExecReScan(linitial(node->custom_ps)); } + ((GapFillState *) node)->state = FETCHED_NONE; } static void diff --git a/tsl/test/expected/gapfill.out b/tsl/test/expected/gapfill.out index 4d3b7d2b1..d55a19e28 100644 --- a/tsl/test/expected/gapfill.out +++ b/tsl/test/expected/gapfill.out @@ -2197,6 +2197,27 @@ GROUP BY 3,4; 5 | 2 | red | 4 (10 rows) +-- test with Nested Loop +SELECT l.id, bucket, data_value FROM + (VALUES (1), (2), (3), (4)) a(id) + INNER JOIN LATERAL ( + SELECT b.id id, time_bucket_gapfill('1'::int, time, start=>'1'::int, finish=> '5'::int) bucket, locf(last(data, time)) data_value + FROM (VALUES (1, 1, 1), (1, 4, 4), (2, 1, -1), (2, 4, -4)) b(id, time, data) + WHERE a.id = b.id + GROUP BY b.id, bucket + ) as l on (true); + id | bucket | data_value +----+--------+------------ + 1 | 1 | 1 + 1 | 2 | 1 + 1 | 3 | 1 + 1 | 4 | 4 + 2 | 1 | -1 + 2 | 2 | -1 + 2 | 3 | -1 + 2 | 4 | -4 +(8 rows) + -- test prepared statement PREPARE prep_gapfill AS SELECT diff --git a/tsl/test/sql/gapfill.sql b/tsl/test/sql/gapfill.sql index c6354000f..d881cf461 100644 --- a/tsl/test/sql/gapfill.sql +++ b/tsl/test/sql/gapfill.sql @@ -1323,6 +1323,16 @@ SELECT FROM (VALUES (1,'blue',1),(2,'red',2)) v(time,color,value) GROUP BY 3,4; +-- test with Nested Loop +SELECT l.id, bucket, data_value FROM + (VALUES (1), (2), (3), (4)) a(id) + INNER JOIN LATERAL ( + SELECT b.id id, time_bucket_gapfill('1'::int, time, start=>'1'::int, finish=> '5'::int) bucket, locf(last(data, time)) data_value + FROM (VALUES (1, 1, 1), (1, 4, 4), (2, 1, -1), (2, 4, -4)) b(id, time, data) + WHERE a.id = b.id + GROUP BY b.id, bucket + ) as l on (true); + -- test prepared statement PREPARE prep_gapfill AS SELECT