From eaf3a38fe9553659e515fac72aaad86cf1a06d1e Mon Sep 17 00:00:00 2001 From: Rafia Sabih Date: Fri, 28 Jan 2022 10:00:11 +0100 Subject: [PATCH] Pushdown of gapfill to data nodes Allow the calls of time_bucket_gapfill to be executed at the data nodes for improved query performance. With this, time_bucket_gapfill is pushed to data nodes in the following conditions, 1. when only one data node has all the chunks 2. when space dimension does not overlap across data nodes 3. when group-by matches space dimension --- src/hypertable.h | 1 + tsl/src/fdw/data_node_scan_plan.c | 23 +- tsl/src/fdw/deparse.c | 15 +- tsl/src/fdw/fdw.c | 10 +- tsl/src/fdw/relinfo.c | 4 +- tsl/src/fdw/relinfo.h | 4 +- tsl/src/fdw/scan_plan.c | 20 ++ tsl/src/nodes/gapfill/planner.c | 95 +++++ tsl/src/nodes/gapfill/planner.h | 18 + tsl/src/planner.c | 6 +- tsl/test/shared/expected/dist_gapfill.out | 6 +- .../expected/dist_gapfill_pushdown-12.out | 327 ++++++++++++++++++ .../expected/dist_gapfill_pushdown-13.out | 308 +++++++++++++++++ .../expected/dist_gapfill_pushdown-14.out | 308 +++++++++++++++++ tsl/test/shared/sql/CMakeLists.txt | 3 +- .../shared/sql/dist_gapfill_pushdown.sql.in | 62 ++++ .../shared/sql/include/dist_gapfill_query.sql | 6 +- tsl/test/shared/sql/include/shared_setup.sql | 35 ++ 18 files changed, 1232 insertions(+), 19 deletions(-) create mode 100644 tsl/test/shared/expected/dist_gapfill_pushdown-12.out create mode 100644 tsl/test/shared/expected/dist_gapfill_pushdown-13.out create mode 100644 tsl/test/shared/expected/dist_gapfill_pushdown-14.out create mode 100644 tsl/test/shared/sql/dist_gapfill_pushdown.sql.in diff --git a/src/hypertable.h b/src/hypertable.h index 8213a0538..d3937518e 100644 --- a/src/hypertable.h +++ b/src/hypertable.h @@ -52,6 +52,7 @@ typedef struct Hypertable Oid chunk_sizing_func; Hyperspace *space; SubspaceStore *chunk_cache; + bool push_gapfill; /* * Allows restricting the data nodes to use for the hypertable. Default is to * use all available data nodes. diff --git a/tsl/src/fdw/data_node_scan_plan.c b/tsl/src/fdw/data_node_scan_plan.c index 1c0ac86e8..ec0b6e27f 100644 --- a/tsl/src/fdw/data_node_scan_plan.c +++ b/tsl/src/fdw/data_node_scan_plan.c @@ -40,6 +40,8 @@ #include "data_node_scan_exec.h" #include "fdw_utils.h" +#include "nodes/gapfill/planner.h" + /* * DataNodeScan is a custom scan implementation for scanning hypertables on * remote data nodes instead of scanning individual remote chunks. @@ -373,7 +375,7 @@ force_group_by_push_down(PlannerInfo *root, RelOptInfo *hyper_rel) */ static void push_down_group_bys(PlannerInfo *root, RelOptInfo *hyper_rel, Hyperspace *hs, - DataNodeChunkAssignments *scas) + DataNodeChunkAssignments *scas, bool gapfill_safe) { const Dimension *dim; bool overlaps; @@ -414,6 +416,9 @@ push_down_group_bys(PlannerInfo *root, RelOptInfo *hyper_rel, Hyperspace *hs, Assert(NULL != dim); hyper_rel->partexprs[0] = ts_dimension_get_partexprs(dim, hyper_rel->relid); hyper_rel->part_scheme->partnatts = 1; + + if (gapfill_safe) + force_group_by_push_down(root, hyper_rel); } } @@ -440,6 +445,7 @@ data_node_scan_add_node_paths(PlannerInfo *root, RelOptInfo *hyper_rel) int ndata_node_rels; DataNodeChunkAssignments scas; int i; + bool gapfill_safe = false; Assert(NULL != ht); @@ -459,8 +465,11 @@ data_node_scan_add_node_paths(PlannerInfo *root, RelOptInfo *hyper_rel) /* Assign chunks to data nodes */ data_node_chunk_assignment_assign_chunks(&scas, chunk_rels, nchunk_rels); + /* Check if we can push down gapfill to data nodes */ + gapfill_safe = pushdown_gapfill(root, hyper_rel, ht->space, &scas); + /* Try to push down GROUP BY expressions and bucketing, if possible */ - push_down_group_bys(root, hyper_rel, ht->space, &scas); + push_down_group_bys(root, hyper_rel, ht->space, &scas, gapfill_safe); /* * Create estimates and paths for each data node rel based on data node chunk @@ -487,10 +496,18 @@ data_node_scan_add_node_paths(PlannerInfo *root, RelOptInfo *hyper_rel) data_node_rel, data_node_rel->serverid, hyper_rte->relid, - TS_FDW_RELINFO_HYPERTABLE_DATA_NODE); + TS_FDW_RELINFO_HYPERTABLE_DATA_NODE, + gapfill_safe); fpinfo->sca = sca; + /* + * Since we can not always call pushdown_gapfill where scas are not available, + * remember if gapfill is safe to be pushed down for this relation for later + * uses e.g. in add_foreign_grouping_paths. + */ + ht->push_gapfill = gapfill_safe; + if (!bms_is_empty(sca->chunk_relids)) { add_data_node_scan_paths(root, data_node_rel); diff --git a/tsl/src/fdw/deparse.c b/tsl/src/fdw/deparse.c index 3b1704467..9acb04cd1 100644 --- a/tsl/src/fdw/deparse.c +++ b/tsl/src/fdw/deparse.c @@ -69,6 +69,7 @@ #include #include +#include #include #include "relinfo.h" @@ -105,6 +106,7 @@ typedef struct deparse_expr_cxt StringInfo buf; /* output buffer to append to */ List **params_list; /* exprs that will become remote Params */ DataNodeChunkAssignment *sca; + bool has_gapfill; } deparse_expr_cxt; #define REL_ALIAS_PREFIX "r" @@ -407,10 +409,8 @@ is_foreign_expr(PlannerInfo *root, RelOptInfo *baserel, Expr *expr) if (!foreign_expr_walker((Node *) expr, &glob_cxt)) return false; - /* - * It is not supported to execute time_bucket_gapfill on data node. - */ - if (gapfill_in_expression(expr)) + /* It is safe to pushdown gapfill in limited cases */ + if (gapfill_in_expression(expr) && !fpinfo->pushdown_gapfill) return false; /* @@ -808,6 +808,7 @@ deparseSelectStmtForRel(StringInfo buf, PlannerInfo *root, RelOptInfo *rel, List context.scanrel = IS_UPPER_REL(rel) ? fpinfo->outerrel : rel; context.params_list = params_list; context.sca = sca; + context.has_gapfill = false; /* Construct SELECT clause */ deparseSelectSql(tlist, is_subquery, retrieved_attrs, &context, pathkeys); @@ -2091,6 +2092,8 @@ deparseExpr(Expr *node, deparse_expr_cxt *context) deparseSubscriptingRef(castNode(SubscriptingRef, node), context); break; case T_FuncExpr: + if (gapfill_in_expression(node)) + context->has_gapfill = true; deparseFuncExpr(castNode(FuncExpr, node), context); break; case T_OpExpr: @@ -2707,7 +2710,7 @@ deparseAggref(Aggref *node, deparse_expr_cxt *context) use_variadic = node->aggvariadic; /* Find aggregate name from aggfnoid which is a pg_proc entry */ - if (partial_agg) + if (!context->has_gapfill && partial_agg) appendStringInfoString(buf, INTERNAL_SCHEMA_NAME "." PARTIALIZE_FUNC_NAME "("); appendFunctionName(node->aggfnoid, context); @@ -2783,7 +2786,7 @@ deparseAggref(Aggref *node, deparse_expr_cxt *context) deparseExpr((Expr *) node->aggfilter, context); } - appendStringInfoString(buf, partial_agg ? "))" : ")"); + appendStringInfoString(buf, !context->has_gapfill && partial_agg ? "))" : ")"); } /* diff --git a/tsl/src/fdw/fdw.c b/tsl/src/fdw/fdw.c index 5cf8c4a9c..0dc7e2842 100644 --- a/tsl/src/fdw/fdw.c +++ b/tsl/src/fdw/fdw.c @@ -65,7 +65,12 @@ get_foreign_rel_size(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid) * kind of regular table that will ever have this callback called on it. */ if (RELKIND_RELATION == rte->relkind) { - fdw_relinfo_create(root, baserel, InvalidOid, foreigntableid, TS_FDW_RELINFO_HYPERTABLE); + fdw_relinfo_create(root, + baserel, + InvalidOid, + foreigntableid, + TS_FDW_RELINFO_HYPERTABLE, + false); } else { @@ -75,7 +80,8 @@ get_foreign_rel_size(PlannerInfo *root, RelOptInfo *baserel, Oid foreigntableid) baserel, table->serverid, foreigntableid, - TS_FDW_RELINFO_FOREIGN_TABLE); + TS_FDW_RELINFO_FOREIGN_TABLE, + false); apply_table_options(table, fdw_relinfo_get(baserel)); } diff --git a/tsl/src/fdw/relinfo.c b/tsl/src/fdw/relinfo.c index bcaacafce..c5c52bff1 100644 --- a/tsl/src/fdw/relinfo.c +++ b/tsl/src/fdw/relinfo.c @@ -373,7 +373,7 @@ estimate_chunk_size(PlannerInfo *root, RelOptInfo *chunk_rel) TsFdwRelInfo * fdw_relinfo_create(PlannerInfo *root, RelOptInfo *rel, Oid server_oid, Oid local_table_id, - TsFdwRelInfoType type) + TsFdwRelInfoType type, bool gapfill_safe) { TsFdwRelInfo *fpinfo; ListCell *lc; @@ -406,6 +406,8 @@ fdw_relinfo_create(PlannerInfo *root, RelOptInfo *rel, Oid server_oid, Oid local if (*refname && strcmp(refname, get_rel_name(rte->relid)) != 0) appendStringInfo(fpinfo->relation_name, " %s", quote_identifier(rte->eref->aliasname)); + fpinfo->pushdown_gapfill = gapfill_safe; + if (type == TS_FDW_RELINFO_HYPERTABLE) { /* nothing more to do for hypertables */ diff --git a/tsl/src/fdw/relinfo.h b/tsl/src/fdw/relinfo.h index 2d8544e8a..bd848e3d8 100644 --- a/tsl/src/fdw/relinfo.h +++ b/tsl/src/fdw/relinfo.h @@ -49,6 +49,7 @@ typedef struct TsFdwRelInfo * foreign scan. */ bool pushdown_safe; + bool pushdown_gapfill; /* * Restriction clauses, divided into safe and unsafe to pushdown subsets. @@ -146,7 +147,8 @@ typedef struct TsFdwRelInfo } TsFdwRelInfo; extern TsFdwRelInfo *fdw_relinfo_create(PlannerInfo *root, RelOptInfo *rel, Oid server_oid, - Oid local_table_id, TsFdwRelInfoType type); + Oid local_table_id, TsFdwRelInfoType type, + bool gapfill_safe); extern TsFdwRelInfo *fdw_relinfo_alloc_or_get(RelOptInfo *rel); extern TsFdwRelInfo *fdw_relinfo_get(RelOptInfo *rel); diff --git a/tsl/src/fdw/scan_plan.c b/tsl/src/fdw/scan_plan.c index 2fb073229..a2a690c7c 100644 --- a/tsl/src/fdw/scan_plan.c +++ b/tsl/src/fdw/scan_plan.c @@ -905,6 +905,9 @@ add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo fpinfo->sca = ifpinfo->sca; merge_fdw_options(fpinfo, ifpinfo, NULL); + if (ifpinfo->pushdown_gapfill) + fpinfo->pushdown_gapfill = true; + /* * Assess if it is safe to push down aggregation and grouping. * @@ -923,6 +926,23 @@ add_foreign_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo fpinfo->startup_cost = startup_cost; fpinfo->total_cost = total_cost; + if (ifpinfo->pushdown_gapfill) + { + /* + * If pushdown of gapfill is possible then also check if it would + * be beneficial to actually push it down. Since, it can create + * more tuples and they need to be transferred to the data node. + * However, still pushing of gapfill to the data nodes could make + * sense because aggregating over it could be then done at the data + * nodes itself, hence ignore pushing down gapfill to data nodes + * when it produces a "really" larger amount of tuples. + */ + if (10 * ifpinfo->rows > fpinfo->rows) + { + fpinfo->pushdown_gapfill = false; + ifpinfo->pushdown_gapfill = false; + } + } /* Create and add path to the grouping relation. */ grouppath = (Path *) create_path(root, grouped_rel, diff --git a/tsl/src/nodes/gapfill/planner.c b/tsl/src/nodes/gapfill/planner.c index 1d4345a15..cc866aebe 100644 --- a/tsl/src/nodes/gapfill/planner.c +++ b/tsl/src/nodes/gapfill/planner.c @@ -21,6 +21,8 @@ #include "nodes/gapfill/gapfill.h" #include "nodes/gapfill/planner.h" #include "nodes/gapfill/exec.h" +#include "func_cache.h" +#include "estimate.h" static CustomScanMethods gapfill_plan_methods = { .CustomName = "GapFill", @@ -573,3 +575,96 @@ gapfill_adjust_window_targetlist(PlannerInfo *root, RelOptInfo *input_rel, RelOp } } } + +/* + * Check if it is safe to push down gapfill to data nodes. + * Currently, we allow only in the following cases, + * + * 1. when only one data node has all the chunks + * 2. when relation has at least one closed dimension and chunks + * do not overlap across data nodes. + * 3. when group by matches space dimension + * and is not an expression of space dimension. + */ +bool +pushdown_gapfill(PlannerInfo *root, RelOptInfo *hyper_rel, Hyperspace *hs, + DataNodeChunkAssignments *scas) +{ + const Dimension *dim; + ListCell *lc; + TargetEntry *tle; + bool space_dim_in_group_by = false; + + Query *parse = root->parse; + gapfill_walker_context context = { .call.node = NULL, .count = 0 }; + + if (CMD_SELECT != parse->commandType || parse->groupClause == NIL) + return false; + + if (!enable_partitionwise_aggregate) + return false; + /* + * Only check for queries with gapfill call. + */ + gapfill_function_walker((Node *) parse->targetList, &context); + + if (context.count == 0) + return false; + + if (context.count > 1) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("multiple time_bucket_gapfill calls not allowed"))); + + Assert(hs->num_dimensions >= 1); + + /* Avoid push down of gapfill when window funcs are present */ + if (parse->hasWindowFuncs) + return false; + + /* + * Check for special case when there is only one data node with chunks. This + * can always be safely pushed down irrespective of partitioning + */ + if (scas->num_nodes_with_chunks == 1) + return true; + + /* + * Get first closed dimension that we use for assigning chunks to + * data nodes. If there is no closed dimension, then pushing gapfill + * to data nodes is not possible. + */ + dim = hyperspace_get_closed_dimension(hs, 0); + + if (dim == NULL) + return false; + else + { + if (parse->groupClause) + { + foreach (lc, parse->groupClause) + { + /* + * Check if the group by matches dimension and + * group by clause has exact dimension and not + * an expression of that attribute. + */ + SortGroupClause *sort = (SortGroupClause *) lfirst(lc); + tle = get_sortgroupref_tle(sort->tleSortGroupRef, parse->targetList); + + if (tle->resno == dim->column_attno) + { + space_dim_in_group_by = true; + + if (IsA(tle->expr, Var)) + break; + else + return false; + } + } + } + if (!space_dim_in_group_by) + return false; + } + return !data_node_chunk_assignments_are_overlapping(scas, dim->fd.id); +} diff --git a/tsl/src/nodes/gapfill/planner.h b/tsl/src/nodes/gapfill/planner.h index 22176e604..a5bccc918 100644 --- a/tsl/src/nodes/gapfill/planner.h +++ b/tsl/src/nodes/gapfill/planner.h @@ -8,10 +8,28 @@ #include +#include "fdw/data_node_scan_plan.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "fdw/data_node_chunk_assignment.h" +#include "fdw/scan_plan.h" +#include "fdw/data_node_scan_plan.h" +#include "fdw/data_node_scan_exec.h" + bool gapfill_in_expression(Expr *node); void plan_add_gapfill(PlannerInfo *root, RelOptInfo *group_rel); void gapfill_adjust_window_targetlist(PlannerInfo *root, RelOptInfo *input_rel, RelOptInfo *output_rel); +bool pushdown_gapfill(PlannerInfo *root, RelOptInfo *hyper_rel, Hyperspace *hs, + DataNodeChunkAssignments *scas); typedef struct GapFillPath { diff --git a/tsl/src/planner.c b/tsl/src/planner.c index fbf2285ec..e53095059 100644 --- a/tsl/src/planner.c +++ b/tsl/src/planner.c @@ -71,7 +71,11 @@ tsl_create_upper_paths_hook(PlannerInfo *root, UpperRelationKind stage, RelOptIn { case UPPERREL_GROUP_AGG: if (input_reltype != TS_REL_HYPERTABLE_CHILD) - plan_add_gapfill(root, output_rel); + { + /* Avoid adding gapfill node to the access node if it is pushed to data node */ + if (!dist_ht || !ht->push_gapfill) + plan_add_gapfill(root, output_rel); + } break; case UPPERREL_WINDOW: if (IsA(linitial(input_rel->pathlist), CustomPath)) diff --git a/tsl/test/shared/expected/dist_gapfill.out b/tsl/test/shared/expected/dist_gapfill.out index b0447c6b4..a75946729 100644 --- a/tsl/test/shared/expected/dist_gapfill.out +++ b/tsl/test/shared/expected/dist_gapfill.out @@ -36,7 +36,8 @@ SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:0 first(value, time), avg(value) FROM :CONDITIONS -GROUP BY 1,2; +GROUP BY 1,2 +ORDER BY 2,1; time_bucket_gapfill | device | first | avg ------------------------------+--------+----------+---------- Sun Jan 01 04:00:00 2017 PST | 1 | 1.2 | 1.2 @@ -86,7 +87,8 @@ SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:0 first(value, time), avg(value) FROM :CONDITIONS -GROUP BY 2,1; +GROUP BY 2,1 +ORDER BY 2,1; time_bucket_gapfill | device | first | avg ------------------------------+--------+----------+---------- Sun Jan 01 04:00:00 2017 PST | 1 | 1.2 | 1.2 diff --git a/tsl/test/shared/expected/dist_gapfill_pushdown-12.out b/tsl/test/shared/expected/dist_gapfill_pushdown-12.out new file mode 100644 index 000000000..c8b47678e --- /dev/null +++ b/tsl/test/shared/expected/dist_gapfill_pushdown-12.out @@ -0,0 +1,327 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. +\set ON_ERROR_STOP 0 +SET enable_partitionwise_aggregate = 'on'; +SET timescaledb.enable_remote_explain = true; +-- Cases where gapfill is pushed down to data-nodes +EXPLAIN (VERBOSE, COSTS OFF) SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), + name, + first(value, time), + avg(value) +FROM test_gapfill +GROUP BY 1,2; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Custom Scan (AsyncAppend) + Output: (time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), name, (first(value, "time")), (avg(value)) + -> Append + -> Custom Scan (DataNodeScan) + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), test_gapfill.name, (first(test_gapfill.value, test_gapfill."time")), (avg(test_gapfill.value)) + Relations: Aggregate on (public.test_gapfill) + Data node: data_node_1 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT public.time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), name, public.first(value, "time"), avg(value) FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[28]) GROUP BY 1, 2 + Remote EXPLAIN: + Custom Scan (GapFill) + Output: (public.time_bucket_gapfill('03:00:00'::interval, "time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), name, (public.first(value, "time")), (avg(value)) + -> Sort + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, (public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time")), (avg(_dist_hyper_X_X_chunk.value)) + Sort Key: _dist_hyper_X_X_chunk.name, (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)) + -> HashAggregate + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time"), avg(_dist_hyper_X_X_chunk.value) + Group Key: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name + -> Result + Output: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time" + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + + -> Custom Scan (DataNodeScan) + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), test_gapfill_1.name, (first(test_gapfill_1.value, test_gapfill_1."time")), (avg(test_gapfill_1.value)) + Relations: Aggregate on (public.test_gapfill) + Data node: data_node_3 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT public.time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), name, public.first(value, "time"), avg(value) FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[23, 24, 25]) GROUP BY 1, 2 + Remote EXPLAIN: + Custom Scan (GapFill) + Output: (public.time_bucket_gapfill('03:00:00'::interval, "time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), name, (public.first(value, "time")), (avg(value)) + -> Sort + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, (public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time")), (avg(_dist_hyper_X_X_chunk.value)) + Sort Key: _dist_hyper_X_X_chunk.name, (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)) + -> HashAggregate + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time"), avg(_dist_hyper_X_X_chunk.value) + Group Key: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name + -> Result + Output: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time" + -> Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + +(50 rows) + +EXPLAIN (VERBOSE, COSTS OFF) SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), + name, + first(value, time), + avg(value) +FROM test_gapfill +GROUP BY 2,1; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Custom Scan (AsyncAppend) + Output: (time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), name, (first(value, "time")), (avg(value)) + -> Append + -> Custom Scan (DataNodeScan) + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), test_gapfill.name, (first(test_gapfill.value, test_gapfill."time")), (avg(test_gapfill.value)) + Relations: Aggregate on (public.test_gapfill) + Data node: data_node_1 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT public.time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), name, public.first(value, "time"), avg(value) FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[28]) GROUP BY 2, 1 + Remote EXPLAIN: + Custom Scan (GapFill) + Output: (public.time_bucket_gapfill('03:00:00'::interval, "time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), name, (public.first(value, "time")), (avg(value)) + -> Sort + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, (public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time")), (avg(_dist_hyper_X_X_chunk.value)) + Sort Key: _dist_hyper_X_X_chunk.name, (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)) + -> HashAggregate + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time"), avg(_dist_hyper_X_X_chunk.value) + Group Key: _dist_hyper_X_X_chunk.name, public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone) + -> Result + Output: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time" + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + + -> Custom Scan (DataNodeScan) + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), test_gapfill_1.name, (first(test_gapfill_1.value, test_gapfill_1."time")), (avg(test_gapfill_1.value)) + Relations: Aggregate on (public.test_gapfill) + Data node: data_node_3 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT public.time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), name, public.first(value, "time"), avg(value) FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[23, 24, 25]) GROUP BY 2, 1 + Remote EXPLAIN: + Custom Scan (GapFill) + Output: (public.time_bucket_gapfill('03:00:00'::interval, "time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), name, (public.first(value, "time")), (avg(value)) + -> Sort + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, (public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time")), (avg(_dist_hyper_X_X_chunk.value)) + Sort Key: _dist_hyper_X_X_chunk.name, (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)) + -> HashAggregate + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time"), avg(_dist_hyper_X_X_chunk.value) + Group Key: _dist_hyper_X_X_chunk.name, public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone) + -> Result + Output: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time" + -> Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + +(50 rows) + +-- Check for multiple gapfill calls +SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), + time_bucket_gapfill('6 hours', time, '2017-01-01 08:00', '2017-01-01 18:00'), + name, + first(value, time), + avg(value) +FROM test_gapfill +GROUP BY 2,1,3; +ERROR: multiple time_bucket_gapfill calls not allowed +-- Cases where gapfill is not pushed down to data-nodes +-- Space dimension is not in group by clause +EXPLAIN (VERBOSE, COSTS OFF) SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-02 18:00'), + first(value, time), + avg(value) +FROM test_gapfill +GROUP BY 1; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Custom Scan (GapFill) + Output: (time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)), (first(value, "time")), (avg(value)) + -> Finalize GroupAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)), first(test_gapfill.value, test_gapfill."time"), avg(test_gapfill.value) + Group Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)) + -> Sort + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)), (PARTIAL first(test_gapfill.value, test_gapfill."time")), (PARTIAL avg(test_gapfill.value)) + Sort Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)) + -> Append + -> Partial HashAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)), PARTIAL first(test_gapfill.value, test_gapfill."time"), PARTIAL avg(test_gapfill.value) + Group Key: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone) + -> Custom Scan (DataNodeScan) on public.test_gapfill + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone), test_gapfill.value, test_gapfill."time" + Data node: data_node_1 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", value FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[28]) + Remote EXPLAIN: + Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.value + + -> Partial HashAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)), PARTIAL first(test_gapfill_1.value, test_gapfill_1."time"), PARTIAL avg(test_gapfill_1.value) + Group Key: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone) + -> Custom Scan (DataNodeScan) on public.test_gapfill test_gapfill_1 + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone), test_gapfill_1.value, test_gapfill_1."time" + Data node: data_node_3 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", value FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[23, 24, 25]) + Remote EXPLAIN: + Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.value + +(40 rows) + +-- Window functions +EXPLAIN (VERBOSE, COSTS OFF) SELECT + time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), + lag(min(time)) OVER () +FROM test_gapfill +GROUP BY 1; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + WindowAgg + Output: (time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), lag((min("time"))) OVER (?) + -> Custom Scan (GapFill) + Output: (time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), (min("time")) + -> Finalize GroupAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), min(test_gapfill."time") + Group Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)) + -> Sort + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), (PARTIAL min(test_gapfill."time")) + Sort Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)) + -> Append + -> Partial HashAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), PARTIAL min(test_gapfill."time") + Group Key: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone) + -> Custom Scan (DataNodeScan) on public.test_gapfill + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), test_gapfill."time" + Data node: data_node_1 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT "time" FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[28]) + Remote EXPLAIN: + Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time" + + -> Partial HashAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), PARTIAL min(test_gapfill_1."time") + Group Key: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone) + -> Custom Scan (DataNodeScan) on public.test_gapfill test_gapfill_1 + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), test_gapfill_1."time" + Data node: data_node_3 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT "time" FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[23, 24, 25]) + Remote EXPLAIN: + Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time" + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time" + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time" + +(42 rows) + +-- Data nodes are overlapping +EXPLAIN (VERBOSE, COSTS OFF) SELECT time_bucket_gapfill('3 hours', time, '2018-01-01 06:00', '2018-01-01 18:00'), + name, + first(value, time), + avg(value) +FROM test_gapfill_overlap +GROUP BY 1,2; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Custom Scan (GapFill) + Output: (time_bucket_gapfill('@ 3 hours'::interval, "time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), name, (first(value, "time")), (avg(value)) + -> Sort + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap.name, (first(test_gapfill_overlap.value, test_gapfill_overlap."time")), (avg(test_gapfill_overlap.value)) + Sort Key: test_gapfill_overlap.name, (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)) + -> Finalize GroupAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap.name, first(test_gapfill_overlap.value, test_gapfill_overlap."time"), avg(test_gapfill_overlap.value) + Group Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap.name + -> Merge Append + Sort Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap.name + -> Partial GroupAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap.name, PARTIAL first(test_gapfill_overlap.value, test_gapfill_overlap."time"), PARTIAL avg(test_gapfill_overlap.value) + Group Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap.name + -> Sort + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap.name, test_gapfill_overlap.value, test_gapfill_overlap."time" + Sort Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap.name + -> Custom Scan (DataNodeScan) on public.test_gapfill_overlap + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone), test_gapfill_overlap.name, test_gapfill_overlap.value, test_gapfill_overlap."time" + Data node: data_node_1 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", name, value FROM public.test_gapfill_overlap WHERE _timescaledb_internal.chunks_in(public.test_gapfill_overlap.*, ARRAY[29, 30, 31, 32]) + Remote EXPLAIN: + Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + + -> Partial GroupAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_1."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap_1.name, PARTIAL first(test_gapfill_overlap_1.value, test_gapfill_overlap_1."time"), PARTIAL avg(test_gapfill_overlap_1.value) + Group Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_1."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap_1.name + -> Sort + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_1."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap_1.name, test_gapfill_overlap_1.value, test_gapfill_overlap_1."time" + Sort Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_1."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap_1.name + -> Custom Scan (DataNodeScan) on public.test_gapfill_overlap test_gapfill_overlap_1 + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_1."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone), test_gapfill_overlap_1.name, test_gapfill_overlap_1.value, test_gapfill_overlap_1."time" + Data node: data_node_2 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", name, value FROM public.test_gapfill_overlap WHERE _timescaledb_internal.chunks_in(public.test_gapfill_overlap.*, ARRAY[20]) + Remote EXPLAIN: + Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + + -> Partial GroupAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_2."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap_2.name, PARTIAL first(test_gapfill_overlap_2.value, test_gapfill_overlap_2."time"), PARTIAL avg(test_gapfill_overlap_2.value) + Group Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_2."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap_2.name + -> Sort + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_2."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap_2.name, test_gapfill_overlap_2.value, test_gapfill_overlap_2."time" + Sort Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_2."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap_2.name + -> Custom Scan (DataNodeScan) on public.test_gapfill_overlap test_gapfill_overlap_2 + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_2."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone), test_gapfill_overlap_2.name, test_gapfill_overlap_2.value, test_gapfill_overlap_2."time" + Data node: data_node_3 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", name, value FROM public.test_gapfill_overlap WHERE _timescaledb_internal.chunks_in(public.test_gapfill_overlap.*, ARRAY[26, 27, 28, 29, 30]) + Remote EXPLAIN: + Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + +(74 rows) + +SET timescaledb.enable_remote_explain = false; +DROP TABLE test_gapfill; +DROP TABLE test_gapfill_overlap; diff --git a/tsl/test/shared/expected/dist_gapfill_pushdown-13.out b/tsl/test/shared/expected/dist_gapfill_pushdown-13.out new file mode 100644 index 000000000..33e82aa5a --- /dev/null +++ b/tsl/test/shared/expected/dist_gapfill_pushdown-13.out @@ -0,0 +1,308 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. +\set ON_ERROR_STOP 0 +SET enable_partitionwise_aggregate = 'on'; +SET timescaledb.enable_remote_explain = true; +-- Cases where gapfill is pushed down to data-nodes +EXPLAIN (VERBOSE, COSTS OFF) SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), + name, + first(value, time), + avg(value) +FROM test_gapfill +GROUP BY 1,2; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Custom Scan (AsyncAppend) + Output: (time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), name, (first(value, "time")), (avg(value)) + -> Append + -> Custom Scan (DataNodeScan) + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), test_gapfill.name, (first(test_gapfill.value, test_gapfill."time")), (avg(test_gapfill.value)) + Relations: Aggregate on (public.test_gapfill) + Data node: data_node_1 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT public.time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), name, public.first(value, "time"), avg(value) FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[28]) GROUP BY 1, 2 + Remote EXPLAIN: + Custom Scan (GapFill) + Output: (public.time_bucket_gapfill('03:00:00'::interval, "time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), name, (public.first(value, "time")), (avg(value)) + -> Sort + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, (public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time")), (avg(_dist_hyper_X_X_chunk.value)) + Sort Key: _dist_hyper_X_X_chunk.name, (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)) + -> HashAggregate + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time"), avg(_dist_hyper_X_X_chunk.value) + Group Key: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name + -> Result + Output: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time" + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + + -> Custom Scan (DataNodeScan) + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), test_gapfill_1.name, (first(test_gapfill_1.value, test_gapfill_1."time")), (avg(test_gapfill_1.value)) + Relations: Aggregate on (public.test_gapfill) + Data node: data_node_3 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT public.time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), name, public.first(value, "time"), avg(value) FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[23, 24, 25]) GROUP BY 1, 2 + Remote EXPLAIN: + Custom Scan (GapFill) + Output: (public.time_bucket_gapfill('03:00:00'::interval, test_gapfill."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), test_gapfill.name, (public.first(test_gapfill.value, test_gapfill."time")), (avg(test_gapfill.value)) + -> Sort + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, (public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time")), (avg(_dist_hyper_X_X_chunk.value)) + Sort Key: _dist_hyper_X_X_chunk.name, (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)) + -> HashAggregate + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time"), avg(_dist_hyper_X_X_chunk.value) + Group Key: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name + -> Result + Output: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time" + -> Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + +(50 rows) + +EXPLAIN (VERBOSE, COSTS OFF) SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), + name, + first(value, time), + avg(value) +FROM test_gapfill +GROUP BY 2,1; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Custom Scan (AsyncAppend) + Output: (time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), name, (first(value, "time")), (avg(value)) + -> Append + -> Custom Scan (DataNodeScan) + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), test_gapfill.name, (first(test_gapfill.value, test_gapfill."time")), (avg(test_gapfill.value)) + Relations: Aggregate on (public.test_gapfill) + Data node: data_node_1 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT public.time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), name, public.first(value, "time"), avg(value) FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[28]) GROUP BY 2, 1 + Remote EXPLAIN: + Custom Scan (GapFill) + Output: (public.time_bucket_gapfill('03:00:00'::interval, "time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), name, (public.first(value, "time")), (avg(value)) + -> Sort + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, (public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time")), (avg(_dist_hyper_X_X_chunk.value)) + Sort Key: _dist_hyper_X_X_chunk.name, (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)) + -> HashAggregate + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time"), avg(_dist_hyper_X_X_chunk.value) + Group Key: _dist_hyper_X_X_chunk.name, public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone) + -> Result + Output: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time" + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + + -> Custom Scan (DataNodeScan) + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), test_gapfill_1.name, (first(test_gapfill_1.value, test_gapfill_1."time")), (avg(test_gapfill_1.value)) + Relations: Aggregate on (public.test_gapfill) + Data node: data_node_3 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT public.time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), name, public.first(value, "time"), avg(value) FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[23, 24, 25]) GROUP BY 2, 1 + Remote EXPLAIN: + Custom Scan (GapFill) + Output: (public.time_bucket_gapfill('03:00:00'::interval, test_gapfill."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), test_gapfill.name, (public.first(test_gapfill.value, test_gapfill."time")), (avg(test_gapfill.value)) + -> Sort + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, (public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time")), (avg(_dist_hyper_X_X_chunk.value)) + Sort Key: _dist_hyper_X_X_chunk.name, (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)) + -> HashAggregate + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time"), avg(_dist_hyper_X_X_chunk.value) + Group Key: _dist_hyper_X_X_chunk.name, public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone) + -> Result + Output: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time" + -> Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + +(50 rows) + +-- Check for multiple gapfill calls +SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), + time_bucket_gapfill('6 hours', time, '2017-01-01 08:00', '2017-01-01 18:00'), + name, + first(value, time), + avg(value) +FROM test_gapfill +GROUP BY 2,1,3; +ERROR: multiple time_bucket_gapfill calls not allowed +-- Cases where gapfill is not pushed down to data-nodes +-- Space dimension is not in group by clause +EXPLAIN (VERBOSE, COSTS OFF) SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-02 18:00'), + first(value, time), + avg(value) +FROM test_gapfill +GROUP BY 1; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Custom Scan (GapFill) + Output: (time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)), (first(value, "time")), (avg(value)) + -> Finalize GroupAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)), first(test_gapfill.value, test_gapfill."time"), avg(test_gapfill.value) + Group Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)) + -> Sort + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)), (PARTIAL first(test_gapfill.value, test_gapfill."time")), (PARTIAL avg(test_gapfill.value)) + Sort Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)) + -> Append + -> Partial HashAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)), PARTIAL first(test_gapfill.value, test_gapfill."time"), PARTIAL avg(test_gapfill.value) + Group Key: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone) + -> Custom Scan (DataNodeScan) on public.test_gapfill + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone), test_gapfill.value, test_gapfill."time" + Data node: data_node_1 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", value FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[28]) + Remote EXPLAIN: + Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.value + + -> Partial HashAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)), PARTIAL first(test_gapfill_1.value, test_gapfill_1."time"), PARTIAL avg(test_gapfill_1.value) + Group Key: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone) + -> Custom Scan (DataNodeScan) on public.test_gapfill test_gapfill_1 + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone), test_gapfill_1.value, test_gapfill_1."time" + Data node: data_node_3 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", value FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[23, 24, 25]) + Remote EXPLAIN: + Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.value + +(40 rows) + +-- Window functions +EXPLAIN (VERBOSE, COSTS OFF) SELECT + time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), + lag(min(time)) OVER () +FROM test_gapfill +GROUP BY 1; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + WindowAgg + Output: (time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), lag((min("time"))) OVER (?) + -> Custom Scan (GapFill) + Output: (time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), (min("time")) + -> Finalize GroupAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), min(test_gapfill."time") + Group Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)) + -> Sort + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), (PARTIAL min(test_gapfill."time")) + Sort Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)) + -> Append + -> Partial HashAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), PARTIAL min(test_gapfill."time") + Group Key: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone) + -> Custom Scan (DataNodeScan) on public.test_gapfill + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), test_gapfill."time" + Data node: data_node_1 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT "time" FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[28]) + Remote EXPLAIN: + Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time" + + -> Partial HashAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), PARTIAL min(test_gapfill_1."time") + Group Key: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone) + -> Custom Scan (DataNodeScan) on public.test_gapfill test_gapfill_1 + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), test_gapfill_1."time" + Data node: data_node_3 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT "time" FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[23, 24, 25]) + Remote EXPLAIN: + Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time" + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time" + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time" + +(42 rows) + +-- Data nodes are overlapping +EXPLAIN (VERBOSE, COSTS OFF) SELECT time_bucket_gapfill('3 hours', time, '2018-01-01 06:00', '2018-01-01 18:00'), + name, + first(value, time), + avg(value) +FROM test_gapfill_overlap +GROUP BY 1,2; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Custom Scan (GapFill) + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap.name, (first(test_gapfill_overlap.value, test_gapfill_overlap."time")), (avg(test_gapfill_overlap.value)) + -> Sort + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_1."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap_1.name, (first(test_gapfill_overlap_1.value, test_gapfill_overlap_1."time")), (avg(test_gapfill_overlap_1.value)) + Sort Key: test_gapfill_overlap_1.name, (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_1."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)) + -> HashAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_1."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap_1.name, first(test_gapfill_overlap_1.value, test_gapfill_overlap_1."time"), avg(test_gapfill_overlap_1.value) + Group Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_1."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap_1.name + -> Append + -> Custom Scan (DataNodeScan) on public.test_gapfill_overlap test_gapfill_overlap_1 + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_1."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone), test_gapfill_overlap_1.name, test_gapfill_overlap_1.value, test_gapfill_overlap_1."time" + Data node: data_node_1 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", name, value FROM public.test_gapfill_overlap WHERE _timescaledb_internal.chunks_in(public.test_gapfill_overlap.*, ARRAY[29, 30, 31, 32]) + Remote EXPLAIN: + Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + + -> Custom Scan (DataNodeScan) on public.test_gapfill_overlap test_gapfill_overlap_2 + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_2."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone), test_gapfill_overlap_2.name, test_gapfill_overlap_2.value, test_gapfill_overlap_2."time" + Data node: data_node_2 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", name, value FROM public.test_gapfill_overlap WHERE _timescaledb_internal.chunks_in(public.test_gapfill_overlap.*, ARRAY[20]) + Remote EXPLAIN: + Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + + -> Custom Scan (DataNodeScan) on public.test_gapfill_overlap test_gapfill_overlap_3 + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_3."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone), test_gapfill_overlap_3.name, test_gapfill_overlap_3.value, test_gapfill_overlap_3."time" + Data node: data_node_3 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", name, value FROM public.test_gapfill_overlap WHERE _timescaledb_internal.chunks_in(public.test_gapfill_overlap.*, ARRAY[26, 27, 28, 29, 30]) + Remote EXPLAIN: + Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + +(55 rows) + +SET timescaledb.enable_remote_explain = false; +DROP TABLE test_gapfill; +DROP TABLE test_gapfill_overlap; diff --git a/tsl/test/shared/expected/dist_gapfill_pushdown-14.out b/tsl/test/shared/expected/dist_gapfill_pushdown-14.out new file mode 100644 index 000000000..33e82aa5a --- /dev/null +++ b/tsl/test/shared/expected/dist_gapfill_pushdown-14.out @@ -0,0 +1,308 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. +\set ON_ERROR_STOP 0 +SET enable_partitionwise_aggregate = 'on'; +SET timescaledb.enable_remote_explain = true; +-- Cases where gapfill is pushed down to data-nodes +EXPLAIN (VERBOSE, COSTS OFF) SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), + name, + first(value, time), + avg(value) +FROM test_gapfill +GROUP BY 1,2; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Custom Scan (AsyncAppend) + Output: (time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), name, (first(value, "time")), (avg(value)) + -> Append + -> Custom Scan (DataNodeScan) + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), test_gapfill.name, (first(test_gapfill.value, test_gapfill."time")), (avg(test_gapfill.value)) + Relations: Aggregate on (public.test_gapfill) + Data node: data_node_1 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT public.time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), name, public.first(value, "time"), avg(value) FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[28]) GROUP BY 1, 2 + Remote EXPLAIN: + Custom Scan (GapFill) + Output: (public.time_bucket_gapfill('03:00:00'::interval, "time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), name, (public.first(value, "time")), (avg(value)) + -> Sort + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, (public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time")), (avg(_dist_hyper_X_X_chunk.value)) + Sort Key: _dist_hyper_X_X_chunk.name, (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)) + -> HashAggregate + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time"), avg(_dist_hyper_X_X_chunk.value) + Group Key: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name + -> Result + Output: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time" + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + + -> Custom Scan (DataNodeScan) + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), test_gapfill_1.name, (first(test_gapfill_1.value, test_gapfill_1."time")), (avg(test_gapfill_1.value)) + Relations: Aggregate on (public.test_gapfill) + Data node: data_node_3 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT public.time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), name, public.first(value, "time"), avg(value) FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[23, 24, 25]) GROUP BY 1, 2 + Remote EXPLAIN: + Custom Scan (GapFill) + Output: (public.time_bucket_gapfill('03:00:00'::interval, test_gapfill."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), test_gapfill.name, (public.first(test_gapfill.value, test_gapfill."time")), (avg(test_gapfill.value)) + -> Sort + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, (public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time")), (avg(_dist_hyper_X_X_chunk.value)) + Sort Key: _dist_hyper_X_X_chunk.name, (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)) + -> HashAggregate + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time"), avg(_dist_hyper_X_X_chunk.value) + Group Key: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name + -> Result + Output: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time" + -> Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + +(50 rows) + +EXPLAIN (VERBOSE, COSTS OFF) SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), + name, + first(value, time), + avg(value) +FROM test_gapfill +GROUP BY 2,1; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Custom Scan (AsyncAppend) + Output: (time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), name, (first(value, "time")), (avg(value)) + -> Append + -> Custom Scan (DataNodeScan) + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), test_gapfill.name, (first(test_gapfill.value, test_gapfill."time")), (avg(test_gapfill.value)) + Relations: Aggregate on (public.test_gapfill) + Data node: data_node_1 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT public.time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), name, public.first(value, "time"), avg(value) FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[28]) GROUP BY 2, 1 + Remote EXPLAIN: + Custom Scan (GapFill) + Output: (public.time_bucket_gapfill('03:00:00'::interval, "time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), name, (public.first(value, "time")), (avg(value)) + -> Sort + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, (public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time")), (avg(_dist_hyper_X_X_chunk.value)) + Sort Key: _dist_hyper_X_X_chunk.name, (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)) + -> HashAggregate + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time"), avg(_dist_hyper_X_X_chunk.value) + Group Key: _dist_hyper_X_X_chunk.name, public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone) + -> Result + Output: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time" + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + + -> Custom Scan (DataNodeScan) + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), test_gapfill_1.name, (first(test_gapfill_1.value, test_gapfill_1."time")), (avg(test_gapfill_1.value)) + Relations: Aggregate on (public.test_gapfill) + Data node: data_node_3 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT public.time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), name, public.first(value, "time"), avg(value) FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[23, 24, 25]) GROUP BY 2, 1 + Remote EXPLAIN: + Custom Scan (GapFill) + Output: (public.time_bucket_gapfill('03:00:00'::interval, test_gapfill."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), test_gapfill.name, (public.first(test_gapfill.value, test_gapfill."time")), (avg(test_gapfill.value)) + -> Sort + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, (public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time")), (avg(_dist_hyper_X_X_chunk.value)) + Sort Key: _dist_hyper_X_X_chunk.name, (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)) + -> HashAggregate + Output: (public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone)), _dist_hyper_X_X_chunk.name, public.first(_dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time"), avg(_dist_hyper_X_X_chunk.value) + Group Key: _dist_hyper_X_X_chunk.name, public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone) + -> Result + Output: public.time_bucket_gapfill('03:00:00'::interval, _dist_hyper_X_X_chunk."time", '2017-01-01 06:00:00'::timestamp without time zone, '2017-01-01 18:00:00'::timestamp without time zone), _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value, _dist_hyper_X_X_chunk."time" + -> Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + +(50 rows) + +-- Check for multiple gapfill calls +SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), + time_bucket_gapfill('6 hours', time, '2017-01-01 08:00', '2017-01-01 18:00'), + name, + first(value, time), + avg(value) +FROM test_gapfill +GROUP BY 2,1,3; +ERROR: multiple time_bucket_gapfill calls not allowed +-- Cases where gapfill is not pushed down to data-nodes +-- Space dimension is not in group by clause +EXPLAIN (VERBOSE, COSTS OFF) SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-02 18:00'), + first(value, time), + avg(value) +FROM test_gapfill +GROUP BY 1; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ + Custom Scan (GapFill) + Output: (time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)), (first(value, "time")), (avg(value)) + -> Finalize GroupAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)), first(test_gapfill.value, test_gapfill."time"), avg(test_gapfill.value) + Group Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)) + -> Sort + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)), (PARTIAL first(test_gapfill.value, test_gapfill."time")), (PARTIAL avg(test_gapfill.value)) + Sort Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)) + -> Append + -> Partial HashAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)), PARTIAL first(test_gapfill.value, test_gapfill."time"), PARTIAL avg(test_gapfill.value) + Group Key: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone) + -> Custom Scan (DataNodeScan) on public.test_gapfill + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone), test_gapfill.value, test_gapfill."time" + Data node: data_node_1 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", value FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[28]) + Remote EXPLAIN: + Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.value + + -> Partial HashAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone)), PARTIAL first(test_gapfill_1.value, test_gapfill_1."time"), PARTIAL avg(test_gapfill_1.value) + Group Key: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone) + -> Custom Scan (DataNodeScan) on public.test_gapfill test_gapfill_1 + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Mon Jan 02 18:00:00 2017'::timestamp without time zone), test_gapfill_1.value, test_gapfill_1."time" + Data node: data_node_3 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", value FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[23, 24, 25]) + Remote EXPLAIN: + Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.value + +(40 rows) + +-- Window functions +EXPLAIN (VERBOSE, COSTS OFF) SELECT + time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), + lag(min(time)) OVER () +FROM test_gapfill +GROUP BY 1; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + WindowAgg + Output: (time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), lag((min("time"))) OVER (?) + -> Custom Scan (GapFill) + Output: (time_bucket_gapfill('@ 3 hours'::interval, "time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), (min("time")) + -> Finalize GroupAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), min(test_gapfill."time") + Group Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)) + -> Sort + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), (PARTIAL min(test_gapfill."time")) + Sort Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)) + -> Append + -> Partial HashAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), PARTIAL min(test_gapfill."time") + Group Key: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone) + -> Custom Scan (DataNodeScan) on public.test_gapfill + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), test_gapfill."time" + Data node: data_node_1 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT "time" FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[28]) + Remote EXPLAIN: + Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time" + + -> Partial HashAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone)), PARTIAL min(test_gapfill_1."time") + Group Key: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone) + -> Custom Scan (DataNodeScan) on public.test_gapfill test_gapfill_1 + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_1."time", 'Sun Jan 01 06:00:00 2017'::timestamp without time zone, 'Sun Jan 01 18:00:00 2017'::timestamp without time zone), test_gapfill_1."time" + Data node: data_node_3 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT "time" FROM public.test_gapfill WHERE _timescaledb_internal.chunks_in(public.test_gapfill.*, ARRAY[23, 24, 25]) + Remote EXPLAIN: + Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time" + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time" + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time" + +(42 rows) + +-- Data nodes are overlapping +EXPLAIN (VERBOSE, COSTS OFF) SELECT time_bucket_gapfill('3 hours', time, '2018-01-01 06:00', '2018-01-01 18:00'), + name, + first(value, time), + avg(value) +FROM test_gapfill_overlap +GROUP BY 1,2; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Custom Scan (GapFill) + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap.name, (first(test_gapfill_overlap.value, test_gapfill_overlap."time")), (avg(test_gapfill_overlap.value)) + -> Sort + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_1."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap_1.name, (first(test_gapfill_overlap_1.value, test_gapfill_overlap_1."time")), (avg(test_gapfill_overlap_1.value)) + Sort Key: test_gapfill_overlap_1.name, (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_1."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)) + -> HashAggregate + Output: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_1."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap_1.name, first(test_gapfill_overlap_1.value, test_gapfill_overlap_1."time"), avg(test_gapfill_overlap_1.value) + Group Key: (time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_1."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone)), test_gapfill_overlap_1.name + -> Append + -> Custom Scan (DataNodeScan) on public.test_gapfill_overlap test_gapfill_overlap_1 + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_1."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone), test_gapfill_overlap_1.name, test_gapfill_overlap_1.value, test_gapfill_overlap_1."time" + Data node: data_node_1 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", name, value FROM public.test_gapfill_overlap WHERE _timescaledb_internal.chunks_in(public.test_gapfill_overlap.*, ARRAY[29, 30, 31, 32]) + Remote EXPLAIN: + Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + + -> Custom Scan (DataNodeScan) on public.test_gapfill_overlap test_gapfill_overlap_2 + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_2."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone), test_gapfill_overlap_2.name, test_gapfill_overlap_2.value, test_gapfill_overlap_2."time" + Data node: data_node_2 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", name, value FROM public.test_gapfill_overlap WHERE _timescaledb_internal.chunks_in(public.test_gapfill_overlap.*, ARRAY[20]) + Remote EXPLAIN: + Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + + -> Custom Scan (DataNodeScan) on public.test_gapfill_overlap test_gapfill_overlap_3 + Output: time_bucket_gapfill('@ 3 hours'::interval, test_gapfill_overlap_3."time", 'Mon Jan 01 06:00:00 2018'::timestamp without time zone, 'Mon Jan 01 18:00:00 2018'::timestamp without time zone), test_gapfill_overlap_3.name, test_gapfill_overlap_3.value, test_gapfill_overlap_3."time" + Data node: data_node_3 + Fetcher Type: Row by row + Chunks: _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk, _dist_hyper_X_X_chunk + Remote SQL: SELECT "time", name, value FROM public.test_gapfill_overlap WHERE _timescaledb_internal.chunks_in(public.test_gapfill_overlap.*, ARRAY[26, 27, 28, 29, 30]) + Remote EXPLAIN: + Append + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + -> Seq Scan on _timescaledb_internal._dist_hyper_X_X_chunk + Output: _dist_hyper_X_X_chunk."time", _dist_hyper_X_X_chunk.name, _dist_hyper_X_X_chunk.value + +(55 rows) + +SET timescaledb.enable_remote_explain = false; +DROP TABLE test_gapfill; +DROP TABLE test_gapfill_overlap; diff --git a/tsl/test/shared/sql/CMakeLists.txt b/tsl/test/shared/sql/CMakeLists.txt index 6c2382000..fabe158de 100644 --- a/tsl/test/shared/sql/CMakeLists.txt +++ b/tsl/test/shared/sql/CMakeLists.txt @@ -23,7 +23,8 @@ endif(CMAKE_BUILD_TYPE MATCHES Debug) set(TEST_TEMPLATES_SHARED gapfill.sql.in generated_columns.sql.in transparent_decompress_chunk.sql.in - ordered_append.sql.in ordered_append_join.sql.in) + ordered_append.sql.in ordered_append_join.sql.in + dist_gapfill_pushdown.sql.in) # Regression tests that vary with PostgreSQL version. Generated test files are # put in the original source directory since all tests must be in the same diff --git a/tsl/test/shared/sql/dist_gapfill_pushdown.sql.in b/tsl/test/shared/sql/dist_gapfill_pushdown.sql.in new file mode 100644 index 000000000..e48be8587 --- /dev/null +++ b/tsl/test/shared/sql/dist_gapfill_pushdown.sql.in @@ -0,0 +1,62 @@ +-- This file and its contents are licensed under the Timescale License. +-- Please see the included NOTICE for copyright information and +-- LICENSE-TIMESCALE for a copy of the license. + +\set ON_ERROR_STOP 0 + +SET enable_partitionwise_aggregate = 'on'; +SET timescaledb.enable_remote_explain = true; + +-- Cases where gapfill is pushed down to data-nodes +EXPLAIN (VERBOSE, COSTS OFF) SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), + name, + first(value, time), + avg(value) +FROM test_gapfill +GROUP BY 1,2; + +EXPLAIN (VERBOSE, COSTS OFF) SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), + name, + first(value, time), + avg(value) +FROM test_gapfill +GROUP BY 2,1; + +-- Check for multiple gapfill calls +SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), + time_bucket_gapfill('6 hours', time, '2017-01-01 08:00', '2017-01-01 18:00'), + name, + first(value, time), + avg(value) +FROM test_gapfill +GROUP BY 2,1,3; + +-- Cases where gapfill is not pushed down to data-nodes + +-- Space dimension is not in group by clause +EXPLAIN (VERBOSE, COSTS OFF) SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-02 18:00'), + first(value, time), + avg(value) +FROM test_gapfill +GROUP BY 1; + +-- Window functions +EXPLAIN (VERBOSE, COSTS OFF) SELECT + time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), + lag(min(time)) OVER () +FROM test_gapfill +GROUP BY 1; + +-- Data nodes are overlapping + +EXPLAIN (VERBOSE, COSTS OFF) SELECT time_bucket_gapfill('3 hours', time, '2018-01-01 06:00', '2018-01-01 18:00'), + name, + first(value, time), + avg(value) +FROM test_gapfill_overlap +GROUP BY 1,2; + +SET timescaledb.enable_remote_explain = false; + +DROP TABLE test_gapfill; +DROP TABLE test_gapfill_overlap; diff --git a/tsl/test/shared/sql/include/dist_gapfill_query.sql b/tsl/test/shared/sql/include/dist_gapfill_query.sql index 099d735c5..f64d08f5a 100644 --- a/tsl/test/shared/sql/include/dist_gapfill_query.sql +++ b/tsl/test/shared/sql/include/dist_gapfill_query.sql @@ -13,14 +13,16 @@ SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:0 first(value, time), avg(value) FROM :CONDITIONS -GROUP BY 1,2; +GROUP BY 1,2 +ORDER BY 2,1; SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), device, first(value, time), avg(value) FROM :CONDITIONS -GROUP BY 2,1; +GROUP BY 2,1 +ORDER BY 2,1; SELECT time_bucket_gapfill('3 hours', time, '2017-01-01 06:00', '2017-01-01 18:00'), diff --git a/tsl/test/shared/sql/include/shared_setup.sql b/tsl/test/shared/sql/include/shared_setup.sql index 1a9b423e0..5ff205c7e 100644 --- a/tsl/test/shared/sql/include/shared_setup.sql +++ b/tsl/test/shared/sql/include/shared_setup.sql @@ -254,3 +254,38 @@ order by r; create table distinct_on_distributed(ts timestamp, id int, val numeric); select create_distributed_hypertable('distinct_on_distributed', 'ts'); insert into distinct_on_distributed select * from distinct_on_hypertable; + +-- Table with non-overlapping data across data-nodes to test gapfill pushdown to data nodes +CREATE TABLE test_gapfill(time timestamp, name text, value numeric); + +SELECT table_name from create_distributed_hypertable('test_gapfill', 'time', partitioning_column => 'name'); + +INSERT INTO test_gapfill VALUES + ('2018-01-01 06:01', 'one', 1.2), + ('2018-01-02 09:11', 'two', 4.3), + ('2018-01-03 08:01', 'three', 7.3), + ('2018-01-04 08:01', 'one', 0.23), + ('2018-07-05 08:01', 'five', 0.0), + ('2018-07-06 06:01', 'forty', 3.1), + ('2018-07-07 09:11', 'eleven', 10303.12), + ('2018-07-08 08:01', 'ten', 64); + +-- Make table with data nodes overlapping + +CREATE TABLE test_gapfill_overlap(time timestamp, name text, value numeric); + +SELECT table_name from create_distributed_hypertable('test_gapfill_overlap', 'time', partitioning_column => 'name'); + +INSERT INTO test_gapfill_overlap SELECT * FROM test_gapfill; + +SELECT set_number_partitions('test_gapfill_overlap', 4); + +INSERT INTO test_gapfill_overlap VALUES +('2020-01-01 06:01', 'eleven', 1.2), +('2020-01-02 09:11', 'twenty-two', 4.3), +('2020-01-03 08:01', 'three', 7.3), +('2020-01-04 08:01', 'one', 0.23), +('2020-07-05 08:01', 'five', 0.0), +('2020-07-06 06:01', 'forty-six', 3.1), +('2020-07-07 09:11', 'eleven', 10303.12), +('2020-07-08 08:01', 'ten', 64);