From 665d56be472edc3c544100bd55e2dee7fcf1f1bb Mon Sep 17 00:00:00 2001 From: Sven Klemm Date: Sat, 4 Jul 2020 11:14:23 +0200 Subject: [PATCH] Improve infering start and stop arguments from gapfill query Infering start and stop parameter used to only work for top level constraints. However even when constraints are at the toplevel in the query they might not end up at the top-level of the jointree depending on the plan shape. This patch changes the gapfill code to traverse the jointree to find valid start and stop arguments. --- tsl/src/nodes/gapfill/exec.c | 112 +++++++++++++++++++++++++++++++--- tsl/test/expected/gapfill.out | 55 ++++++++++++----- tsl/test/sql/gapfill.sql | 21 ++++++- 3 files changed, 161 insertions(+), 27 deletions(-) diff --git a/tsl/src/nodes/gapfill/exec.c b/tsl/src/nodes/gapfill/exec.c index caa7ffcff..aaea86551 100644 --- a/tsl/src/nodes/gapfill/exec.c +++ b/tsl/src/nodes/gapfill/exec.c @@ -374,6 +374,99 @@ get_boundary_expr_value(GapFillState *state, GapFillBoundary boundary, Expr *exp return gapfill_datum_get_internal(arg_value, state->gapfill_typid); } +typedef struct CollectBoundaryContext +{ + List *quals; + Var *ts_var; +} CollectBoundaryContext; + +/* + * expression references our gapfill time column and could be + * a boundary expression, more thorough check is in + * infer_gapfill_boundary + */ +static bool +is_boundary_expr(Node *node, CollectBoundaryContext *context) +{ + OpExpr *op; + Node *left, *right; + + if (!IsA(node, OpExpr)) + return false; + + op = castNode(OpExpr, node); + + if (op->args->length != 2) + return false; + + left = linitial(op->args); + right = llast(op->args); + + /* Var OP Var is not useful here because we are not yet at a point + * where we could evaluate them */ + if (IsA(left, Var) && IsA(right, Var)) + return false; + + if (IsA(left, Var) && var_equal(castNode(Var, left), context->ts_var)) + return true; + + if (IsA(right, Var) && var_equal(castNode(Var, right), context->ts_var)) + return true; + + return false; +} + +static bool +collect_boundary_walker(Node *node, CollectBoundaryContext *context) +{ + Node *quals = NULL; + + if (node == NULL) + return false; + + if (IsA(node, FromExpr)) + { + quals = castNode(FromExpr, node)->quals; + } + else if (IsA(node, JoinExpr)) + { + JoinExpr *j = castNode(JoinExpr, node); + + /* don't descend into outer join */ + if (IS_OUTER_JOIN(j->jointype)) + return false; + + quals = j->quals; + } + + if (quals) + { + ListCell *lc; + + foreach (lc, castNode(List, quals)) + { + if (is_boundary_expr(lfirst(lc), context)) + context->quals = lappend(context->quals, lfirst(lc)); + } + } + + return expression_tree_walker(node, collect_boundary_walker, context); +} + +/* + * traverse jointree to look for expressions referencing + * the time column of our gapfill call + */ +static List * +collect_boundary_expressions(Node *node, Var *ts_var) +{ + CollectBoundaryContext context = { .quals = NIL, .ts_var = ts_var }; + + collect_boundary_walker(node, &context); + + return context.quals; +} + static int64 infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary) { @@ -385,6 +478,7 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary) TypeCacheEntry *tce = lookup_type_cache(state->gapfill_typid, TYPECACHE_BTREE_OPFAMILY); int strategy; Oid lefttype, righttype; + List *quals; int64 boundary_value = 0; bool boundary_found = false; @@ -403,19 +497,16 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary) ts_var = castNode(Var, lsecond(func->args)); - foreach (lc, (List *) jt->quals) + quals = collect_boundary_expressions((Node *) jt, ts_var); + + foreach (lc, quals) { - OpExpr *opexpr; + OpExpr *opexpr = lfirst_node(OpExpr, lc); Var *var; Expr *expr; Oid op; int64 value; - if (!IsA(lfirst(lc), OpExpr)) - continue; - - opexpr = lfirst(lc); - if (IsA(linitial(opexpr->args), Var)) { var = linitial(opexpr->args); @@ -429,7 +520,11 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary) op = get_commutator(opexpr->opno); } else + { + /* collect_boundary_expressions has filtered those out already */ + Assert(false); continue; + } if (!op_in_opfamily(op, tce->btree_opf)) continue; @@ -483,8 +578,7 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid time_bucket_gapfill argument: could not infer %s boundary from WHERE " - "clause", + errmsg("missing time_bucket_gapfill argument: could not infer %s from WHERE clause", boundary == GAPFILL_START ? "start" : "finish"), errhint("You can either pass start and finish as arguments or in the WHERE clause"))); pg_unreachable(); diff --git a/tsl/test/expected/gapfill.out b/tsl/test/expected/gapfill.out index 88920369b..da4f092af 100644 --- a/tsl/test/expected/gapfill.out +++ b/tsl/test/expected/gapfill.out @@ -217,12 +217,12 @@ SELECT time_bucket_gapfill(1,time,NULL,11) FROM (VALUES (1),(2)) v(time) GROUP BY 1; -ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause +ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause SELECT time_bucket_gapfill(1,time,1,NULL) FROM (VALUES (1),(2)) v(time) GROUP BY 1; -ERROR: invalid time_bucket_gapfill argument: could not infer finish boundary from WHERE clause +ERROR: missing time_bucket_gapfill argument: could not infer finish from WHERE clause -- test 0 bucket_width SELECT time_bucket_gapfill(0,time,1,11) @@ -1701,21 +1701,21 @@ SELECT FROM (VALUES (1),(2)) v(t) WHERE true AND true GROUP BY 1; -ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause +ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause -- NULL start/finish and no usable time constraints SELECT time_bucket_gapfill(1,t,NULL,NULL) FROM (VALUES (1),(2)) v(t) WHERE true AND true GROUP BY 1; -ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause +ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause -- no start and no usable time constraints SELECT time_bucket_gapfill(1,t,finish:=1) FROM (VALUES (1),(2)) v(t) WHERE true AND true GROUP BY 1; -ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause +ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause -- NULL start expression and no usable time constraints SELECT time_bucket_gapfill(1,t,CASE WHEN length(version())>0 THEN NULL::int ELSE NULL::int END,1) @@ -1736,7 +1736,7 @@ SELECT FROM (VALUES (1),(2)) v(t) WHERE true AND true GROUP BY 1; -ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause +ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause -- NULL finish expression and no usable time constraints SELECT time_bucket_gapfill(1,t,1,CASE WHEN length(version())>0 THEN NULL::int ELSE NULL::int END) @@ -1757,21 +1757,21 @@ SELECT FROM (VALUES (1),(2)) v(t) WHERE true AND true GROUP BY 1; -ERROR: invalid time_bucket_gapfill argument: could not infer finish boundary from WHERE clause +ERROR: missing time_bucket_gapfill argument: could not infer finish from WHERE clause -- NULL finish and no usable time constraints SELECT time_bucket_gapfill(1,t,1,NULL) FROM (VALUES (1),(2)) v(t) WHERE true AND true GROUP BY 1; -ERROR: invalid time_bucket_gapfill argument: could not infer finish boundary from WHERE clause +ERROR: missing time_bucket_gapfill argument: could not infer finish from WHERE clause -- expression with column reference on right side SELECT time_bucket_gapfill(1,t) FROM (VALUES (1),(2)) v(t) WHERE t > t AND t < 2 GROUP BY 1; -ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause +ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause -- expression with cast SELECT time_bucket_gapfill(1,t1::int8) @@ -1820,7 +1820,7 @@ SELECT FROM metrics_int WHERE time =0 AND time < 2 GROUP BY 1; -ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause +ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause -- time_bucket_gapfill with constraints ORed SELECT time_bucket_gapfill(1::int8,t::int8) @@ -1835,22 +1835,47 @@ SELECT FROM metrics_int m, metrics_int m2 WHERE m.time >=0 AND m.time < 2 GROUP BY 1; -ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause +ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause -- test inner join and where clause doesnt match gapfill argument SELECT time_bucket_gapfill(1,m2.time) FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time WHERE m1.time >=0 AND m1.time < 2 GROUP BY 1; -ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause +ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause +-- test outer join with constraints in join condition +-- not usable as start/stop +SELECT + time_bucket_gapfill(1,m1.time) +FROM metrics_int m1 LEFT OUTER JOIN metrics_int m2 ON m1.time=m2.time AND m1.time >=0 AND m1.time < 2 +GROUP BY 1; +ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause +\set ON_ERROR_STOP 1 +-- test subqueries +-- subqueries will alter the shape of the plan and top-level constraints +-- might not end up in top-level of jointree +SELECT + time_bucket_gapfill(1,m1.time) +FROM metrics_int m1 +WHERE m1.time >=0 AND m1.time < 2 AND device_id IN (SELECT device_id FROM metrics_int) +GROUP BY 1; + time_bucket_gapfill +--------------------- + 0 + 1 +(2 rows) + -- test inner join with constraints in join condition --- only toplevel where clause constraints are supported atm SELECT time_bucket_gapfill(1,m2.time) FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time AND m2.time >=0 AND m2.time < 2 GROUP BY 1; -ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause -\set ON_ERROR_STOP 1 + time_bucket_gapfill +--------------------- + 0 + 1 +(2 rows) + -- int32 time_bucket_gapfill with no start/finish SELECT time_bucket_gapfill(1,t) diff --git a/tsl/test/sql/gapfill.sql b/tsl/test/sql/gapfill.sql index 7105f5b76..9cbba6b75 100644 --- a/tsl/test/sql/gapfill.sql +++ b/tsl/test/sql/gapfill.sql @@ -1150,15 +1150,30 @@ FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time WHERE m1.time >=0 AND m1.time < 2 GROUP BY 1; +-- test outer join with constraints in join condition +-- not usable as start/stop +SELECT + time_bucket_gapfill(1,m1.time) +FROM metrics_int m1 LEFT OUTER JOIN metrics_int m2 ON m1.time=m2.time AND m1.time >=0 AND m1.time < 2 +GROUP BY 1; + +\set ON_ERROR_STOP 1 + +-- test subqueries +-- subqueries will alter the shape of the plan and top-level constraints +-- might not end up in top-level of jointree +SELECT + time_bucket_gapfill(1,m1.time) +FROM metrics_int m1 +WHERE m1.time >=0 AND m1.time < 2 AND device_id IN (SELECT device_id FROM metrics_int) +GROUP BY 1; + -- test inner join with constraints in join condition --- only toplevel where clause constraints are supported atm SELECT time_bucket_gapfill(1,m2.time) FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time AND m2.time >=0 AND m2.time < 2 GROUP BY 1; -\set ON_ERROR_STOP 1 - -- int32 time_bucket_gapfill with no start/finish SELECT time_bucket_gapfill(1,t)