Improve infering start and stop arguments from gapfill query

Infering start and stop parameter used to only work for top
level constraints. However even when constraints are at the
toplevel in the query they might not end up at the top-level
of the jointree depending on the plan shape. This patch
changes the gapfill code to traverse the jointree to find
valid start and stop arguments.
This commit is contained in:
Sven Klemm 2020-07-04 11:14:23 +02:00 committed by Sven Klemm
parent c4aa8cf7cf
commit 665d56be47
3 changed files with 161 additions and 27 deletions

View File

@ -374,6 +374,99 @@ get_boundary_expr_value(GapFillState *state, GapFillBoundary boundary, Expr *exp
return gapfill_datum_get_internal(arg_value, state->gapfill_typid);
}
typedef struct CollectBoundaryContext
{
List *quals;
Var *ts_var;
} CollectBoundaryContext;
/*
* expression references our gapfill time column and could be
* a boundary expression, more thorough check is in
* infer_gapfill_boundary
*/
static bool
is_boundary_expr(Node *node, CollectBoundaryContext *context)
{
OpExpr *op;
Node *left, *right;
if (!IsA(node, OpExpr))
return false;
op = castNode(OpExpr, node);
if (op->args->length != 2)
return false;
left = linitial(op->args);
right = llast(op->args);
/* Var OP Var is not useful here because we are not yet at a point
* where we could evaluate them */
if (IsA(left, Var) && IsA(right, Var))
return false;
if (IsA(left, Var) && var_equal(castNode(Var, left), context->ts_var))
return true;
if (IsA(right, Var) && var_equal(castNode(Var, right), context->ts_var))
return true;
return false;
}
static bool
collect_boundary_walker(Node *node, CollectBoundaryContext *context)
{
Node *quals = NULL;
if (node == NULL)
return false;
if (IsA(node, FromExpr))
{
quals = castNode(FromExpr, node)->quals;
}
else if (IsA(node, JoinExpr))
{
JoinExpr *j = castNode(JoinExpr, node);
/* don't descend into outer join */
if (IS_OUTER_JOIN(j->jointype))
return false;
quals = j->quals;
}
if (quals)
{
ListCell *lc;
foreach (lc, castNode(List, quals))
{
if (is_boundary_expr(lfirst(lc), context))
context->quals = lappend(context->quals, lfirst(lc));
}
}
return expression_tree_walker(node, collect_boundary_walker, context);
}
/*
* traverse jointree to look for expressions referencing
* the time column of our gapfill call
*/
static List *
collect_boundary_expressions(Node *node, Var *ts_var)
{
CollectBoundaryContext context = { .quals = NIL, .ts_var = ts_var };
collect_boundary_walker(node, &context);
return context.quals;
}
static int64
infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)
{
@ -385,6 +478,7 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)
TypeCacheEntry *tce = lookup_type_cache(state->gapfill_typid, TYPECACHE_BTREE_OPFAMILY);
int strategy;
Oid lefttype, righttype;
List *quals;
int64 boundary_value = 0;
bool boundary_found = false;
@ -403,19 +497,16 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)
ts_var = castNode(Var, lsecond(func->args));
foreach (lc, (List *) jt->quals)
quals = collect_boundary_expressions((Node *) jt, ts_var);
foreach (lc, quals)
{
OpExpr *opexpr;
OpExpr *opexpr = lfirst_node(OpExpr, lc);
Var *var;
Expr *expr;
Oid op;
int64 value;
if (!IsA(lfirst(lc), OpExpr))
continue;
opexpr = lfirst(lc);
if (IsA(linitial(opexpr->args), Var))
{
var = linitial(opexpr->args);
@ -429,7 +520,11 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)
op = get_commutator(opexpr->opno);
}
else
{
/* collect_boundary_expressions has filtered those out already */
Assert(false);
continue;
}
if (!op_in_opfamily(op, tce->btree_opf))
continue;
@ -483,8 +578,7 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid time_bucket_gapfill argument: could not infer %s boundary from WHERE "
"clause",
errmsg("missing time_bucket_gapfill argument: could not infer %s from WHERE clause",
boundary == GAPFILL_START ? "start" : "finish"),
errhint("You can either pass start and finish as arguments or in the WHERE clause")));
pg_unreachable();

View File

@ -217,12 +217,12 @@ SELECT
time_bucket_gapfill(1,time,NULL,11)
FROM (VALUES (1),(2)) v(time)
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
SELECT
time_bucket_gapfill(1,time,1,NULL)
FROM (VALUES (1),(2)) v(time)
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer finish boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer finish from WHERE clause
-- test 0 bucket_width
SELECT
time_bucket_gapfill(0,time,1,11)
@ -1701,21 +1701,21 @@ SELECT
FROM (VALUES (1),(2)) v(t)
WHERE true AND true
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- NULL start/finish and no usable time constraints
SELECT
time_bucket_gapfill(1,t,NULL,NULL)
FROM (VALUES (1),(2)) v(t)
WHERE true AND true
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- no start and no usable time constraints
SELECT
time_bucket_gapfill(1,t,finish:=1)
FROM (VALUES (1),(2)) v(t)
WHERE true AND true
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- NULL start expression and no usable time constraints
SELECT
time_bucket_gapfill(1,t,CASE WHEN length(version())>0 THEN NULL::int ELSE NULL::int END,1)
@ -1736,7 +1736,7 @@ SELECT
FROM (VALUES (1),(2)) v(t)
WHERE true AND true
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- NULL finish expression and no usable time constraints
SELECT
time_bucket_gapfill(1,t,1,CASE WHEN length(version())>0 THEN NULL::int ELSE NULL::int END)
@ -1757,21 +1757,21 @@ SELECT
FROM (VALUES (1),(2)) v(t)
WHERE true AND true
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer finish boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer finish from WHERE clause
-- NULL finish and no usable time constraints
SELECT
time_bucket_gapfill(1,t,1,NULL)
FROM (VALUES (1),(2)) v(t)
WHERE true AND true
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer finish boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer finish from WHERE clause
-- expression with column reference on right side
SELECT
time_bucket_gapfill(1,t)
FROM (VALUES (1),(2)) v(t)
WHERE t > t AND t < 2
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- expression with cast
SELECT
time_bucket_gapfill(1,t1::int8)
@ -1820,7 +1820,7 @@ SELECT
FROM metrics_int
WHERE time =0 AND time < 2
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- time_bucket_gapfill with constraints ORed
SELECT
time_bucket_gapfill(1::int8,t::int8)
@ -1835,22 +1835,47 @@ SELECT
FROM metrics_int m, metrics_int m2
WHERE m.time >=0 AND m.time < 2
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- test inner join and where clause doesnt match gapfill argument
SELECT
time_bucket_gapfill(1,m2.time)
FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time
WHERE m1.time >=0 AND m1.time < 2
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- test outer join with constraints in join condition
-- not usable as start/stop
SELECT
time_bucket_gapfill(1,m1.time)
FROM metrics_int m1 LEFT OUTER JOIN metrics_int m2 ON m1.time=m2.time AND m1.time >=0 AND m1.time < 2
GROUP BY 1;
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
\set ON_ERROR_STOP 1
-- test subqueries
-- subqueries will alter the shape of the plan and top-level constraints
-- might not end up in top-level of jointree
SELECT
time_bucket_gapfill(1,m1.time)
FROM metrics_int m1
WHERE m1.time >=0 AND m1.time < 2 AND device_id IN (SELECT device_id FROM metrics_int)
GROUP BY 1;
time_bucket_gapfill
---------------------
0
1
(2 rows)
-- test inner join with constraints in join condition
-- only toplevel where clause constraints are supported atm
SELECT
time_bucket_gapfill(1,m2.time)
FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time AND m2.time >=0 AND m2.time < 2
GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause
\set ON_ERROR_STOP 1
time_bucket_gapfill
---------------------
0
1
(2 rows)
-- int32 time_bucket_gapfill with no start/finish
SELECT
time_bucket_gapfill(1,t)

View File

@ -1150,15 +1150,30 @@ FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time
WHERE m1.time >=0 AND m1.time < 2
GROUP BY 1;
-- test outer join with constraints in join condition
-- not usable as start/stop
SELECT
time_bucket_gapfill(1,m1.time)
FROM metrics_int m1 LEFT OUTER JOIN metrics_int m2 ON m1.time=m2.time AND m1.time >=0 AND m1.time < 2
GROUP BY 1;
\set ON_ERROR_STOP 1
-- test subqueries
-- subqueries will alter the shape of the plan and top-level constraints
-- might not end up in top-level of jointree
SELECT
time_bucket_gapfill(1,m1.time)
FROM metrics_int m1
WHERE m1.time >=0 AND m1.time < 2 AND device_id IN (SELECT device_id FROM metrics_int)
GROUP BY 1;
-- test inner join with constraints in join condition
-- only toplevel where clause constraints are supported atm
SELECT
time_bucket_gapfill(1,m2.time)
FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time AND m2.time >=0 AND m2.time < 2
GROUP BY 1;
\set ON_ERROR_STOP 1
-- int32 time_bucket_gapfill with no start/finish
SELECT
time_bucket_gapfill(1,t)