Improve infering start and stop arguments from gapfill query

Infering start and stop parameter used to only work for top
level constraints. However even when constraints are at the
toplevel in the query they might not end up at the top-level
of the jointree depending on the plan shape. This patch
changes the gapfill code to traverse the jointree to find
valid start and stop arguments.
This commit is contained in:
Sven Klemm 2020-07-04 11:14:23 +02:00 committed by Sven Klemm
parent c4aa8cf7cf
commit 665d56be47
3 changed files with 161 additions and 27 deletions

View File

@ -374,6 +374,99 @@ get_boundary_expr_value(GapFillState *state, GapFillBoundary boundary, Expr *exp
return gapfill_datum_get_internal(arg_value, state->gapfill_typid); return gapfill_datum_get_internal(arg_value, state->gapfill_typid);
} }
typedef struct CollectBoundaryContext
{
List *quals;
Var *ts_var;
} CollectBoundaryContext;
/*
* expression references our gapfill time column and could be
* a boundary expression, more thorough check is in
* infer_gapfill_boundary
*/
static bool
is_boundary_expr(Node *node, CollectBoundaryContext *context)
{
OpExpr *op;
Node *left, *right;
if (!IsA(node, OpExpr))
return false;
op = castNode(OpExpr, node);
if (op->args->length != 2)
return false;
left = linitial(op->args);
right = llast(op->args);
/* Var OP Var is not useful here because we are not yet at a point
* where we could evaluate them */
if (IsA(left, Var) && IsA(right, Var))
return false;
if (IsA(left, Var) && var_equal(castNode(Var, left), context->ts_var))
return true;
if (IsA(right, Var) && var_equal(castNode(Var, right), context->ts_var))
return true;
return false;
}
static bool
collect_boundary_walker(Node *node, CollectBoundaryContext *context)
{
Node *quals = NULL;
if (node == NULL)
return false;
if (IsA(node, FromExpr))
{
quals = castNode(FromExpr, node)->quals;
}
else if (IsA(node, JoinExpr))
{
JoinExpr *j = castNode(JoinExpr, node);
/* don't descend into outer join */
if (IS_OUTER_JOIN(j->jointype))
return false;
quals = j->quals;
}
if (quals)
{
ListCell *lc;
foreach (lc, castNode(List, quals))
{
if (is_boundary_expr(lfirst(lc), context))
context->quals = lappend(context->quals, lfirst(lc));
}
}
return expression_tree_walker(node, collect_boundary_walker, context);
}
/*
* traverse jointree to look for expressions referencing
* the time column of our gapfill call
*/
static List *
collect_boundary_expressions(Node *node, Var *ts_var)
{
CollectBoundaryContext context = { .quals = NIL, .ts_var = ts_var };
collect_boundary_walker(node, &context);
return context.quals;
}
static int64 static int64
infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary) infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)
{ {
@ -385,6 +478,7 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)
TypeCacheEntry *tce = lookup_type_cache(state->gapfill_typid, TYPECACHE_BTREE_OPFAMILY); TypeCacheEntry *tce = lookup_type_cache(state->gapfill_typid, TYPECACHE_BTREE_OPFAMILY);
int strategy; int strategy;
Oid lefttype, righttype; Oid lefttype, righttype;
List *quals;
int64 boundary_value = 0; int64 boundary_value = 0;
bool boundary_found = false; bool boundary_found = false;
@ -403,19 +497,16 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)
ts_var = castNode(Var, lsecond(func->args)); ts_var = castNode(Var, lsecond(func->args));
foreach (lc, (List *) jt->quals) quals = collect_boundary_expressions((Node *) jt, ts_var);
foreach (lc, quals)
{ {
OpExpr *opexpr; OpExpr *opexpr = lfirst_node(OpExpr, lc);
Var *var; Var *var;
Expr *expr; Expr *expr;
Oid op; Oid op;
int64 value; int64 value;
if (!IsA(lfirst(lc), OpExpr))
continue;
opexpr = lfirst(lc);
if (IsA(linitial(opexpr->args), Var)) if (IsA(linitial(opexpr->args), Var))
{ {
var = linitial(opexpr->args); var = linitial(opexpr->args);
@ -429,7 +520,11 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)
op = get_commutator(opexpr->opno); op = get_commutator(opexpr->opno);
} }
else else
{
/* collect_boundary_expressions has filtered those out already */
Assert(false);
continue; continue;
}
if (!op_in_opfamily(op, tce->btree_opf)) if (!op_in_opfamily(op, tce->btree_opf))
continue; continue;
@ -483,8 +578,7 @@ infer_gapfill_boundary(GapFillState *state, GapFillBoundary boundary)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid time_bucket_gapfill argument: could not infer %s boundary from WHERE " errmsg("missing time_bucket_gapfill argument: could not infer %s from WHERE clause",
"clause",
boundary == GAPFILL_START ? "start" : "finish"), boundary == GAPFILL_START ? "start" : "finish"),
errhint("You can either pass start and finish as arguments or in the WHERE clause"))); errhint("You can either pass start and finish as arguments or in the WHERE clause")));
pg_unreachable(); pg_unreachable();

View File

@ -217,12 +217,12 @@ SELECT
time_bucket_gapfill(1,time,NULL,11) time_bucket_gapfill(1,time,NULL,11)
FROM (VALUES (1),(2)) v(time) FROM (VALUES (1),(2)) v(time)
GROUP BY 1; GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
SELECT SELECT
time_bucket_gapfill(1,time,1,NULL) time_bucket_gapfill(1,time,1,NULL)
FROM (VALUES (1),(2)) v(time) FROM (VALUES (1),(2)) v(time)
GROUP BY 1; GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer finish boundary from WHERE clause ERROR: missing time_bucket_gapfill argument: could not infer finish from WHERE clause
-- test 0 bucket_width -- test 0 bucket_width
SELECT SELECT
time_bucket_gapfill(0,time,1,11) time_bucket_gapfill(0,time,1,11)
@ -1701,21 +1701,21 @@ SELECT
FROM (VALUES (1),(2)) v(t) FROM (VALUES (1),(2)) v(t)
WHERE true AND true WHERE true AND true
GROUP BY 1; GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- NULL start/finish and no usable time constraints -- NULL start/finish and no usable time constraints
SELECT SELECT
time_bucket_gapfill(1,t,NULL,NULL) time_bucket_gapfill(1,t,NULL,NULL)
FROM (VALUES (1),(2)) v(t) FROM (VALUES (1),(2)) v(t)
WHERE true AND true WHERE true AND true
GROUP BY 1; GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- no start and no usable time constraints -- no start and no usable time constraints
SELECT SELECT
time_bucket_gapfill(1,t,finish:=1) time_bucket_gapfill(1,t,finish:=1)
FROM (VALUES (1),(2)) v(t) FROM (VALUES (1),(2)) v(t)
WHERE true AND true WHERE true AND true
GROUP BY 1; GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- NULL start expression and no usable time constraints -- NULL start expression and no usable time constraints
SELECT SELECT
time_bucket_gapfill(1,t,CASE WHEN length(version())>0 THEN NULL::int ELSE NULL::int END,1) time_bucket_gapfill(1,t,CASE WHEN length(version())>0 THEN NULL::int ELSE NULL::int END,1)
@ -1736,7 +1736,7 @@ SELECT
FROM (VALUES (1),(2)) v(t) FROM (VALUES (1),(2)) v(t)
WHERE true AND true WHERE true AND true
GROUP BY 1; GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- NULL finish expression and no usable time constraints -- NULL finish expression and no usable time constraints
SELECT SELECT
time_bucket_gapfill(1,t,1,CASE WHEN length(version())>0 THEN NULL::int ELSE NULL::int END) time_bucket_gapfill(1,t,1,CASE WHEN length(version())>0 THEN NULL::int ELSE NULL::int END)
@ -1757,21 +1757,21 @@ SELECT
FROM (VALUES (1),(2)) v(t) FROM (VALUES (1),(2)) v(t)
WHERE true AND true WHERE true AND true
GROUP BY 1; GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer finish boundary from WHERE clause ERROR: missing time_bucket_gapfill argument: could not infer finish from WHERE clause
-- NULL finish and no usable time constraints -- NULL finish and no usable time constraints
SELECT SELECT
time_bucket_gapfill(1,t,1,NULL) time_bucket_gapfill(1,t,1,NULL)
FROM (VALUES (1),(2)) v(t) FROM (VALUES (1),(2)) v(t)
WHERE true AND true WHERE true AND true
GROUP BY 1; GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer finish boundary from WHERE clause ERROR: missing time_bucket_gapfill argument: could not infer finish from WHERE clause
-- expression with column reference on right side -- expression with column reference on right side
SELECT SELECT
time_bucket_gapfill(1,t) time_bucket_gapfill(1,t)
FROM (VALUES (1),(2)) v(t) FROM (VALUES (1),(2)) v(t)
WHERE t > t AND t < 2 WHERE t > t AND t < 2
GROUP BY 1; GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- expression with cast -- expression with cast
SELECT SELECT
time_bucket_gapfill(1,t1::int8) time_bucket_gapfill(1,t1::int8)
@ -1820,7 +1820,7 @@ SELECT
FROM metrics_int FROM metrics_int
WHERE time =0 AND time < 2 WHERE time =0 AND time < 2
GROUP BY 1; GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- time_bucket_gapfill with constraints ORed -- time_bucket_gapfill with constraints ORed
SELECT SELECT
time_bucket_gapfill(1::int8,t::int8) time_bucket_gapfill(1::int8,t::int8)
@ -1835,22 +1835,47 @@ SELECT
FROM metrics_int m, metrics_int m2 FROM metrics_int m, metrics_int m2
WHERE m.time >=0 AND m.time < 2 WHERE m.time >=0 AND m.time < 2
GROUP BY 1; GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- test inner join and where clause doesnt match gapfill argument -- test inner join and where clause doesnt match gapfill argument
SELECT SELECT
time_bucket_gapfill(1,m2.time) time_bucket_gapfill(1,m2.time)
FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time
WHERE m1.time >=0 AND m1.time < 2 WHERE m1.time >=0 AND m1.time < 2
GROUP BY 1; GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
-- test outer join with constraints in join condition
-- not usable as start/stop
SELECT
time_bucket_gapfill(1,m1.time)
FROM metrics_int m1 LEFT OUTER JOIN metrics_int m2 ON m1.time=m2.time AND m1.time >=0 AND m1.time < 2
GROUP BY 1;
ERROR: missing time_bucket_gapfill argument: could not infer start from WHERE clause
\set ON_ERROR_STOP 1
-- test subqueries
-- subqueries will alter the shape of the plan and top-level constraints
-- might not end up in top-level of jointree
SELECT
time_bucket_gapfill(1,m1.time)
FROM metrics_int m1
WHERE m1.time >=0 AND m1.time < 2 AND device_id IN (SELECT device_id FROM metrics_int)
GROUP BY 1;
time_bucket_gapfill
---------------------
0
1
(2 rows)
-- test inner join with constraints in join condition -- test inner join with constraints in join condition
-- only toplevel where clause constraints are supported atm
SELECT SELECT
time_bucket_gapfill(1,m2.time) time_bucket_gapfill(1,m2.time)
FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time AND m2.time >=0 AND m2.time < 2 FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time AND m2.time >=0 AND m2.time < 2
GROUP BY 1; GROUP BY 1;
ERROR: invalid time_bucket_gapfill argument: could not infer start boundary from WHERE clause time_bucket_gapfill
\set ON_ERROR_STOP 1 ---------------------
0
1
(2 rows)
-- int32 time_bucket_gapfill with no start/finish -- int32 time_bucket_gapfill with no start/finish
SELECT SELECT
time_bucket_gapfill(1,t) time_bucket_gapfill(1,t)

View File

@ -1150,15 +1150,30 @@ FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time
WHERE m1.time >=0 AND m1.time < 2 WHERE m1.time >=0 AND m1.time < 2
GROUP BY 1; GROUP BY 1;
-- test outer join with constraints in join condition
-- not usable as start/stop
SELECT
time_bucket_gapfill(1,m1.time)
FROM metrics_int m1 LEFT OUTER JOIN metrics_int m2 ON m1.time=m2.time AND m1.time >=0 AND m1.time < 2
GROUP BY 1;
\set ON_ERROR_STOP 1
-- test subqueries
-- subqueries will alter the shape of the plan and top-level constraints
-- might not end up in top-level of jointree
SELECT
time_bucket_gapfill(1,m1.time)
FROM metrics_int m1
WHERE m1.time >=0 AND m1.time < 2 AND device_id IN (SELECT device_id FROM metrics_int)
GROUP BY 1;
-- test inner join with constraints in join condition -- test inner join with constraints in join condition
-- only toplevel where clause constraints are supported atm
SELECT SELECT
time_bucket_gapfill(1,m2.time) time_bucket_gapfill(1,m2.time)
FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time AND m2.time >=0 AND m2.time < 2 FROM metrics_int m1 INNER JOIN metrics_int m2 ON m1.time=m2.time AND m2.time >=0 AND m2.time < 2
GROUP BY 1; GROUP BY 1;
\set ON_ERROR_STOP 1
-- int32 time_bucket_gapfill with no start/finish -- int32 time_bucket_gapfill with no start/finish
SELECT SELECT
time_bucket_gapfill(1,t) time_bucket_gapfill(1,t)