mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-28 09:46:44 +08:00
A number of TimescaleDB query optimizations involve operations on functions. This refactor exposes a function cache that can be used to quickly identify important functions and get access to relevant auxiliary functionality and/or information. In particular, certain functions apply to some type of (time) bucketing expression, e.g., expressions involving our own `time_bucket` function or PostgreSQL's `date_trunc`. This change recognizes the importance of time bucketing and uses the function cache to access custom functionality around time bucketing used in query optimizations. For example, both grouping estimates for hash aggregates and sort transforms can be quickly accessed to make better use of indexes when bucketing on a time column. This refactor is also done with anticipation that this will be useful going forward when other types of optimizations are implemented on time bucketing expressions, or other functions that can benefit from this cache.
395 lines
11 KiB
C
395 lines
11 KiB
C
/*
|
|
* This file and its contents are licensed under the Apache License 2.0.
|
|
* Please see the included NOTICE for copyright information and
|
|
* LICENSE-APACHE for a copy of the license.
|
|
*/
|
|
#include <postgres.h>
|
|
#include <catalog/pg_type.h>
|
|
#include <nodes/makefuncs.h>
|
|
#include <nodes/nodeFuncs.h>
|
|
#include <nodes/plannodes.h>
|
|
#include <parser/parsetree.h>
|
|
#include <utils/guc.h>
|
|
#include <optimizer/planner.h>
|
|
#include <optimizer/paths.h>
|
|
#include <utils/lsyscache.h>
|
|
|
|
#include "func_cache.h"
|
|
#include "sort_transform.h"
|
|
|
|
/* This optimizations allows GROUP BY clauses that transform time in
|
|
* order-preserving ways to use indexes on the time field. It works
|
|
* by transforming sorting clauses from their more complex versions
|
|
* to simplified ones that can use the plain index, if the transform
|
|
* is order preserving.
|
|
*
|
|
* For example, an ordering on date_trunc('minute', time) can be transformed
|
|
* to an ordering on time.
|
|
*/
|
|
|
|
extern void ts_sort_transform_optimization(PlannerInfo *root, RelOptInfo *rel);
|
|
|
|
static Expr *
|
|
transform_timestamp_cast(FuncExpr *func)
|
|
{
|
|
/*
|
|
* transform cast from timestamptz to timestamp
|
|
*
|
|
* timestamp(var) => var
|
|
*
|
|
* proof: timestamp(time1) >= timestamp(time2) iff time1 > time2
|
|
*
|
|
*/
|
|
|
|
Expr *first;
|
|
|
|
if (list_length(func->args) != 1)
|
|
return (Expr *) func;
|
|
|
|
first = ts_sort_transform_expr(linitial(func->args));
|
|
if (!IsA(first, Var))
|
|
return (Expr *) func;
|
|
|
|
return (Expr *) copyObject(first);
|
|
}
|
|
|
|
static Expr *
|
|
transform_timestamptz_cast(FuncExpr *func)
|
|
{
|
|
/*
|
|
* Transform cast from date to timestamptz, or timestamp to timestamptz,
|
|
* or abstime to timestamptz Handles only single-argument versions of the
|
|
* cast to avoid explicit timezone specifiers
|
|
*
|
|
*
|
|
* timestamptz(var) => var
|
|
*
|
|
* proof: timestamptz(time1) >= timestamptz(time2) iff time1 > time2
|
|
*
|
|
*/
|
|
|
|
Expr *first;
|
|
|
|
if (list_length(func->args) != 1)
|
|
return (Expr *) func;
|
|
|
|
first = ts_sort_transform_expr(linitial(func->args));
|
|
if (!IsA(first, Var))
|
|
return (Expr *) func;
|
|
|
|
return (Expr *) copyObject(first);
|
|
}
|
|
|
|
static inline Expr *
|
|
transform_time_op_const_interval(OpExpr *op)
|
|
{
|
|
/*
|
|
* optimize timestamp(tz) +/- const interval
|
|
*
|
|
* Sort of ts + 1 minute fulfilled by sort of ts
|
|
*/
|
|
if (list_length(op->args) == 2 && IsA(lsecond(op->args), Const))
|
|
{
|
|
Oid left = exprType((Node *) linitial(op->args));
|
|
Oid right = exprType((Node *) lsecond(op->args));
|
|
|
|
if ((left == TIMESTAMPOID && right == INTERVALOID) ||
|
|
(left == TIMESTAMPTZOID && right == INTERVALOID) ||
|
|
(left == DATEOID && right == INTERVALOID))
|
|
{
|
|
char *name = get_opname(op->opno);
|
|
|
|
if (strncmp(name, "-", NAMEDATALEN) == 0 || strncmp(name, "+", NAMEDATALEN) == 0)
|
|
{
|
|
Expr *first = ts_sort_transform_expr((Expr *) linitial(op->args));
|
|
|
|
if (IsA(first, Var))
|
|
return copyObject(first);
|
|
}
|
|
}
|
|
}
|
|
return (Expr *) op;
|
|
}
|
|
|
|
static inline Expr *
|
|
transform_int_op_const(OpExpr *op)
|
|
{
|
|
/*
|
|
* Optimize int op const (or const op int), whenever possible. e.g. sort
|
|
* of some_int + const fulfilled by sort of some_int same for the
|
|
* following operator: + - / *
|
|
*
|
|
* Note that / is not commutative and const / var does NOT work (namely it
|
|
* reverses sort order, which we don't handle yet)
|
|
*/
|
|
if (list_length(op->args) == 2 &&
|
|
(IsA(lsecond(op->args), Const) || IsA(linitial(op->args), Const)))
|
|
{
|
|
Oid left = exprType((Node *) linitial(op->args));
|
|
Oid right = exprType((Node *) lsecond(op->args));
|
|
|
|
if ((left == INT8OID && right == INT8OID) || (left == INT4OID && right == INT4OID) ||
|
|
(left == INT2OID && right == INT2OID))
|
|
{
|
|
char *name = get_opname(op->opno);
|
|
|
|
if (name[1] == '\0')
|
|
{
|
|
switch (name[0])
|
|
{
|
|
case '-':
|
|
case '+':
|
|
case '*':
|
|
/* commutative cases */
|
|
if (IsA(linitial(op->args), Const))
|
|
{
|
|
Expr *nonconst = ts_sort_transform_expr((Expr *) lsecond(op->args));
|
|
|
|
if (IsA(nonconst, Var))
|
|
return copyObject(nonconst);
|
|
}
|
|
else
|
|
{
|
|
Expr *nonconst = ts_sort_transform_expr((Expr *) linitial(op->args));
|
|
|
|
if (IsA(nonconst, Var))
|
|
return copyObject(nonconst);
|
|
}
|
|
break;
|
|
case '/':
|
|
/* only if second arg is const */
|
|
if (IsA(lsecond(op->args), Const))
|
|
{
|
|
Expr *nonconst = ts_sort_transform_expr((Expr *) linitial(op->args));
|
|
|
|
if (IsA(nonconst, Var))
|
|
return copyObject(nonconst);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return (Expr *) op;
|
|
}
|
|
|
|
/* sort_transforms_expr returns a simplified sort expression in a form
|
|
* more common for indexes. Must return same data type & collation too.
|
|
*
|
|
* Sort transforms have the following correctness condition:
|
|
* Any ordering provided by the returned expression is a valid
|
|
* ordering under the original expression. The reverse need not
|
|
* be true to apply the transformation to the last member of pathkeys
|
|
* but it would need to be true to apply the transformation to
|
|
* arbitrary members of pathkeys.
|
|
*
|
|
* Namely if orig_expr(X) > orig_expr(Y) then
|
|
* new_expr(X) > new_expr(Y).
|
|
*
|
|
* Note that if orig_expr(X) = orig_expr(Y) then
|
|
* the ordering under new_expr is unconstrained.
|
|
* */
|
|
Expr *
|
|
ts_sort_transform_expr(Expr *orig_expr)
|
|
{
|
|
if (IsA(orig_expr, FuncExpr))
|
|
{
|
|
FuncExpr *func = (FuncExpr *) orig_expr;
|
|
char *func_name = get_func_name(func->funcid);
|
|
FuncInfo *finfo = ts_func_cache_get_bucketing_func(func->funcid);
|
|
|
|
if (NULL != finfo)
|
|
{
|
|
if (NULL == finfo->sort_transform)
|
|
return orig_expr;
|
|
|
|
return finfo->sort_transform(func);
|
|
}
|
|
|
|
if (strncmp(func_name, "timestamp", NAMEDATALEN) == 0)
|
|
return transform_timestamp_cast(func);
|
|
if (strncmp(func_name, "timestamptz", NAMEDATALEN) == 0)
|
|
return transform_timestamptz_cast(func);
|
|
}
|
|
if (IsA(orig_expr, OpExpr))
|
|
{
|
|
OpExpr *op = (OpExpr *) orig_expr;
|
|
Oid type_first = exprType((Node *) linitial(op->args));
|
|
|
|
if (type_first == TIMESTAMPOID || type_first == TIMESTAMPTZOID || type_first == DATEOID)
|
|
{
|
|
return transform_time_op_const_interval(op);
|
|
}
|
|
if (type_first == INT2OID || type_first == INT4OID || type_first == INT8OID)
|
|
{
|
|
return transform_int_op_const(op);
|
|
}
|
|
}
|
|
return orig_expr;
|
|
}
|
|
|
|
/* sort_transform_ec creates a new EquivalenceClass with transformed
|
|
* expressions if any of the members of the original EC can be transformed for the sort.
|
|
*/
|
|
|
|
static EquivalenceClass *
|
|
sort_transform_ec(PlannerInfo *root, EquivalenceClass *orig)
|
|
{
|
|
ListCell *lc_member;
|
|
EquivalenceClass *newec = NULL;
|
|
|
|
/* check all members, adding only transformable members to new ec */
|
|
foreach (lc_member, orig->ec_members)
|
|
{
|
|
EquivalenceMember *ec_mem = (EquivalenceMember *) lfirst(lc_member);
|
|
Expr *transformed_expr = ts_sort_transform_expr(ec_mem->em_expr);
|
|
|
|
if (transformed_expr != ec_mem->em_expr)
|
|
{
|
|
EquivalenceMember *em;
|
|
Oid type_oid = exprType((Node *) transformed_expr);
|
|
List *opfamilies = list_copy(orig->ec_opfamilies);
|
|
|
|
/*
|
|
* if the transform already exists for even one member, assume
|
|
* exists for all
|
|
*/
|
|
EquivalenceClass *exist = get_eclass_for_sort_expr(root,
|
|
transformed_expr,
|
|
ec_mem->em_nullable_relids,
|
|
opfamilies,
|
|
type_oid,
|
|
orig->ec_collation,
|
|
orig->ec_sortref,
|
|
ec_mem->em_relids,
|
|
false);
|
|
|
|
if (exist != NULL)
|
|
{
|
|
return exist;
|
|
}
|
|
|
|
em = makeNode(EquivalenceMember);
|
|
|
|
em->em_expr = transformed_expr;
|
|
em->em_relids = bms_copy(ec_mem->em_relids);
|
|
em->em_nullable_relids = bms_copy(ec_mem->em_nullable_relids);
|
|
em->em_is_const = ec_mem->em_is_const;
|
|
em->em_is_child = ec_mem->em_is_child;
|
|
em->em_datatype = type_oid;
|
|
|
|
if (newec == NULL)
|
|
{
|
|
/* lazy create the ec. */
|
|
newec = makeNode(EquivalenceClass);
|
|
newec->ec_opfamilies = opfamilies;
|
|
newec->ec_collation = orig->ec_collation;
|
|
newec->ec_members = NIL;
|
|
newec->ec_sources = list_copy(orig->ec_sources);
|
|
newec->ec_derives = list_copy(orig->ec_derives);
|
|
newec->ec_relids = bms_copy(orig->ec_relids);
|
|
newec->ec_has_const = orig->ec_has_const;
|
|
newec->ec_has_volatile = orig->ec_has_volatile;
|
|
newec->ec_below_outer_join = orig->ec_below_outer_join;
|
|
newec->ec_broken = orig->ec_broken;
|
|
newec->ec_sortref = orig->ec_sortref;
|
|
newec->ec_merged = orig->ec_merged;
|
|
}
|
|
|
|
newec->ec_members = lappend(newec->ec_members, em);
|
|
}
|
|
}
|
|
/* if any transforms were found return new ec */
|
|
if (newec != NULL)
|
|
{
|
|
root->eq_classes = lappend(root->eq_classes, newec);
|
|
return newec;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* This optimization transforms between equivalent sort operations to try
|
|
* to find useful indexes.
|
|
*
|
|
* For example: an ORDER BY date_trunc('minute', time) can be implemented by
|
|
* an ordering of time.
|
|
*/
|
|
void
|
|
ts_sort_transform_optimization(PlannerInfo *root, RelOptInfo *rel)
|
|
{
|
|
/*
|
|
* We attack this problem in three steps:
|
|
*
|
|
* 1) Create a pathkey for the transformed (simplified) sort.
|
|
*
|
|
* 2) Use the transformed pathkey to find new useful index paths.
|
|
*
|
|
* 3) Transform the pathkey of the new paths back into the original form
|
|
* to make this transparent to upper levels in the planner.
|
|
*
|
|
*/
|
|
ListCell *lc;
|
|
List *transformed_query_pathkey = NIL;
|
|
List *orig_query_pathkeys = root->query_pathkeys;
|
|
PathKey *last_pk;
|
|
PathKey *new_pk;
|
|
EquivalenceClass *transformed;
|
|
|
|
/*
|
|
* nothing to do for empty pathkeys
|
|
*/
|
|
if (orig_query_pathkeys == NIL)
|
|
return;
|
|
|
|
/*
|
|
* These sort transformations are only safe for single member ORDER BY
|
|
* clauses or as last member of the ORDER BY clause.
|
|
* Using it for other ORDER BY clauses will result in wrong ordering.
|
|
*/
|
|
last_pk = llast(root->query_pathkeys);
|
|
transformed = sort_transform_ec(root, last_pk->pk_eclass);
|
|
|
|
if (transformed == NULL)
|
|
return;
|
|
|
|
new_pk = make_canonical_pathkey(root,
|
|
transformed,
|
|
last_pk->pk_opfamily,
|
|
last_pk->pk_strategy,
|
|
last_pk->pk_nulls_first);
|
|
|
|
/*
|
|
* create complete transformed pathkeys
|
|
*/
|
|
foreach (lc, root->query_pathkeys)
|
|
{
|
|
if (lfirst(lc) != last_pk)
|
|
transformed_query_pathkey = lappend(transformed_query_pathkey, lfirst(lc));
|
|
else
|
|
transformed_query_pathkey = lappend(transformed_query_pathkey, new_pk);
|
|
}
|
|
|
|
/* search for indexes on transformed pathkeys */
|
|
root->query_pathkeys = transformed_query_pathkey;
|
|
create_index_paths(root, rel);
|
|
root->query_pathkeys = orig_query_pathkeys;
|
|
|
|
/*
|
|
* change returned paths to use original pathkeys. have to go through
|
|
* all paths since create_index_paths might have modified existing
|
|
* pathkey. Always safe to do transform since ordering of
|
|
* transformed_query_pathkey implements ordering of
|
|
* orig_query_pathkeys.
|
|
*/
|
|
foreach (lc, rel->pathlist)
|
|
{
|
|
Path *path = lfirst(lc);
|
|
|
|
if (compare_pathkeys(path->pathkeys, transformed_query_pathkey) == PATHKEYS_EQUAL)
|
|
{
|
|
path->pathkeys = orig_query_pathkeys;
|
|
}
|
|
}
|
|
}
|