Merged in mat/groupby-mergeappend (pull request #137)

Optimize queries with date_trunc in ORDER BY and a LIMIT.

Approved-by: Erik Nordström <erik.nordstrom@gmail.com>
Approved-by: ci-vast
This commit is contained in:
Matvey Arye 2017-04-05 20:54:28 +00:00
commit 851a3d2c23
12 changed files with 734 additions and 76 deletions

View File

@ -34,6 +34,7 @@ SRCS = \
src/insert.c \
src/planner.c \
src/process_utility.c \
src/sort_transform.c \
src/xact.c
OBJS = $(SRCS:.c=.o)

View File

@ -13,6 +13,7 @@
#include <optimizer/planner.h>
#include <catalog/namespace.h>
#include <catalog/pg_type.h>
#include <optimizer/paths.h>
#include "hypertable_cache.h"
#include "partitioning.h"
@ -22,6 +23,7 @@ void _planner_init(void);
void _planner_fini(void);
static planner_hook_type prev_planner_hook;
static set_rel_pathlist_hook_type prev_set_rel_pathlist_hook;
typedef struct ChangeTableNameCtx
{
@ -306,15 +308,36 @@ timescaledb_planner(Query *parse, int cursorOptions, ParamListInfo boundParams)
return rv;
}
extern void sort_transform_optimization(PlannerInfo *root, RelOptInfo *rel);
static void timescaledb_set_rel_pathlist(PlannerInfo *root,
RelOptInfo *rel,
Index rti,
RangeTblEntry *rte)
{
char *disable_optimizations = GetConfigOptionByName("timescaledb.disable_optimizations", NULL, true);
if (extension_is_loaded() && (disable_optimizations == NULL || strncmp(disable_optimizations, "true", 4) != 0)) {
sort_transform_optimization(root, rel);
}
if (prev_set_rel_pathlist_hook != NULL) {
(void) (*prev_set_rel_pathlist_hook)(root, rel, rti, rte);
}
}
void
_planner_init(void)
{
prev_planner_hook = planner_hook;
planner_hook = timescaledb_planner;
prev_set_rel_pathlist_hook = set_rel_pathlist_hook;
set_rel_pathlist_hook = timescaledb_set_rel_pathlist;
}
void
_planner_fini(void)
{
planner_hook = prev_planner_hook;
set_rel_pathlist_hook = prev_set_rel_pathlist_hook;
}

211
src/sort_transform.c Normal file
View File

@ -0,0 +1,211 @@
#include <postgres.h>
#include <nodes/makefuncs.h>
#include <nodes/plannodes.h>
#include <parser/parsetree.h>
#include <utils/guc.h>
#include <optimizer/planner.h>
#include <optimizer/paths.h>
#include <utils/lsyscache.h>
/* This optimizations allows GROUP BY clauses that transform time in
* order-preserving ways to use indexes on the time field. It works
* by transforming sorting clauses from their more complex versions
* to simplified ones that can use the plain index, if the transform
* is order preserving.
*
* For example, an ordering on date_trunc('minute', time) can be transformed
* to an ordering on time.
*/
extern void sort_transform_optimization(PlannerInfo *root, RelOptInfo *rel);
/* sort_transforms_expr returns a simplified sort expression in a form
* more common for indexes. Must return same data type & collation too.
*
* Sort transforms have the following correctness condition:
* Any ordering provided by the returned expression is a valid
* ordering under the original expression. The reverse need not
* be true.
*
* Namely if orig_expr(X) > orig_expr(Y) then
* new_expr(X) > new_expr(Y).
*
* Note that if orig_expr(X) = orig_expr(Y) then
* the ordering under new_expr is unconstrained.
* */
static Expr *
sort_transform_expr(Expr *orig_expr)
{
/*
* date_trunc (const, var) => var
*
* proof: date_trunc(c, time1) > date_trunc(c,time2) iff time1 > time2
*/
if (IsA(orig_expr, FuncExpr))
{
FuncExpr *func = (FuncExpr *) orig_expr;
char *func_name = get_func_name(func->funcid);
Var *v;
if (strncmp(func_name, "date_trunc", NAMEDATALEN) != 0)
return NULL;
if (!IsA(linitial(func->args), Const) ||!IsA(lsecond(func->args), Var))
return NULL;
v = lsecond(func->args);
return (Expr *) copyObject(v);
}
return NULL;
}
/* sort_transform_ec creates a new EquivalenceClass with transformed
* expressions if any of the members of the original EC can be transformed for the sort.
*/
static EquivalenceClass *
sort_transform_ec(PlannerInfo *root, EquivalenceClass *orig)
{
ListCell *lc_member;
EquivalenceClass *newec = NULL;
/* check all members, adding only tranformable members to new ec */
foreach(lc_member, orig->ec_members)
{
EquivalenceMember *ec_mem = (EquivalenceMember *) lfirst(lc_member);
Expr *transformed_expr = sort_transform_expr(ec_mem->em_expr);
if (transformed_expr != NULL)
{
EquivalenceMember *em;
/*
* if the transform already exists for even one member, assume
* exists for all
*/
EquivalenceClass *exist =
get_eclass_for_sort_expr(root, transformed_expr, ec_mem->em_nullable_relids,
orig->ec_opfamilies, ec_mem->em_datatype,
orig->ec_collation, orig->ec_sortref,
ec_mem->em_relids, false);
if (exist != NULL)
{
return exist;
}
em = makeNode(EquivalenceMember);
em->em_expr = transformed_expr;
em->em_relids = bms_copy(ec_mem->em_relids);
em->em_nullable_relids = bms_copy(ec_mem->em_nullable_relids);
em->em_is_const = ec_mem->em_is_const;
em->em_is_child = ec_mem->em_is_child;
em->em_datatype = ec_mem->em_datatype;
if (newec == NULL)
{
/* lazy create the ec. */
newec = makeNode(EquivalenceClass);
newec->ec_opfamilies = list_copy(orig->ec_opfamilies);
newec->ec_collation = orig->ec_collation;
newec->ec_members = NIL;
newec->ec_sources = list_copy(orig->ec_sources);
newec->ec_derives = list_copy(orig->ec_derives);
newec->ec_relids = bms_copy(orig->ec_relids);
newec->ec_has_const = orig->ec_has_const;
newec->ec_has_volatile = orig->ec_has_volatile;
newec->ec_below_outer_join = orig->ec_below_outer_join;
newec->ec_broken = orig->ec_broken;
newec->ec_sortref = orig->ec_sortref;
newec->ec_merged = orig->ec_merged;
}
newec->ec_members = lappend(newec->ec_members, em);
}
}
/* if any transforms were found return new ec */
if (newec != NULL)
{
root->eq_classes = lappend(root->eq_classes, newec);
return newec;
}
return NULL;
}
/*
* This optimization transforms between equivalent sort operations to try
* to find useful indexes.
*
* For example: an ORDER BY date_trunc('minute', time) can be implemented by
* an ordering of time.
*/
void
sort_transform_optimization(PlannerInfo *root, RelOptInfo *rel)
{
/*
* We attack this problem in three steps:
*
* 1) Create a pathkey for the transformed (simplified) sort.
*
* 2) Use the transformed pathkey to find new useful index paths.
*
* 3) Transform the pathkey of the new paths back into the original form
* to make this transparent to upper levels in the planner.
*
* */
ListCell *lc_pathkey;
List *transformed_query_pathkey = NIL;
bool was_transformed = false;
/* build transformed query pathkeys */
foreach(lc_pathkey, root->query_pathkeys)
{
PathKey *pk = lfirst(lc_pathkey);
EquivalenceClass *transformed = sort_transform_ec(root, pk->pk_eclass);
if (transformed != NULL)
{
PathKey *newpk = make_canonical_pathkey(root,
transformed, pk->pk_opfamily, pk->pk_strategy, pk->pk_nulls_first);
was_transformed = true;
transformed_query_pathkey = lappend(transformed_query_pathkey, newpk);
}
else
{
transformed_query_pathkey = lappend(transformed_query_pathkey, pk);
}
}
if (was_transformed)
{
ListCell *lc_plan;
/* search for indexes on transformed pathkeys */
List *orig_query_pathkeys = root->query_pathkeys;
root->query_pathkeys = transformed_query_pathkey;
create_index_paths(root, rel);
root->query_pathkeys = orig_query_pathkeys;
/*
* change returned paths to use original pathkeys. have to go through
* all paths since create_index_paths might have modified existing
* pathkey. Always safe to do transform since ordering of transformed_query_pathkey
* implements ordering of orig_query_pathkeys.
*/
foreach(lc_plan, rel->pathlist)
{
Path *path = lfirst(lc_plan);
if (compare_pathkeys(path->pathkeys, transformed_query_pathkey) == PATHKEYS_EQUAL)
{
path->pathkeys = orig_query_pathkeys;
}
}
}
}

View File

@ -326,63 +326,3 @@ EXPLAIN (verbose ON, costs off)SELECT "timeCustom"%10 t, min(series_0) FROM PUBL
Output: _hyper_1_2_0_3_data."timeCustom", _hyper_1_2_0_3_data.series_0
(23 rows)
--make table with timestamp. Test timestamp instead of int time.
CREATE TABLE PUBLIC.hyper_1 (
time TIMESTAMPTZ NOT NULL,
series_0 DOUBLE PRECISION NULL,
series_1 DOUBLE PRECISION NULL,
series_2 DOUBLE PRECISION NULL
);
CREATE INDEX ON PUBLIC.hyper_1 (time DESC, series_0);
SELECT * FROM create_hypertable('"public"."hyper_1"'::regclass, 'time'::name, number_partitions => 1, chunk_size_bytes=>100000);
create_hypertable
-------------------
(1 row)
INSERT INTO hyper_1 SELECT to_timestamp(generate_series(0,10000)), random(), random(), random();
--non-aggragated uses MergeAppend correctly
EXPLAIN (verbose ON, costs off)SELECT * FROM hyper_1 ORDER BY "time" DESC limit 2;
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit
Output: _hyper_2_0_replica."time", _hyper_2_0_replica.series_0, _hyper_2_0_replica.series_1, _hyper_2_0_replica.series_2
-> Merge Append
Sort Key: _hyper_2_0_replica."time" DESC
-> Sort
Output: _hyper_2_0_replica."time", _hyper_2_0_replica.series_0, _hyper_2_0_replica.series_1, _hyper_2_0_replica.series_2
Sort Key: _hyper_2_0_replica."time" DESC
-> Seq Scan on _timescaledb_internal._hyper_2_0_replica
Output: _hyper_2_0_replica."time", _hyper_2_0_replica.series_0, _hyper_2_0_replica.series_1, _hyper_2_0_replica.series_2
-> Sort
Output: _hyper_2_3_0_partition."time", _hyper_2_3_0_partition.series_0, _hyper_2_3_0_partition.series_1, _hyper_2_3_0_partition.series_2
Sort Key: _hyper_2_3_0_partition."time" DESC
-> Seq Scan on _timescaledb_internal._hyper_2_3_0_partition
Output: _hyper_2_3_0_partition."time", _hyper_2_3_0_partition.series_0, _hyper_2_3_0_partition.series_1, _hyper_2_3_0_partition.series_2
-> Index Scan using "19-hyper_1_time_series_0_idx" on _timescaledb_internal._hyper_2_3_0_4_data
Output: _hyper_2_3_0_4_data."time", _hyper_2_3_0_4_data.series_0, _hyper_2_3_0_4_data.series_1, _hyper_2_3_0_4_data.series_2
(16 rows)
--TODO: aggregated with date_trunc doesn't work
EXPLAIN (verbose ON, costs off)SELECT date_trunc('minute', time) t, min(series_0) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------
Limit
Output: (date_trunc('minute'::text, _hyper_2_0_replica."time")), (min(_hyper_2_0_replica.series_0))
-> Sort
Output: (date_trunc('minute'::text, _hyper_2_0_replica."time")), (min(_hyper_2_0_replica.series_0))
Sort Key: (date_trunc('minute'::text, _hyper_2_0_replica."time")) DESC
-> HashAggregate
Output: (date_trunc('minute'::text, _hyper_2_0_replica."time")), min(_hyper_2_0_replica.series_0)
Group Key: date_trunc('minute'::text, _hyper_2_0_replica."time")
-> Result
Output: date_trunc('minute'::text, _hyper_2_0_replica."time"), _hyper_2_0_replica.series_0
-> Append
-> Seq Scan on _timescaledb_internal._hyper_2_0_replica
Output: _hyper_2_0_replica."time", _hyper_2_0_replica.series_0
-> Seq Scan on _timescaledb_internal._hyper_2_3_0_partition
Output: _hyper_2_3_0_partition."time", _hyper_2_3_0_partition.series_0
-> Seq Scan on _timescaledb_internal._hyper_2_3_0_4_data
Output: _hyper_2_3_0_4_data."time", _hyper_2_3_0_4_data.series_0
(17 rows)

View File

@ -0,0 +1,187 @@
\o /dev/null
\ir include/create_single_db.sql
SET client_min_messages = WARNING;
DROP DATABASE IF EXISTS single;
SET client_min_messages = NOTICE;
CREATE DATABASE single;
\c single
CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE;
psql:include/create_single_db.sql:7: NOTICE: installing required extension "dblink"
psql:include/create_single_db.sql:7: NOTICE: installing required extension "postgres_fdw"
psql:include/create_single_db.sql:7: NOTICE: installing required extension "hstore"
SELECT setup_timescaledb(hostname => 'fakehost'); -- fakehost makes sure there is no network connection
\o
\ir include/sql_query_results.sql
CREATE TABLE PUBLIC.hyper_1 (
time TIMESTAMP NOT NULL,
series_0 DOUBLE PRECISION NULL,
series_1 DOUBLE PRECISION NULL,
series_2 DOUBLE PRECISION NULL
);
CREATE INDEX "time_plain" ON PUBLIC.hyper_1 (time DESC, series_0);
SELECT * FROM create_hypertable('"public"."hyper_1"'::regclass, 'time'::name, number_partitions => 1, chunk_size_bytes=>10000);
create_hypertable
-------------------
(1 row)
INSERT INTO hyper_1 SELECT to_timestamp(ser), ser, ser+10000, sqrt(ser::numeric) FROM generate_series(0,10000) ser;
INSERT INTO hyper_1 SELECT to_timestamp(ser), ser, ser+10000, sqrt(ser::numeric) FROM generate_series(10001,20000) ser;
--non-aggregates use MergeAppend in both optimized and non-optimized
EXPLAIN (costs off) SELECT * FROM hyper_1 ORDER BY "time" DESC limit 2;
QUERY PLAN
--------------------------------------------------------------------
Limit
-> Merge Append
Sort Key: _hyper_1_0_replica."time" DESC
-> Sort
Sort Key: _hyper_1_0_replica."time" DESC
-> Seq Scan on _hyper_1_0_replica
-> Sort
Sort Key: _hyper_1_1_0_partition."time" DESC
-> Seq Scan on _hyper_1_1_0_partition
-> Index Scan using "1-time_plain" on _hyper_1_1_0_1_data
-> Index Scan using "2-time_plain" on _hyper_1_1_0_2_data
(11 rows)
SELECT * FROM hyper_1 ORDER BY "time" DESC limit 2;
time | series_0 | series_1 | series_2
--------------------------+----------+----------+------------------
Wed Dec 31 21:33:20 1969 | 20000 | 30000 | 141.42135623731
Wed Dec 31 21:33:19 1969 | 19999 | 29999 | 141.417820659208
(2 rows)
--aggregates use MergeAppend only in optimized
EXPLAIN (costs off) SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
QUERY PLAN
------------------------------------------------------------------------------------------------------
Limit
-> GroupAggregate
Group Key: (date_trunc('minute'::text, _hyper_1_0_replica."time"))
-> Result
-> Merge Append
Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
-> Sort
Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
-> Seq Scan on _hyper_1_0_replica
-> Sort
Sort Key: (date_trunc('minute'::text, _hyper_1_1_0_partition."time")) DESC
-> Seq Scan on _hyper_1_1_0_partition
-> Index Scan using "1-time_plain" on _hyper_1_1_0_1_data
-> Index Scan using "2-time_plain" on _hyper_1_1_0_2_data
(14 rows)
--the minute and second results should be diff
SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
t | avg | min | avg
--------------------------+---------+-------+------------------
Wed Dec 31 21:33:00 1969 | 19990 | 29980 | 141.385994856058
Wed Dec 31 21:32:00 1969 | 19949.5 | 29920 | 141.242685621416
(2 rows)
SELECT date_trunc('second', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
t | avg | min | avg
--------------------------+-------+-------+------------------
Wed Dec 31 21:33:20 1969 | 20000 | 30000 | 141.42135623731
Wed Dec 31 21:33:19 1969 | 19999 | 29999 | 141.417820659208
(2 rows)
--test that when index on time used by constraint, still works correctly
EXPLAIN (costs off)
SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2)
FROM hyper_1
WHERE time < to_timestamp(900)
GROUP BY t
ORDER BY t DESC
LIMIT 2;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------
Limit
-> GroupAggregate
Group Key: (date_trunc('minute'::text, _hyper_1_0_replica."time"))
-> Result
-> Merge Append
Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
-> Sort
Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
-> Seq Scan on _hyper_1_0_replica
Filter: ("time" < 'Wed Dec 31 16:15:00 1969 PST'::timestamp with time zone)
-> Sort
Sort Key: (date_trunc('minute'::text, _hyper_1_1_0_partition."time")) DESC
-> Seq Scan on _hyper_1_1_0_partition
Filter: ("time" < 'Wed Dec 31 16:15:00 1969 PST'::timestamp with time zone)
-> Index Scan using "1-time_plain" on _hyper_1_1_0_1_data
Index Cond: ("time" < 'Wed Dec 31 16:15:00 1969 PST'::timestamp with time zone)
-> Index Scan using "2-time_plain" on _hyper_1_1_0_2_data
Index Cond: ("time" < 'Wed Dec 31 16:15:00 1969 PST'::timestamp with time zone)
(18 rows)
SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2)
FROM hyper_1
WHERE time < to_timestamp(900)
GROUP BY t
ORDER BY t DESC
LIMIT 2;
t | avg | min | avg
--------------------------+-------+-------+------------------
Wed Dec 31 16:14:00 1969 | 869.5 | 10840 | 29.4858228711055
Wed Dec 31 16:13:00 1969 | 809.5 | 10780 | 28.4500853206775
(2 rows)
--test that still works with an expression index on data_trunc.
DROP INDEX "time_plain";
CREATE INDEX "time_trunc" ON PUBLIC.hyper_1 (date_trunc('minute', time));
EXPLAIN (costs off) SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
QUERY PLAN
------------------------------------------------------------------------------------------------------
Limit
-> GroupAggregate
Group Key: (date_trunc('minute'::text, _hyper_1_0_replica."time"))
-> Result
-> Merge Append
Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
-> Sort
Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
-> Seq Scan on _hyper_1_0_replica
-> Sort
Sort Key: (date_trunc('minute'::text, _hyper_1_1_0_partition."time")) DESC
-> Seq Scan on _hyper_1_1_0_partition
-> Index Scan Backward using "3-time_trunc" on _hyper_1_1_0_1_data
-> Index Scan Backward using "4-time_trunc" on _hyper_1_1_0_2_data
(14 rows)
SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
t | avg | min | avg
--------------------------+---------+-------+------------------
Wed Dec 31 21:33:00 1969 | 19990 | 29980 | 141.385994856058
Wed Dec 31 21:32:00 1969 | 19949.5 | 29920 | 141.242685621416
(2 rows)
--test that works with both indexes
CREATE INDEX "time_plain" ON PUBLIC.hyper_1 (time DESC, series_0);
EXPLAIN (costs off) SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
QUERY PLAN
------------------------------------------------------------------------------------------------------
Limit
-> GroupAggregate
Group Key: (date_trunc('minute'::text, _hyper_1_0_replica."time"))
-> Result
-> Merge Append
Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
-> Sort
Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
-> Seq Scan on _hyper_1_0_replica
-> Sort
Sort Key: (date_trunc('minute'::text, _hyper_1_1_0_partition."time")) DESC
-> Seq Scan on _hyper_1_1_0_partition
-> Index Scan Backward using "3-time_trunc" on _hyper_1_1_0_1_data
-> Index Scan Backward using "4-time_trunc" on _hyper_1_1_0_2_data
(14 rows)
SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
t | avg | min | avg
--------------------------+---------+-------+------------------
Wed Dec 31 21:33:00 1969 | 19990 | 29980 | 141.385994856058
Wed Dec 31 21:32:00 1969 | 19949.5 | 29920 | 141.242685621416
(2 rows)

View File

@ -0,0 +1,182 @@
\o /dev/null
\ir include/create_single_db.sql
SET client_min_messages = WARNING;
DROP DATABASE IF EXISTS single;
SET client_min_messages = NOTICE;
CREATE DATABASE single;
\c single
CREATE EXTENSION IF NOT EXISTS timescaledb CASCADE;
psql:include/create_single_db.sql:7: NOTICE: installing required extension "dblink"
psql:include/create_single_db.sql:7: NOTICE: installing required extension "postgres_fdw"
psql:include/create_single_db.sql:7: NOTICE: installing required extension "hstore"
SELECT setup_timescaledb(hostname => 'fakehost'); -- fakehost makes sure there is no network connection
\o
SET timescaledb.disable_optimizations= 'true';
\ir include/sql_query_results.sql
CREATE TABLE PUBLIC.hyper_1 (
time TIMESTAMP NOT NULL,
series_0 DOUBLE PRECISION NULL,
series_1 DOUBLE PRECISION NULL,
series_2 DOUBLE PRECISION NULL
);
CREATE INDEX "time_plain" ON PUBLIC.hyper_1 (time DESC, series_0);
SELECT * FROM create_hypertable('"public"."hyper_1"'::regclass, 'time'::name, number_partitions => 1, chunk_size_bytes=>10000);
create_hypertable
-------------------
(1 row)
INSERT INTO hyper_1 SELECT to_timestamp(ser), ser, ser+10000, sqrt(ser::numeric) FROM generate_series(0,10000) ser;
INSERT INTO hyper_1 SELECT to_timestamp(ser), ser, ser+10000, sqrt(ser::numeric) FROM generate_series(10001,20000) ser;
--non-aggregates use MergeAppend in both optimized and non-optimized
EXPLAIN (costs off) SELECT * FROM hyper_1 ORDER BY "time" DESC limit 2;
QUERY PLAN
--------------------------------------------------------------------
Limit
-> Merge Append
Sort Key: _hyper_1_0_replica."time" DESC
-> Sort
Sort Key: _hyper_1_0_replica."time" DESC
-> Seq Scan on _hyper_1_0_replica
-> Sort
Sort Key: _hyper_1_1_0_partition."time" DESC
-> Seq Scan on _hyper_1_1_0_partition
-> Index Scan using "1-time_plain" on _hyper_1_1_0_1_data
-> Index Scan using "2-time_plain" on _hyper_1_1_0_2_data
(11 rows)
SELECT * FROM hyper_1 ORDER BY "time" DESC limit 2;
time | series_0 | series_1 | series_2
--------------------------+----------+----------+------------------
Wed Dec 31 21:33:20 1969 | 20000 | 30000 | 141.42135623731
Wed Dec 31 21:33:19 1969 | 19999 | 29999 | 141.417820659208
(2 rows)
--aggregates use MergeAppend only in optimized
EXPLAIN (costs off) SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
QUERY PLAN
--------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
-> HashAggregate
Group Key: date_trunc('minute'::text, _hyper_1_0_replica."time")
-> Result
-> Append
-> Seq Scan on _hyper_1_0_replica
-> Seq Scan on _hyper_1_1_0_partition
-> Seq Scan on _hyper_1_1_0_1_data
-> Seq Scan on _hyper_1_1_0_2_data
(11 rows)
--the minute and second results should be diff
SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
t | avg | min | avg
--------------------------+---------+-------+------------------
Wed Dec 31 21:33:00 1969 | 19990 | 29980 | 141.385994856058
Wed Dec 31 21:32:00 1969 | 19949.5 | 29920 | 141.242685621416
(2 rows)
SELECT date_trunc('second', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
t | avg | min | avg
--------------------------+-------+-------+------------------
Wed Dec 31 21:33:20 1969 | 20000 | 30000 | 141.42135623731
Wed Dec 31 21:33:19 1969 | 19999 | 29999 | 141.417820659208
(2 rows)
--test that when index on time used by constraint, still works correctly
EXPLAIN (costs off)
SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2)
FROM hyper_1
WHERE time < to_timestamp(900)
GROUP BY t
ORDER BY t DESC
LIMIT 2;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------
Limit
-> Sort
Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
-> HashAggregate
Group Key: date_trunc('minute'::text, _hyper_1_0_replica."time")
-> Result
-> Append
-> Seq Scan on _hyper_1_0_replica
Filter: ("time" < 'Wed Dec 31 16:15:00 1969 PST'::timestamp with time zone)
-> Seq Scan on _hyper_1_1_0_partition
Filter: ("time" < 'Wed Dec 31 16:15:00 1969 PST'::timestamp with time zone)
-> Seq Scan on _hyper_1_1_0_1_data
Filter: ("time" < 'Wed Dec 31 16:15:00 1969 PST'::timestamp with time zone)
-> Seq Scan on _hyper_1_1_0_2_data
Filter: ("time" < 'Wed Dec 31 16:15:00 1969 PST'::timestamp with time zone)
(15 rows)
SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2)
FROM hyper_1
WHERE time < to_timestamp(900)
GROUP BY t
ORDER BY t DESC
LIMIT 2;
t | avg | min | avg
--------------------------+-------+-------+------------------
Wed Dec 31 16:14:00 1969 | 869.5 | 10840 | 29.4858228711055
Wed Dec 31 16:13:00 1969 | 809.5 | 10780 | 28.4500853206775
(2 rows)
--test that still works with an expression index on data_trunc.
DROP INDEX "time_plain";
CREATE INDEX "time_trunc" ON PUBLIC.hyper_1 (date_trunc('minute', time));
EXPLAIN (costs off) SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
QUERY PLAN
------------------------------------------------------------------------------------------------------
Limit
-> GroupAggregate
Group Key: (date_trunc('minute'::text, _hyper_1_0_replica."time"))
-> Result
-> Merge Append
Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
-> Sort
Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
-> Seq Scan on _hyper_1_0_replica
-> Sort
Sort Key: (date_trunc('minute'::text, _hyper_1_1_0_partition."time")) DESC
-> Seq Scan on _hyper_1_1_0_partition
-> Index Scan Backward using "3-time_trunc" on _hyper_1_1_0_1_data
-> Index Scan Backward using "4-time_trunc" on _hyper_1_1_0_2_data
(14 rows)
SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
t | avg | min | avg
--------------------------+---------+-------+------------------
Wed Dec 31 21:33:00 1969 | 19990 | 29980 | 141.385994856058
Wed Dec 31 21:32:00 1969 | 19949.5 | 29920 | 141.242685621416
(2 rows)
--test that works with both indexes
CREATE INDEX "time_plain" ON PUBLIC.hyper_1 (time DESC, series_0);
EXPLAIN (costs off) SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
QUERY PLAN
------------------------------------------------------------------------------------------------------
Limit
-> GroupAggregate
Group Key: (date_trunc('minute'::text, _hyper_1_0_replica."time"))
-> Result
-> Merge Append
Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
-> Sort
Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
-> Seq Scan on _hyper_1_0_replica
-> Sort
Sort Key: (date_trunc('minute'::text, _hyper_1_1_0_partition."time")) DESC
-> Seq Scan on _hyper_1_1_0_partition
-> Index Scan Backward using "3-time_trunc" on _hyper_1_1_0_1_data
-> Index Scan Backward using "4-time_trunc" on _hyper_1_1_0_2_data
(14 rows)
SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
t | avg | min | avg
--------------------------+---------+-------+------------------
Wed Dec 31 21:33:00 1969 | 19990 | 29980 | 141.385994856058
Wed Dec 31 21:32:00 1969 | 19949.5 | 29920 | 141.242685621416
(2 rows)

View File

@ -0,0 +1,66 @@
--make sure diff only has explain output not result output
\! diff ../results/sql_query_results_optimized.out ../results/sql_query_results_unoptimized.out
13a14
> SET timescaledb.disable_optimizations= 'true';
56,57c57,58
< QUERY PLAN
< ------------------------------------------------------------------------------------------------------
---
> QUERY PLAN
> --------------------------------------------------------------------------------
59,65c60,65
< -> GroupAggregate
< Group Key: (date_trunc('minute'::text, _hyper_1_0_replica."time"))
< -> Result
< -> Merge Append
< Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
< -> Sort
< Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
---
> -> Sort
> Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
> -> HashAggregate
> Group Key: date_trunc('minute'::text, _hyper_1_0_replica."time")
> -> Result
> -> Append
67,68d66
< -> Sort
< Sort Key: (date_trunc('minute'::text, _hyper_1_1_0_partition."time")) DESC
70,72c68,70
< -> Index Scan using "1-time_plain" on _hyper_1_1_0_1_data
< -> Index Scan using "2-time_plain" on _hyper_1_1_0_2_data
< (14 rows)
---
> -> Seq Scan on _hyper_1_1_0_1_data
> -> Seq Scan on _hyper_1_1_0_2_data
> (11 rows)
100,106c98,103
< -> GroupAggregate
< Group Key: (date_trunc('minute'::text, _hyper_1_0_replica."time"))
< -> Result
< -> Merge Append
< Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
< -> Sort
< Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
---
> -> Sort
> Sort Key: (date_trunc('minute'::text, _hyper_1_0_replica."time")) DESC
> -> HashAggregate
> Group Key: date_trunc('minute'::text, _hyper_1_0_replica."time")
> -> Result
> -> Append
109,110d105
< -> Sort
< Sort Key: (date_trunc('minute'::text, _hyper_1_1_0_partition."time")) DESC
113,117c108,112
< -> Index Scan using "1-time_plain" on _hyper_1_1_0_1_data
< Index Cond: ("time" < 'Wed Dec 31 16:15:00 1969 PST'::timestamp with time zone)
< -> Index Scan using "2-time_plain" on _hyper_1_1_0_2_data
< Index Cond: ("time" < 'Wed Dec 31 16:15:00 1969 PST'::timestamp with time zone)
< (18 rows)
---
> -> Seq Scan on _hyper_1_1_0_1_data
> Filter: ("time" < 'Wed Dec 31 16:15:00 1969 PST'::timestamp with time zone)
> -> Seq Scan on _hyper_1_1_0_2_data
> Filter: ("time" < 'Wed Dec 31 16:15:00 1969 PST'::timestamp with time zone)
> (15 rows)

View File

@ -0,0 +1,50 @@
CREATE TABLE PUBLIC.hyper_1 (
time TIMESTAMP NOT NULL,
series_0 DOUBLE PRECISION NULL,
series_1 DOUBLE PRECISION NULL,
series_2 DOUBLE PRECISION NULL
);
CREATE INDEX "time_plain" ON PUBLIC.hyper_1 (time DESC, series_0);
SELECT * FROM create_hypertable('"public"."hyper_1"'::regclass, 'time'::name, number_partitions => 1, chunk_size_bytes=>10000);
INSERT INTO hyper_1 SELECT to_timestamp(ser), ser, ser+10000, sqrt(ser::numeric) FROM generate_series(0,10000) ser;
INSERT INTO hyper_1 SELECT to_timestamp(ser), ser, ser+10000, sqrt(ser::numeric) FROM generate_series(10001,20000) ser;
--non-aggregates use MergeAppend in both optimized and non-optimized
EXPLAIN (costs off) SELECT * FROM hyper_1 ORDER BY "time" DESC limit 2;
SELECT * FROM hyper_1 ORDER BY "time" DESC limit 2;
--aggregates use MergeAppend only in optimized
EXPLAIN (costs off) SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
--the minute and second results should be diff
SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
SELECT date_trunc('second', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
--test that when index on time used by constraint, still works correctly
EXPLAIN (costs off)
SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2)
FROM hyper_1
WHERE time < to_timestamp(900)
GROUP BY t
ORDER BY t DESC
LIMIT 2;
SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2)
FROM hyper_1
WHERE time < to_timestamp(900)
GROUP BY t
ORDER BY t DESC
LIMIT 2;
--test that still works with an expression index on data_trunc.
DROP INDEX "time_plain";
CREATE INDEX "time_trunc" ON PUBLIC.hyper_1 (date_trunc('minute', time));
EXPLAIN (costs off) SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
--test that works with both indexes
CREATE INDEX "time_plain" ON PUBLIC.hyper_1 (time DESC, series_0);
EXPLAIN (costs off) SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;
SELECT date_trunc('minute', time) t, avg(series_0), min(series_1), avg(series_2) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;

View File

@ -30,20 +30,4 @@ EXPLAIN (verbose ON, costs off)SELECT "timeCustom"/10 t, min(series_0) FROM PUBL
EXPLAIN (verbose ON, costs off)SELECT "timeCustom"%10 t, min(series_0) FROM PUBLIC."two_Partitions" GROUP BY t ORDER BY t DESC NULLS LAST limit 2;
--make table with timestamp. Test timestamp instead of int time.
CREATE TABLE PUBLIC.hyper_1 (
time TIMESTAMPTZ NOT NULL,
series_0 DOUBLE PRECISION NULL,
series_1 DOUBLE PRECISION NULL,
series_2 DOUBLE PRECISION NULL
);
CREATE INDEX ON PUBLIC.hyper_1 (time DESC, series_0);
SELECT * FROM create_hypertable('"public"."hyper_1"'::regclass, 'time'::name, number_partitions => 1, chunk_size_bytes=>100000);
INSERT INTO hyper_1 SELECT to_timestamp(generate_series(0,10000)), random(), random(), random();
--non-aggragated uses MergeAppend correctly
EXPLAIN (verbose ON, costs off)SELECT * FROM hyper_1 ORDER BY "time" DESC limit 2;
--TODO: aggregated with date_trunc doesn't work
EXPLAIN (verbose ON, costs off)SELECT date_trunc('minute', time) t, min(series_0) FROM hyper_1 GROUP BY t ORDER BY t DESC limit 2;

View File

@ -0,0 +1,6 @@
\o /dev/null
\ir include/create_single_db.sql
\o
\ir include/sql_query_results.sql

View File

@ -0,0 +1,6 @@
\o /dev/null
\ir include/create_single_db.sql
\o
SET timescaledb.disable_optimizations= 'true';
\ir include/sql_query_results.sql

View File

@ -0,0 +1,2 @@
--make sure diff only has explain output not result output
\! diff ../results/sql_query_results_optimized.out ../results/sql_query_results_unoptimized.out