Avoid distributed chunks scan plans

The current approach to planning queries on distributed hypertables
first plans a "naive" plan for scanning each remote chunk individually
(the chunks are children of an append) and then that plan gets replaced
by a datanode scan plan (each data node is a child in the Append).

While we need to include each chunk in the planning for cost reasons
(we need to know the stats for chunk rels) we need not actually create
paths for scanning chunks. This unnecessary work is now avoided and we
now plan datanode scans directly.

Fixes #3685
This commit is contained in:
Nikhil Sontakke 2022-01-12 17:24:52 +05:30 committed by Nikhil
parent e320679c4c
commit 2a2b394172
3 changed files with 44 additions and 1 deletions

View File

@ -747,14 +747,43 @@ reenable_inheritance(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntr
if (set_pathlist_for_current_rel)
{
bool do_distributed;
Hypertable *ht = get_hypertable(rte->relid, CACHE_FLAG_NOCREATE);
Assert(ht != NULL);
/* the hypertable will have been planned as if it was a regular table
* with no data. Since such a plan would be cheaper than any real plan,
* it would always be used, and we need to remove these plans before
* adding ours.
*
* Also, if it's a distributed hypertable and per data node queries are
* enabled then we will be throwing this below append path away. So only
* build it otherwise
*/
do_distributed = !IS_DUMMY_REL(rel) && hypertable_is_distributed(ht) &&
ts_guc_enable_per_data_node_queries;
rel->pathlist = NIL;
rel->partial_pathlist = NIL;
ts_set_append_rel_pathlist(root, rel, rti, rte);
/* allow a session parameter to override the use of this datanode only path */
#ifdef TS_DEBUG
if (do_distributed)
{
const char *allow_dn_path =
GetConfigOption("timescaledb.debug_allow_datanode_only_path", true, false);
if (allow_dn_path && pg_strcasecmp(allow_dn_path, "on") != 0)
{
do_distributed = false;
elog(DEBUG2, "creating per chunk append paths");
}
else
elog(DEBUG2, "avoiding per chunk append paths");
}
#endif
if (!do_distributed)
ts_set_append_rel_pathlist(root, rel, rti, rte);
}
}

View File

@ -63,6 +63,8 @@ ANALYZE hyper;
-- Optimizer debug messages shown at debug level 2
SET client_min_messages TO DEBUG2;
-- Turning on show_rel should show a message
-- But disable the code which avoids dist chunk planning
SET timescaledb.debug_allow_datanode_only_path = 'off';
SET timescaledb.debug_optimizer_flags = 'show_rel';
SHOW timescaledb.debug_optimizer_flags;
timescaledb.debug_optimizer_flags
@ -75,6 +77,7 @@ FROM hyper
WHERE time BETWEEN '2018-04-19 00:01' AND '2018-06-01 00:00'
GROUP BY 1, 2
ORDER BY 1, 2;
DEBUG: creating per chunk append paths
DEBUG: RELOPTINFO [rel name: _timescaledb_internal._dist_hyper_1_8_chunk hyper, type: FOREIGN_TABLE, kind: OTHER_MEMBER_REL, base rel names: hyper] rows=1 width=20
Path list:
ForeignScan [rel type: FOREIGN_TABLE, kind: OTHER_MEMBER_REL, parent's base rels: hyper] rows=1 with pathkeys: ((hyper.time, hyper.time, hyper.time, hyper.time, hyper.time), (hyper.device, hyper.device, hyper.device, hyper.device, hyper.device))
@ -172,6 +175,9 @@ Path list:
Wed May 30 13:02:00 2018 PDT | 3 | 9
(5 rows)
-- Enable session level datanode only path parameter which doesn't
-- plan distributed chunk scans unnecessarily
SET timescaledb.debug_allow_datanode_only_path = 'on';
-- Turning off the show_rel (and turning on another flag) should not
-- show a notice on the relations, but show the upper paths.
SET timescaledb.debug_optimizer_flags = 'show_upper=*';
@ -186,6 +192,7 @@ FROM hyper
WHERE time BETWEEN '2018-04-19 00:01' AND '2018-06-01 00:00'
GROUP BY 1, 2
ORDER BY 1, 2;
DEBUG: avoiding per chunk append paths
DEBUG: Upper rel stage GROUP_AGG:
RELOPTINFO [rel name: Aggregate on (public.hyper), type: DATA_NODE, kind: OTHER_UPPER_REL, base rel names: hyper] rows=0 width=20
Path list:
@ -232,6 +239,7 @@ FROM hyper
WHERE time BETWEEN '2018-04-19 00:01' AND '2018-06-01 00:00'
GROUP BY 1, 2
ORDER BY 1, 2;
DEBUG: avoiding per chunk append paths
time | device | temp
------------------------------+--------+------
Thu Apr 19 13:01:00 2018 PDT | 1 | 7.6

View File

@ -51,6 +51,8 @@ ANALYZE hyper;
SET client_min_messages TO DEBUG2;
-- Turning on show_rel should show a message
-- But disable the code which avoids dist chunk planning
SET timescaledb.debug_allow_datanode_only_path = 'off';
SET timescaledb.debug_optimizer_flags = 'show_rel';
SHOW timescaledb.debug_optimizer_flags;
@ -60,6 +62,10 @@ WHERE time BETWEEN '2018-04-19 00:01' AND '2018-06-01 00:00'
GROUP BY 1, 2
ORDER BY 1, 2;
-- Enable session level datanode only path parameter which doesn't
-- plan distributed chunk scans unnecessarily
SET timescaledb.debug_allow_datanode_only_path = 'on';
-- Turning off the show_rel (and turning on another flag) should not
-- show a notice on the relations, but show the upper paths.
SET timescaledb.debug_optimizer_flags = 'show_upper=*';