mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-15 18:13:18 +08:00
Avoid distributed chunks scan plans
The current approach to planning queries on distributed hypertables first plans a "naive" plan for scanning each remote chunk individually (the chunks are children of an append) and then that plan gets replaced by a datanode scan plan (each data node is a child in the Append). While we need to include each chunk in the planning for cost reasons (we need to know the stats for chunk rels) we need not actually create paths for scanning chunks. This unnecessary work is now avoided and we now plan datanode scans directly. Fixes #3685
This commit is contained in:
parent
e320679c4c
commit
2a2b394172
@ -747,14 +747,43 @@ reenable_inheritance(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeTblEntr
|
||||
|
||||
if (set_pathlist_for_current_rel)
|
||||
{
|
||||
bool do_distributed;
|
||||
|
||||
Hypertable *ht = get_hypertable(rte->relid, CACHE_FLAG_NOCREATE);
|
||||
Assert(ht != NULL);
|
||||
|
||||
/* the hypertable will have been planned as if it was a regular table
|
||||
* with no data. Since such a plan would be cheaper than any real plan,
|
||||
* it would always be used, and we need to remove these plans before
|
||||
* adding ours.
|
||||
*
|
||||
* Also, if it's a distributed hypertable and per data node queries are
|
||||
* enabled then we will be throwing this below append path away. So only
|
||||
* build it otherwise
|
||||
*/
|
||||
do_distributed = !IS_DUMMY_REL(rel) && hypertable_is_distributed(ht) &&
|
||||
ts_guc_enable_per_data_node_queries;
|
||||
|
||||
rel->pathlist = NIL;
|
||||
rel->partial_pathlist = NIL;
|
||||
ts_set_append_rel_pathlist(root, rel, rti, rte);
|
||||
/* allow a session parameter to override the use of this datanode only path */
|
||||
#ifdef TS_DEBUG
|
||||
if (do_distributed)
|
||||
{
|
||||
const char *allow_dn_path =
|
||||
GetConfigOption("timescaledb.debug_allow_datanode_only_path", true, false);
|
||||
if (allow_dn_path && pg_strcasecmp(allow_dn_path, "on") != 0)
|
||||
{
|
||||
do_distributed = false;
|
||||
elog(DEBUG2, "creating per chunk append paths");
|
||||
}
|
||||
else
|
||||
elog(DEBUG2, "avoiding per chunk append paths");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!do_distributed)
|
||||
ts_set_append_rel_pathlist(root, rel, rti, rte);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -63,6 +63,8 @@ ANALYZE hyper;
|
||||
-- Optimizer debug messages shown at debug level 2
|
||||
SET client_min_messages TO DEBUG2;
|
||||
-- Turning on show_rel should show a message
|
||||
-- But disable the code which avoids dist chunk planning
|
||||
SET timescaledb.debug_allow_datanode_only_path = 'off';
|
||||
SET timescaledb.debug_optimizer_flags = 'show_rel';
|
||||
SHOW timescaledb.debug_optimizer_flags;
|
||||
timescaledb.debug_optimizer_flags
|
||||
@ -75,6 +77,7 @@ FROM hyper
|
||||
WHERE time BETWEEN '2018-04-19 00:01' AND '2018-06-01 00:00'
|
||||
GROUP BY 1, 2
|
||||
ORDER BY 1, 2;
|
||||
DEBUG: creating per chunk append paths
|
||||
DEBUG: RELOPTINFO [rel name: _timescaledb_internal._dist_hyper_1_8_chunk hyper, type: FOREIGN_TABLE, kind: OTHER_MEMBER_REL, base rel names: hyper] rows=1 width=20
|
||||
Path list:
|
||||
ForeignScan [rel type: FOREIGN_TABLE, kind: OTHER_MEMBER_REL, parent's base rels: hyper] rows=1 with pathkeys: ((hyper.time, hyper.time, hyper.time, hyper.time, hyper.time), (hyper.device, hyper.device, hyper.device, hyper.device, hyper.device))
|
||||
@ -172,6 +175,9 @@ Path list:
|
||||
Wed May 30 13:02:00 2018 PDT | 3 | 9
|
||||
(5 rows)
|
||||
|
||||
-- Enable session level datanode only path parameter which doesn't
|
||||
-- plan distributed chunk scans unnecessarily
|
||||
SET timescaledb.debug_allow_datanode_only_path = 'on';
|
||||
-- Turning off the show_rel (and turning on another flag) should not
|
||||
-- show a notice on the relations, but show the upper paths.
|
||||
SET timescaledb.debug_optimizer_flags = 'show_upper=*';
|
||||
@ -186,6 +192,7 @@ FROM hyper
|
||||
WHERE time BETWEEN '2018-04-19 00:01' AND '2018-06-01 00:00'
|
||||
GROUP BY 1, 2
|
||||
ORDER BY 1, 2;
|
||||
DEBUG: avoiding per chunk append paths
|
||||
DEBUG: Upper rel stage GROUP_AGG:
|
||||
RELOPTINFO [rel name: Aggregate on (public.hyper), type: DATA_NODE, kind: OTHER_UPPER_REL, base rel names: hyper] rows=0 width=20
|
||||
Path list:
|
||||
@ -232,6 +239,7 @@ FROM hyper
|
||||
WHERE time BETWEEN '2018-04-19 00:01' AND '2018-06-01 00:00'
|
||||
GROUP BY 1, 2
|
||||
ORDER BY 1, 2;
|
||||
DEBUG: avoiding per chunk append paths
|
||||
time | device | temp
|
||||
------------------------------+--------+------
|
||||
Thu Apr 19 13:01:00 2018 PDT | 1 | 7.6
|
||||
|
@ -51,6 +51,8 @@ ANALYZE hyper;
|
||||
SET client_min_messages TO DEBUG2;
|
||||
|
||||
-- Turning on show_rel should show a message
|
||||
-- But disable the code which avoids dist chunk planning
|
||||
SET timescaledb.debug_allow_datanode_only_path = 'off';
|
||||
SET timescaledb.debug_optimizer_flags = 'show_rel';
|
||||
SHOW timescaledb.debug_optimizer_flags;
|
||||
|
||||
@ -60,6 +62,10 @@ WHERE time BETWEEN '2018-04-19 00:01' AND '2018-06-01 00:00'
|
||||
GROUP BY 1, 2
|
||||
ORDER BY 1, 2;
|
||||
|
||||
-- Enable session level datanode only path parameter which doesn't
|
||||
-- plan distributed chunk scans unnecessarily
|
||||
SET timescaledb.debug_allow_datanode_only_path = 'on';
|
||||
|
||||
-- Turning off the show_rel (and turning on another flag) should not
|
||||
-- show a notice on the relations, but show the upper paths.
|
||||
SET timescaledb.debug_optimizer_flags = 'show_upper=*';
|
||||
|
Loading…
x
Reference in New Issue
Block a user