From 04ce1bc498f8a2f7732a50bf673de175db97fd04 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Thu, 10 Aug 2023 15:32:40 +0200 Subject: [PATCH] Use cached Chunk struct when considering compressed paths Full catalog lookups for a Chunk are expensive, avoiding them speeds up the planning. --- src/planner/expand_hypertable.c | 2 +- src/planner/planner.c | 13 +++++----- src/planner/planner.h | 2 +- tsl/src/fdw/data_node_chunk_assignment.c | 4 +-- tsl/src/fdw/relinfo.c | 7 +++--- tsl/src/planner.c | 31 ++++++++++++++++++++---- 6 files changed, 40 insertions(+), 19 deletions(-) diff --git a/src/planner/expand_hypertable.c b/src/planner/expand_hypertable.c index 8298c9f92..d609df487 100644 --- a/src/planner/expand_hypertable.c +++ b/src/planner/expand_hypertable.c @@ -1507,7 +1507,7 @@ ts_plan_expand_hypertable_chunks(Hypertable *ht, PlannerInfo *root, RelOptInfo * #endif } - ts_get_private_reloptinfo(child_rel)->chunk = chunks[i]; + ts_get_private_reloptinfo(child_rel)->cached_chunk_struct = chunks[i]; Assert(chunks[i]->table_id == root->simple_rte_array[child_rtindex]->relid); } } diff --git a/src/planner/planner.c b/src/planner/planner.c index a42d19d78..699dbb0d8 100644 --- a/src/planner/planner.c +++ b/src/planner/planner.c @@ -1096,18 +1096,17 @@ apply_optimizations(PlannerInfo *root, TsRelType reltype, RelOptInfo *rel, Range case TS_REL_CHUNK_STANDALONE: case TS_REL_CHUNK_CHILD: ts_sort_transform_optimization(root, rel); + /* + * Since the sort optimization adds new paths to the rel it has + * to happen before any optimizations that replace pathlist. + */ + if (ts_cm_functions->set_rel_pathlist_query != NULL) + ts_cm_functions->set_rel_pathlist_query(root, rel, rel->relid, rte, ht); break; default: break; } - /* - * Since the sort optimization adds new paths to the rel it has - * to happen before any optimizations that replace pathlist. - */ - if (ts_cm_functions->set_rel_pathlist_query != NULL) - ts_cm_functions->set_rel_pathlist_query(root, rel, rel->relid, rte, ht); - if (reltype == TS_REL_HYPERTABLE && #if PG14_GE (root->parse->commandType == CMD_SELECT || root->parse->commandType == CMD_DELETE || diff --git a/src/planner/planner.h b/src/planner/planner.h index 5e41ba9b7..51f34f33d 100644 --- a/src/planner/planner.h +++ b/src/planner/planner.h @@ -40,7 +40,7 @@ typedef struct TimescaleDBPrivate TsFdwRelInfo *fdw_relation_info; /* Cached chunk data for the chunk relinfo. */ - Chunk *chunk; + Chunk *cached_chunk_struct; } TimescaleDBPrivate; extern TSDLLEXPORT bool ts_rte_is_hypertable(const RangeTblEntry *rte, bool *isdistributed); diff --git a/tsl/src/fdw/data_node_chunk_assignment.c b/tsl/src/fdw/data_node_chunk_assignment.c index 7b40b1a47..ac5533371 100644 --- a/tsl/src/fdw/data_node_chunk_assignment.c +++ b/tsl/src/fdw/data_node_chunk_assignment.c @@ -78,7 +78,7 @@ data_node_chunk_assignment_assign_chunk(DataNodeChunkAssignments *scas, RelOptIn */ Oid remote_chunk_relid = InvalidOid; ListCell *lc; - foreach (lc, chunk_private->chunk->data_nodes) + foreach (lc, chunk_private->cached_chunk_struct->data_nodes) { ChunkDataNode *cdn = (ChunkDataNode *) lfirst(lc); if (cdn->foreign_server_oid == chunkrel->serverid) @@ -94,7 +94,7 @@ data_node_chunk_assignment_assign_chunk(DataNodeChunkAssignments *scas, RelOptIn */ old = MemoryContextSwitchTo(scas->mctx); sca->chunk_relids = bms_add_member(sca->chunk_relids, chunkrel->relid); - sca->chunks = lappend(sca->chunks, chunk_private->chunk); + sca->chunks = lappend(sca->chunks, chunk_private->cached_chunk_struct); sca->remote_chunk_ids = lappend_int(sca->remote_chunk_ids, remote_chunk_relid); sca->pages += chunkrel->pages; sca->rows += chunkrel->rows; diff --git a/tsl/src/fdw/relinfo.c b/tsl/src/fdw/relinfo.c index c9d9a06fe..8e34a9065 100644 --- a/tsl/src/fdw/relinfo.c +++ b/tsl/src/fdw/relinfo.c @@ -300,10 +300,10 @@ estimate_chunk_size(PlannerInfo *root, RelOptInfo *chunk_rel) * exclusion, so we'll have to look up this info now. */ TimescaleDBPrivate *chunk_private = ts_get_private_reloptinfo(chunk_rel); - if (chunk_private->chunk == NULL) + if (chunk_private->cached_chunk_struct == NULL) { RangeTblEntry *chunk_rte = planner_rt_fetch(chunk_rel->relid, root); - chunk_private->chunk = + chunk_private->cached_chunk_struct = ts_chunk_get_by_relid(chunk_rte->relid, true /* fail_if_not_found */); } @@ -319,7 +319,8 @@ estimate_chunk_size(PlannerInfo *root, RelOptInfo *chunk_rel) Hypertable *ht = ts_hypertable_cache_get_entry(hcache, parent_rte->relid, CACHE_FLAG_NONE); Hyperspace *hyperspace = ht->space; - const double fillfactor = estimate_chunk_fillfactor(chunk_private->chunk, hyperspace); + const double fillfactor = + estimate_chunk_fillfactor(chunk_private->cached_chunk_struct, hyperspace); /* Can't have nonzero tuples in zero pages */ Assert(parent_private->average_chunk_pages != 0 || parent_private->average_chunk_tuples <= 0); diff --git a/tsl/src/planner.c b/tsl/src/planner.c index 373ca5e38..074fb2de7 100644 --- a/tsl/src/planner.c +++ b/tsl/src/planner.c @@ -121,16 +121,37 @@ tsl_set_rel_pathlist_query(PlannerInfo *root, RelOptInfo *rel, Index rti, RangeT * We check if it is the ONLY case by calling ts_rte_is_marked_for_expansion. * Respecting ONLY here is important to not break postgres tools like pg_dump. */ + TimescaleDBPrivate *fdw_private = (TimescaleDBPrivate *) rel->fdw_private; if (ts_guc_enable_transparent_decompression && ht && (rel->reloptkind == RELOPT_OTHER_MEMBER_REL || (rel->reloptkind == RELOPT_BASEREL && ts_rte_is_marked_for_expansion(rte))) && - TS_HYPERTABLE_HAS_COMPRESSION_TABLE(ht) && rel->fdw_private != NULL && - ((TimescaleDBPrivate *) rel->fdw_private)->compressed) + TS_HYPERTABLE_HAS_COMPRESSION_TABLE(ht) && fdw_private != NULL && fdw_private->compressed) { - Chunk *chunk = ts_chunk_get_by_relid(rte->relid, true); + if (fdw_private->cached_chunk_struct == NULL) + { + /* + * We can not have the cached Chunk struct, + * 1) if it was a direct query on the chunk; + * 2) if it is not a SELECT QUERY. + * Caching is done by our hypertable expansion, which doesn't run in + * these cases. + * + * Also on PG13 when a DELETE query runs through SPI, its command + * type is CMD_SELECT. Apparently it goes into inheritance_planner, + * which uses a hack to pretend it's actually a SELECT query, but + * for some reason for non-SPI queries the query type is still + * correct. You can observe it in the continuous_aggs-13 test. + * Just ignore this assertion on 13 and look up the chunk. + */ +#if PG14_GE + Assert(rel->reloptkind == RELOPT_BASEREL || root->parse->commandType != CMD_SELECT); +#endif + fdw_private->cached_chunk_struct = + ts_chunk_get_by_relid(rte->relid, /* fail_if_not_found = */ true); + } - if (chunk->fd.compressed_chunk_id != INVALID_CHUNK_ID) - ts_decompress_chunk_generate_paths(root, rel, ht, chunk); + if (fdw_private->cached_chunk_struct->fd.compressed_chunk_id != INVALID_CHUNK_ID) + ts_decompress_chunk_generate_paths(root, rel, ht, fdw_private->cached_chunk_struct); } }