Miss segmentby compression option in CAGGs

Timescale 2.7 released a new version of Continuous Aggregate (#4269)
that store the final aggregation state instead of the byte array of
the partial aggregate state, offering multiple opportunities of
optimizations as well a more compact form.

This new version also removes the unecessary `chunk_id` column from the
materialization hypertable and consequently the re-aggregation in the
user view. It means the user view that query the materialization
hypertable don't have a GROUP BY clause anymore that was problematic
for query performance.

Before 2.7 when users turn compression ON we infer compression options
`segmentby` and `orderby` based on the GROUP BY clause and time bucket
respectively. With the new version without a GROUP BY clause in the user
view the inferetion for the 'segmentby' compression option stopped to
work.

Fixed it by changing the code to the compression on the new version of
Continuous Aggregate (aka finals form) behave the same as the old
version.

Fix #4816
This commit is contained in:
Fabrízio de Royes Mello 2022-10-13 20:05:04 -03:00
parent bde337e92d
commit 043bd55c0b
3 changed files with 74 additions and 25 deletions

View File

@ -67,22 +67,55 @@ update_materialized_only(ContinuousAgg *agg, bool materialized_only)
}
/*
* Retrieve the cagg view query and find the groupby clause and
* time_bucket clause. Map them to the column names(of mat.hypertable)
* Note that cagg_view_query has 2 forms : with union and without UNION
* We have to extract the part of the query that has finalize_agg on
* the materialized hypertable to find the group by clauses.
* (see continuous_aggs/create.c for more info on the query structure)
* Returns: list of column names used in group by clause of the cagg query.
* This function is responsible to return a list of column names used in
* GROUP BY clause of the cagg query. It behaves a bit different depending
* of the type of the Continuous Aggregate.
*
* 1) Partials form (finalized=false)
*
* Retrieve the "user view query" and find the GROUP BY clause and
* "time_bucket" clause. Map them to the column names (of mat.hypertable)
*
* Note that the "user view query" has 2 forms:
* - with UNION
* - without UNION
*
* We have to extract the part of the query that has "finalize_agg" on
* the materialized hypertable to find the GROUP BY clauses.
* (see continuous_aggs/create.c for more info on the query structure)
*
* 2) Finals form (finalized=true) (>= 2.7)
*
* Retrieve the "direct view query" and find the GROUP BY clause and
* "time_bucket" clause. We use the "direct view query" because in the
* "user view query" we removed the re-aggregation in the part that query
* the materialization hypertable so we don't have a GROUP BY clause
* anymore.
*
* Get the column name from the GROUP BY clause because all the column
* names are the same in all underlying objects (user view, direct view,
* partial view and materialization hypertable).
*/
static List *
cagg_find_groupingcols(ContinuousAgg *agg, Hypertable *mat_ht)
{
List *retlist = NIL;
ListCell *lc;
Oid cagg_view_oid =
get_relname_relid(NameStr(agg->data.user_view_name),
get_namespace_oid(NameStr(agg->data.user_view_schema), false));
Oid cagg_view_oid;
/*
* Get the direct_view definition for the finalized version because
* the user view doesn't have the "GROUP BY" clause anymore.
*/
if (ContinuousAggIsFinalized(agg))
cagg_view_oid =
get_relname_relid(NameStr(agg->data.direct_view_name),
get_namespace_oid(NameStr(agg->data.direct_view_schema), false));
else
cagg_view_oid =
get_relname_relid(NameStr(agg->data.user_view_name),
get_namespace_oid(NameStr(agg->data.user_view_schema), false));
Relation cagg_view_rel = table_open(cagg_view_oid, AccessShareLock);
RuleLock *cagg_view_rules = cagg_view_rel->rd_rules;
Assert(cagg_view_rules && cagg_view_rules->numLocks == 1);
@ -117,11 +150,19 @@ cagg_find_groupingcols(ContinuousAgg *agg, Hypertable *mat_ht)
{
SortGroupClause *cagg_gc = (SortGroupClause *) lfirst(lc);
TargetEntry *cagg_tle = get_sortgroupclause_tle(cagg_gc, finalize_query->targetList);
/* groupby clauses are columns from the mat hypertable */
Assert(IsA(cagg_tle->expr, Var));
Var *mat_var = castNode(Var, cagg_tle->expr);
char *mat_colname = get_attname(mat_relid, mat_var->varattno, false);
retlist = lappend(retlist, mat_colname);
if (ContinuousAggIsFinalized(agg))
{
/* "resname" is the same as "mat column names" in the finalized version */
if (!cagg_tle->resjunk && cagg_tle->resname)
retlist = lappend(retlist, cagg_tle->resname);
}
else
{
/* groupby clauses are columns from the mat hypertable */
Var *mat_var = castNode(Var, cagg_tle->expr);
retlist = lappend(retlist, get_attname(mat_relid, mat_var->varattno, false));
}
}
return retlist;
}

View File

@ -1511,6 +1511,14 @@ WHERE hypertable_name = :'MAT_TABLE_NAME';
-[ RECORD 1 ]----------+----------------------------
hypertable_schema | _timescaledb_internal
hypertable_name | _materialized_hypertable_41
attname | search_query
segmentby_column_index | 1
orderby_column_index |
orderby_asc |
orderby_nullsfirst |
-[ RECORD 2 ]----------+----------------------------
hypertable_schema | _timescaledb_internal
hypertable_name | _materialized_hypertable_41
attname | bucket
segmentby_column_index |
orderby_column_index | 1

View File

@ -433,21 +433,21 @@ SELECT jsonb_pretty(rels) AS relations FROM relations;
"num_reltuples": 697 +
}, +
"continuous_aggregates": { +
"heap_size": 147456, +
"heap_size": 180224, +
"toast_size": 24576, +
"compression": { +
"compressed_heap_size": 8192, +
"compressed_row_count": 1, +
"compressed_heap_size": 40960, +
"compressed_row_count": 10, +
"num_compressed_caggs": 1, +
"compressed_toast_size": 8192, +
"num_compressed_chunks": 1, +
"uncompressed_heap_size": 49152, +
"uncompressed_row_count": 452, +
"compressed_indexes_size": 0, +
"compressed_indexes_size": 16384, +
"uncompressed_toast_size": 0, +
"uncompressed_indexes_size": 81920 +
}, +
"indexes_size": 163840, +
"indexes_size": 180224, +
"num_children": 4, +
"num_relations": 2, +
"num_reltuples": 452, +
@ -932,21 +932,21 @@ FROM relations;
continuous_aggregates
------------------------------------------------
{ +
"heap_size": 335872, +
"heap_size": 368640, +
"toast_size": 40960, +
"compression": { +
"compressed_heap_size": 8192, +
"compressed_row_count": 1, +
"compressed_heap_size": 40960, +
"compressed_row_count": 10, +
"num_compressed_caggs": 1, +
"compressed_toast_size": 8192, +
"num_compressed_chunks": 1, +
"uncompressed_heap_size": 49152, +
"uncompressed_row_count": 452, +
"compressed_indexes_size": 0, +
"compressed_indexes_size": 16384, +
"uncompressed_toast_size": 0, +
"uncompressed_indexes_size": 81920 +
}, +
"indexes_size": 393216, +
"indexes_size": 409600, +
"num_children": 8, +
"num_relations": 4, +
"num_reltuples": 452, +