Add default index for continuous aggregates

Add indexes for materialization table created by continuous aggregates.
This behavior can be turned on/off by using timescaledb.create_group_indexes parameter
of the WITH clause when the continuous agg is created.
This commit is contained in:
gayyappan 2019-04-26 17:03:57 -04:00 committed by gayyappan
parent 74f8d204a5
commit 297b9ed66a
10 changed files with 162 additions and 8 deletions

View File

@ -50,6 +50,11 @@ static const WithClauseDefinition continuous_aggregate_with_clause_def[] = {
.arg_name = "max_interval_per_job",
.type_id = TEXTOID,
},
[ContinuousViewOptionCreateGroupIndex] = {
.arg_name = "create_group_indexes",
.type_id = BOOLOID,
.default_val = BoolGetDatum(true),
},
};
WithClauseResult *

View File

@ -22,6 +22,7 @@ typedef enum ContinuousAggViewOption
ContinuousViewOptionRefreshLag,
ContinuousViewOptionRefreshInterval,
ContinuousViewOptionMaxIntervalPerRun,
ContinuousViewOptionCreateGroupIndex,
} ContinuousAggViewOption;
typedef enum ContinuousAggViewType

View File

@ -19,12 +19,15 @@
#include <nodes/parsenodes.h>
#include <nodes/makefuncs.h>
#include <nodes/nodeFuncs.h>
#include <catalog/index.h>
#include <catalog/pg_type.h>
#include <catalog/pg_aggregate.h>
#include <catalog/toasting.h>
#include <catalog/pg_collation.h>
#include <catalog/pg_trigger.h>
#include <commands/defrem.h>
#include <commands/tablecmds.h>
#include <commands/tablespace.h>
#include <commands/trigger.h>
#include <commands/view.h>
#include <access/xact.h>
@ -135,8 +138,12 @@ typedef struct MatTableColumnInfo
List *matcollist; /* column defns for materialization tbl*/
List *partial_seltlist; /* tlist entries for populating the materialization table columns */
List *partial_grouplist; /* group clauses used for populating the materialization table */
int matpartcolno; /*index of partitioning column in matcollist */
char *matpartcolname; /*name of the partition column */
List *mat_groupcolname_list; /* names of columns that are populated by the group-by clause
correspond to the partial_grouplist.
time_bucket column is not included here: it is the
matpartcolname */
int matpartcolno; /*index of partitioning column in matcollist */
char *matpartcolname; /*name of the partition column */
} MatTableColumnInfo;
typedef struct FinalizeQueryInfo
@ -178,6 +185,7 @@ static void mattablecolumninfo_addinternal(MatTableColumnInfo *matcolinfo,
static int32 mattablecolumninfo_create_materialization_table(MatTableColumnInfo *matcolinfo,
int32 hypertable_id, RangeVar *mat_rel,
CAggTimebucketInfo *origquery_tblinfo,
bool create_addl_index,
ObjectAddress *mataddress);
static Query *mattablecolumninfo_get_partial_select_query(MatTableColumnInfo *matcolinfo,
Query *userview_query);
@ -317,6 +325,58 @@ cagg_add_trigger_hypertable(Oid relid, char *trigarg)
ts_cache_release(hcache);
}
/* add additional indexes to materialization table for the columns derived from
* the group-by column list of the partial select query
* if partial select query has:
* GROUP BY timebucket_expr, <grpcol1, grpcol2, grpcol3 ...>
* index on mattable is <grpcol1, timebucketcol>, <grpcol2, timebucketcol> ... and so on.
* i.e. #indexes =( #grp-cols - 1)
*/
static void
mattablecolumninfo_add_mattable_index(MatTableColumnInfo *matcolinfo, Hypertable *ht)
{
IndexStmt stmt = {
.type = T_IndexStmt,
.accessMethod = DEFAULT_INDEX_TYPE,
.idxname = NULL,
.relation = makeRangeVar(NameStr(ht->fd.schema_name), NameStr(ht->fd.table_name), 0),
.tableSpace = get_tablespace_name(get_rel_tablespace(ht->main_table_relid)),
};
IndexElem timeelem = { .type = T_IndexElem,
.name = matcolinfo->matpartcolname,
.ordering = SORTBY_DESC };
ListCell *le = NULL;
foreach (le, matcolinfo->mat_groupcolname_list)
{
NameData indxname;
ObjectAddress indxaddr;
HeapTuple indxtuple;
char *grpcolname = (char *) lfirst(le);
IndexElem grpelem = { .type = T_IndexElem, .name = grpcolname };
stmt.indexParams = list_make2(&grpelem, &timeelem);
indxaddr = DefineIndexCompat(ht->main_table_relid,
&stmt,
InvalidOid,
false, /* is alter table */
false, /* check rights */
false, /* skip_build */
false); /* quiet */
indxtuple = SearchSysCache1(RELOID, ObjectIdGetDatum(indxaddr.objectId));
if (!HeapTupleIsValid(indxtuple))
elog(ERROR, "cache lookup failed for index relid %d", indxaddr.objectId);
indxname = ((Form_pg_class) GETSTRUCT(indxtuple))->relname;
elog(NOTICE,
"adding index %s ON %s.%s USING BTREE(%s, %s)",
NameStr(indxname),
NameStr(ht->fd.schema_name),
NameStr(ht->fd.table_name),
grpcolname,
matcolinfo->matpartcolname);
ReleaseSysCache(indxtuple);
}
}
/*
* Create the materialization hypertable root by faking up a
* CREATE TABLE parsetree and passing it to DefineRelation.
@ -333,7 +393,7 @@ static int32
mattablecolumninfo_create_materialization_table(MatTableColumnInfo *matcolinfo, int32 hypertable_id,
RangeVar *mat_rel,
CAggTimebucketInfo *origquery_tblinfo,
ObjectAddress *mataddress)
bool create_addl_index, ObjectAddress *mataddress)
{
Oid uid, saved_uid;
int sec_ctx;
@ -379,6 +439,10 @@ mattablecolumninfo_create_materialization_table(MatTableColumnInfo *matcolinfo,
hcache = ts_hypertable_cache_pin();
ht = ts_hypertable_cache_get_entry(hcache, mat_relid);
mat_htid = ht->fd.id;
/* create additional index on the group-by columns for the materialization table */
if (create_addl_index)
mattablecolumninfo_add_mattable_index(matcolinfo, ht);
ts_cache_release(hcache);
return mat_htid;
}
@ -999,6 +1063,7 @@ mattablecolumninfo_init(MatTableColumnInfo *matcolinfo, List *collist, List *tli
matcolinfo->matcollist = collist;
matcolinfo->partial_seltlist = tlist;
matcolinfo->partial_grouplist = grouplist;
matcolinfo->mat_groupcolname_list = NIL;
matcolinfo->matpartcolno = -1;
matcolinfo->matpartcolname = NULL;
}
@ -1048,6 +1113,7 @@ mattablecolumninfo_addentry(MatTableColumnInfo *out, Node *input, int original_q
case T_TargetEntry:
{
TargetEntry *tle = (TargetEntry *) input;
bool timebkt_chk = false;
if (tle->resname)
colname = pstrdup(tle->resname);
else
@ -1058,8 +1124,8 @@ mattablecolumninfo_addentry(MatTableColumnInfo *out, Node *input, int original_q
/* is this the time_bucket column */
if (IsA(tle->expr, FuncExpr))
{
bool chk = is_timebucket_expr(((FuncExpr *) tle->expr)->funcid);
if (chk)
timebkt_chk = is_timebucket_expr(((FuncExpr *) tle->expr)->funcid);
if (timebkt_chk)
{
colname = MATPARTCOLNM;
tle->resname = pstrdup(colname);
@ -1067,6 +1133,10 @@ mattablecolumninfo_addentry(MatTableColumnInfo *out, Node *input, int original_q
out->matpartcolname = pstrdup(colname);
}
}
if (!timebkt_chk) /* add the names to the gorupcolname list */
{
out->mat_groupcolname_list = lappend(out->mat_groupcolname_list, pstrdup(colname));
}
coltype = exprType((Node *) tle->expr);
coltypmod = exprTypmod((Node *) tle->expr);
colcollation = exprCollation((Node *) tle->expr);
@ -1518,6 +1588,7 @@ cagg_create(ViewStmt *stmt, Query *panquery, CAggTimebucketInfo *origquery_ht,
MatTableColumnInfo mattblinfo;
FinalizeQueryInfo finalqinfo;
CatalogSecurityContext sec_ctx;
bool is_create_mattbl_index;
Query *final_selquery;
Query *partial_selquery; /* query to populate the mattable*/
@ -1558,10 +1629,12 @@ cagg_create(ViewStmt *stmt, Query *panquery, CAggTimebucketInfo *origquery_ht,
ts_catalog_restore_user(&sec_ctx);
PRINT_MATINTERNAL_NAME(relnamebuf, "_materialized_hypertable_%d", materialize_hypertable_id);
mat_rel = makeRangeVar(pstrdup(INTERNAL_SCHEMA_NAME), pstrdup(relnamebuf), -1);
is_create_mattbl_index = with_clause_options[ContinuousViewOptionCreateGroupIndex].is_default;
mattablecolumninfo_create_materialization_table(&mattblinfo,
materialize_hypertable_id,
mat_rel,
origquery_ht,
is_create_mattbl_index,
&mataddress);
/* Step 2: create view with select finalize from materialization
* table

View File

@ -198,4 +198,8 @@ continuous_agg_update_options(ContinuousAgg *agg, WithClauseResult *with_clause_
*DatumGetIntervalP(with_clause_options[ContinuousViewOptionRefreshInterval].parsed);
ts_bgw_job_update_by_id(agg->data.job_id, job);
}
if (!with_clause_options[ContinuousViewOptionCreateGroupIndex].is_default)
{
elog(ERROR, "cannot alter create_group_indexes option for continuous aggregates");
}
}

View File

@ -49,6 +49,7 @@ select a, count(b)
from foo
group by time_bucket(1, a), a;
NOTICE: adding not-null constraint to column "time_partition_col"
NOTICE: adding index _materialized_hypertable_2_a_time_partition_col_idx ON _timescaledb_internal._materialized_hypertable_2 USING BTREE(a, time_partition_col)
SELECT * FROM _timescaledb_config.bgw_job;
id | application_name | job_type | schedule_interval | max_runtime | max_retries | retry_period
------+-------------------------------------+----------------------+-------------------+-------------+-------------+--------------
@ -514,6 +515,7 @@ SELECT
\set ECHO errors
psql:include/cont_agg_equal.sql:8: NOTICE: drop cascades to 2 other objects
psql:include/cont_agg_equal.sql:13: NOTICE: adding not-null constraint to column "time_partition_col"
psql:include/cont_agg_equal.sql:13: NOTICE: adding index _materialized_hypertable_13_location_time_partition_col_idx ON _timescaledb_internal._materialized_hypertable_13 USING BTREE(location, time_partition_col)
?column? | count
---------------------------------------------------------------+-------
Number of rows different between view and original (expect 0) | 0
@ -756,8 +758,11 @@ WITH ( timescaledb.continuous, timescaledb.refresh_lag = '5 hours', timescaledb.
as
select time_bucket('1day', timec), min(location), sum(temperature),sum(humidity)
from conditions
group by time_bucket('1day', timec);
group by time_bucket('1day', timec), location, humidity, temperature;
NOTICE: adding not-null constraint to column "time_partition_col"
NOTICE: adding index _materialized_hypertable_17_grp_5_5_time_partition_col_idx ON _timescaledb_internal._materialized_hypertable_17 USING BTREE(grp_5_5, time_partition_col)
NOTICE: adding index _materialized_hypertable_17_grp_6_6_time_partition_col_idx ON _timescaledb_internal._materialized_hypertable_17 USING BTREE(grp_6_6, time_partition_col)
NOTICE: adding index _materialized_hypertable_17_grp_7_7_time_partition_col_idx ON _timescaledb_internal._materialized_hypertable_17 USING BTREE(grp_7_7, time_partition_col)
SELECT schedule_interval FROM _timescaledb_config.bgw_job;
schedule_interval
-------------------
@ -783,6 +788,39 @@ SELECT schedule_interval FROM _timescaledb_config.bgw_job;
@ 2 hours
(1 row)
select indexname, indexdef from pg_indexes where tablename =
(SELECT h.table_name
FROM _timescaledb_catalog.continuous_agg ca
INNER JOIN _timescaledb_catalog.hypertable h ON(h.id = ca.mat_hypertable_id)
WHERE user_view_name = 'mat_with_test')
order by indexname;
indexname | indexdef
------------------------------------------------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
_materialized_hypertable_17_grp_5_5_time_partition_col_idx | CREATE INDEX _materialized_hypertable_17_grp_5_5_time_partition_col_idx ON _timescaledb_internal._materialized_hypertable_17 USING btree (grp_5_5, time_partition_col DESC)
_materialized_hypertable_17_grp_6_6_time_partition_col_idx | CREATE INDEX _materialized_hypertable_17_grp_6_6_time_partition_col_idx ON _timescaledb_internal._materialized_hypertable_17 USING btree (grp_6_6, time_partition_col DESC)
_materialized_hypertable_17_grp_7_7_time_partition_col_idx | CREATE INDEX _materialized_hypertable_17_grp_7_7_time_partition_col_idx ON _timescaledb_internal._materialized_hypertable_17 USING btree (grp_7_7, time_partition_col DESC)
_materialized_hypertable_17_time_partition_col_idx | CREATE INDEX _materialized_hypertable_17_time_partition_col_idx ON _timescaledb_internal._materialized_hypertable_17 USING btree (time_partition_col DESC)
(4 rows)
drop view mat_with_test cascade;
--no additional indexes
create or replace view mat_with_test( timec, minl, sumt , sumh)
WITH ( timescaledb.continuous, timescaledb.refresh_lag = '5 hours', timescaledb.refresh_interval = '1h', timescaledb.create_group_indexes=false)
as
select time_bucket('1day', timec), min(location), sum(temperature),sum(humidity)
from conditions
group by time_bucket('1day', timec), location, humidity, temperature;
NOTICE: adding not-null constraint to column "time_partition_col"
select indexname, indexdef from pg_indexes where tablename =
(SELECT h.table_name
FROM _timescaledb_catalog.continuous_agg ca
INNER JOIN _timescaledb_catalog.hypertable h ON(h.id = ca.mat_hypertable_id)
WHERE user_view_name = 'mat_with_test');
indexname | indexdef
----------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------
_materialized_hypertable_18_time_partition_col_idx | CREATE INDEX _materialized_hypertable_18_time_partition_col_idx ON _timescaledb_internal._materialized_hypertable_18 USING btree (time_partition_col DESC)
(1 row)
DROP TABLE conditions CASCADE;
NOTICE: drop cascades to 2 other objects
--test WITH using a hypertable with an integer time dimension
@ -903,7 +941,7 @@ WARNING: type integer text
(3 rows)
DROP view mat_ffunc_test cascade;
NOTICE: drop cascades to table _timescaledb_internal._hyper_21_61_chunk
NOTICE: drop cascades to table _timescaledb_internal._hyper_22_61_chunk
create or replace view mat_ffunc_test
WITH ( timescaledb.continuous, timescaledb.refresh_lag = '-200')
as

View File

@ -99,6 +99,7 @@ CREATE VIEW mat_before
WITH ( timescaledb.continuous, timescaledb.refresh_lag='-30 day')
AS :QUERY_BEFORE;
NOTICE: adding not-null constraint to column "time_partition_col"
NOTICE: adding index _materialized_hypertable_3_location_time_partition_col_idx ON _timescaledb_internal._materialized_hypertable_3 USING BTREE(location, time_partition_col)
--materialize this VIEW after dump this tests
--that the partialize VIEW and refresh mechanics
--survives the dump intact
@ -106,6 +107,7 @@ CREATE VIEW mat_after
WITH ( timescaledb.continuous, timescaledb.refresh_lag='-30 day')
AS :QUERY_AFTER;
NOTICE: adding not-null constraint to column "time_partition_col"
NOTICE: adding index _materialized_hypertable_4_location_time_partition_col_idx ON _timescaledb_internal._materialized_hypertable_4 USING BTREE(location, time_partition_col)
--materialize mat_before
REFRESH MATERIALIZED VIEW mat_before;
INFO: new materialization range for public.conditions_before (time column timec) (1548633600000000)

View File

@ -370,6 +370,10 @@ INNER JOIN _timescaledb_catalog.hypertable h ON(h.id = ca.mat_hypertable_id)
WHERE user_view_name = 'mat_with_test'
\gset
\set ON_ERROR_STOP 0
ALTER VIEW mat_with_test SET(timescaledb.create_group_indexes = 'false');
ERROR: cannot alter create_group_indexes option for continuous aggregates
ALTER VIEW mat_with_test SET(timescaledb.create_group_indexes = 'true');
ERROR: cannot alter create_group_indexes option for continuous aggregates
ALTER VIEW mat_with_test SET(timescaledb.refresh_lag = '1 joule');
ERROR: invalid input syntax for type interval: "1 joule"
ALTER VIEW mat_with_test RESET(timescaledb.refresh_lag);

View File

@ -40,6 +40,7 @@ FROM
device_readings
GROUP BY bucket, device_id; --We have to group by the bucket column, but can also add other group-by columns
NOTICE: adding not-null constraint to column "time_partition_col"
NOTICE: adding index _materialized_hypertable_2_device_id_time_partition_col_idx ON _timescaledb_internal._materialized_hypertable_2 USING BTREE(device_id, time_partition_col)
--Next, insert some data into the raw hypertable
INSERT INTO device_readings
SELECT ts, 'device_1', (EXTRACT(EPOCH FROM ts)) from generate_series('2018-12-01 00:00'::timestamp, '2018-12-31 00:00'::timestamp, '30 minutes') ts;
@ -237,6 +238,7 @@ FROM
device_readings
GROUP BY bucket, device_id;
NOTICE: adding not-null constraint to column "time_partition_col"
NOTICE: adding index _materialized_hypertable_3_device_id_time_partition_col_idx ON _timescaledb_internal._materialized_hypertable_3 USING BTREE(device_id, time_partition_col)
DROP VIEW device_summary CASCADE;
-- Option 2: Keep things as TIMESTAMPTZ in the view and convert to local time when
-- querying from the view
@ -255,6 +257,7 @@ FROM
device_readings
GROUP BY bucket, device_id;
NOTICE: adding not-null constraint to column "time_partition_col"
NOTICE: adding index _materialized_hypertable_4_device_id_time_partition_col_idx ON _timescaledb_internal._materialized_hypertable_4 USING BTREE(device_id, time_partition_col)
REFRESH MATERIALIZED VIEW device_summary;
INFO: new materialization range for public.device_readings larger than allowed in one run, truncating (time column observation_time) (1546236000000000)
INFO: new materialization range for public.device_readings (time column observation_time) (1543723200000000)

View File

@ -558,7 +558,7 @@ WITH ( timescaledb.continuous, timescaledb.refresh_lag = '5 hours', timescaledb.
as
select time_bucket('1day', timec), min(location), sum(temperature),sum(humidity)
from conditions
group by time_bucket('1day', timec);
group by time_bucket('1day', timec), location, humidity, temperature;
SELECT schedule_interval FROM _timescaledb_config.bgw_job;
SELECT _timescaledb_internal.to_interval(refresh_lag) FROM _timescaledb_catalog.continuous_agg WHERE user_view_name = 'mat_with_test';
@ -567,6 +567,28 @@ ALTER VIEW mat_with_test SET(timescaledb.refresh_lag = '6 h', timescaledb.refres
SELECT _timescaledb_internal.to_interval(refresh_lag) FROM _timescaledb_catalog.continuous_agg WHERE user_view_name = 'mat_with_test';
SELECT schedule_interval FROM _timescaledb_config.bgw_job;
select indexname, indexdef from pg_indexes where tablename =
(SELECT h.table_name
FROM _timescaledb_catalog.continuous_agg ca
INNER JOIN _timescaledb_catalog.hypertable h ON(h.id = ca.mat_hypertable_id)
WHERE user_view_name = 'mat_with_test')
order by indexname;
drop view mat_with_test cascade;
--no additional indexes
create or replace view mat_with_test( timec, minl, sumt , sumh)
WITH ( timescaledb.continuous, timescaledb.refresh_lag = '5 hours', timescaledb.refresh_interval = '1h', timescaledb.create_group_indexes=false)
as
select time_bucket('1day', timec), min(location), sum(temperature),sum(humidity)
from conditions
group by time_bucket('1day', timec), location, humidity, temperature;
select indexname, indexdef from pg_indexes where tablename =
(SELECT h.table_name
FROM _timescaledb_catalog.continuous_agg ca
INNER JOIN _timescaledb_catalog.hypertable h ON(h.id = ca.mat_hypertable_id)
WHERE user_view_name = 'mat_with_test');
DROP TABLE conditions CASCADE;
--test WITH using a hypertable with an integer time dimension

View File

@ -367,6 +367,8 @@ WHERE user_view_name = 'mat_with_test'
\gset
\set ON_ERROR_STOP 0
ALTER VIEW mat_with_test SET(timescaledb.create_group_indexes = 'false');
ALTER VIEW mat_with_test SET(timescaledb.create_group_indexes = 'true');
ALTER VIEW mat_with_test SET(timescaledb.refresh_lag = '1 joule');
ALTER VIEW mat_with_test RESET(timescaledb.refresh_lag);
ALTER VIEW mat_with_test ALTER timec DROP default;