Improve relation size estimate

In case when there are no stats (number of tuples/pages) we
can use two approaches to estimate relation size: interpolate
relation size using stats from previous chunks (if exists)
or estimate using shared buffer size (shared buffer size should
align with chunk size).
This commit is contained in:
niksa 2019-11-06 22:24:34 +01:00 committed by Erik Nordström
parent 29ce1510a5
commit c60cabd768
19 changed files with 1872 additions and 799 deletions

View File

@ -2299,6 +2299,36 @@ ts_chunk_get_by_id(int32 id, bool fail_if_not_found)
return chunk_scan_find(CHUNK_ID_INDEX, scankey, 1, CurrentMemoryContext, fail_if_not_found);
}
/*
* Number of chunks created after given chunk.
* If chunk2.id > chunk1.id then chunk2 is created after chunk1
*/
int
ts_chunk_num_of_chunks_created_after(const Chunk *chunk)
{
ScanKeyData scankey[1];
/*
* Try to find chunks with a greater Id then a given chunk
*/
ScanKeyInit(&scankey[0],
Anum_chunk_idx_id,
BTGreaterStrategyNumber,
F_INT4GT,
Int32GetDatum(chunk->fd.id));
return chunk_scan_internal(CHUNK_ID_INDEX,
scankey,
1,
NULL,
NULL,
NULL,
0,
ForwardScanDirection,
AccessShareLock,
CurrentMemoryContext);
}
/*
* Simple scans provide lightweight ways to access chunk information without the
* overhead of getting a full chunk (i.e., no extra metadata, like constraints,

View File

@ -136,6 +136,7 @@ extern Oid ts_chunk_get_schema_id(int32 chunk_id, bool missing_ok);
extern bool ts_chunk_get_id(const char *schema, const char *table, int32 *chunk_id,
bool missing_ok);
extern bool ts_chunk_exists_relid(Oid relid);
extern TSDLLEXPORT int ts_chunk_num_of_chunks_created_after(const Chunk *chunk);
extern TSDLLEXPORT bool ts_chunk_exists_with_compression(int32 hypertable_id);
extern void ts_chunk_recreate_all_constraints_for_dimension(Hyperspace *hs, int32 dimension_id);
extern TSDLLEXPORT void ts_chunk_drop_fks(Chunk *const chunk);
@ -144,7 +145,8 @@ extern int ts_chunk_delete_by_hypertable_id(int32 hypertable_id);
extern int ts_chunk_delete_by_name(const char *schema, const char *table, DropBehavior behavior);
extern bool ts_chunk_set_name(Chunk *chunk, const char *newname);
extern bool ts_chunk_set_schema(Chunk *chunk, const char *newschema);
extern List *ts_chunk_get_window(int32 dimension_id, int64 point, int count, MemoryContext mctx);
extern TSDLLEXPORT List *ts_chunk_get_window(int32 dimension_id, int64 point, int count,
MemoryContext mctx);
extern void ts_chunks_rename_schema_name(char *old_schema, char *new_schema);
extern TSDLLEXPORT bool ts_chunk_set_compressed_chunk(Chunk *chunk, int32 compressed_chunk_id,
bool isnull);

View File

@ -114,8 +114,8 @@ get_memory_cache_size(void)
*/
#define DEFAULT_CACHE_MEMORY_SLACK 0.9
static inline int64
calculate_initial_chunk_target_size(void)
extern inline int64
ts_chunk_calculate_initial_chunk_target_size(void)
{
return (int64)((double) get_memory_cache_size() * DEFAULT_CACHE_MEMORY_SLACK);
}
@ -662,7 +662,7 @@ chunk_target_size_in_bytes(const text *target_size_text)
return 0;
if (pg_strcasecmp(target_size, "estimate") == 0)
target_size_bytes = calculate_initial_chunk_target_size();
target_size_bytes = ts_chunk_calculate_initial_chunk_target_size();
else
target_size_bytes = convert_text_memory_amount_to_bytes(target_size);

View File

@ -27,4 +27,6 @@ extern void ts_chunk_adaptive_sizing_info_validate(ChunkSizingInfo *info);
extern void ts_chunk_sizing_func_validate(regproc func, ChunkSizingInfo *info);
extern TSDLLEXPORT ChunkSizingInfo *ts_chunk_sizing_info_get_default_disabled(Oid table_relid);
extern TSDLLEXPORT int64 ts_chunk_calculate_initial_chunk_target_size(void);
#endif /* TIMESCALEDB_CHUNK_ADAPTIVE_H */

View File

@ -279,3 +279,6 @@ $BODY$
BEGIN
END
$BODY$;
CREATE OR REPLACE FUNCTION ts_test_override_current_timestamptz(new_value TIMESTAMPTZ)
RETURNS VOID AS :TSL_MODULE_PATHNAME, 'ts_test_override_current_timestamptz' LANGUAGE C VOLATILE STRICT;

View File

@ -103,7 +103,7 @@ get_upper_rel_estimate(PlannerInfo *root, RelOptInfo *rel, CostEstimate *ce)
ce->startup_cost = ofpinfo->rel_startup_cost;
ce->startup_cost += aggcosts.transCost.startup;
ce->startup_cost += aggcosts.transCost.per_tuple * input_rows;
ce->startup_cost += (cpu_operator_cost * num_group_cols) * input_rows;
ce->startup_cost += cpu_operator_cost * num_group_cols * input_rows;
ce->startup_cost += ptarget->cost.startup;
/*-----

View File

@ -12,6 +12,7 @@
#include <utils/hsearch.h>
#include <utils/builtins.h>
#include <utils/lsyscache.h>
#include <utils/syscache.h>
#include <miscadmin.h>
#include <extension_constants.h>
@ -22,6 +23,15 @@
#include "deparse.h"
#include "relinfo.h"
#include "estimate.h"
#include "chunk_adaptive.h"
#include "cache.h"
#include "hypertable.h"
#include "hypertable_cache.h"
#include "dimension.h"
#include "chunk.h"
#include "hypercube.h"
#include "errors.h"
#include "scan_exec.h"
/* Default CPU cost to start up a foreign query. */
#define DEFAULT_FDW_STARTUP_COST 100.0
@ -31,6 +41,8 @@
#define DEFAULT_FDW_FETCH_SIZE 10000
#define DEFAULT_CHUNK_LOOKBACK_WINDOW 10
/*
* Parse options from foreign server and apply them to fpinfo.
*
@ -97,6 +109,257 @@ get_relation_qualified_name(Oid relid)
return name->data;
}
static const double FILL_FACTOR_CURRENT_CHUNK = 0.5;
static const double FILL_FACTOR_HISTORICAL_CHUNK = 1;
static DimensionSlice *
get_chunk_time_slice(Chunk *chunk, Hyperspace *space)
{
int32 time_dim_id = hyperspace_get_open_dimension(space, 0)->fd.id;
return ts_hypercube_get_slice_by_dimension_id(chunk->cube, time_dim_id);
}
/*
* Sums of slices belonging to closed dimensions
*/
static int
get_total_number_of_slices(Hyperspace *space)
{
int dim_idx;
int total_slices = 0;
for (dim_idx = 0; dim_idx < space->num_dimensions; dim_idx++)
{
Dimension *dim = &space->dimensions[dim_idx];
if (IS_CLOSED_DIMENSION(dim))
total_slices += dim->fd.num_slices;
}
return total_slices;
}
/*
* Fillfactor values are between 0 and 1. It's an indication of how much data is in the chunk.
*
* Two major drivers for estimation is current time and number of chunks created after.
*
* Fill factor estimation assumes that data written is 'recent' in regards to time dimension (eg.
* almost real-time). For the case when writing historical data, given estimates might be more off
* as we assume that historical chunks have fill factor 1 unless the number of chunks created after
* is smaller then total number of slices. Even for writing historical data we might not be totally
* wrong since most probably data has monotonically increasing time.
*
* Estimation handles two possible hypertable configurations: 1. time dimension is of timestamp
* type 2. time dimension is of integer type. If hypertable uses timestamp type to partition data
* then there are three possible scenarios here: we are beyond chunk end time (historical chunk), we
* are somewhere in between chunk time boundaries (current chunk) or chunk start time is in the
* future (highly unlikely). For integer type we assume that all chunks execpt for current have
* factor 1.
*
* To explain how number of chunks created after the chunk affects estimation
* let's imagine that table is space partitioned with one dimension and having 3 partitions. If data
* is equaliy distributed amount partitions then there will be 3 current chunks. If there are two
* new chunks created after chunk X then chunk X is the current chunk.
*/
static double
estimate_chunk_fillfactor(Chunk *chunk, Hyperspace *space)
{
Dimension *time_dim = hyperspace_get_open_dimension(space, 0);
DimensionSlice *time_slice = get_chunk_time_slice(chunk, space);
Oid time_dim_type = ts_dimension_get_partition_type(time_dim);
int num_created_after = ts_chunk_num_of_chunks_created_after(chunk);
int total_slices = get_total_number_of_slices(space);
if (IS_TIMESTAMP_TYPE(time_dim_type))
{
TimestampTz now = GetSQLCurrentTimestamp(-1);
int64 now_internal_time;
#ifdef TS_DEBUG
if (ts_current_timestamp_override_value >= 0)
now = ts_current_timestamp_override_value;
#endif
now_internal_time = ts_time_value_to_internal(TimestampTzGetDatum(now), TIMESTAMPTZOID);
/* if we are beyond end range then chunk can possibly be totally filled */
if (time_slice->fd.range_end <= now_internal_time)
{
/* If there are less newly created chunks then the number of slices then this is current
* chunk. This also works better when writing historical data */
return num_created_after < total_slices ? FILL_FACTOR_CURRENT_CHUNK :
FILL_FACTOR_HISTORICAL_CHUNK;
}
/* for chunks in future (highly unlikely) we assume same as for `current` chunk */
if (time_slice->fd.range_start >= now_internal_time)
return FILL_FACTOR_CURRENT_CHUNK;
/* current time falls within chunk time constraints */
double elapsed = (now_internal_time - time_slice->fd.range_start);
double interval = (time_slice->fd.range_end - time_slice->fd.range_start);
Assert(interval != 0);
return elapsed / interval;
}
else
{
/* if current chunk is the last created we assume it has 0.5 fill factor */
return num_created_after < total_slices ? FILL_FACTOR_CURRENT_CHUNK :
FILL_FACTOR_HISTORICAL_CHUNK;
}
}
typedef struct RelEstimates
{
double tuples;
BlockNumber pages;
} RelEstimates;
/*
* The idea is to look into number of tuples and pages for N previous chunks
* and calculate an average. Ideally we could add weights to this calculation
* and give more importance to newer chunks but a ballpark estimate should be
* just fine.
*/
static RelEstimates *
estimate_tuples_and_pages_using_prev_chunks(PlannerInfo *root, Hyperspace *space,
Chunk *current_chunk)
{
RelEstimates *estimates = palloc0(sizeof(RelEstimates));
ListCell *lc;
float4 total_tuples = 0;
int32 total_pages = 0;
int non_zero_reltuples_cnt = 0;
int non_zero_relpages_cnt = 0;
DimensionSlice *time_slice = get_chunk_time_slice(current_chunk, space);
List *prev_chunks = ts_chunk_get_window(time_slice->fd.dimension_id,
time_slice->fd.range_start,
DEFAULT_CHUNK_LOOKBACK_WINDOW,
CurrentMemoryContext);
foreach (lc, prev_chunks)
{
Chunk *pc = lfirst(lc);
HeapTuple rel_tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(pc->table_id));
Form_pg_class rel_form;
if (!HeapTupleIsValid(rel_tuple))
ereport(ERROR,
(errcode(ERRCODE_TS_INTERNAL_ERROR),
errmsg("cache lookup failed for chunk relation %u", pc->fd.id),
errdetail("Failed to estimate number of tuples and pages for chunk %d.",
pc->table_id)));
rel_form = (Form_pg_class) GETSTRUCT(rel_tuple);
if (rel_form->reltuples > 0)
{
total_tuples += rel_form->reltuples;
non_zero_reltuples_cnt++;
}
if (rel_form->relpages > 0)
{
total_pages += rel_form->relpages;
non_zero_relpages_cnt++;
}
ReleaseSysCache(rel_tuple);
}
if (non_zero_reltuples_cnt > 0)
estimates->tuples = total_tuples / non_zero_reltuples_cnt;
if (non_zero_relpages_cnt > 0)
estimates->pages = total_pages / non_zero_relpages_cnt;
return estimates;
}
static RelEstimates *
estimate_tuples_and_pages_using_shared_buffers(PlannerInfo *root, Hypertable *ht, int result_width)
{
RelEstimates *estimates = palloc(sizeof(RelEstimates));
int64 chunk_size_estimate = ts_chunk_calculate_initial_chunk_target_size();
if (ht != NULL)
{
int total_slices = get_total_number_of_slices(ht->space);
if (total_slices > 0)
chunk_size_estimate /= total_slices;
}
else
/* half-size seems to be the safest bet */
chunk_size_estimate /= 2;
estimates->tuples = chunk_size_estimate / (result_width + MAXALIGN(SizeofHeapTupleHeader));
estimates->pages = chunk_size_estimate / BLCKSZ;
return estimates;
}
static void
set_rel_estimates(RelOptInfo *rel, RelEstimates *estimates)
{
rel->tuples = estimates->tuples;
rel->pages = estimates->pages;
}
static void
rel_estimates_apply_fillfactor(RelEstimates *estimates, double fillfactor)
{
estimates->pages *= fillfactor;
estimates->tuples *= fillfactor;
}
/*
* When there are no local stats we try estimating by either using stats from previous chunks (if
* they exist) or shared buffers size.
*/
static void
estimate_tuples_and_pages(PlannerInfo *root, RelOptInfo *rel)
{
int parent_relid;
RangeTblEntry *hyper_rte;
Cache *hcache;
Hypertable *ht;
double chunk_fillfactor;
RangeTblEntry *chunk_rte;
Chunk *chunk;
Hyperspace *hyperspace;
RelEstimates *estimates;
Assert(rel->tuples == 0);
Assert(rel->pages == 0);
/* In some cases (e.g., UPDATE stmt) top_parent_relids is not set so the best
we can do is using shared buffers size without partitioning information.
Since updates are not something we generaly optimize for this should be fine. */
if (rel->top_parent_relids == NULL)
{
estimates =
estimate_tuples_and_pages_using_shared_buffers(root, NULL, rel->reltarget->width);
set_rel_estimates(rel, estimates);
return;
}
parent_relid = bms_next_member(rel->top_parent_relids, -1);
hyper_rte = planner_rt_fetch(parent_relid, root);
hcache = ts_hypertable_cache_pin();
ht = ts_hypertable_cache_get_entry(hcache, hyper_rte->relid, CACHE_FLAG_NONE);
hyperspace = ht->space;
chunk_rte = planner_rt_fetch(rel->relid, root);
chunk = ts_chunk_get_by_relid(chunk_rte->relid, true);
/* Let's first try figuring out number of tuples/pages using stats from previous chunks,
otherwise make an estimation based on shared buffers size */
estimates = estimate_tuples_and_pages_using_prev_chunks(root, hyperspace, chunk);
if (estimates->tuples == 0 || estimates->pages == 0)
estimates = estimate_tuples_and_pages_using_shared_buffers(root, ht, rel->reltarget->width);
chunk_fillfactor = estimate_chunk_fillfactor(chunk, hyperspace);
/* adjust tuples/pages using chunk_fillfactor */
rel_estimates_apply_fillfactor(estimates, chunk_fillfactor);
set_rel_estimates(rel, estimates);
ts_cache_release(hcache);
}
TsFdwRelInfo *
fdw_relinfo_create(PlannerInfo *root, RelOptInfo *rel, Oid server_oid, Oid local_table_id,
@ -199,17 +462,10 @@ fdw_relinfo_create(PlannerInfo *root, RelOptInfo *rel, Oid server_oid, Oid local
/*
* If the foreign table has never been ANALYZEd, it will have relpages
* and reltuples equal to zero, which most likely has nothing to do
* with reality. We can't do a whole lot about that if we're not
* allowed to consult the data node, but we can use a hack similar
* to plancat.c's treatment of empty relations: use a minimum size
* estimate of 10 pages, and divide by the column-datatype-based width
* estimate to get the corresponding number of tuples.
* with reality. The best we can do is estimate number of tuples/pages.
*/
if (rel->pages == 0 && rel->tuples == 0)
{
rel->pages = 10;
rel->tuples = (10 * BLCKSZ) / (rel->reltarget->width + MAXALIGN(SizeofHeapTupleHeader));
}
if (rel->pages == 0 && rel->tuples == 0 && type == TS_FDW_RELINFO_FOREIGN_TABLE)
estimate_tuples_and_pages(root, rel);
/* Estimate rel size as best we can with local statistics. There are
* no local statistics for data node rels since they aren't real base

View File

@ -191,7 +191,7 @@ prepare_query_params(PlanState *node, List *fdw_exprs, int num_params, FmgrInfo
TimestampTz ts_current_timestamp_override_value = -1;
extern void
fdw_scan_debug_override_pushdown_timestamp(TimestampTz time)
fdw_scan_debug_override_current_timestamp(TimestampTz time)
{
ts_current_timestamp_override_value = time;
}

View File

@ -51,8 +51,10 @@ extern void fdw_scan_explain(ScanState *ss, List *fdw_private, ExplainState *es,
extern DataFetcher *create_data_fetcher(ScanState *ss, TsFdwScanState *fsstate, FetchMode mode);
#ifdef TS_DEBUG
extern TimestampTz ts_current_timestamp_override_value;
/* Allow tests to specify the time to push down in place of now() */
extern void fdw_scan_debug_override_pushdown_timestamp(TimestampTz time);
extern void fdw_scan_debug_override_current_timestamp(TimestampTz time);
#endif
#endif /* TIMESCALEDB_TSL_FDW_SCAN_EXEC_H */

View File

@ -61,6 +61,7 @@ INSERT INTO hyper VALUES
('2018-05-19 13:01', 4, 4, 5.1),
('2018-05-20 15:08', 5, 1, 9.4),
('2018-05-30 13:02', 3, 2, 9.0);
-- Update table stats
ANALYZE hyper;
-- Optimizer debug messages shown at debug level 2
SET client_min_messages TO DEBUG2;

View File

@ -208,37 +208,39 @@ SET enable_partitionwise_aggregate = ON;
FROM :TEST_TABLE
GROUP BY :GROUPING
ORDER BY :GROUPING;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (AsyncAppend)
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Output: location, (last(highlow, timec)), (first(highlow, timec))
-> Merge Append
Sort Key: conditions.location
-> GroupAggregate
Output: conditions.location, last(conditions.highlow, conditions.timec), first(conditions.highlow, conditions.timec)
Group Key: conditions.location
-> Custom Scan (DataNodeScan) on public.conditions
Output: conditions.location, conditions.highlow, conditions.timec
Data node: data_node_1
Chunks: _hyper_1_1_dist_chunk, _hyper_1_2_dist_chunk, _hyper_1_3_dist_chunk, _hyper_1_4_dist_chunk
Remote SQL: SELECT timec, location, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST
-> GroupAggregate
Output: conditions_1.location, last(conditions_1.highlow, conditions_1.timec), first(conditions_1.highlow, conditions_1.timec)
Group Key: conditions_1.location
-> Custom Scan (DataNodeScan) on public.conditions conditions_1
Output: conditions_1.location, conditions_1.highlow, conditions_1.timec
Data node: data_node_2
Chunks: _hyper_1_9_dist_chunk, _hyper_1_10_dist_chunk, _hyper_1_11_dist_chunk, _hyper_1_12_dist_chunk
Remote SQL: SELECT timec, location, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST
-> GroupAggregate
Output: conditions_2.location, last(conditions_2.highlow, conditions_2.timec), first(conditions_2.highlow, conditions_2.timec)
Group Key: conditions_2.location
-> Custom Scan (DataNodeScan) on public.conditions conditions_2
Output: conditions_2.location, conditions_2.highlow, conditions_2.timec
Data node: data_node_3
Chunks: _hyper_1_5_dist_chunk, _hyper_1_6_dist_chunk, _hyper_1_7_dist_chunk, _hyper_1_8_dist_chunk
Remote SQL: SELECT timec, location, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST
(28 rows)
Sort Key: location
-> Custom Scan (AsyncAppend)
Output: location, (last(highlow, timec)), (first(highlow, timec))
-> Append
-> HashAggregate
Output: conditions.location, last(conditions.highlow, conditions.timec), first(conditions.highlow, conditions.timec)
Group Key: conditions.location
-> Custom Scan (DataNodeScan) on public.conditions
Output: conditions.location, conditions.highlow, conditions.timec
Data node: data_node_1
Chunks: _hyper_1_1_dist_chunk, _hyper_1_2_dist_chunk, _hyper_1_3_dist_chunk, _hyper_1_4_dist_chunk
Remote SQL: SELECT timec, location, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
-> HashAggregate
Output: conditions_1.location, last(conditions_1.highlow, conditions_1.timec), first(conditions_1.highlow, conditions_1.timec)
Group Key: conditions_1.location
-> Custom Scan (DataNodeScan) on public.conditions conditions_1
Output: conditions_1.location, conditions_1.highlow, conditions_1.timec
Data node: data_node_2
Chunks: _hyper_1_9_dist_chunk, _hyper_1_10_dist_chunk, _hyper_1_11_dist_chunk, _hyper_1_12_dist_chunk
Remote SQL: SELECT timec, location, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
-> HashAggregate
Output: conditions_2.location, last(conditions_2.highlow, conditions_2.timec), first(conditions_2.highlow, conditions_2.timec)
Group Key: conditions_2.location
-> Custom Scan (DataNodeScan) on public.conditions conditions_2
Output: conditions_2.location, conditions_2.highlow, conditions_2.timec
Data node: data_node_3
Chunks: _hyper_1_5_dist_chunk, _hyper_1_6_dist_chunk, _hyper_1_7_dist_chunk, _hyper_1_8_dist_chunk
Remote SQL: SELECT timec, location, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
(30 rows)
-- Mix of aggregates that push down and those that don't
:PREFIX SELECT :GROUPING,
@ -256,37 +258,39 @@ SET enable_partitionwise_aggregate = ON;
FROM :TEST_TABLE
GROUP BY :GROUPING
ORDER BY :GROUPING;
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (AsyncAppend)
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort
Output: location, (min(allnull)), (max(temperature)), ((sum(temperature) + sum(humidity))), (avg(humidity)), (round(stddev((humidity)::integer), 5)), (bit_and(bit_int)), (bit_or(bit_int)), (bool_and(good_life)), (every((temperature > '0'::double precision))), (bool_or(good_life)), (first(highlow, timec))
-> Merge Append
Sort Key: conditions.location
-> GroupAggregate
Output: conditions.location, min(conditions.allnull), max(conditions.temperature), (sum(conditions.temperature) + sum(conditions.humidity)), avg(conditions.humidity), round(stddev((conditions.humidity)::integer), 5), bit_and(conditions.bit_int), bit_or(conditions.bit_int), bool_and(conditions.good_life), every((conditions.temperature > '0'::double precision)), bool_or(conditions.good_life), first(conditions.highlow, conditions.timec)
Group Key: conditions.location
-> Custom Scan (DataNodeScan) on public.conditions
Output: conditions.location, conditions.allnull, conditions.temperature, conditions.humidity, conditions.bit_int, conditions.good_life, conditions.highlow, conditions.timec
Data node: data_node_1
Chunks: _hyper_1_1_dist_chunk, _hyper_1_2_dist_chunk, _hyper_1_3_dist_chunk, _hyper_1_4_dist_chunk
Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST
-> GroupAggregate
Output: conditions_1.location, min(conditions_1.allnull), max(conditions_1.temperature), (sum(conditions_1.temperature) + sum(conditions_1.humidity)), avg(conditions_1.humidity), round(stddev((conditions_1.humidity)::integer), 5), bit_and(conditions_1.bit_int), bit_or(conditions_1.bit_int), bool_and(conditions_1.good_life), every((conditions_1.temperature > '0'::double precision)), bool_or(conditions_1.good_life), first(conditions_1.highlow, conditions_1.timec)
Group Key: conditions_1.location
-> Custom Scan (DataNodeScan) on public.conditions conditions_1
Output: conditions_1.location, conditions_1.allnull, conditions_1.temperature, conditions_1.humidity, conditions_1.bit_int, conditions_1.good_life, conditions_1.highlow, conditions_1.timec
Data node: data_node_2
Chunks: _hyper_1_9_dist_chunk, _hyper_1_10_dist_chunk, _hyper_1_11_dist_chunk, _hyper_1_12_dist_chunk
Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST
-> GroupAggregate
Output: conditions_2.location, min(conditions_2.allnull), max(conditions_2.temperature), (sum(conditions_2.temperature) + sum(conditions_2.humidity)), avg(conditions_2.humidity), round(stddev((conditions_2.humidity)::integer), 5), bit_and(conditions_2.bit_int), bit_or(conditions_2.bit_int), bool_and(conditions_2.good_life), every((conditions_2.temperature > '0'::double precision)), bool_or(conditions_2.good_life), first(conditions_2.highlow, conditions_2.timec)
Group Key: conditions_2.location
-> Custom Scan (DataNodeScan) on public.conditions conditions_2
Output: conditions_2.location, conditions_2.allnull, conditions_2.temperature, conditions_2.humidity, conditions_2.bit_int, conditions_2.good_life, conditions_2.highlow, conditions_2.timec
Data node: data_node_3
Chunks: _hyper_1_5_dist_chunk, _hyper_1_6_dist_chunk, _hyper_1_7_dist_chunk, _hyper_1_8_dist_chunk
Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY location ASC NULLS LAST
(28 rows)
Sort Key: location
-> Custom Scan (AsyncAppend)
Output: location, (min(allnull)), (max(temperature)), ((sum(temperature) + sum(humidity))), (avg(humidity)), (round(stddev((humidity)::integer), 5)), (bit_and(bit_int)), (bit_or(bit_int)), (bool_and(good_life)), (every((temperature > '0'::double precision))), (bool_or(good_life)), (first(highlow, timec))
-> Append
-> HashAggregate
Output: conditions.location, min(conditions.allnull), max(conditions.temperature), (sum(conditions.temperature) + sum(conditions.humidity)), avg(conditions.humidity), round(stddev((conditions.humidity)::integer), 5), bit_and(conditions.bit_int), bit_or(conditions.bit_int), bool_and(conditions.good_life), every((conditions.temperature > '0'::double precision)), bool_or(conditions.good_life), first(conditions.highlow, conditions.timec)
Group Key: conditions.location
-> Custom Scan (DataNodeScan) on public.conditions
Output: conditions.location, conditions.allnull, conditions.temperature, conditions.humidity, conditions.bit_int, conditions.good_life, conditions.highlow, conditions.timec
Data node: data_node_1
Chunks: _hyper_1_1_dist_chunk, _hyper_1_2_dist_chunk, _hyper_1_3_dist_chunk, _hyper_1_4_dist_chunk
Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
-> HashAggregate
Output: conditions_1.location, min(conditions_1.allnull), max(conditions_1.temperature), (sum(conditions_1.temperature) + sum(conditions_1.humidity)), avg(conditions_1.humidity), round(stddev((conditions_1.humidity)::integer), 5), bit_and(conditions_1.bit_int), bit_or(conditions_1.bit_int), bool_and(conditions_1.good_life), every((conditions_1.temperature > '0'::double precision)), bool_or(conditions_1.good_life), first(conditions_1.highlow, conditions_1.timec)
Group Key: conditions_1.location
-> Custom Scan (DataNodeScan) on public.conditions conditions_1
Output: conditions_1.location, conditions_1.allnull, conditions_1.temperature, conditions_1.humidity, conditions_1.bit_int, conditions_1.good_life, conditions_1.highlow, conditions_1.timec
Data node: data_node_2
Chunks: _hyper_1_9_dist_chunk, _hyper_1_10_dist_chunk, _hyper_1_11_dist_chunk, _hyper_1_12_dist_chunk
Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
-> HashAggregate
Output: conditions_2.location, min(conditions_2.allnull), max(conditions_2.temperature), (sum(conditions_2.temperature) + sum(conditions_2.humidity)), avg(conditions_2.humidity), round(stddev((conditions_2.humidity)::integer), 5), bit_and(conditions_2.bit_int), bit_or(conditions_2.bit_int), bool_and(conditions_2.good_life), every((conditions_2.temperature > '0'::double precision)), bool_or(conditions_2.good_life), first(conditions_2.highlow, conditions_2.timec)
Group Key: conditions_2.location
-> Custom Scan (DataNodeScan) on public.conditions conditions_2
Output: conditions_2.location, conditions_2.allnull, conditions_2.temperature, conditions_2.humidity, conditions_2.bit_int, conditions_2.good_life, conditions_2.highlow, conditions_2.timec
Data node: data_node_3
Chunks: _hyper_1_5_dist_chunk, _hyper_1_6_dist_chunk, _hyper_1_7_dist_chunk, _hyper_1_8_dist_chunk
Remote SQL: SELECT timec, location, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
(30 rows)
\set GROUPING 'region'
\ir 'include/aggregate_queries.sql'
@ -370,40 +374,42 @@ SET enable_partitionwise_aggregate = ON;
FROM :TEST_TABLE
GROUP BY :GROUPING
ORDER BY :GROUPING;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Finalize GroupAggregate
Output: region, last(highlow, timec), first(highlow, timec)
Group Key: region
-> Custom Scan (AsyncAppend)
-> Sort
Output: region, (PARTIAL last(highlow, timec)), (PARTIAL first(highlow, timec))
-> Merge Append
Sort Key: conditions.region
-> Partial GroupAggregate
Output: conditions.region, PARTIAL last(conditions.highlow, conditions.timec), PARTIAL first(conditions.highlow, conditions.timec)
Group Key: conditions.region
-> Custom Scan (DataNodeScan) on public.conditions
Output: conditions.region, conditions.highlow, conditions.timec
Data node: data_node_1
Chunks: _hyper_1_1_dist_chunk, _hyper_1_2_dist_chunk, _hyper_1_3_dist_chunk, _hyper_1_4_dist_chunk
Remote SQL: SELECT timec, region, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST
-> Partial GroupAggregate
Output: conditions_1.region, PARTIAL last(conditions_1.highlow, conditions_1.timec), PARTIAL first(conditions_1.highlow, conditions_1.timec)
Group Key: conditions_1.region
-> Custom Scan (DataNodeScan) on public.conditions conditions_1
Output: conditions_1.region, conditions_1.highlow, conditions_1.timec
Data node: data_node_2
Chunks: _hyper_1_9_dist_chunk, _hyper_1_10_dist_chunk, _hyper_1_11_dist_chunk, _hyper_1_12_dist_chunk
Remote SQL: SELECT timec, region, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST
-> Partial GroupAggregate
Output: conditions_2.region, PARTIAL last(conditions_2.highlow, conditions_2.timec), PARTIAL first(conditions_2.highlow, conditions_2.timec)
Group Key: conditions_2.region
-> Custom Scan (DataNodeScan) on public.conditions conditions_2
Output: conditions_2.region, conditions_2.highlow, conditions_2.timec
Data node: data_node_3
Chunks: _hyper_1_5_dist_chunk, _hyper_1_6_dist_chunk, _hyper_1_7_dist_chunk, _hyper_1_8_dist_chunk
Remote SQL: SELECT timec, region, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST
(31 rows)
Sort Key: region
-> Custom Scan (AsyncAppend)
Output: region, (PARTIAL last(highlow, timec)), (PARTIAL first(highlow, timec))
-> Append
-> Partial HashAggregate
Output: conditions.region, PARTIAL last(conditions.highlow, conditions.timec), PARTIAL first(conditions.highlow, conditions.timec)
Group Key: conditions.region
-> Custom Scan (DataNodeScan) on public.conditions
Output: conditions.region, conditions.highlow, conditions.timec
Data node: data_node_1
Chunks: _hyper_1_1_dist_chunk, _hyper_1_2_dist_chunk, _hyper_1_3_dist_chunk, _hyper_1_4_dist_chunk
Remote SQL: SELECT timec, region, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
-> Partial HashAggregate
Output: conditions_1.region, PARTIAL last(conditions_1.highlow, conditions_1.timec), PARTIAL first(conditions_1.highlow, conditions_1.timec)
Group Key: conditions_1.region
-> Custom Scan (DataNodeScan) on public.conditions conditions_1
Output: conditions_1.region, conditions_1.highlow, conditions_1.timec
Data node: data_node_2
Chunks: _hyper_1_9_dist_chunk, _hyper_1_10_dist_chunk, _hyper_1_11_dist_chunk, _hyper_1_12_dist_chunk
Remote SQL: SELECT timec, region, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
-> Partial HashAggregate
Output: conditions_2.region, PARTIAL last(conditions_2.highlow, conditions_2.timec), PARTIAL first(conditions_2.highlow, conditions_2.timec)
Group Key: conditions_2.region
-> Custom Scan (DataNodeScan) on public.conditions conditions_2
Output: conditions_2.region, conditions_2.highlow, conditions_2.timec
Data node: data_node_3
Chunks: _hyper_1_5_dist_chunk, _hyper_1_6_dist_chunk, _hyper_1_7_dist_chunk, _hyper_1_8_dist_chunk
Remote SQL: SELECT timec, region, highlow FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
(33 rows)
-- Mix of aggregates that push down and those that don't
:PREFIX SELECT :GROUPING,
@ -421,40 +427,42 @@ SET enable_partitionwise_aggregate = ON;
FROM :TEST_TABLE
GROUP BY :GROUPING
ORDER BY :GROUPING;
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Finalize GroupAggregate
Output: region, min(allnull), max(temperature), (sum(temperature) + sum(humidity)), avg(humidity), round(stddev((humidity)::integer), 5), bit_and(bit_int), bit_or(bit_int), bool_and(good_life), every((temperature > '0'::double precision)), bool_or(good_life), first(highlow, timec)
Group Key: region
-> Custom Scan (AsyncAppend)
-> Sort
Output: region, (PARTIAL min(allnull)), (PARTIAL max(temperature)), (PARTIAL sum(temperature)), (PARTIAL sum(humidity)), (PARTIAL avg(humidity)), (PARTIAL stddev((humidity)::integer)), (PARTIAL bit_and(bit_int)), (PARTIAL bit_or(bit_int)), (PARTIAL bool_and(good_life)), (PARTIAL every((temperature > '0'::double precision))), (PARTIAL bool_or(good_life)), (PARTIAL first(highlow, timec))
-> Merge Append
Sort Key: conditions.region
-> Partial GroupAggregate
Output: conditions.region, PARTIAL min(conditions.allnull), PARTIAL max(conditions.temperature), PARTIAL sum(conditions.temperature), PARTIAL sum(conditions.humidity), PARTIAL avg(conditions.humidity), PARTIAL stddev((conditions.humidity)::integer), PARTIAL bit_and(conditions.bit_int), PARTIAL bit_or(conditions.bit_int), PARTIAL bool_and(conditions.good_life), PARTIAL every((conditions.temperature > '0'::double precision)), PARTIAL bool_or(conditions.good_life), PARTIAL first(conditions.highlow, conditions.timec)
Group Key: conditions.region
-> Custom Scan (DataNodeScan) on public.conditions
Output: conditions.region, conditions.allnull, conditions.temperature, conditions.humidity, conditions.bit_int, conditions.good_life, conditions.highlow, conditions.timec
Data node: data_node_1
Chunks: _hyper_1_1_dist_chunk, _hyper_1_2_dist_chunk, _hyper_1_3_dist_chunk, _hyper_1_4_dist_chunk
Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST
-> Partial GroupAggregate
Output: conditions_1.region, PARTIAL min(conditions_1.allnull), PARTIAL max(conditions_1.temperature), PARTIAL sum(conditions_1.temperature), PARTIAL sum(conditions_1.humidity), PARTIAL avg(conditions_1.humidity), PARTIAL stddev((conditions_1.humidity)::integer), PARTIAL bit_and(conditions_1.bit_int), PARTIAL bit_or(conditions_1.bit_int), PARTIAL bool_and(conditions_1.good_life), PARTIAL every((conditions_1.temperature > '0'::double precision)), PARTIAL bool_or(conditions_1.good_life), PARTIAL first(conditions_1.highlow, conditions_1.timec)
Group Key: conditions_1.region
-> Custom Scan (DataNodeScan) on public.conditions conditions_1
Output: conditions_1.region, conditions_1.allnull, conditions_1.temperature, conditions_1.humidity, conditions_1.bit_int, conditions_1.good_life, conditions_1.highlow, conditions_1.timec
Data node: data_node_2
Chunks: _hyper_1_9_dist_chunk, _hyper_1_10_dist_chunk, _hyper_1_11_dist_chunk, _hyper_1_12_dist_chunk
Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST
-> Partial GroupAggregate
Output: conditions_2.region, PARTIAL min(conditions_2.allnull), PARTIAL max(conditions_2.temperature), PARTIAL sum(conditions_2.temperature), PARTIAL sum(conditions_2.humidity), PARTIAL avg(conditions_2.humidity), PARTIAL stddev((conditions_2.humidity)::integer), PARTIAL bit_and(conditions_2.bit_int), PARTIAL bit_or(conditions_2.bit_int), PARTIAL bool_and(conditions_2.good_life), PARTIAL every((conditions_2.temperature > '0'::double precision)), PARTIAL bool_or(conditions_2.good_life), PARTIAL first(conditions_2.highlow, conditions_2.timec)
Group Key: conditions_2.region
-> Custom Scan (DataNodeScan) on public.conditions conditions_2
Output: conditions_2.region, conditions_2.allnull, conditions_2.temperature, conditions_2.humidity, conditions_2.bit_int, conditions_2.good_life, conditions_2.highlow, conditions_2.timec
Data node: data_node_3
Chunks: _hyper_1_5_dist_chunk, _hyper_1_6_dist_chunk, _hyper_1_7_dist_chunk, _hyper_1_8_dist_chunk
Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4]) ORDER BY region ASC NULLS LAST
(31 rows)
Sort Key: region
-> Custom Scan (AsyncAppend)
Output: region, (PARTIAL min(allnull)), (PARTIAL max(temperature)), (PARTIAL sum(temperature)), (PARTIAL sum(humidity)), (PARTIAL avg(humidity)), (PARTIAL stddev((humidity)::integer)), (PARTIAL bit_and(bit_int)), (PARTIAL bit_or(bit_int)), (PARTIAL bool_and(good_life)), (PARTIAL every((temperature > '0'::double precision))), (PARTIAL bool_or(good_life)), (PARTIAL first(highlow, timec))
-> Append
-> Partial HashAggregate
Output: conditions.region, PARTIAL min(conditions.allnull), PARTIAL max(conditions.temperature), PARTIAL sum(conditions.temperature), PARTIAL sum(conditions.humidity), PARTIAL avg(conditions.humidity), PARTIAL stddev((conditions.humidity)::integer), PARTIAL bit_and(conditions.bit_int), PARTIAL bit_or(conditions.bit_int), PARTIAL bool_and(conditions.good_life), PARTIAL every((conditions.temperature > '0'::double precision)), PARTIAL bool_or(conditions.good_life), PARTIAL first(conditions.highlow, conditions.timec)
Group Key: conditions.region
-> Custom Scan (DataNodeScan) on public.conditions
Output: conditions.region, conditions.allnull, conditions.temperature, conditions.humidity, conditions.bit_int, conditions.good_life, conditions.highlow, conditions.timec
Data node: data_node_1
Chunks: _hyper_1_1_dist_chunk, _hyper_1_2_dist_chunk, _hyper_1_3_dist_chunk, _hyper_1_4_dist_chunk
Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
-> Partial HashAggregate
Output: conditions_1.region, PARTIAL min(conditions_1.allnull), PARTIAL max(conditions_1.temperature), PARTIAL sum(conditions_1.temperature), PARTIAL sum(conditions_1.humidity), PARTIAL avg(conditions_1.humidity), PARTIAL stddev((conditions_1.humidity)::integer), PARTIAL bit_and(conditions_1.bit_int), PARTIAL bit_or(conditions_1.bit_int), PARTIAL bool_and(conditions_1.good_life), PARTIAL every((conditions_1.temperature > '0'::double precision)), PARTIAL bool_or(conditions_1.good_life), PARTIAL first(conditions_1.highlow, conditions_1.timec)
Group Key: conditions_1.region
-> Custom Scan (DataNodeScan) on public.conditions conditions_1
Output: conditions_1.region, conditions_1.allnull, conditions_1.temperature, conditions_1.humidity, conditions_1.bit_int, conditions_1.good_life, conditions_1.highlow, conditions_1.timec
Data node: data_node_2
Chunks: _hyper_1_9_dist_chunk, _hyper_1_10_dist_chunk, _hyper_1_11_dist_chunk, _hyper_1_12_dist_chunk
Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
-> Partial HashAggregate
Output: conditions_2.region, PARTIAL min(conditions_2.allnull), PARTIAL max(conditions_2.temperature), PARTIAL sum(conditions_2.temperature), PARTIAL sum(conditions_2.humidity), PARTIAL avg(conditions_2.humidity), PARTIAL stddev((conditions_2.humidity)::integer), PARTIAL bit_and(conditions_2.bit_int), PARTIAL bit_or(conditions_2.bit_int), PARTIAL bool_and(conditions_2.good_life), PARTIAL every((conditions_2.temperature > '0'::double precision)), PARTIAL bool_or(conditions_2.good_life), PARTIAL first(conditions_2.highlow, conditions_2.timec)
Group Key: conditions_2.region
-> Custom Scan (DataNodeScan) on public.conditions conditions_2
Output: conditions_2.region, conditions_2.allnull, conditions_2.temperature, conditions_2.humidity, conditions_2.bit_int, conditions_2.good_life, conditions_2.highlow, conditions_2.timec
Data node: data_node_3
Chunks: _hyper_1_5_dist_chunk, _hyper_1_6_dist_chunk, _hyper_1_7_dist_chunk, _hyper_1_8_dist_chunk
Remote SQL: SELECT timec, region, temperature, humidity, allnull, highlow, bit_int, good_life FROM public.conditions WHERE _timescaledb_internal.chunks_in(public.conditions.*, ARRAY[1, 2, 3, 4])
(33 rows)
-- Full aggregate pushdown correctness check, compare location grouped query results with partionwise aggregates on and off
\set GROUPING 'location'
@ -480,11 +488,3 @@ SELECT format('\! diff %s %s', :'RESULTS_CONTROL2', :'RESULTS_TEST2') as "DIFF_C
-- multiple values for "col" that has the same timestamp, so the
-- output depends on the order of arriving tuples.
:DIFF_CMD2
14c14
< west | (1,2) | (1,2)
---
> west | (1,2) |
22c22
< west | | 85 | 993674 | 67.5 | 9.68309 | 0 | 10 | t | t | t | (1,2)
---
> west | | 85 | 993674 | 67.5 | 9.68309 | 0 | 10 | t | t | t |

View File

@ -3,20 +3,10 @@
-- LICENSE-TIMESCALE for a copy of the license.
-- Need to be super user to create extension and add data nodes
\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER;
\ir include/remote_exec.sql
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
CREATE SCHEMA IF NOT EXISTS test;
psql:include/remote_exec.sql:5: NOTICE: schema "test" already exists, skipping
GRANT USAGE ON SCHEMA test TO PUBLIC;
CREATE OR REPLACE FUNCTION test.remote_exec(srv_name name[], command text)
RETURNS VOID
AS :TSL_MODULE_PATHNAME, 'ts_remote_exec'
LANGUAGE C;
-- Support for execute_sql_and_filter_data_node_name_on_error()
\unset ECHO
psql:include/filter_exec.sql:5: NOTICE: schema "test" already exists, skipping
psql:include/remote_exec.sql:5: NOTICE: schema "test" already exists, skipping
psql:utils/testsupport.sql:8: NOTICE: schema "test" already exists, skipping
-- Cleanup from other potential tests that created these databases
SET client_min_messages TO ERROR;
DROP DATABASE IF EXISTS data_node_1;
@ -2358,27 +2348,27 @@ UPDATE disttable_replicated SET device = 2 WHERE device = (SELECT device FROM de
-> Foreign Scan on _timescaledb_internal._hyper_6_11_dist_chunk _hyper_6_11_dist_chunk_1 (actual rows=2 loops=1)
Output: _hyper_6_11_dist_chunk_1.device
Data node: data_node_1
Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_11_dist_chunk ORDER BY device ASC NULLS LAST
Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_11_dist_chunk
-> Foreign Scan on _timescaledb_internal._hyper_6_12_dist_chunk _hyper_6_12_dist_chunk_1 (actual rows=1 loops=1)
Output: _hyper_6_12_dist_chunk_1.device
Data node: data_node_2
Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_12_dist_chunk ORDER BY device ASC NULLS LAST
Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_12_dist_chunk
-> Foreign Scan on _timescaledb_internal._hyper_6_13_dist_chunk _hyper_6_13_dist_chunk_1 (actual rows=1 loops=1)
Output: _hyper_6_13_dist_chunk_1.device
Data node: data_node_3
Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_13_dist_chunk ORDER BY device ASC NULLS LAST
Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_13_dist_chunk
-> Foreign Scan on _timescaledb_internal._hyper_6_14_dist_chunk _hyper_6_14_dist_chunk_1 (actual rows=2 loops=1)
Output: _hyper_6_14_dist_chunk_1.device
Data node: data_node_1
Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_14_dist_chunk ORDER BY device ASC NULLS LAST
Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_14_dist_chunk
-> Foreign Scan on _timescaledb_internal._hyper_6_15_dist_chunk _hyper_6_15_dist_chunk_1 (actual rows=1 loops=1)
Output: _hyper_6_15_dist_chunk_1.device
Data node: data_node_2
Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_15_dist_chunk ORDER BY device ASC NULLS LAST
Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_15_dist_chunk
-> Foreign Scan on _timescaledb_internal._hyper_6_16_dist_chunk _hyper_6_16_dist_chunk_1 (actual rows=1 loops=1)
Output: _hyper_6_16_dist_chunk_1.device
Data node: data_node_3
Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_16_dist_chunk ORDER BY device ASC NULLS LAST
Remote SQL: SELECT device FROM _timescaledb_internal._hyper_6_16_dist_chunk
InitPlan 2 (returns $1)
-> Limit (actual rows=1 loops=1)
Output: devices.device
@ -3295,12 +3285,12 @@ INSERT INTO dist_device VALUES
('2017-01-01 06:01', 1, 1.1),
('2017-01-01 09:11', 3, 2.1),
('2017-01-01 08:01', 1, 1.2);
EXPLAIN VERBOSE
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM dist_device;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------
Append (cost=100.00..175.14 rows=1861 width=20)
-> Custom Scan (DataNodeScan) on public.dist_device (cost=100.00..165.83 rows=1861 width=20)
Append
-> Custom Scan (DataNodeScan) on public.dist_device
Output: dist_device."time", dist_device.dist_device, dist_device.temp
Data node: data_node_1
Chunks: _hyper_15_36_dist_chunk
@ -3315,3 +3305,89 @@ SELECT * FROM dist_device;
Sun Jan 01 08:01:00 2017 PST | 1 | 1.2
(3 rows)
-- Test estimating relation size without stats
CREATE TABLE hyper_estimate(time timestamptz, device int, temp float);
SELECT * FROM create_distributed_hypertable('hyper_estimate', 'time', 'device', number_partitions => 3, replication_factor => 1, chunk_time_interval => INTERVAL '7 days');
NOTICE: adding not-null constraint to column "time"
hypertable_id | schema_name | table_name | created
---------------+-------------+----------------+---------
16 | public | hyper_estimate | t
(1 row)
-- This will enable us to more easily see estimates per chunk
SET timescaledb.enable_per_data_node_queries = false;
-- Estimating chunk progress uses current timestamp so we override it for test purposes
SELECT ts_test_override_current_timestamptz('2019-11-11 00:00'::timestamptz);
ts_test_override_current_timestamptz
--------------------------------------
(1 row)
-- Test estimates when backfilling. 3 chunks should be historical and 3 should be considered current when estimating.
-- Note that estimate numbers are way off since we are using shared buffer size as starting point. This will not be
-- an issue in 'production' like env since chunk size should be similar to shared buffer size.
INSERT INTO hyper_estimate VALUES
('2017-01-01 06:01', 1, 1.1),
('2017-01-01 09:11', 1, 2.1),
('2017-01-01 08:01', 1, 1.2),
('2017-01-02 08:01', 1, 1.3),
('2017-01-02 08:01', 2, 1.6),
('2017-01-02 06:01', 2, 1.4),
('2017-01-03 01:01', 3, 2),
('2017-01-03 01:16', 3, 3),
('2017-01-03 01:17', 3, 4),
('2018-01-13 01:01', 1, 2),
('2018-01-13 01:10', 1, 0.4),
('2018-01-13 02:10', 2, 1.4),
('2018-01-13 05:01', 2, 2),
('2018-01-13 05:50', 2, 4),
('2018-01-13 16:01', 3, 2);
-- Since there are no stats we use shared buffers size to estimate number of rows
EXPLAIN (COSTS ON)
SELECT *
FROM hyper_estimate;
QUERY PLAN
---------------------------------------------------------------------------------------------
Append (cost=100.00..166847.40 rows=4118040 width=20)
-> Foreign Scan on _hyper_16_37_dist_chunk (cost=100.00..32468.60 rows=915120 width=20)
-> Foreign Scan on _hyper_16_38_dist_chunk (cost=100.00..32468.60 rows=915120 width=20)
-> Foreign Scan on _hyper_16_39_dist_chunk (cost=100.00..32468.60 rows=915120 width=20)
-> Foreign Scan on _hyper_16_40_dist_chunk (cost=100.00..16283.80 rows=457560 width=20)
-> Foreign Scan on _hyper_16_41_dist_chunk (cost=100.00..16283.80 rows=457560 width=20)
-> Foreign Scan on _hyper_16_42_dist_chunk (cost=100.00..16283.80 rows=457560 width=20)
(7 rows)
-- This will calculate the stats
ANALYZE hyper_estimate;
EXPLAIN (COSTS ON)
SELECT *
FROM hyper_estimate;
QUERY PLAN
--------------------------------------------------------------------------------------
Append (cost=100.00..606.52 rows=15 width=20)
-> Foreign Scan on _hyper_16_37_dist_chunk (cost=100.00..101.12 rows=4 width=20)
-> Foreign Scan on _hyper_16_38_dist_chunk (cost=100.00..101.06 rows=2 width=20)
-> Foreign Scan on _hyper_16_39_dist_chunk (cost=100.00..101.09 rows=3 width=20)
-> Foreign Scan on _hyper_16_40_dist_chunk (cost=100.00..101.06 rows=2 width=20)
-> Foreign Scan on _hyper_16_41_dist_chunk (cost=100.00..101.09 rows=3 width=20)
-> Foreign Scan on _hyper_16_42_dist_chunk (cost=100.00..101.03 rows=1 width=20)
(7 rows)
-- Let's insert data into a new chunk. This will result in chunk creation.
INSERT INTO hyper_estimate VALUES ('2019-11-11 06:01', 1, 1.1);
-- We have stats for previous chunks so we can interpolate number of rows for the new chunk
EXPLAIN (COSTS ON)
SELECT *
FROM hyper_estimate;
QUERY PLAN
--------------------------------------------------------------------------------------
Append (cost=100.00..706.58 rows=17 width=20)
-> Foreign Scan on _hyper_16_37_dist_chunk (cost=100.00..101.12 rows=4 width=20)
-> Foreign Scan on _hyper_16_38_dist_chunk (cost=100.00..101.06 rows=2 width=20)
-> Foreign Scan on _hyper_16_39_dist_chunk (cost=100.00..101.09 rows=3 width=20)
-> Foreign Scan on _hyper_16_40_dist_chunk (cost=100.00..101.06 rows=2 width=20)
-> Foreign Scan on _hyper_16_41_dist_chunk (cost=100.00..101.09 rows=3 width=20)
-> Foreign Scan on _hyper_16_42_dist_chunk (cost=100.00..101.03 rows=1 width=20)
-> Foreign Scan on _hyper_16_43_dist_chunk (cost=100.00..100.05 rows=2 width=20)
(8 rows)

View File

@ -3,20 +3,10 @@
-- LICENSE-TIMESCALE for a copy of the license.
-- Need to be super user to create extension and add data nodes
\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER;
\ir include/remote_exec.sql
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
CREATE SCHEMA IF NOT EXISTS test;
psql:include/remote_exec.sql:5: NOTICE: schema "test" already exists, skipping
GRANT USAGE ON SCHEMA test TO PUBLIC;
CREATE OR REPLACE FUNCTION test.remote_exec(srv_name name[], command text)
RETURNS VOID
AS :TSL_MODULE_PATHNAME, 'ts_remote_exec'
LANGUAGE C;
-- Support for execute_sql_and_filter_data_node_name_on_error()
\unset ECHO
psql:include/filter_exec.sql:5: NOTICE: schema "test" already exists, skipping
psql:include/remote_exec.sql:5: NOTICE: schema "test" already exists, skipping
psql:utils/testsupport.sql:8: NOTICE: schema "test" already exists, skipping
-- Cleanup from other potential tests that created these databases
SET client_min_messages TO ERROR;
DROP DATABASE IF EXISTS data_node_1;
@ -3289,11 +3279,11 @@ INSERT INTO dist_device VALUES
('2017-01-01 06:01', 1, 1.1),
('2017-01-01 09:11', 3, 2.1),
('2017-01-01 08:01', 1, 1.2);
EXPLAIN VERBOSE
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM dist_device;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------
Custom Scan (DataNodeScan) on public.dist_device (cost=100.00..165.83 rows=1861 width=20)
Custom Scan (DataNodeScan) on public.dist_device
Output: dist_device."time", dist_device.dist_device, dist_device.temp
Data node: data_node_1
Chunks: _hyper_15_36_dist_chunk
@ -3308,3 +3298,89 @@ SELECT * FROM dist_device;
Sun Jan 01 08:01:00 2017 PST | 1 | 1.2
(3 rows)
-- Test estimating relation size without stats
CREATE TABLE hyper_estimate(time timestamptz, device int, temp float);
SELECT * FROM create_distributed_hypertable('hyper_estimate', 'time', 'device', number_partitions => 3, replication_factor => 1, chunk_time_interval => INTERVAL '7 days');
NOTICE: adding not-null constraint to column "time"
hypertable_id | schema_name | table_name | created
---------------+-------------+----------------+---------
16 | public | hyper_estimate | t
(1 row)
-- This will enable us to more easily see estimates per chunk
SET timescaledb.enable_per_data_node_queries = false;
-- Estimating chunk progress uses current timestamp so we override it for test purposes
SELECT ts_test_override_current_timestamptz('2019-11-11 00:00'::timestamptz);
ts_test_override_current_timestamptz
--------------------------------------
(1 row)
-- Test estimates when backfilling. 3 chunks should be historical and 3 should be considered current when estimating.
-- Note that estimate numbers are way off since we are using shared buffer size as starting point. This will not be
-- an issue in 'production' like env since chunk size should be similar to shared buffer size.
INSERT INTO hyper_estimate VALUES
('2017-01-01 06:01', 1, 1.1),
('2017-01-01 09:11', 1, 2.1),
('2017-01-01 08:01', 1, 1.2),
('2017-01-02 08:01', 1, 1.3),
('2017-01-02 08:01', 2, 1.6),
('2017-01-02 06:01', 2, 1.4),
('2017-01-03 01:01', 3, 2),
('2017-01-03 01:16', 3, 3),
('2017-01-03 01:17', 3, 4),
('2018-01-13 01:01', 1, 2),
('2018-01-13 01:10', 1, 0.4),
('2018-01-13 02:10', 2, 1.4),
('2018-01-13 05:01', 2, 2),
('2018-01-13 05:50', 2, 4),
('2018-01-13 16:01', 3, 2);
-- Since there are no stats we use shared buffers size to estimate number of rows
EXPLAIN (COSTS ON)
SELECT *
FROM hyper_estimate;
QUERY PLAN
---------------------------------------------------------------------------------------------
Append (cost=100.00..166847.40 rows=4118040 width=20)
-> Foreign Scan on _hyper_16_37_dist_chunk (cost=100.00..32468.60 rows=915120 width=20)
-> Foreign Scan on _hyper_16_38_dist_chunk (cost=100.00..32468.60 rows=915120 width=20)
-> Foreign Scan on _hyper_16_39_dist_chunk (cost=100.00..32468.60 rows=915120 width=20)
-> Foreign Scan on _hyper_16_40_dist_chunk (cost=100.00..16283.80 rows=457560 width=20)
-> Foreign Scan on _hyper_16_41_dist_chunk (cost=100.00..16283.80 rows=457560 width=20)
-> Foreign Scan on _hyper_16_42_dist_chunk (cost=100.00..16283.80 rows=457560 width=20)
(7 rows)
-- This will calculate the stats
ANALYZE hyper_estimate;
EXPLAIN (COSTS ON)
SELECT *
FROM hyper_estimate;
QUERY PLAN
--------------------------------------------------------------------------------------
Append (cost=100.00..606.52 rows=15 width=20)
-> Foreign Scan on _hyper_16_37_dist_chunk (cost=100.00..101.12 rows=4 width=20)
-> Foreign Scan on _hyper_16_38_dist_chunk (cost=100.00..101.06 rows=2 width=20)
-> Foreign Scan on _hyper_16_39_dist_chunk (cost=100.00..101.09 rows=3 width=20)
-> Foreign Scan on _hyper_16_40_dist_chunk (cost=100.00..101.06 rows=2 width=20)
-> Foreign Scan on _hyper_16_41_dist_chunk (cost=100.00..101.09 rows=3 width=20)
-> Foreign Scan on _hyper_16_42_dist_chunk (cost=100.00..101.03 rows=1 width=20)
(7 rows)
-- Let's insert data into a new chunk. This will result in chunk creation.
INSERT INTO hyper_estimate VALUES ('2019-11-11 06:01', 1, 1.1);
-- We have stats for previous chunks so we can interpolate number of rows for the new chunk
EXPLAIN (COSTS ON)
SELECT *
FROM hyper_estimate;
QUERY PLAN
--------------------------------------------------------------------------------------
Append (cost=100.00..706.58 rows=17 width=20)
-> Foreign Scan on _hyper_16_37_dist_chunk (cost=100.00..101.12 rows=4 width=20)
-> Foreign Scan on _hyper_16_38_dist_chunk (cost=100.00..101.06 rows=2 width=20)
-> Foreign Scan on _hyper_16_39_dist_chunk (cost=100.00..101.09 rows=3 width=20)
-> Foreign Scan on _hyper_16_40_dist_chunk (cost=100.00..101.06 rows=2 width=20)
-> Foreign Scan on _hyper_16_41_dist_chunk (cost=100.00..101.09 rows=3 width=20)
-> Foreign Scan on _hyper_16_42_dist_chunk (cost=100.00..101.03 rows=1 width=20)
-> Foreign Scan on _hyper_16_43_dist_chunk (cost=100.00..100.05 rows=2 width=20)
(8 rows)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -48,6 +48,7 @@ INSERT INTO hyper VALUES
('2018-05-20 15:08', 5, 1, 9.4),
('2018-05-30 13:02', 3, 2, 9.0);
-- Update table stats
ANALYZE hyper;
-- Optimizer debug messages shown at debug level 2

View File

@ -4,12 +4,12 @@
-- Need to be super user to create extension and add data nodes
\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER;
\ir include/remote_exec.sql
-- Support for execute_sql_and_filter_data_node_name_on_error()
\unset ECHO
\o /dev/null
\ir include/filter_exec.sql
\ir include/remote_exec.sql
\ir utils/testsupport.sql
\o
\set ECHO all
@ -950,7 +950,56 @@ INSERT INTO dist_device VALUES
('2017-01-01 09:11', 3, 2.1),
('2017-01-01 08:01', 1, 1.2);
EXPLAIN VERBOSE
EXPLAIN (VERBOSE, COSTS OFF)
SELECT * FROM dist_device;
SELECT * FROM dist_device;
-- Test estimating relation size without stats
CREATE TABLE hyper_estimate(time timestamptz, device int, temp float);
SELECT * FROM create_distributed_hypertable('hyper_estimate', 'time', 'device', number_partitions => 3, replication_factor => 1, chunk_time_interval => INTERVAL '7 days');
-- This will enable us to more easily see estimates per chunk
SET timescaledb.enable_per_data_node_queries = false;
-- Estimating chunk progress uses current timestamp so we override it for test purposes
SELECT ts_test_override_current_timestamptz('2019-11-11 00:00'::timestamptz);
-- Test estimates when backfilling. 3 chunks should be historical and 3 should be considered current when estimating.
-- Note that estimate numbers are way off since we are using shared buffer size as starting point. This will not be
-- an issue in 'production' like env since chunk size should be similar to shared buffer size.
INSERT INTO hyper_estimate VALUES
('2017-01-01 06:01', 1, 1.1),
('2017-01-01 09:11', 1, 2.1),
('2017-01-01 08:01', 1, 1.2),
('2017-01-02 08:01', 1, 1.3),
('2017-01-02 08:01', 2, 1.6),
('2017-01-02 06:01', 2, 1.4),
('2017-01-03 01:01', 3, 2),
('2017-01-03 01:16', 3, 3),
('2017-01-03 01:17', 3, 4),
('2018-01-13 01:01', 1, 2),
('2018-01-13 01:10', 1, 0.4),
('2018-01-13 02:10', 2, 1.4),
('2018-01-13 05:01', 2, 2),
('2018-01-13 05:50', 2, 4),
('2018-01-13 16:01', 3, 2);
-- Since there are no stats we use shared buffers size to estimate number of rows
EXPLAIN (COSTS ON)
SELECT *
FROM hyper_estimate;
-- This will calculate the stats
ANALYZE hyper_estimate;
EXPLAIN (COSTS ON)
SELECT *
FROM hyper_estimate;
-- Let's insert data into a new chunk. This will result in chunk creation.
INSERT INTO hyper_estimate VALUES ('2019-11-11 06:01', 1, 1.1);
-- We have stats for previous chunks so we can interpolate number of rows for the new chunk
EXPLAIN (COSTS ON)
SELECT *
FROM hyper_estimate;

View File

@ -5,10 +5,7 @@
-- Need to be super user to create extension and add data nodes
\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER;
\ir include/remote_exec.sql
CREATE OR REPLACE FUNCTION ts_test_override_pushdown_timestamptz(new_value TIMESTAMPTZ) RETURNS VOID
AS :TSL_MODULE_PATHNAME, 'ts_test_override_pushdown_timestamptz'
LANGUAGE C VOLATILE STRICT;
\ir utils/testsupport.sql
-- Cleanup from other potential tests that created these databases
SET client_min_messages TO ERROR;
@ -658,7 +655,7 @@ GROUP BY 1, 2
LIMIT 1;
-- contains whitelisted time expressions
SELECT ts_test_override_pushdown_timestamptz('2018-06-01 00:00'::timestamptz);
SELECT ts_test_override_current_timestamptz('2018-06-01 00:00'::timestamptz);
EXPLAIN (VERBOSE, COSTS OFF)
SELECT time, device, avg(temp)
@ -685,7 +682,7 @@ LIMIT 1;
EXPLAIN (VERBOSE, COSTS OFF)
EXECUTE timestamp_pushdown_test;
SELECT ts_test_override_pushdown_timestamptz('2019-10-15 00:00'::timestamptz);
SELECT ts_test_override_current_timestamptz('2019-10-15 00:00'::timestamptz);
EXPLAIN (VERBOSE, COSTS OFF)
EXECUTE timestamp_pushdown_test;

View File

@ -8,13 +8,13 @@
#include "export.h"
#include "fdw/scan_exec.h"
TS_FUNCTION_INFO_V1(ts_test_override_pushdown_timestamptz);
TS_FUNCTION_INFO_V1(ts_test_override_current_timestamptz);
Datum
ts_test_override_pushdown_timestamptz(PG_FUNCTION_ARGS)
ts_test_override_current_timestamptz(PG_FUNCTION_ARGS)
{
#ifdef TS_DEBUG
fdw_scan_debug_override_pushdown_timestamp(PG_GETARG_INT64(0));
fdw_scan_debug_override_current_timestamp(PG_GETARG_INT64(0));
PG_RETURN_VOID();
#else
elog(ERROR, "unable to handle ts_test_is_frontend_session without TS_DEBUG flag set");