Start refactoring to support any number of partitioning dimensions

The current schema and code support only one "time" and one "space"
dimension for partitioning. While this is probably good enough for
many applications, it won't allow partitioning along two "space"
dimensions, like longitude and latitude, for instance.

This commit is a first stab at refactoring the metadata schema and
internal functionality to support any number of partitioning
dimensions. The idea is to think of a hypertable as partitioned in N
dimensions, where a partition (chunk) is a hypercube in the
N-dimensional hyperspace. Each dimension is divided into a number of
"slices" (dimensional partitions) that each occupies a range along the
dimension's keyspace axis.

A dimension can either be "closed" or "open", indicating a space-like
or time-like dimension, respectively. Closed dimensions have a limited
number of partitions that cover the entire domain. Open dimensions have
an unlimited number of partitions and thus must be created on-demand
as needed for the inserted data.

Note that the open/closed notation is preferred over time/space, since
an open dimension could be used for something other than time (for
instance, a sequence number). Conversely, closed dimensions need not be
space-like as it is entirely possible to use time as a closed
dimension.

Another advantage of this refactoring is that it now unifies a lot
of the logic for time and space that used to be separate.

On a schema-level, this gets rid of partition/partitition_epoch tables and
replace them with dimension, dimension_slice, and chunk_constraint. It
also removes the time columns associated with a chunk and instead makes
this a separate dimension stored as a chunk_constraint.
This commit is contained in:
Matvey Arye 2017-06-14 18:57:38 -04:00 committed by Erik Nordström
parent 216afe6ad7
commit d3bdcbaf1b
4 changed files with 65 additions and 173 deletions

26
sql/dimensions.sql Normal file
View File

@ -0,0 +1,26 @@
-- Add a new partition epoch with equally sized partitions
CREATE OR REPLACE FUNCTION _timescaledb_internal.find_chunk(
time_dimension_id INTEGER,
time_value BIGINT,
space_dimension_id INTEGER,
space_dimension_hash BIGINT
)
RETURNS _timescaledb_catalog.chunk LANGUAGE SQL STABLE AS
$BODY$
SELECT *
FROM _timescaledb_catalog.chunk
WHERE
id = (
SELECT cc.chunk_id
FROM _timescaledb_catalog.dimension_slice ds
INNER JOIN _timescaledb_catalog.chunk_constraint cc ON (ds.id = cc.dimension_slice_id)
WHERE ds.dimension_id = time_dimension_id and ds.range_start <= time_value and ds.range_end >= time_value
INTERSECT
SELECT cc.chunk_id
FROM _timescaledb_catalog.dimension_slice ds
INNER JOIN _timescaledb_catalog.chunk_constraint cc ON (ds.id = cc.dimension_slice_id)
WHERE ds.dimension_id = space_dimension_id and ds.range_start <= space_dimension_hash and ds.range_end >= space_dimension_hash
)
$BODY$;

View File

@ -23,71 +23,58 @@ CREATE TABLE IF NOT EXISTS _timescaledb_catalog.hypertable (
SELECT pg_catalog.pg_extension_config_dump('_timescaledb_catalog.hypertable', ''); SELECT pg_catalog.pg_extension_config_dump('_timescaledb_catalog.hypertable', '');
SELECT pg_catalog.pg_extension_config_dump(pg_get_serial_sequence('_timescaledb_catalog.hypertable','id'), ''); SELECT pg_catalog.pg_extension_config_dump(pg_get_serial_sequence('_timescaledb_catalog.hypertable','id'), '');
-- A partition_epoch represents a different partitioning of the data. CREATE TABLE _timescaledb_catalog.dimension (
-- It has a start and end time (data time). Data needs to be placed in the correct epoch by time. id SERIAL NOT NULL PRIMARY KEY,
-- Partitionings are defined by a function, column, and modulo:
-- 1) partitioning_func - Takes the partitioning_column and returns a number
-- which is modulo'd to place the data correctly
-- 2) partitioning_mod - Number used in modulo operation
-- 3) partitioning_column - column in data to partition by (input to partitioning_func)
--
-- Changing a data's partitioning, and thus creating a new epoch, should be done
-- INFREQUENTLY as it's expensive operation.
CREATE TABLE IF NOT EXISTS _timescaledb_catalog.partition_epoch (
id SERIAL NOT NULL PRIMARY KEY,
hypertable_id INTEGER NOT NULL REFERENCES _timescaledb_catalog.hypertable(id) ON DELETE CASCADE, hypertable_id INTEGER NOT NULL REFERENCES _timescaledb_catalog.hypertable(id) ON DELETE CASCADE,
start_time BIGINT NULL CHECK (start_time >= 0), column_name NAME NOT NULL,
end_time BIGINT NULL CHECK (end_time >= 0), time_type BOOLEAN NOT NULL,
num_partitions SMALLINT NOT NULL CHECK (num_partitions >= 0), -- space-columns
num_slices SMALLINT NULL,
partitioning_func_schema NAME NULL, partitioning_func_schema NAME NULL,
partitioning_func NAME NULL, -- function name of a function of the form func(data_value, partitioning_mod) -> [0, partitioning_mod) partitioning_func NAME NULL, -- function name of a function of the form func(data_value) -> [0, 65535)
partitioning_mod INT NOT NULL CHECK (partitioning_mod < 65536), -- time-columns
partitioning_column NAME NULL, interval_length BIGINT NULL,
UNIQUE (hypertable_id, start_time),
UNIQUE (hypertable_id, end_time),
CHECK (start_time <= end_time),
CHECK (num_partitions <= partitioning_mod),
CHECK ((partitioning_func_schema IS NULL AND partitioning_func IS NULL) OR (partitioning_func_schema IS NOT NULL AND partitioning_func IS NOT NULL))
);
CREATE INDEX ON _timescaledb_catalog.partition_epoch(hypertable_id, start_time, end_time);
SELECT pg_catalog.pg_extension_config_dump('_timescaledb_catalog.partition_epoch', '');
SELECT pg_catalog.pg_extension_config_dump(pg_get_serial_sequence('_timescaledb_catalog.partition_epoch','id'), '');
-- A partition defines a partition witin a partition_epoch. CHECK (
-- For any partition the keyspace is defined as [keyspace_start, keyspace_end]. (partitioning_func_schema IS NULL AND partitioning_func IS NULL) OR
-- For any epoch, there must be a partition that covers every element in the (partitioning_func_schema IS NOT NULL AND partitioning_func IS NOT NULL)
-- keyspace, i.e. from [0, partition_epoch.partitioning_mod]. ),
-- Parent: "hypertable.schema_name"."hypertable.table_name" CHECK (
-- Children: "chunk.schema_name"."chunk.table_name" (time_type AND interval_length IS NOT NULL) OR
CREATE TABLE IF NOT EXISTS _timescaledb_catalog.partition ( (NOT time_type AND num_slices IS NOT NULL)
id SERIAL NOT NULL PRIMARY KEY, )
epoch_id INT NOT NULL REFERENCES _timescaledb_catalog.partition_epoch (id) ON DELETE CASCADE,
keyspace_start SMALLINT NOT NULL CHECK (keyspace_start >= 0), -- start inclusive
keyspace_end SMALLINT NOT NULL CHECK (keyspace_end > 0), -- end inclusive; compatible with between operator
tablespace NAME NULL,
UNIQUE (epoch_id, keyspace_start),
CHECK (keyspace_end > keyspace_start)
); );
CREATE INDEX ON _timescaledb_catalog.partition(epoch_id); SELECT pg_catalog.pg_extension_config_dump('_timescaledb_catalog.dimension', '');
SELECT pg_catalog.pg_extension_config_dump('_timescaledb_catalog.partition', ''); SELECT pg_catalog.pg_extension_config_dump(pg_get_serial_sequence('_timescaledb_catalog.dimension','id'), '');
SELECT pg_catalog.pg_extension_config_dump(pg_get_serial_sequence('_timescaledb_catalog.partition','id'), '');
CREATE TABLE _timescaledb_catalog.dimension_slice (
id SERIAL NOT NULL PRIMARY KEY,
dimension_id INTEGER NOT NULL REFERENCES _timescaledb_catalog.dimension(id) ON DELETE CASCADE,
range_start BIGINT NOT NULL CHECK (range_start >= 0),
range_end BIGINT NOT NULL CHECK (range_end >= 0),
CHECK (range_start <= range_end),
UNIQUE (dimension_id, range_start, range_end)
);
SELECT pg_catalog.pg_extension_config_dump('_timescaledb_catalog.dimension_slice', '');
SELECT pg_catalog.pg_extension_config_dump(pg_get_serial_sequence('_timescaledb_catalog.dimension_slice','id'), '');
CREATE TABLE _timescaledb_catalog.chunk_constraint(
dimension_slice_id INTEGER NOT NULL REFERENCES _timescaledb_catalog.dimension(id) ON DELETE CASCADE,
chunk_id INTEGER NOT NULL REFERENCES _timescaledb_catalog.chunk(id) ON DELETE CASCADE,
PRIMARY KEY(dimension_slice_id, chunk_id)
);
SELECT pg_catalog.pg_extension_config_dump('_timescaledb_catalog.chunk_constraint', '');
-- Represent a chunk of data, which is data in a hypertable that is -- Represent a chunk of data, which is data in a hypertable that is
-- partitioned by both the partition_column and time. -- partitioned by both the partition_column and time.
CREATE TABLE IF NOT EXISTS _timescaledb_catalog.chunk ( CREATE TABLE IF NOT EXISTS _timescaledb_catalog.chunk (
id SERIAL NOT NULL PRIMARY KEY, id SERIAL NOT NULL PRIMARY KEY,
partition_id INT NOT NULL REFERENCES _timescaledb_catalog.partition (id) ON DELETE CASCADE, partition_id INT NOT NULL REFERENCES _timescaledb_catalog.partition (id) ON DELETE CASCADE,
start_time BIGINT NOT NULL CHECK (start_time >= 0),
end_time BIGINT NOT NULL CHECK (end_time >= 0),
schema_name NAME NOT NULL, schema_name NAME NOT NULL,
table_name NAME NOT NULL, table_name NAME NOT NULL,
UNIQUE (schema_name, table_name), UNIQUE (schema_name, table_name),
UNIQUE (partition_id, start_time),
UNIQUE (partition_id, end_time),
CHECK (start_time <= end_time)
); );
CREATE UNIQUE INDEX ON _timescaledb_catalog.chunk (partition_id) WHERE start_time IS NULL;
CREATE UNIQUE INDEX ON _timescaledb_catalog.chunk (partition_id) WHERE end_time IS NULL;
CREATE INDEX ON _timescaledb_catalog.chunk(partition_id, start_time, end_time); CREATE INDEX ON _timescaledb_catalog.chunk(partition_id, start_time, end_time);
SELECT pg_catalog.pg_extension_config_dump('_timescaledb_catalog.chunk', ''); SELECT pg_catalog.pg_extension_config_dump('_timescaledb_catalog.chunk', '');
SELECT pg_catalog.pg_extension_config_dump(pg_get_serial_sequence('_timescaledb_catalog.chunk','id'), ''); SELECT pg_catalog.pg_extension_config_dump(pg_get_serial_sequence('_timescaledb_catalog.chunk','id'), '');

View File

@ -9,8 +9,6 @@
static const char *catalog_table_names[_MAX_CATALOG_TABLES] = { static const char *catalog_table_names[_MAX_CATALOG_TABLES] = {
[HYPERTABLE] = HYPERTABLE_TABLE_NAME, [HYPERTABLE] = HYPERTABLE_TABLE_NAME,
[PARTITION] = PARTITION_TABLE_NAME,
[PARTITION_EPOCH] = PARTITION_EPOCH_TABLE_NAME,
[CHUNK] = CHUNK_TABLE_NAME [CHUNK] = CHUNK_TABLE_NAME
}; };
@ -28,20 +26,6 @@ const static TableIndexDef catalog_table_index_definitions[_MAX_CATALOG_TABLES]
[HYPERTABLE_NAME_INDEX] = "hypertable_schema_name_table_name_key", [HYPERTABLE_NAME_INDEX] = "hypertable_schema_name_table_name_key",
} }
}, },
[PARTITION] = {
.length = _MAX_PARTITION_INDEX,
.names = (char *[]) {
[PARTITION_ID_INDEX] = "partition_pkey",
[PARTITION_PARTITION_EPOCH_ID_INDEX] = "partition_epoch_id_idx",
}
},
[PARTITION_EPOCH] = {
.length = _MAX_PARTITION_EPOCH_INDEX,
.names = (char *[]) {
[PARTITION_EPOCH_ID_INDEX] = "partition_epoch_pkey",
[PARTITION_EPOCH_TIME_INDEX] = "partition_epoch_hypertable_id_start_time_end_time_idx",
}
},
[CHUNK] = { [CHUNK] = {
.length = _MAX_CHUNK_INDEX, .length = _MAX_CHUNK_INDEX,
.names = (char *[]) { .names = (char *[]) {

View File

@ -21,8 +21,6 @@
enum CatalogTable enum CatalogTable
{ {
HYPERTABLE = 0, HYPERTABLE = 0,
PARTITION_EPOCH,
PARTITION,
CHUNK, CHUNK,
_MAX_CATALOG_TABLES, _MAX_CATALOG_TABLES,
}; };
@ -83,90 +81,6 @@ enum Anum_hypertable_name_idx
#define Natts_hypertable_name_idx (_Anum_hypertable_name_max - 1) #define Natts_hypertable_name_idx (_Anum_hypertable_name_max - 1)
/***********************************
*
* Partition epoch table definitions
*
***********************************/
#define PARTITION_EPOCH_TABLE_NAME "partition_epoch"
enum
{
PARTITION_EPOCH_ID_INDEX = 0,
PARTITION_EPOCH_TIME_INDEX,
_MAX_PARTITION_EPOCH_INDEX,
};
enum Anum_partition_epoch
{
Anum_partition_epoch_id = 1,
Anum_partition_epoch_hypertable_id,
Anum_partition_epoch_start_time,
Anum_partition_epoch_end_time,
Anum_partition_epoch_num_partitions,
Anum_partition_epoch_partitioning_func_schema,
Anum_partition_epoch_partitioning_func,
Anum_partition_epoch_partitioning_mod,
Anum_partition_epoch_partitioning_column,
_Anum_partition_epoch_max,
};
#define Natts_partition_epoch \
(_Anum_partition_epoch_max - 1)
enum Anum_partition_epoch_hypertable_start_time_end_time_idx
{
Anum_partition_epoch_hypertable_start_time_end_time_idx_hypertable_id = 1,
Anum_partition_epoch_hypertable_start_time_end_time_idx_start_time,
Anum_partition_epoch_hypertable_start_time_end_time_idx_end_time,
_Anum_partition_epoch_hypertable_start_time_end_time_idx_max,
};
#define Natts_partition_epoch_hypertable_start_time_end_time_idx \
(_Anum_partition_epoch_hypertable_start_time_end_time_idx_max - 1)
enum Anum_partition_epoch_id_idx
{
Anum_partition_epoch_id_idx_epoch_id = 1,
_Anum_partition_epoch_id_idx_max,
};
#define Natts_partition_epoch_id_idx \
(_Anum_partition_epoch_id_idx_max - 1)
/*****************************
*
* Partition table definitions
*
*****************************/
#define PARTITION_TABLE_NAME "partition"
enum
{
PARTITION_ID_INDEX = 0,
PARTITION_PARTITION_EPOCH_ID_INDEX,
_MAX_PARTITION_INDEX,
};
enum Anum_partition
{
Anum_partition_id = 1,
Anum_partition_partition_epoch_id,
Anum_partition_keyspace_start,
Anum_partition_keyspace_end,
Anum_partition_tablespace,
Anum_partition_schema_name,
Anum_partition_table_name,
_Anum_partition_max,
};
#define Natts_partition \
(_Anum_partition_max - 1)
/************************* /*************************
* *
* Chunk table definitions * Chunk table definitions
@ -186,8 +100,6 @@ enum Anum_chunk
{ {
Anum_chunk_id = 1, Anum_chunk_id = 1,
Anum_chunk_partition_id, Anum_chunk_partition_id,
Anum_chunk_start_time,
Anum_chunk_end_time,
Anum_chunk_schema_name, Anum_chunk_schema_name,
Anum_chunk_table_name, Anum_chunk_table_name,
_Anum_chunk_max, _Anum_chunk_max,
@ -196,29 +108,12 @@ enum Anum_chunk
#define Natts_chunk \ #define Natts_chunk \
(_Anum_chunk_max - 1) (_Anum_chunk_max - 1)
enum Anum_chunk_partition_start_time_end_time_idx
{
Anum_chunk_partition_start_time_end_time_idx_partition_id = 1,
Anum_chunk_partition_start_time_end_time_idx_start_time,
Anum_chunk_partition_start_time_end_time_idx_end_time,
_Anum_chunk_partition_start_time_end_time_idx_max,
};
#define Natts_chunk_partition_start_time_end_time_idx \
(_Anum_chunk_partition_start_time_end_time_idx_max -1)
/**************************************
*
* Chunk replica node table definitions
*
**************************************/
#define MAX(a, b) \ #define MAX(a, b) \
((long)(a) > (long)(b) ? (a) : (b)) ((long)(a) > (long)(b) ? (a) : (b))
#define _MAX_TABLE_INDEXES MAX(_MAX_HYPERTABLE_INDEX,\ #define _MAX_TABLE_INDEXES MAX(_MAX_HYPERTABLE_INDEX,\
MAX(_MAX_PARTITION_EPOCH_INDEX, \ _MAX_CHUNK_INDEX)
MAX(_MAX_PARTITION_INDEX, _MAX_CHUNK_INDEX)))
typedef enum CacheType typedef enum CacheType
{ {