timescaledb/sql/common/tables.sql
2016-11-22 16:42:19 -05:00

172 lines
8.0 KiB
SQL

--data nodes
CREATE TABLE IF NOT EXISTS node (
database_name NAME NOT NULL PRIMARY KEY,
schema_name NAME NOT NULL UNIQUE, --public schema of remote
server_name NAME NOT NULL UNIQUE,
hostname TEXT NOT NULL,
active BOOLEAN NOT NULL DEFAULT TRUE,
id SERIAL NOT NULL UNIQUE -- id for node. used in naming
);
--singleton holding info about meta db.
CREATE TABLE IF NOT EXISTS meta (
database_name NAME NOT NULL PRIMARY KEY,
hostname TEXT NOT NULL,
server_name NAME NOT NULL
);
CREATE UNIQUE INDEX there_can_be_only_one_meta
ON meta ((1));
--these users should exist an all nodes+meta in the cluster.
CREATE TABLE IF NOT EXISTS cluster_user (
username TEXT NOT NULL PRIMARY KEY,
password TEXT NULL --not any more of a security hole than usual since stored in pg_user_mapping anyway
);
--The hypertable is an abstraction that represents a replicated table that is partition on 2 dimensions.
--One of the dimensions is time, the other is arbitrary.
--This abstraction also tracks the distinct value set of any columns marked as `distinct`.
--Each row creates 3 tables:
-- i) main table is just an alias to the 0'th replica for now. Represents the hypertable to the user.
-- ii) root table is the ancesstor of all the data tables (across replicas). Should not be queryable for data (TODO).
-- iii) distinct root table is the ancestor of all distinct tables (across replicas). Should not be queryable for data (TODO).
CREATE TABLE IF NOT EXISTS hypertable (
name NAME NOT NULL PRIMARY KEY CHECK (name NOT LIKE '\_%'),
main_schema_name NAME NOT NULL,
main_table_name NAME NOT NULL,
associated_schema_name NAME NOT NULL,
associated_table_prefix NAME NOT NULL,
root_schema_name NAME NOT NULL,
root_table_name NAME NOT NULL,
distinct_schema_name NAME NOT NULL,
distinct_table_name NAME NOT NULL,
replication_factor SMALLINT NOT NULL CHECK (replication_factor > 0),
UNIQUE (main_schema_name, main_table_name),
UNIQUE (associated_schema_name, associated_table_prefix),
UNIQUE (root_schema_name, root_table_name)
);
--This represents one replica of the data across all partitions and time.
--Each row creates 2 tables:
-- i) data replica table (schema_name.table_name)
-- parent is the hypertable root table.
-- childen are the partition_replica tables.
-- ii) distinct replica tables (distinct_schema_name.distinct_table_name)
-- parent is the hypertable distinct root table.
-- childen are created by distinct_replica_node table.
CREATE TABLE IF NOT EXISTS hypertable_replica (
hypertable_name NAME NOT NULL REFERENCES hypertable (name),
replica_id SMALLINT NOT NULL CHECK (replica_id >= 0),
schema_name NAME NOT NULL,
table_name NAME NOT NULL,
distinct_schema_name NAME NOT NULL,
distinct_table_name NAME NOT NULL,
PRIMARY KEY (hypertable_name, replica_id),
UNIQUE (schema_name, table_name)
);
--there should be one distinct_replica_node for each node with a chunk from that replica
--so there can be multiple rows for one hypertable-replica on different nodes.
--that way writes are local. Optimized reads are also local for many queries.
--But, some read queries are cross-node.
--Each row creates a table.
-- Parent table: hypertable_replica.distinct_table
-- No children, created table contains data.
CREATE TABLE IF NOT EXISTS distinct_replica_node (
hypertable_name NAME NOT NULL,
replica_id SMALLINT NOT NULL,
database_name NAME NOT NULL REFERENCES node (database_name),
schema_name NAME NOT NULL,
table_name NAME NOT NULL,
PRIMARY KEY (hypertable_name, replica_id, database_name),
UNIQUE (schema_name, table_name),
FOREIGN KEY (hypertable_name, replica_id) REFERENCES hypertable_replica (hypertable_name, replica_id)
);
--A partition epoch represents a different partitioning of the data.
--It has a start and end time (data time). Data needs to be placed in the correct epoch by time.
--This should change very infrequently. Expensive to start new epoch.
CREATE TABLE IF NOT EXISTS partition_epoch (
id SERIAL NOT NULL PRIMARY KEY,
hypertable_name NAME NOT NULL REFERENCES hypertable (name),
start_time BIGINT NULL CHECK (start_time > 0),
end_time BIGINT NULL CHECK (end_time > 0),
partitioning_func NAME NOT NULL,
partitioning_mod INT NOT NULL CHECK (partitioning_mod < 65536),
partitioning_field NAME NOT NULL,
UNIQUE (hypertable_name, start_time),
UNIQUE (hypertable_name, end_time),
CHECK (start_time < end_time)
);
-- A partition defines a partition in a partition_epoch.
-- For any partition the keyspace is defined as [0, partition_epoch.partitioning_mod]
-- For any epoch, there must be a partition that covers every element in the keyspace.
CREATE TABLE IF NOT EXISTS partition (
id SERIAL NOT NULL PRIMARY KEY,
epoch_id INT NOT NULL REFERENCES partition_epoch (id),
keyspace_start SMALLINT NOT NULL CHECK (keyspace_start >= 0), --start inclusive
keyspace_end SMALLINT NOT NULL CHECK (keyspace_end > 0), --end inclusive; compatible with between operator
UNIQUE (epoch_id, keyspace_start),
CHECK (keyspace_end > keyspace_start)
);
--Represents a replica for a partition.
--Each row creates a table:
-- Parent: "hypertable_replica.schema_name"."hypertable_replica.table_name"
-- Children: "chunk_replica_node.schema_name"."chunk_replica_node.table_name"
--TODO: trigger to verify partition_epoch hypertable name matches this hypertable_name
CREATE TABLE IF NOT EXISTS partition_replica (
id SERIAL NOT NULL PRIMARY KEY,
partition_id INT NOT NULL REFERENCES partition (id),
hypertable_name NAME NOT NULL,
replica_id SMALLINT NOT NULL,
schema_name NAME NOT NULL,
table_name NAME NOT NULL,
UNIQUE (schema_name, table_name),
FOREIGN KEY (hypertable_name, replica_id) REFERENCES hypertable_replica (hypertable_name, replica_id)
);
-- Represent a chunk of data
-- i.e. data for a particular hypername-partition for a particular time.
CREATE TABLE IF NOT EXISTS chunk (
id SERIAL NOT NULL PRIMARY KEY,
partition_id INT NOT NULL REFERENCES partition (id),
start_time BIGINT NULL CHECK (start_time > 0),
end_time BIGINT NULL CHECK (end_time > 0),
UNIQUE (partition_id, start_time),
UNIQUE (partition_id, end_time),
CHECK (start_time < end_time)
);
--A mapping between chunks, partition_replica, and node.
--This represents the table where actual data is stored.
--Each row represents a table:
-- Parent table: "partition_replica.schema_name"."partition_replica.table_name"
CREATE TABLE IF NOT EXISTS chunk_replica_node (
chunk_id INT NOT NULL REFERENCES chunk (id),
partition_replica_id INT NOT NULL REFERENCES partition_replica (id),
database_name NAME NOT NULL REFERENCES node (database_name),
schema_name NAME NOT NULL,
table_name NAME NOT NULL,
PRIMARY KEY (chunk_id, partition_replica_id), --a single chunk, replica tuple
UNIQUE (chunk_id, database_name), --no two chunk replicas on same node
UNIQUE (schema_name, table_name)
);
--Represents a hypertable field.
--TODO: remove is_partitioning. defined in partition_epoch table.
CREATE TABLE IF NOT EXISTS field (
hypertable_name NAME NOT NULL REFERENCES hypertable (name),
name NAME NOT NULL,
data_type REGTYPE NOT NULL,
is_partitioning BOOLEAN NOT NULL DEFAULT FALSE,
is_distinct BOOLEAN NOT NULL DEFAULT FALSE,
index_types field_index_type [] NOT NULL,
PRIMARY KEY (hypertable_name, name)
);
CREATE UNIQUE INDEX IF NOT EXISTS one_partition_field
ON field (hypertable_name)
WHERE is_partitioning;