timescaledb/sql/common/tables.sql

--data nodes
CREATE TABLE IF NOT EXISTS node (
    database_name NAME    NOT NULL PRIMARY KEY,
    schema_name   NAME    NOT NULL UNIQUE, --public schema of remote
    server_name   NAME    NOT NULL UNIQUE,
    hostname      TEXT    NOT NULL,
    active        BOOLEAN NOT NULL DEFAULT TRUE,
    id            SERIAL  NOT NULL UNIQUE -- id for node. used in naming
);

--singleton holding info about meta db.
CREATE TABLE IF NOT EXISTS meta (
    database_name NAME NOT NULL PRIMARY KEY,
    hostname      TEXT NOT NULL,
    server_name   NAME NOT NULL
);
CREATE UNIQUE INDEX there_can_be_only_one_meta
    ON meta ((1));

--these users should exist an all nodes+meta in the cluster.
CREATE TABLE IF NOT EXISTS cluster_user (
    username TEXT NOT NULL PRIMARY KEY,
    password TEXT NULL --not any more of a security hole than usual since stored in  pg_user_mapping anyway
);

--The hypertable is an abstraction that represents a replicated table that is partition on 2 dimensions.
--One of the dimensions is time, the other is arbitrary.
--This abstraction also tracks the distinct value set of any columns marked as `distinct`.
--Each row creates 3 tables:
--    i) main table is just an alias to the 0'th replica for now. Represents the hypertable to the user.
--    ii) root table is the ancesstor of all the data tables (across replicas). Should not be queryable for data (TODO).
--    iii) distinct root table is the ancestor of all distinct tables (across replicas).  Should not be queryable for data (TODO).
CREATE TABLE IF NOT EXISTS hypertable (
    name                    NAME     NOT NULL PRIMARY KEY CHECK (name NOT LIKE '\_%'),
    main_schema_name        NAME     NOT NULL,
    main_table_name         NAME     NOT NULL,
    associated_schema_name  NAME     NOT NULL,
    associated_table_prefix NAME     NOT NULL,
    root_schema_name        NAME     NOT NULL,
    root_table_name         NAME     NOT NULL,
    distinct_schema_name    NAME     NOT NULL,
    distinct_table_name     NAME     NOT NULL,
    replication_factor      SMALLINT NOT NULL CHECK (replication_factor > 0),
    UNIQUE (main_schema_name, main_table_name),
    UNIQUE (associated_schema_name, associated_table_prefix),
    UNIQUE (root_schema_name, root_table_name)
);

--This represents one replica of the data across all partitions and time.
--Each row creates 2 tables:
--   i)  data replica table (schema_name.table_name)
--       parent is the hypertable root table.
--       childen are the partition_replica tables.
--   ii) distinct replica tables (distinct_schema_name.distinct_table_name)
--       parent is the hypertable distinct root table.
--       childen are created by distinct_replica_node table.
CREATE TABLE IF NOT EXISTS hypertable_replica (
    hypertable_name      NAME     NOT NULL  REFERENCES hypertable (name),
    replica_id           SMALLINT NOT NULL  CHECK (replica_id >= 0),
    schema_name          NAME     NOT NULL,
    table_name           NAME     NOT NULL,
    distinct_schema_name NAME     NOT NULL,
    distinct_table_name  NAME     NOT NULL,
    PRIMARY KEY (hypertable_name, replica_id),
    UNIQUE (schema_name, table_name)
);

--there should be one distinct_replica_node for each node with a chunk from that replica
--so there can be multiple rows for one hypertable-replica on different nodes.
--that way writes are local. Optimized reads are also local for many queries.
--But, some read queries are cross-node.
--Each row creates a table.
--  Parent table:  hypertable_replica.distinct_table
--  No children, created table contains data.
CREATE TABLE IF NOT EXISTS distinct_replica_node (
    hypertable_name NAME     NOT NULL,
    replica_id      SMALLINT NOT NULL,
    database_name   NAME     NOT NULL REFERENCES node (database_name),
    schema_name     NAME     NOT NULL,
    table_name      NAME     NOT NULL,
    PRIMARY KEY (hypertable_name, replica_id, database_name),
    UNIQUE (schema_name, table_name),
    FOREIGN KEY (hypertable_name, replica_id) REFERENCES hypertable_replica (hypertable_name, replica_id)
);

--A partition epoch represents a different partitioning of the data.
--It has a start and end time (data time). Data needs to be placed in the correct epoch by time.
--This should change very infrequently. Expensive to start new epoch.
CREATE TABLE IF NOT EXISTS partition_epoch (
    id                 SERIAL NOT NULL  PRIMARY KEY,
    hypertable_name    NAME   NOT NULL  REFERENCES hypertable (name),
    start_time         BIGINT NULL      CHECK (start_time > 0),
    end_time           BIGINT NULL      CHECK (end_time > 0),
    partitioning_func  NAME   NOT NULL,
    partitioning_mod   INT    NOT NULL  CHECK (partitioning_mod < 65536),
    partitioning_field NAME   NOT NULL,
    UNIQUE (hypertable_name, start_time),
    UNIQUE (hypertable_name, end_time),
    CHECK (start_time < end_time)
);

-- A partition defines a partition in a partition_epoch.
-- For any partition the keyspace is defined as [0, partition_epoch.partitioning_mod]
-- For any epoch, there must be a partition that covers every element in the keyspace.
CREATE TABLE IF NOT EXISTS partition (
    id             SERIAL   NOT NULL PRIMARY KEY,
    epoch_id       INT      NOT NULL REFERENCES partition_epoch (id),
    keyspace_start SMALLINT NOT NULL CHECK (keyspace_start >= 0), --start inclusive
    keyspace_end   SMALLINT NOT NULL CHECK (keyspace_end > 0), --end   inclusive; compatible with between operator
    UNIQUE (epoch_id, keyspace_start),
    CHECK (keyspace_end > keyspace_start)
);

--Represents a replica for a partition.
--Each row creates a table:
--   Parent: "hypertable_replica.schema_name"."hypertable_replica.table_name"
--   Children: "chunk_replica_node.schema_name"."chunk_replica_node.table_name"
--TODO: trigger to verify partition_epoch hypertable name matches this hypertable_name
CREATE TABLE IF NOT EXISTS partition_replica (
    id              SERIAL   NOT NULL PRIMARY KEY,
    partition_id    INT      NOT NULL REFERENCES partition (id),
    hypertable_name NAME     NOT NULL,
    replica_id      SMALLINT NOT NULL,
    schema_name     NAME     NOT NULL,
    table_name      NAME     NOT NULL,
    UNIQUE (schema_name, table_name),
    FOREIGN KEY (hypertable_name, replica_id) REFERENCES hypertable_replica (hypertable_name, replica_id)
);

-- Represent a chunk of data
-- i.e. data for a particular hypername-partition for a particular time.
CREATE TABLE IF NOT EXISTS chunk (
    id           SERIAL NOT NULL    PRIMARY KEY,
    partition_id INT    NOT NULL    REFERENCES partition (id),
    start_time   BIGINT NULL        CHECK (start_time > 0),
    end_time     BIGINT NULL        CHECK (end_time > 0),
    UNIQUE (partition_id, start_time),
    UNIQUE (partition_id, end_time),
    CHECK (start_time < end_time)
);

--A mapping between chunks, partition_replica, and node.
--This represents the table where actual data is stored.
--Each row represents a table:
--  Parent table: "partition_replica.schema_name"."partition_replica.table_name"
CREATE TABLE IF NOT EXISTS chunk_replica_node (
    chunk_id             INT  NOT NULL  REFERENCES chunk (id),
    partition_replica_id INT  NOT NULL  REFERENCES partition_replica (id),
    database_name        NAME NOT NULL  REFERENCES node (database_name),
    schema_name          NAME NOT NULL,
    table_name           NAME NOT NULL,
    PRIMARY KEY (chunk_id, partition_replica_id), --a single chunk, replica tuple
    UNIQUE (chunk_id, database_name), --no two chunk replicas on same node
    UNIQUE (schema_name, table_name)
);

--Represents a hypertable field.
--TODO: remove is_partitioning. defined in partition_epoch table.
CREATE TABLE IF NOT EXISTS field (
    hypertable_name NAME                NOT NULL REFERENCES hypertable (name),
    name            NAME                NOT NULL,
    data_type       REGTYPE             NOT NULL,
    is_partitioning BOOLEAN             NOT NULL DEFAULT FALSE,
    is_distinct     BOOLEAN             NOT NULL DEFAULT FALSE,
    index_types     field_index_type [] NOT NULL,
    PRIMARY KEY (hypertable_name, name)
);

CREATE UNIQUE INDEX IF NOT EXISTS one_partition_field
    ON field (hypertable_name)
    WHERE is_partitioning;