Use direct index scan for hypertable lookups.

This is a first stab at moving from SPI queries
to direct heap/index scans for cleaner and more
efficient code. By doing direct scans, there is no
need to prepare and cache a bunch of query plans
that make the code both slower and more complex.

This patch also adds a catalog module that keeps
cached OIDs and other things for catalog tables.
The cached information is updated every time the
backend switches to a new database. A permission
check is also implemented when accessing the catalog
information, but should probably be extended to
tables and schemas in the future.
This commit is contained in:
Erik Nordström 2017-02-26 20:41:20 +01:00 committed by Erik Nordström
parent 67ad21ee36
commit f99669c880
10 changed files with 215 additions and 120 deletions

View File

@ -12,6 +12,7 @@ SRCS = \
src/murmur3.c \
src/pgmurmur3.c \
src/utils.c \
src/catalog.c \
src/metadata_queries.c \
src/cache.c \
src/cache_invalidate.c \

83
src/catalog.c Normal file
View File

@ -0,0 +1,83 @@
#include <postgres.h>
#include <catalog/namespace.h>
#include <utils/lsyscache.h>
#include <miscadmin.h>
#include <commands/dbcommands.h>
#include "catalog.h"
static char *catalog_table_names[_MAX_CATALOG_TABLES] = {
[HYPERTABLE] = HYPERTABLE_TABLE_NAME,
[PARTITION] = PARTITION_TABLE_NAME,
[PARTITION_EPOCH] = PARTITION_EPOCH_TABLE_NAME,
[CHUNK] = CHUNK_TABLE_NAME,
};
static char *catalog_table_index_names[_MAX_CATALOG_TABLES] = {
[HYPERTABLE] = HYPERTABLE_INDEX_NAME,
[PARTITION] = PARTITION_INDEX_NAME,
[PARTITION_EPOCH] = PARTITION_EPOCH_INDEX_NAME,
[CHUNK] = CHUNK_INDEX_NAME,
};
/* Catalog information for the current database. Should probably be invalidated
* if the extension is unloaded for the database. */
static Catalog catalog = {
.database_id = InvalidOid,
};
Catalog *catalog_get(void)
{
AclResult aclresult;
int i;
if (MyDatabaseId == InvalidOid)
elog(ERROR, "Invalid database ID");
/* Check that the user has CREATE permissions on the database, since the
operation may involve creating chunks and inserting into them. */
aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, ACL_KIND_DATABASE,
get_database_name(MyDatabaseId));
if (MyDatabaseId == catalog.database_id)
return &catalog;
memset(&catalog, 0, sizeof(Catalog));
catalog.database_id = MyDatabaseId;
strncpy(catalog.database_name, get_database_name(MyDatabaseId), NAMEDATALEN);
catalog.schema_id = get_namespace_oid(CATALOG_SCHEMA_NAME, false);
if (catalog.schema_id == InvalidOid)
{
elog(ERROR, "Oid lookup failed for schema %s", CATALOG_SCHEMA_NAME);
}
for (i = 0; i < _MAX_CATALOG_TABLES; i++)
{
Oid id;
id = get_relname_relid(catalog_table_names[i], catalog.schema_id);
if (id == InvalidOid)
{
elog(ERROR, "Oid lookup failed for table %s", catalog_table_names[i]);
}
catalog.tables[i].id = id;
id = get_relname_relid(catalog_table_index_names[i], catalog.schema_id);
if (id == InvalidOid)
{
elog(ERROR, "Oid lookup failed for table index %s", catalog_table_index_names[i]);
}
catalog.tables[i].index_id = id;
catalog.tables[i].name = catalog_table_names[i];
}
return &catalog;
}

39
src/catalog.h Normal file
View File

@ -0,0 +1,39 @@
#ifndef IOBEAMDB_CATALOG_H
#define IOBEAMDB_CATALOG_H
#include <postgres.h>
enum catalog_table {
HYPERTABLE = 0,
CHUNK,
PARTITION,
PARTITION_EPOCH,
_MAX_CATALOG_TABLES,
};
#define CATALOG_SCHEMA_NAME "_iobeamdb_catalog"
#define HYPERTABLE_TABLE_NAME "hypertable"
#define CHUNK_TABLE_NAME "chunk"
#define PARTITION_TABLE_NAME "partition"
#define PARTITION_EPOCH_TABLE_NAME "partition_epoch"
#define HYPERTABLE_INDEX_NAME "hypertable_pkey"
#define CHUNK_INDEX_NAME "chunk_pkey"
#define PARTITION_INDEX_NAME "partition_pkey"
#define PARTITION_EPOCH_INDEX_NAME "partition_epoch_pkey"
typedef struct Catalog {
char database_name[NAMEDATALEN];
Oid database_id;
Oid schema_id;
struct {
const char *name;
Oid id;
Oid index_id;
} tables[_MAX_CATALOG_TABLES];
} Catalog;
Catalog *catalog_get(void);
#endif /* IOBEAMDB_CATALOG_H */

View File

@ -188,17 +188,17 @@ get_copy_table_insert_sql(ChunkCacheQueryCtx *ctx)
if (ctx->chunk_start_time != OPEN_START_TIME)
{
appendStringInfo(where_clause, " AND (%1$s >= %2$s) ",
quote_identifier(ctx->hci->info->time_column_name.data),
quote_identifier(ctx->hci->time_column_name),
internal_time_to_column_literal_sql(ctx->chunk_start_time,
ctx->hci->info->time_column_type));
ctx->hci->time_column_type));
}
if (ctx->chunk_end_time != OPEN_END_TIME)
{
appendStringInfo(where_clause, " AND (%1$s <= %2$s) ",
quote_identifier(ctx->hci->info->time_column_name.data),
quote_identifier(ctx->hci->time_column_name),
internal_time_to_column_literal_sql(ctx->chunk_end_time,
ctx->hci->info->time_column_type));
ctx->hci->time_column_type));
}
i = 0;

View File

@ -1,13 +1,18 @@
#include <postgres.h>
#include <access/relscan.h>
#include <utils/catcache.h>
#include <utils/rel.h>
#include <utils/fmgroids.h>
#include <utils/tqual.h>
#include <utils/acl.h>
#include "hypertable_cache.h"
#include "catalog.h"
#include "cache.h"
#include "metadata_queries.h"
#include "utils.h"
static void hypertable_cache_pre_invalidate(Cache *cache);
static void *hypertable_cache_create_entry(Cache *cache, CacheQueryCtx *ctx);
typedef struct HypertableCacheQueryCtx
@ -34,33 +39,72 @@ static Cache hypertable_cache = {
.flags = HASH_ELEM | HASH_CONTEXT | HASH_BLOBS,
.get_key = hypertable_cache_get_key,
.create_entry = hypertable_cache_create_entry,
.pre_invalidate_hook = hypertable_cache_pre_invalidate,
.post_invalidate_hook = cache_init,
};
static void
hypertable_cache_pre_invalidate(Cache *cache)
{
hypertable_cache_entry *entry;
HASH_SEQ_STATUS scan;
/* Column numbers for 'hypertable' table in sql/common/tables.sql */
#define HT_COL_ID 1
#define HT_COL_TIME_COL_NAME 10
#define HT_COL_TIME_TYPE 11
hash_seq_init(&scan, cache->htab);
while ((entry = hash_seq_search(&scan)))
{
SPI_freeplan(entry->info->get_one_tuple_copyt_plan);
}
}
/* Primary key Index column number */
#define HT_INDEX_COL_ID 1
static void *
hypertable_cache_create_entry(Cache *cache, CacheQueryCtx *ctx)
{
HypertableCacheQueryCtx *hctx = (HypertableCacheQueryCtx *) ctx;
hypertable_cache_entry *he = ctx->entry;
hypertable_cache_entry *he = NULL;
Relation table, index;
ScanKeyData scankey[1];
int nkeys = 1, norderbys = 0;
IndexScanDesc scan;
HeapTuple tuple;
TupleDesc tuple_desc;
Catalog *catalog = catalog_get();
/* Perform an index scan on primary key. */
table = heap_open(catalog->tables[HYPERTABLE].id, AccessShareLock);
index = index_open(catalog->tables[HYPERTABLE].index_id, AccessShareLock);
he->info = fetch_hypertable_info(NULL, hctx->hypertable_id);
he->num_epochs = 0;
ScanKeyInit(&scankey[0], HT_INDEX_COL_ID, BTEqualStrategyNumber,
F_INT4EQ, Int32GetDatum(hctx->hypertable_id));
scan = index_beginscan(table, index, SnapshotSelf, nkeys, norderbys);
index_rescan(scan, scankey, nkeys, NULL, norderbys);
tuple_desc = RelationGetDescr(table);
tuple = index_getnext(scan, ForwardScanDirection);
if (HeapTupleIsValid(tuple))
{
bool is_null;
Datum id_datum = heap_getattr(tuple, HT_COL_ID, tuple_desc, &is_null);
Datum time_col_datum = heap_getattr(tuple, HT_COL_TIME_COL_NAME, tuple_desc, &is_null);
Datum time_type_datum = heap_getattr(tuple, HT_COL_TIME_TYPE, tuple_desc, &is_null);
int32 id = DatumGetInt32(id_datum);
if (id != hctx->hypertable_id)
{
elog(ERROR, "Expected hypertable ID %u, got %u", hctx->hypertable_id, id);
}
he = ctx->entry;
he->num_epochs = 0;
he->id = hctx->hypertable_id;
strncpy(he->time_column_name, DatumGetCString(time_col_datum), NAMEDATALEN);
he->time_column_type = DatumGetObjectId(time_type_datum);
}
else
{
elog(ERROR, "Could not find hypertable entry");
}
index_endscan(scan);
index_close(index, AccessShareLock);
heap_close(table, AccessShareLock);
return he;
}
@ -79,7 +123,7 @@ get_hypertable_cache_entry(int32 hypertable_id)
HypertableCacheQueryCtx ctx = {
.hypertable_id = hypertable_id,
};
return cache_fetch(&hypertable_cache, &ctx.cctx);
}
@ -131,7 +175,7 @@ get_partition_epoch_cache_entry(hypertable_cache_entry *hce, int64 time_pt, Oid
}
old = cache_switch_to_memory_context(&hypertable_cache);
entry = fetch_epoch_and_partitions_set(NULL, hce->info->id, time_pt, relid);
entry = fetch_epoch_and_partitions_set(NULL, hce->id, time_pt, relid);
/* check if full */
if (hce->num_epochs == MAX_EPOCHS_PER_HYPERTABLE_CACHE_ENTRY)
@ -202,8 +246,7 @@ get_partition_info(epoch_and_partitions_set *epoch, int16 keyspace_pt)
return *part;
}
void
_hypertable_cache_init(void)
void _hypertable_cache_init(void)
{
CreateCacheMemoryContext();
cache_init(&hypertable_cache);

View File

@ -16,7 +16,8 @@ typedef struct partition_info partition_info;
typedef struct hypertable_cache_entry
{
int32 id;
hypertable_basic_info *info;
char time_column_name[NAMEDATALEN];
Oid time_column_type;
int num_epochs;
/* Array of epoch_and_partitions_set*. Order by start_time */
epoch_and_partitions_set *epochs[MAX_EPOCHS_PER_HYPERTABLE_CACHE_ENTRY];

View File

@ -25,6 +25,7 @@
#include "tcop/tcopprot.h"
#include "tcop/utility.h"
#include "deps/dblink.h"
#include "utils/tqual.h"
#include "access/xact.h"
#include "parser/parse_oper.h"
@ -45,7 +46,6 @@
/* private funcs */
static int tuple_fnumber(TupleDesc tupdesc, const char *fname);
static HeapTuple get_one_tuple_from_copy_table(hypertable_cache_entry *hci);
/*
* Inserts rows from the temporary copy table into correct hypertable child tables.
@ -100,7 +100,10 @@ insert_trigger_on_copy_table_c(PG_FUNCTION_ARGS)
* two different hypertables.
*/
char *insert_guard = GetConfigOptionByName("io.insert_data_guard", NULL, true);
HeapScanDesc scan;
ScanKeyData scankey[1];
int nkeys = 0;
if (insert_guard != NULL && strcmp(insert_guard, "on") == 0)
{
ereport(ERROR,
@ -116,24 +119,28 @@ insert_trigger_on_copy_table_c(PG_FUNCTION_ARGS)
* column fnum for time field
*/
hci = get_hypertable_cache_entry(atoi(hypertable_id_arg));
time_fnum = tuple_fnumber(trigdata->tg_relation->rd_att, NameStr(hci->info->time_column_name));
time_fnum = tuple_fnumber(trigdata->tg_relation->rd_att, hci->time_column_name);
scan = heap_beginscan(trigdata->tg_relation, SnapshotSelf, nkeys, scankey);
/* get one row in a loop until the copy table is empty. */
while ((firstrow = get_one_tuple_from_copy_table(hci)))
while ((firstrow = heap_getnext(scan, ForwardScanDirection)))
{
Datum time_datum = heap_getattr(firstrow, time_fnum, trigdata->tg_relation->rd_att, &isnull);
Datum time_datum;
int64 time_internal;
epoch_and_partitions_set *pe_entry;
partition_info *part = NULL;
chunk_cache_entry *chunk;
int ret;
time_datum = heap_getattr(firstrow, time_fnum, trigdata->tg_relation->rd_att, &isnull);
if (isnull)
{
elog(ERROR, "Time column is null");
}
time_internal = time_value_to_internal(time_datum, hci->info->time_column_type);
time_internal = time_value_to_internal(time_datum, hci->time_column_type);
pe_entry = get_partition_epoch_cache_entry(hci, time_internal, trigdata->tg_relation->rd_id);
if (pe_entry->partitioning_func != NULL)
@ -161,6 +168,7 @@ insert_trigger_on_copy_table_c(PG_FUNCTION_ARGS)
}
chunk = get_chunk_cache_entry(hci, pe_entry, part, time_internal, true);
if (chunk->chunk->end_time == OPEN_END_TIME)
{
chunk_id_list = lappend_int(chunk_id_list, chunk->id);
@ -178,6 +186,8 @@ insert_trigger_on_copy_table_c(PG_FUNCTION_ARGS)
}
heap_endscan(scan);
/* build chunk id array */
num_chunks = list_length(chunk_id_list);
chunk_id_array = palloc(sizeof(int) * num_chunks);
@ -297,29 +307,3 @@ tuple_fnumber(TupleDesc tupdesc, const char *fname)
elog(ERROR, "field not found: %s", fname);
}
static HeapTuple
get_one_tuple_from_copy_table(hypertable_cache_entry *hci)
{
HeapTuple res;
int ret;
if (SPI_connect() < 0)
{
elog(ERROR, "Got an SPI connect error");
}
ret = SPI_execute_plan(hci->info->get_one_tuple_copyt_plan, NULL, NULL, false, 1);
if (ret <= 0)
{
elog(ERROR, "Got an SPI error %d", ret);
}
if (SPI_processed != 1)
{
SPI_finish();
return NULL;
}
res = SPI_copytuple(SPI_tuptable->vals[0]);
SPI_finish();
return res;
}

View File

@ -194,13 +194,17 @@ SPIPlanPtr get_hypertable_info_plan()
bool
IobeamLoaded(void)
{
if (!isLoaded)
{
Oid id;
if(!IsTransactionState())
{
return false;
}
Oid id = get_extension_oid("iobeamdb", true);
id = get_extension_oid("iobeamdb", true);
if (id != InvalidOid && !(creating_extension && id == CurrentExtensionObject))
{

View File

@ -53,57 +53,7 @@ prepare_plan(const char *src, int nargs, Oid *argtypes)
#define HYPERTABLE_QUERY_ARGS (Oid[]) { INT4OID }
#define HYPERTABLE_QUERY "SELECT id, time_column_name, time_column_type FROM _iobeamdb_catalog.hypertable h WHERE h.id = $1"
DEFINE_PLAN(get_hypertable_plan, HYPERTABLE_QUERY, 1, HYPERTABLE_QUERY_ARGS)
hypertable_basic_info *
fetch_hypertable_info(hypertable_basic_info *entry, int32 hypertable_id)
{
SPIPlanPtr plan = get_hypertable_plan();
Datum args[1] = {Int32GetDatum(hypertable_id)};
int ret;
bool is_null;
TupleDesc tupdesc;
HeapTuple tuple;
Name time_column_name;
int sql_len = NAMEDATALEN * 2 + 100;
char get_one_tuple_copyt_sql[sql_len];
if (entry == NULL)
{
entry = palloc(sizeof(hypertable_basic_info));
}
CACHE2_elog(WARNING, "Looking up hypertable info: %d", hypertable_id);
if (SPI_connect() < 0)
{
elog(ERROR, "Got an SPI connect error");
}
ret = SPI_execute_plan(plan, args, NULL, true, 2);
if (ret <= 0)
{
elog(ERROR, "Got an SPI error %d", ret);
}
if (SPI_processed != 1)
{
elog(ERROR, "Got not 1 row but %lu", SPI_processed);
}
tupdesc = SPI_tuptable->tupdesc;
tuple = SPI_tuptable->vals[0];
entry->id = DatumGetInt32(SPI_getbinval(tuple, tupdesc, 1, &is_null));
time_column_name = DatumGetName(SPI_getbinval(tuple, tupdesc, 2, &is_null));
memcpy(entry->time_column_name.data, time_column_name, NAMEDATALEN);
entry->time_column_type = DatumGetObjectId(SPI_getbinval(tuple, tupdesc, 3, &is_null));
SPI_finish();
snprintf(get_one_tuple_copyt_sql, sql_len, "SELECT * FROM %s LIMIT 1", copy_table_name(entry->id));
entry->get_one_tuple_copyt_plan = prepare_plan(get_one_tuple_copyt_sql, 0, NULL);
return entry;
}
//DEFINE_PLAN(get_hypertable_plan, HYPERTABLE_QUERY, 1, HYPERTABLE_QUERY_ARGS)
#define EPOCH_AND_PARTITION_ARGS (Oid[]) { INT4OID, INT8OID }
#define EPOCH_AND_PARTITION_QUERY "SELECT pe.id as epoch_id, hypertable_id, start_time, end_time, \

View File

@ -10,14 +10,6 @@
#define OPEN_START_TIME -1
#define OPEN_END_TIME PG_INT64_MAX
typedef struct hypertable_basic_info
{
int32 id;
NameData time_column_name;
Oid time_column_type;
SPIPlanPtr get_one_tuple_copyt_plan;
} hypertable_basic_info;
typedef struct partition_info
{
int32 id;
@ -73,8 +65,6 @@ extern epoch_and_partitions_set *fetch_epoch_and_partitions_set(epoch_and_partit
extern void free_epoch(epoch_and_partitions_set *epoch);
extern hypertable_basic_info *fetch_hypertable_info(hypertable_basic_info *entry, int32 hypertable_id);
extern chunk_row *fetch_chunk_row(chunk_row *entry, int32 partition_id, int64 time_pt, bool lock);
extern crn_set *fetch_crn_set(crn_set *entry, int32 chunk_id);