Use direct index scan for hypertable lookups.

This is a first stab at moving from SPI queries
to direct heap/index scans for cleaner and more
efficient code. By doing direct scans, there is no
need to prepare and cache a bunch of query plans
that make the code both slower and more complex.

This patch also adds a catalog module that keeps
cached OIDs and other things for catalog tables.
The cached information is updated every time the
backend switches to a new database. A permission
check is also implemented when accessing the catalog
information, but should probably be extended to
tables and schemas in the future.
This commit is contained in:
Erik Nordström 2017-02-26 20:41:20 +01:00 committed by Erik Nordström
parent 67ad21ee36
commit f99669c880
10 changed files with 215 additions and 120 deletions

View File

@ -12,6 +12,7 @@ SRCS = \
src/murmur3.c \ src/murmur3.c \
src/pgmurmur3.c \ src/pgmurmur3.c \
src/utils.c \ src/utils.c \
src/catalog.c \
src/metadata_queries.c \ src/metadata_queries.c \
src/cache.c \ src/cache.c \
src/cache_invalidate.c \ src/cache_invalidate.c \

83
src/catalog.c Normal file
View File

@ -0,0 +1,83 @@
#include <postgres.h>
#include <catalog/namespace.h>
#include <utils/lsyscache.h>
#include <miscadmin.h>
#include <commands/dbcommands.h>
#include "catalog.h"
static char *catalog_table_names[_MAX_CATALOG_TABLES] = {
[HYPERTABLE] = HYPERTABLE_TABLE_NAME,
[PARTITION] = PARTITION_TABLE_NAME,
[PARTITION_EPOCH] = PARTITION_EPOCH_TABLE_NAME,
[CHUNK] = CHUNK_TABLE_NAME,
};
static char *catalog_table_index_names[_MAX_CATALOG_TABLES] = {
[HYPERTABLE] = HYPERTABLE_INDEX_NAME,
[PARTITION] = PARTITION_INDEX_NAME,
[PARTITION_EPOCH] = PARTITION_EPOCH_INDEX_NAME,
[CHUNK] = CHUNK_INDEX_NAME,
};
/* Catalog information for the current database. Should probably be invalidated
* if the extension is unloaded for the database. */
static Catalog catalog = {
.database_id = InvalidOid,
};
Catalog *catalog_get(void)
{
AclResult aclresult;
int i;
if (MyDatabaseId == InvalidOid)
elog(ERROR, "Invalid database ID");
/* Check that the user has CREATE permissions on the database, since the
operation may involve creating chunks and inserting into them. */
aclresult = pg_database_aclcheck(MyDatabaseId, GetUserId(), ACL_CREATE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, ACL_KIND_DATABASE,
get_database_name(MyDatabaseId));
if (MyDatabaseId == catalog.database_id)
return &catalog;
memset(&catalog, 0, sizeof(Catalog));
catalog.database_id = MyDatabaseId;
strncpy(catalog.database_name, get_database_name(MyDatabaseId), NAMEDATALEN);
catalog.schema_id = get_namespace_oid(CATALOG_SCHEMA_NAME, false);
if (catalog.schema_id == InvalidOid)
{
elog(ERROR, "Oid lookup failed for schema %s", CATALOG_SCHEMA_NAME);
}
for (i = 0; i < _MAX_CATALOG_TABLES; i++)
{
Oid id;
id = get_relname_relid(catalog_table_names[i], catalog.schema_id);
if (id == InvalidOid)
{
elog(ERROR, "Oid lookup failed for table %s", catalog_table_names[i]);
}
catalog.tables[i].id = id;
id = get_relname_relid(catalog_table_index_names[i], catalog.schema_id);
if (id == InvalidOid)
{
elog(ERROR, "Oid lookup failed for table index %s", catalog_table_index_names[i]);
}
catalog.tables[i].index_id = id;
catalog.tables[i].name = catalog_table_names[i];
}
return &catalog;
}

39
src/catalog.h Normal file
View File

@ -0,0 +1,39 @@
#ifndef IOBEAMDB_CATALOG_H
#define IOBEAMDB_CATALOG_H
#include <postgres.h>
enum catalog_table {
HYPERTABLE = 0,
CHUNK,
PARTITION,
PARTITION_EPOCH,
_MAX_CATALOG_TABLES,
};
#define CATALOG_SCHEMA_NAME "_iobeamdb_catalog"
#define HYPERTABLE_TABLE_NAME "hypertable"
#define CHUNK_TABLE_NAME "chunk"
#define PARTITION_TABLE_NAME "partition"
#define PARTITION_EPOCH_TABLE_NAME "partition_epoch"
#define HYPERTABLE_INDEX_NAME "hypertable_pkey"
#define CHUNK_INDEX_NAME "chunk_pkey"
#define PARTITION_INDEX_NAME "partition_pkey"
#define PARTITION_EPOCH_INDEX_NAME "partition_epoch_pkey"
typedef struct Catalog {
char database_name[NAMEDATALEN];
Oid database_id;
Oid schema_id;
struct {
const char *name;
Oid id;
Oid index_id;
} tables[_MAX_CATALOG_TABLES];
} Catalog;
Catalog *catalog_get(void);
#endif /* IOBEAMDB_CATALOG_H */

View File

@ -188,17 +188,17 @@ get_copy_table_insert_sql(ChunkCacheQueryCtx *ctx)
if (ctx->chunk_start_time != OPEN_START_TIME) if (ctx->chunk_start_time != OPEN_START_TIME)
{ {
appendStringInfo(where_clause, " AND (%1$s >= %2$s) ", appendStringInfo(where_clause, " AND (%1$s >= %2$s) ",
quote_identifier(ctx->hci->info->time_column_name.data), quote_identifier(ctx->hci->time_column_name),
internal_time_to_column_literal_sql(ctx->chunk_start_time, internal_time_to_column_literal_sql(ctx->chunk_start_time,
ctx->hci->info->time_column_type)); ctx->hci->time_column_type));
} }
if (ctx->chunk_end_time != OPEN_END_TIME) if (ctx->chunk_end_time != OPEN_END_TIME)
{ {
appendStringInfo(where_clause, " AND (%1$s <= %2$s) ", appendStringInfo(where_clause, " AND (%1$s <= %2$s) ",
quote_identifier(ctx->hci->info->time_column_name.data), quote_identifier(ctx->hci->time_column_name),
internal_time_to_column_literal_sql(ctx->chunk_end_time, internal_time_to_column_literal_sql(ctx->chunk_end_time,
ctx->hci->info->time_column_type)); ctx->hci->time_column_type));
} }
i = 0; i = 0;

View File

@ -1,13 +1,18 @@
#include <postgres.h> #include <postgres.h>
#include <access/relscan.h>
#include <utils/catcache.h> #include <utils/catcache.h>
#include <utils/rel.h>
#include <utils/fmgroids.h>
#include <utils/tqual.h>
#include <utils/acl.h>
#include "hypertable_cache.h" #include "hypertable_cache.h"
#include "catalog.h"
#include "cache.h" #include "cache.h"
#include "metadata_queries.h" #include "metadata_queries.h"
#include "utils.h" #include "utils.h"
static void hypertable_cache_pre_invalidate(Cache *cache);
static void *hypertable_cache_create_entry(Cache *cache, CacheQueryCtx *ctx); static void *hypertable_cache_create_entry(Cache *cache, CacheQueryCtx *ctx);
typedef struct HypertableCacheQueryCtx typedef struct HypertableCacheQueryCtx
@ -34,33 +39,72 @@ static Cache hypertable_cache = {
.flags = HASH_ELEM | HASH_CONTEXT | HASH_BLOBS, .flags = HASH_ELEM | HASH_CONTEXT | HASH_BLOBS,
.get_key = hypertable_cache_get_key, .get_key = hypertable_cache_get_key,
.create_entry = hypertable_cache_create_entry, .create_entry = hypertable_cache_create_entry,
.pre_invalidate_hook = hypertable_cache_pre_invalidate,
.post_invalidate_hook = cache_init, .post_invalidate_hook = cache_init,
}; };
static void /* Column numbers for 'hypertable' table in sql/common/tables.sql */
hypertable_cache_pre_invalidate(Cache *cache) #define HT_COL_ID 1
{ #define HT_COL_TIME_COL_NAME 10
hypertable_cache_entry *entry; #define HT_COL_TIME_TYPE 11
HASH_SEQ_STATUS scan;
hash_seq_init(&scan, cache->htab); /* Primary key Index column number */
#define HT_INDEX_COL_ID 1
while ((entry = hash_seq_search(&scan)))
{
SPI_freeplan(entry->info->get_one_tuple_copyt_plan);
}
}
static void * static void *
hypertable_cache_create_entry(Cache *cache, CacheQueryCtx *ctx) hypertable_cache_create_entry(Cache *cache, CacheQueryCtx *ctx)
{ {
HypertableCacheQueryCtx *hctx = (HypertableCacheQueryCtx *) ctx; HypertableCacheQueryCtx *hctx = (HypertableCacheQueryCtx *) ctx;
hypertable_cache_entry *he = ctx->entry; hypertable_cache_entry *he = NULL;
Relation table, index;
ScanKeyData scankey[1];
int nkeys = 1, norderbys = 0;
IndexScanDesc scan;
HeapTuple tuple;
TupleDesc tuple_desc;
Catalog *catalog = catalog_get();
/* Perform an index scan on primary key. */
table = heap_open(catalog->tables[HYPERTABLE].id, AccessShareLock);
index = index_open(catalog->tables[HYPERTABLE].index_id, AccessShareLock);
he->info = fetch_hypertable_info(NULL, hctx->hypertable_id); ScanKeyInit(&scankey[0], HT_INDEX_COL_ID, BTEqualStrategyNumber,
he->num_epochs = 0; F_INT4EQ, Int32GetDatum(hctx->hypertable_id));
scan = index_beginscan(table, index, SnapshotSelf, nkeys, norderbys);
index_rescan(scan, scankey, nkeys, NULL, norderbys);
tuple_desc = RelationGetDescr(table);
tuple = index_getnext(scan, ForwardScanDirection);
if (HeapTupleIsValid(tuple))
{
bool is_null;
Datum id_datum = heap_getattr(tuple, HT_COL_ID, tuple_desc, &is_null);
Datum time_col_datum = heap_getattr(tuple, HT_COL_TIME_COL_NAME, tuple_desc, &is_null);
Datum time_type_datum = heap_getattr(tuple, HT_COL_TIME_TYPE, tuple_desc, &is_null);
int32 id = DatumGetInt32(id_datum);
if (id != hctx->hypertable_id)
{
elog(ERROR, "Expected hypertable ID %u, got %u", hctx->hypertable_id, id);
}
he = ctx->entry;
he->num_epochs = 0;
he->id = hctx->hypertable_id;
strncpy(he->time_column_name, DatumGetCString(time_col_datum), NAMEDATALEN);
he->time_column_type = DatumGetObjectId(time_type_datum);
}
else
{
elog(ERROR, "Could not find hypertable entry");
}
index_endscan(scan);
index_close(index, AccessShareLock);
heap_close(table, AccessShareLock);
return he; return he;
} }
@ -79,7 +123,7 @@ get_hypertable_cache_entry(int32 hypertable_id)
HypertableCacheQueryCtx ctx = { HypertableCacheQueryCtx ctx = {
.hypertable_id = hypertable_id, .hypertable_id = hypertable_id,
}; };
return cache_fetch(&hypertable_cache, &ctx.cctx); return cache_fetch(&hypertable_cache, &ctx.cctx);
} }
@ -131,7 +175,7 @@ get_partition_epoch_cache_entry(hypertable_cache_entry *hce, int64 time_pt, Oid
} }
old = cache_switch_to_memory_context(&hypertable_cache); old = cache_switch_to_memory_context(&hypertable_cache);
entry = fetch_epoch_and_partitions_set(NULL, hce->info->id, time_pt, relid); entry = fetch_epoch_and_partitions_set(NULL, hce->id, time_pt, relid);
/* check if full */ /* check if full */
if (hce->num_epochs == MAX_EPOCHS_PER_HYPERTABLE_CACHE_ENTRY) if (hce->num_epochs == MAX_EPOCHS_PER_HYPERTABLE_CACHE_ENTRY)
@ -202,8 +246,7 @@ get_partition_info(epoch_and_partitions_set *epoch, int16 keyspace_pt)
return *part; return *part;
} }
void void _hypertable_cache_init(void)
_hypertable_cache_init(void)
{ {
CreateCacheMemoryContext(); CreateCacheMemoryContext();
cache_init(&hypertable_cache); cache_init(&hypertable_cache);

View File

@ -16,7 +16,8 @@ typedef struct partition_info partition_info;
typedef struct hypertable_cache_entry typedef struct hypertable_cache_entry
{ {
int32 id; int32 id;
hypertable_basic_info *info; char time_column_name[NAMEDATALEN];
Oid time_column_type;
int num_epochs; int num_epochs;
/* Array of epoch_and_partitions_set*. Order by start_time */ /* Array of epoch_and_partitions_set*. Order by start_time */
epoch_and_partitions_set *epochs[MAX_EPOCHS_PER_HYPERTABLE_CACHE_ENTRY]; epoch_and_partitions_set *epochs[MAX_EPOCHS_PER_HYPERTABLE_CACHE_ENTRY];

View File

@ -25,6 +25,7 @@
#include "tcop/tcopprot.h" #include "tcop/tcopprot.h"
#include "tcop/utility.h" #include "tcop/utility.h"
#include "deps/dblink.h" #include "deps/dblink.h"
#include "utils/tqual.h"
#include "access/xact.h" #include "access/xact.h"
#include "parser/parse_oper.h" #include "parser/parse_oper.h"
@ -45,7 +46,6 @@
/* private funcs */ /* private funcs */
static int tuple_fnumber(TupleDesc tupdesc, const char *fname); static int tuple_fnumber(TupleDesc tupdesc, const char *fname);
static HeapTuple get_one_tuple_from_copy_table(hypertable_cache_entry *hci);
/* /*
* Inserts rows from the temporary copy table into correct hypertable child tables. * Inserts rows from the temporary copy table into correct hypertable child tables.
@ -100,7 +100,10 @@ insert_trigger_on_copy_table_c(PG_FUNCTION_ARGS)
* two different hypertables. * two different hypertables.
*/ */
char *insert_guard = GetConfigOptionByName("io.insert_data_guard", NULL, true); char *insert_guard = GetConfigOptionByName("io.insert_data_guard", NULL, true);
HeapScanDesc scan;
ScanKeyData scankey[1];
int nkeys = 0;
if (insert_guard != NULL && strcmp(insert_guard, "on") == 0) if (insert_guard != NULL && strcmp(insert_guard, "on") == 0)
{ {
ereport(ERROR, ereport(ERROR,
@ -116,24 +119,28 @@ insert_trigger_on_copy_table_c(PG_FUNCTION_ARGS)
* column fnum for time field * column fnum for time field
*/ */
hci = get_hypertable_cache_entry(atoi(hypertable_id_arg)); hci = get_hypertable_cache_entry(atoi(hypertable_id_arg));
time_fnum = tuple_fnumber(trigdata->tg_relation->rd_att, NameStr(hci->info->time_column_name)); time_fnum = tuple_fnumber(trigdata->tg_relation->rd_att, hci->time_column_name);
scan = heap_beginscan(trigdata->tg_relation, SnapshotSelf, nkeys, scankey);
/* get one row in a loop until the copy table is empty. */ /* get one row in a loop until the copy table is empty. */
while ((firstrow = get_one_tuple_from_copy_table(hci))) while ((firstrow = heap_getnext(scan, ForwardScanDirection)))
{ {
Datum time_datum = heap_getattr(firstrow, time_fnum, trigdata->tg_relation->rd_att, &isnull); Datum time_datum;
int64 time_internal; int64 time_internal;
epoch_and_partitions_set *pe_entry; epoch_and_partitions_set *pe_entry;
partition_info *part = NULL; partition_info *part = NULL;
chunk_cache_entry *chunk; chunk_cache_entry *chunk;
int ret; int ret;
time_datum = heap_getattr(firstrow, time_fnum, trigdata->tg_relation->rd_att, &isnull);
if (isnull) if (isnull)
{ {
elog(ERROR, "Time column is null"); elog(ERROR, "Time column is null");
} }
time_internal = time_value_to_internal(time_datum, hci->info->time_column_type); time_internal = time_value_to_internal(time_datum, hci->time_column_type);
pe_entry = get_partition_epoch_cache_entry(hci, time_internal, trigdata->tg_relation->rd_id); pe_entry = get_partition_epoch_cache_entry(hci, time_internal, trigdata->tg_relation->rd_id);
if (pe_entry->partitioning_func != NULL) if (pe_entry->partitioning_func != NULL)
@ -161,6 +168,7 @@ insert_trigger_on_copy_table_c(PG_FUNCTION_ARGS)
} }
chunk = get_chunk_cache_entry(hci, pe_entry, part, time_internal, true); chunk = get_chunk_cache_entry(hci, pe_entry, part, time_internal, true);
if (chunk->chunk->end_time == OPEN_END_TIME) if (chunk->chunk->end_time == OPEN_END_TIME)
{ {
chunk_id_list = lappend_int(chunk_id_list, chunk->id); chunk_id_list = lappend_int(chunk_id_list, chunk->id);
@ -178,6 +186,8 @@ insert_trigger_on_copy_table_c(PG_FUNCTION_ARGS)
} }
heap_endscan(scan);
/* build chunk id array */ /* build chunk id array */
num_chunks = list_length(chunk_id_list); num_chunks = list_length(chunk_id_list);
chunk_id_array = palloc(sizeof(int) * num_chunks); chunk_id_array = palloc(sizeof(int) * num_chunks);
@ -297,29 +307,3 @@ tuple_fnumber(TupleDesc tupdesc, const char *fname)
elog(ERROR, "field not found: %s", fname); elog(ERROR, "field not found: %s", fname);
} }
static HeapTuple
get_one_tuple_from_copy_table(hypertable_cache_entry *hci)
{
HeapTuple res;
int ret;
if (SPI_connect() < 0)
{
elog(ERROR, "Got an SPI connect error");
}
ret = SPI_execute_plan(hci->info->get_one_tuple_copyt_plan, NULL, NULL, false, 1);
if (ret <= 0)
{
elog(ERROR, "Got an SPI error %d", ret);
}
if (SPI_processed != 1)
{
SPI_finish();
return NULL;
}
res = SPI_copytuple(SPI_tuptable->vals[0]);
SPI_finish();
return res;
}

View File

@ -194,13 +194,17 @@ SPIPlanPtr get_hypertable_info_plan()
bool bool
IobeamLoaded(void) IobeamLoaded(void)
{ {
if (!isLoaded) if (!isLoaded)
{ {
Oid id;
if(!IsTransactionState()) if(!IsTransactionState())
{ {
return false; return false;
} }
Oid id = get_extension_oid("iobeamdb", true);
id = get_extension_oid("iobeamdb", true);
if (id != InvalidOid && !(creating_extension && id == CurrentExtensionObject)) if (id != InvalidOid && !(creating_extension && id == CurrentExtensionObject))
{ {

View File

@ -53,57 +53,7 @@ prepare_plan(const char *src, int nargs, Oid *argtypes)
#define HYPERTABLE_QUERY_ARGS (Oid[]) { INT4OID } #define HYPERTABLE_QUERY_ARGS (Oid[]) { INT4OID }
#define HYPERTABLE_QUERY "SELECT id, time_column_name, time_column_type FROM _iobeamdb_catalog.hypertable h WHERE h.id = $1" #define HYPERTABLE_QUERY "SELECT id, time_column_name, time_column_type FROM _iobeamdb_catalog.hypertable h WHERE h.id = $1"
DEFINE_PLAN(get_hypertable_plan, HYPERTABLE_QUERY, 1, HYPERTABLE_QUERY_ARGS) //DEFINE_PLAN(get_hypertable_plan, HYPERTABLE_QUERY, 1, HYPERTABLE_QUERY_ARGS)
hypertable_basic_info *
fetch_hypertable_info(hypertable_basic_info *entry, int32 hypertable_id)
{
SPIPlanPtr plan = get_hypertable_plan();
Datum args[1] = {Int32GetDatum(hypertable_id)};
int ret;
bool is_null;
TupleDesc tupdesc;
HeapTuple tuple;
Name time_column_name;
int sql_len = NAMEDATALEN * 2 + 100;
char get_one_tuple_copyt_sql[sql_len];
if (entry == NULL)
{
entry = palloc(sizeof(hypertable_basic_info));
}
CACHE2_elog(WARNING, "Looking up hypertable info: %d", hypertable_id);
if (SPI_connect() < 0)
{
elog(ERROR, "Got an SPI connect error");
}
ret = SPI_execute_plan(plan, args, NULL, true, 2);
if (ret <= 0)
{
elog(ERROR, "Got an SPI error %d", ret);
}
if (SPI_processed != 1)
{
elog(ERROR, "Got not 1 row but %lu", SPI_processed);
}
tupdesc = SPI_tuptable->tupdesc;
tuple = SPI_tuptable->vals[0];
entry->id = DatumGetInt32(SPI_getbinval(tuple, tupdesc, 1, &is_null));
time_column_name = DatumGetName(SPI_getbinval(tuple, tupdesc, 2, &is_null));
memcpy(entry->time_column_name.data, time_column_name, NAMEDATALEN);
entry->time_column_type = DatumGetObjectId(SPI_getbinval(tuple, tupdesc, 3, &is_null));
SPI_finish();
snprintf(get_one_tuple_copyt_sql, sql_len, "SELECT * FROM %s LIMIT 1", copy_table_name(entry->id));
entry->get_one_tuple_copyt_plan = prepare_plan(get_one_tuple_copyt_sql, 0, NULL);
return entry;
}
#define EPOCH_AND_PARTITION_ARGS (Oid[]) { INT4OID, INT8OID } #define EPOCH_AND_PARTITION_ARGS (Oid[]) { INT4OID, INT8OID }
#define EPOCH_AND_PARTITION_QUERY "SELECT pe.id as epoch_id, hypertable_id, start_time, end_time, \ #define EPOCH_AND_PARTITION_QUERY "SELECT pe.id as epoch_id, hypertable_id, start_time, end_time, \

View File

@ -10,14 +10,6 @@
#define OPEN_START_TIME -1 #define OPEN_START_TIME -1
#define OPEN_END_TIME PG_INT64_MAX #define OPEN_END_TIME PG_INT64_MAX
typedef struct hypertable_basic_info
{
int32 id;
NameData time_column_name;
Oid time_column_type;
SPIPlanPtr get_one_tuple_copyt_plan;
} hypertable_basic_info;
typedef struct partition_info typedef struct partition_info
{ {
int32 id; int32 id;
@ -73,8 +65,6 @@ extern epoch_and_partitions_set *fetch_epoch_and_partitions_set(epoch_and_partit
extern void free_epoch(epoch_and_partitions_set *epoch); extern void free_epoch(epoch_and_partitions_set *epoch);
extern hypertable_basic_info *fetch_hypertable_info(hypertable_basic_info *entry, int32 hypertable_id);
extern chunk_row *fetch_chunk_row(chunk_row *entry, int32 partition_id, int64 time_pt, bool lock); extern chunk_row *fetch_chunk_row(chunk_row *entry, int32 partition_id, int64 time_pt, bool lock);
extern crn_set *fetch_crn_set(crn_set *entry, int32 chunk_id); extern crn_set *fetch_crn_set(crn_set *entry, int32 chunk_id);