Fix conversion between TIMESTAMP and internal BIGINT UNIX time representation.

Currently, the internal metadata tables for hypertables track time
as a BIGINT integer. Converting hypertable time columns in TIMESTAMP
format to this internal representation requires using Postgres' conversion
functions that are imprecise due to floating-point arithmetic. This patch
adds C-based conversion functions that offer the following conversions
using accurate integer arithmetic:

- TIMESTAMP to UNIX epoch BIGINT in microseconds
- UNIX epoch BIGINT in microseconds to TIMESTAMP
- TIMESTAMP to Postgres epoch BIGINT in microseconds
- Postgres epoch BIGINT in microseconds to TIMESTAMP

The downside of the UNIX epoch functions are that they don't offer the full
date range as offered by the Postgres to_timestamp() function. This is
because of the required epoch shift might otherwise overflow the BIGINT.
All functions should, however, offer appropriate range checks and will
throw errors if outside the range.
This commit is contained in:
Erik Nordström 2017-02-10 16:08:49 +01:00
parent a910d100aa
commit 4683d8e03e
7 changed files with 320 additions and 49 deletions

View File

@ -7,7 +7,7 @@ EXT_SQL_FILE = sql/$(EXTENSION)--$(EXT_VERSION).sql
DATA = $(EXT_SQL_FILE)
MODULE_big = $(EXTENSION)
SRCS = src/iobeamdb.c src/murmur3.c src/pgmurmur3.c
SRCS = src/iobeamdb.c src/murmur3.c src/pgmurmur3.c src/utils.c
OBJS = $(SRCS:.c=.o)
MKFILE_PATH := $(abspath $(MAKEFILE_LIST))

View File

@ -1,4 +1,17 @@
-- This file contains utilities for time conversion.
CREATE OR REPLACE FUNCTION _iobeamdb_internal.to_microseconds(ts TIMESTAMPTZ) RETURNS BIGINT
AS '$libdir/iobeamdb', 'pg_timestamp_to_microseconds' LANGUAGE C IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION _iobeamdb_internal.to_unix_microseconds(ts TIMESTAMPTZ) RETURNS BIGINT
AS '$libdir/iobeamdb', 'pg_timestamp_to_unix_microseconds' LANGUAGE C IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION _iobeamdb_internal.to_timestamp(unixtime_us BIGINT) RETURNS TIMESTAMPTZ
AS '$libdir/iobeamdb', 'pg_unix_microseconds_to_timestamp' LANGUAGE C IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION _iobeamdb_internal.to_timestamp_pg(postgres_us BIGINT) RETURNS TIMESTAMPTZ
AS '$libdir/iobeamdb', 'pg_microseconds_to_timestamp' LANGUAGE C IMMUTABLE STRICT;
-- Time can be represented in a hypertable as an int* (bigint/integer/smallint) or as a timestamp type (
-- with or without timezones). In or metatables and other internal systems all time values are stored as bigint.
-- Converting from int* columns to internal representation is a cast to bigint.
@ -17,30 +30,11 @@ BEGIN
WHEN 'BIGINT'::regtype, 'INTEGER'::regtype, 'SMALLINT'::regtype THEN
RETURN format('%s::bigint', identifier); --scale determined by user.
WHEN 'TIMESTAMP'::regtype, 'TIMESTAMPTZ'::regtype THEN
RETURN format('((EXTRACT(epoch FROM %s::timestamptz)*1e6)::bigint)', identifier); --microseconds since UTC epoch
RETURN format('(_iobeamdb_internal.to_unix_microseconds(%s::timestamptz))', identifier); --microseconds since UTC epoch
END CASE;
END
$BODY$;
CREATE OR REPLACE FUNCTION _iobeamdb_internal.time_value_to_timestamp(
time_value BIGINT
)
RETURNS TIMESTAMPTZ LANGUAGE PLPGSQL STABLE AS
$BODY$
DECLARE
seconds BIGINT;
microseconds BIGINT;
microseconds_interval INTERVAL;
timestamp_value TIMESTAMPTZ;
BEGIN
seconds := (time_value / 1e6)::bigint;
microseconds := time_value - (seconds * 1e6);
microseconds_interval := make_interval(secs => microseconds / 1e6);
SELECT to_timestamp(seconds) + microseconds_interval INTO timestamp_value;
RETURN timestamp_value;
END
$BODY$;
-- Gets the sql code for representing the literal for the given time value (in the internal representation) as the column_type.
CREATE OR REPLACE FUNCTION _iobeamdb_internal.time_literal_sql(
time_value BIGINT,
@ -58,7 +52,7 @@ BEGIN
RETURN format('%L', time_value); --scale determined by user.
WHEN 'TIMESTAMP'::regtype, 'TIMESTAMPTZ'::regtype THEN
--assume time_value is in microsec
RETURN format('%2$s %1$L', _iobeamdb_internal.time_value_to_timestamp(time_value), column_type); --microseconds
RETURN format('%2$s %1$L', _iobeamdb_internal.to_timestamp(time_value), column_type); --microseconds
END CASE;
END
$BODY$;

132
src/utils.c Normal file
View File

@ -0,0 +1,132 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: t; c-basic-offset: 4 -*- */
#include <unistd.h>
#include <postgres.h>
#include <fmgr.h>
#include <utils/datetime.h>
Datum pg_timestamp_to_microseconds(PG_FUNCTION_ARGS);
Datum pg_microseconds_to_timestamp(PG_FUNCTION_ARGS);
Datum pg_timestamp_to_unix_microseconds(PG_FUNCTION_ARGS);
Datum pg_unix_microseconds_to_timestamp(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(pg_timestamp_to_microseconds);
/*
* Convert a Postgres TIMESTAMP to BIGINT microseconds relative the Postgres epoch.
*/
Datum
pg_timestamp_to_microseconds(PG_FUNCTION_ARGS)
{
TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(0);
int64 microseconds;
if (!IS_VALID_TIMESTAMP(timestamp))
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
#ifdef HAVE_INT64_TIMESTAMP
microseconds = timestamp;
#else
if (1)
{
int64 seconds = (int64)timestamp;
microseconds = (seconds * USECS_PER_SEC) + ((timestamp - seconds) * USECS_PER_SEC);
}
#endif
PG_RETURN_INT64(microseconds);
}
PG_FUNCTION_INFO_V1(pg_microseconds_to_timestamp);
/*
* Convert BIGINT microseconds relative the UNIX epoch to a Postgres TIMESTAMP.
*/
Datum
pg_microseconds_to_timestamp(PG_FUNCTION_ARGS)
{
int64 microseconds = PG_GETARG_INT64(0);
TimestampTz timestamp;
#ifdef HAVE_INT64_TIMESTAMP
timestamp = microseconds;
#else
timestamp = microseconds / USECS_PER_SEC;
#endif
if (!IS_VALID_TIMESTAMP(timestamp))
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
PG_RETURN_TIMESTAMPTZ(timestamp);
}
PG_FUNCTION_INFO_V1(pg_timestamp_to_unix_microseconds);
/*
* Convert a Postgres TIMESTAMP to BIGINT microseconds relative the UNIX epoch.
*/
Datum
pg_timestamp_to_unix_microseconds(PG_FUNCTION_ARGS)
{
TimestampTz timestamp = PG_GETARG_TIMESTAMPTZ(0);
int64 epoch_diff_microseconds = (POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * USECS_PER_DAY;
int64 microseconds;
if (timestamp < MIN_TIMESTAMP)
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
if (timestamp >= (END_TIMESTAMP - epoch_diff_microseconds))
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
#ifdef HAVE_INT64_TIMESTAMP
microseconds = timestamp + epoch_diff_microseconds;
#else
if (1)
{
int64 seconds = (int64)timestamp;
microseconds = (seconds * USECS_PER_SEC) + ((timestamp - seconds) * USECS_PER_SEC) + epoch_diff_microseconds;
}
#endif
PG_RETURN_INT64(microseconds);
}
PG_FUNCTION_INFO_V1(pg_unix_microseconds_to_timestamp);
/*
* Convert BIGINT microseconds relative the UNIX epoch to a Postgres TIMESTAMP.
*/
Datum
pg_unix_microseconds_to_timestamp(PG_FUNCTION_ARGS)
{
int64 microseconds = PG_GETARG_INT64(0);
TimestampTz timestamp;
/*
Test that the UNIX us timestamp is within bounds.
Note that an int64 at UNIX epoch and microsecond precision cannot represent
the upper limit of the supported date range (Julian end date), so INT64_MAX
is the natural upper bound for this function.
*/
if (microseconds < ((int64)USECS_PER_DAY * (DATETIME_MIN_JULIAN - UNIX_EPOCH_JDATE)))
ereport(ERROR,
(errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
errmsg("timestamp out of range")));
#ifdef HAVE_INT64_TIMESTAMP
timestamp = microseconds - ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * USECS_PER_DAY);
#else
/* Shift the epoch using integer arithmetic to reduce precision errors */
timestamp = microseconds / USECS_PER_SEC; /* seconds */
microseconds = microseconds - ((int64)timestamp * USECS_PER_SEC);
timestamp = (float8)((int64)seconds - ((POSTGRES_EPOCH_JDATE - UNIX_EPOCH_JDATE) * SECS_PER_DAY))
+ (float8)microseconds / USECS_PER_SEC;
#endif
PG_RETURN_TIMESTAMPTZ(timestamp);
}

View File

@ -1,5 +1,4 @@
\set ON_ERROR_STOP 1
\set VERBOSITY verbose
\set SHOW_CONTEXT never
\ir include/create_clustered_db.sql
SET client_min_messages = WARNING;
@ -12,12 +11,9 @@ CREATE DATABASE "Test1";
CREATE DATABASE test2;
\c meta
CREATE EXTENSION IF NOT EXISTS iobeamdb CASCADE;
psql:include/create_clustered_db.sql:12: NOTICE: 00000: installing required extension "dblink"
LOCATION: CreateExtensionInternal, extension.c:1441
psql:include/create_clustered_db.sql:12: NOTICE: 00000: installing required extension "postgres_fdw"
LOCATION: CreateExtensionInternal, extension.c:1441
psql:include/create_clustered_db.sql:12: NOTICE: 00000: installing required extension "hstore"
LOCATION: CreateExtensionInternal, extension.c:1441
psql:include/create_clustered_db.sql:12: NOTICE: installing required extension "dblink"
psql:include/create_clustered_db.sql:12: NOTICE: installing required extension "postgres_fdw"
psql:include/create_clustered_db.sql:12: NOTICE: installing required extension "hstore"
select set_meta('localhost');
set_meta
----------
@ -27,12 +23,9 @@ select set_meta('localhost');
\c Test1
CREATE SCHEMA io_test;
CREATE EXTENSION IF NOT EXISTS iobeamdb SCHEMA io_test CASCADE;
psql:include/create_clustered_db.sql:17: NOTICE: 00000: installing required extension "dblink"
LOCATION: CreateExtensionInternal, extension.c:1441
psql:include/create_clustered_db.sql:17: NOTICE: 00000: installing required extension "postgres_fdw"
LOCATION: CreateExtensionInternal, extension.c:1441
psql:include/create_clustered_db.sql:17: NOTICE: 00000: installing required extension "hstore"
LOCATION: CreateExtensionInternal, extension.c:1441
psql:include/create_clustered_db.sql:17: NOTICE: installing required extension "dblink"
psql:include/create_clustered_db.sql:17: NOTICE: installing required extension "postgres_fdw"
psql:include/create_clustered_db.sql:17: NOTICE: installing required extension "hstore"
ALTER DATABASE "Test1" SET search_path = "io_test";
SET search_path = 'io_test';
select join_cluster(meta_database => 'meta', meta_hostname => 'localhost', node_hostname => 'localhost');
@ -43,12 +36,9 @@ select join_cluster(meta_database => 'meta', meta_hostname => 'localhost', node_
\c test2
CREATE EXTENSION IF NOT EXISTS iobeamdb CASCADE;
psql:include/create_clustered_db.sql:23: NOTICE: 00000: installing required extension "dblink"
LOCATION: CreateExtensionInternal, extension.c:1441
psql:include/create_clustered_db.sql:23: NOTICE: 00000: installing required extension "postgres_fdw"
LOCATION: CreateExtensionInternal, extension.c:1441
psql:include/create_clustered_db.sql:23: NOTICE: 00000: installing required extension "hstore"
LOCATION: CreateExtensionInternal, extension.c:1441
psql:include/create_clustered_db.sql:23: NOTICE: installing required extension "dblink"
psql:include/create_clustered_db.sql:23: NOTICE: installing required extension "postgres_fdw"
psql:include/create_clustered_db.sql:23: NOTICE: installing required extension "hstore"
select join_cluster(meta_database => 'meta', meta_hostname => 'localhost', node_hostname => 'localhost');
join_cluster
--------------
@ -57,6 +47,11 @@ select join_cluster(meta_database => 'meta', meta_hostname => 'localhost', node_
\set ECHO ALL
\c Test1
\set ON_ERROR_STOP 0
SET client_min_messages = WARNING;
drop tablespace if exists tspace1;
SET client_min_messages = NOTICE;
\set VERBOSITY verbose
--test hypertable with tables space
create tablespace tspace1 location :TEST_TABLESPACE_PATH;
create table test_tspace(time timestamp, temp float, device_id text) tablespace tspace1;

View File

@ -203,10 +203,100 @@ FROM PUBLIC."testNs" GROUP BY time ORDER BY time ASC LIMIT 2;
Wed Nov 11 19:00:00 2009 | 3
(2 rows)
-- check time conversion
SELECT _iobeamdb_internal.time_value_to_timestamp(1486480176236538);
time_value_to_timestamp
------------------------------------
-- Test time conversion functions --
------------------------------------
\set ON_ERROR_STOP 0
SET timezone = 'UTC';
ALTER DATABASE test2 SET timezone ='UTC';
-- Conversion to timestamp using Postgres built-in function taking double
SELECT to_timestamp(1486480176.236538);
to_timestamp
-------------------------------------
Tue Feb 07 10:09:36.236538 2017 EST
Tue Feb 07 15:09:36.236537 2017 UTC
(1 row)
-- extension-specific version taking microsecond UNIX timestamp
SELECT _iobeamdb_internal.to_timestamp(1486480176236538);
to_timestamp
-------------------------------------
Tue Feb 07 15:09:36.236538 2017 UTC
(1 row)
-- Should be the inverse of the statement above.
SELECT _iobeamdb_internal.to_unix_microseconds('2017-02-07 15:09:36.236538+00');
to_unix_microseconds
----------------------
1486480176236538
(1 row)
-- In UNIX microseconds, BIGINT MAX is smaller than internal date upper bound
-- and should therefore be OK. Further, converting to the internal postgres
-- epoch cannot overflow a 64-bit INTEGER since the postgres epoch is at a
-- later date compared to the UNIX epoch, and is therefore represented by a
-- smaller number
SELECT _iobeamdb_internal.to_timestamp(9223372036854775807);
to_timestamp
---------------------------------------
Sun Jan 10 04:00:54.775807 294247 UTC
(1 row)
-- Julian day zero is -210866803200000000 microseconds from UNIX epoch
SELECT _iobeamdb_internal.to_timestamp(-210866803200000000);
to_timestamp
---------------------------------
Mon Nov 24 00:00:00 4714 UTC BC
(1 row)
-- Going beyond Julian day zero should give out-of-range error
SELECT _iobeamdb_internal.to_timestamp(-210866803200000001);
ERROR: timestamp out of range
-- Lower bound on date (should return the Julian day zero UNIX timestamp above)
SELECT _iobeamdb_internal.to_unix_microseconds('4714-11-24 00:00:00+00 BC');
to_unix_microseconds
----------------------
-210866803200000000
(1 row)
-- Going beyond lower bound on date should return out-of-range
SELECT _iobeamdb_internal.to_unix_microseconds('4714-11-23 23:59:59.999999+00 BC');
ERROR: timestamp out of range: "4714-11-23 23:59:59.999999+00 BC"
LINE 1: SELECT _iobeamdb_internal.to_unix_microseconds('4714-11-23 2...
^
-- The upper bound for Postgres TIMESTAMPTZ
SELECT timestamp '294276-12-31 23:59:59.999999+00';
timestamp
-----------------------------------
Sun Dec 31 23:59:59.999999 294276
(1 row)
-- Going beyond the upper bound, should fail
SELECT timestamp '294276-12-31 23:59:59.999999+00' + interval '1 us';
ERROR: timestamp out of range
-- Cannot represent the upper bound timestamp with a UNIX microsecond timestamp
-- since the Postgres epoch is at a later date than the UNIX epoch.
SELECT _iobeamdb_internal.to_unix_microseconds('294276-12-31 23:59:59.999999+00');
ERROR: timestamp out of range
-- Subtracting the difference between the two epochs (10957 days) should bring
-- us within range.
SELECT timestamp '294276-12-31 23:59:59.999999+00' - interval '10957 days';
?column?
-----------------------------------
Fri Jan 01 23:59:59.999999 294247
(1 row)
SELECT _iobeamdb_internal.to_unix_microseconds('294247-01-01 23:59:59.999999');
to_unix_microseconds
----------------------
9223371331199999999
(1 row)
-- Adding one microsecond should take us out-of-range again
SELECT timestamp '294247-01-01 23:59:59.999999' + interval '1 us';
?column?
----------------------------
Sat Jan 02 00:00:00 294247
(1 row)
SELECT _iobeamdb_internal.to_unix_microseconds(timestamp '294247-01-01 23:59:59.999999' + interval '1 us');
ERROR: timestamp out of range

View File

@ -1,6 +1,5 @@
\set ON_ERROR_STOP 1
\set VERBOSITY verbose
\set SHOW_CONTEXT never
\ir include/create_clustered_db.sql
@ -8,6 +7,14 @@
\set ECHO ALL
\c Test1
\set ON_ERROR_STOP 0
SET client_min_messages = WARNING;
drop tablespace if exists tspace1;
SET client_min_messages = NOTICE;
\set VERBOSITY verbose
--test hypertable with tables space
create tablespace tspace1 location :TEST_TABLESPACE_PATH;
create table test_tspace(time timestamp, temp float, device_id text) tablespace tspace1;
@ -22,4 +29,4 @@ insert into test_tspace values ('2017-01-20T09:00:02', 22.3, 'dev7');
--cleanup
drop table test_tspace;
drop tablespace tspace1;
drop tablespace tspace1;

View File

@ -101,5 +101,58 @@ SELECT dblink_disconnect(conn) FROM unnest(dblink_get_connections()) conn;
SELECT date_group("timeCustom", '1 day') AS time, sum(series_0)
FROM PUBLIC."testNs" GROUP BY time ORDER BY time ASC LIMIT 2;
-- check time conversion
SELECT _iobeamdb_internal.time_value_to_timestamp(1486480176236538);
------------------------------------
-- Test time conversion functions --
------------------------------------
\set ON_ERROR_STOP 0
SET timezone = 'UTC';
ALTER DATABASE test2 SET timezone ='UTC';
-- Conversion to timestamp using Postgres built-in function taking double
SELECT to_timestamp(1486480176.236538);
-- extension-specific version taking microsecond UNIX timestamp
SELECT _iobeamdb_internal.to_timestamp(1486480176236538);
-- Should be the inverse of the statement above.
SELECT _iobeamdb_internal.to_unix_microseconds('2017-02-07 15:09:36.236538+00');
-- In UNIX microseconds, BIGINT MAX is smaller than internal date upper bound
-- and should therefore be OK. Further, converting to the internal postgres
-- epoch cannot overflow a 64-bit INTEGER since the postgres epoch is at a
-- later date compared to the UNIX epoch, and is therefore represented by a
-- smaller number
SELECT _iobeamdb_internal.to_timestamp(9223372036854775807);
-- Julian day zero is -210866803200000000 microseconds from UNIX epoch
SELECT _iobeamdb_internal.to_timestamp(-210866803200000000);
-- Going beyond Julian day zero should give out-of-range error
SELECT _iobeamdb_internal.to_timestamp(-210866803200000001);
-- Lower bound on date (should return the Julian day zero UNIX timestamp above)
SELECT _iobeamdb_internal.to_unix_microseconds('4714-11-24 00:00:00+00 BC');
-- Going beyond lower bound on date should return out-of-range
SELECT _iobeamdb_internal.to_unix_microseconds('4714-11-23 23:59:59.999999+00 BC');
-- The upper bound for Postgres TIMESTAMPTZ
SELECT timestamp '294276-12-31 23:59:59.999999+00';
-- Going beyond the upper bound, should fail
SELECT timestamp '294276-12-31 23:59:59.999999+00' + interval '1 us';
-- Cannot represent the upper bound timestamp with a UNIX microsecond timestamp
-- since the Postgres epoch is at a later date than the UNIX epoch.
SELECT _iobeamdb_internal.to_unix_microseconds('294276-12-31 23:59:59.999999+00');
-- Subtracting the difference between the two epochs (10957 days) should bring
-- us within range.
SELECT timestamp '294276-12-31 23:59:59.999999+00' - interval '10957 days';
SELECT _iobeamdb_internal.to_unix_microseconds('294247-01-01 23:59:59.999999');
-- Adding one microsecond should take us out-of-range again
SELECT timestamp '294247-01-01 23:59:59.999999' + interval '1 us';
SELECT _iobeamdb_internal.to_unix_microseconds(timestamp '294247-01-01 23:59:59.999999' + interval '1 us');