From 9c7191e89882e159c6355e1be4878ddd1560693e Mon Sep 17 00:00:00 2001 From: Matvey Arye Date: Thu, 16 Nov 2017 14:48:25 -0500 Subject: [PATCH] Change TIMESTAMP partitioning to be completely tz-independent Previously, for timezones w/o tz. The range_end and range_start were defined as UTC, but the constraints on the table were written as as the local time at the time of chunk creation. This does not work well if timezones change over the life of the hypertable. This change removes the dependency on local time for all timestamp partitioning. Namely, the range_start and range_end remain as UTC but the constraints are now always written in UTC too. Since old constraints correctly describe the data currently in the chunks, the update script to handle this change changes range_start and range_end instead of the constraints. Fixes #300. --- scripts/test_updates.sh | 8 ++-- sql/updates/post-0.6.1--0.7.0-dev.sql | 18 ++++++++ sql/util_time.sql | 7 ++- src/utils.c | 4 +- test/expected/ddl_alter_column.out | 3 ++ test/expected/insert_single.out | 63 +++++++++++++++++++-------- test/expected/reindex.out | 6 ++- test/expected/vacuum.out | 18 ++++---- test/sql/ddl_alter_column.sql | 3 ++ test/sql/insert_single.sql | 14 +++++- test/sql/updates/setup.sql | 18 +++++++- test/sql/vacuum.sql | 12 ++--- 12 files changed, 131 insertions(+), 43 deletions(-) diff --git a/scripts/test_updates.sh b/scripts/test_updates.sh index 5aef565be..3945965dc 100755 --- a/scripts/test_updates.sh +++ b/scripts/test_updates.sh @@ -44,8 +44,8 @@ cleanup() { set +e # do not exit immediately on failure in cleanup handler if [ $status -eq 0 ]; then rm -rf ${PGTEST_TMPDIR} + docker rm -vf timescaledb-orig timescaledb-clean-restore timescaledb-updated 2>/dev/null fi - docker rm -vf timescaledb-orig timescaledb-clean-restore timescaledb-updated 2>/dev/null echo "Exit status is $status" exit $status } @@ -61,7 +61,7 @@ docker_pgcmd() { } docker_pgscript() { - docker_exec $1 "psql -h localhost -U postgres -f $2" + docker_exec $1 "psql -h localhost -U postgres -v ON_ERROR_STOP=1 -f $2" } docker_pgtest() { @@ -78,12 +78,12 @@ docker_pgdiff() { } docker_run() { - docker run -d --name $1 -v ${BASE_DIR}:/src $2 + docker run -d --name $1 -v ${BASE_DIR}:/src $2 -c timezone="US/Eastern" wait_for_pg $1 } docker_run_vol() { - docker run -d --name $1 -v ${BASE_DIR}:/src -v $2 $3 + docker run -d --name $1 -v ${BASE_DIR}:/src -v $2 $3 -c timezone="US/Eastern" wait_for_pg $1 } diff --git a/sql/updates/post-0.6.1--0.7.0-dev.sql b/sql/updates/post-0.6.1--0.7.0-dev.sql index ca256e8c4..de8defe24 100644 --- a/sql/updates/post-0.6.1--0.7.0-dev.sql +++ b/sql/updates/post-0.6.1--0.7.0-dev.sql @@ -34,3 +34,21 @@ BEGIN END LOOP; END$$; + +--for timestamp (non-tz) columns we used to have internal_time -> constraint_time via local_time. +--So the internal time was interpreted as UTC but the constraint was printed in terms of the local time. +--Now we interpret the internal_time as UTC and the constraints is generated as UTC as well. +--These constraints should not be re-written since they are correct for the data. But we should adjust the internal time +--to be consistent. + +-- So _timescaledb_internal.to_timestamp(internal_time)::timestamp gives you the old constraint +-- We then convert it to timestamptz as though it was at UTC +-- finally, we convert it to the internal represtentation back. + +UPDATE _timescaledb_catalog.dimension_slice ds +SET +range_end = _timescaledb_internal.to_unix_microseconds(timezone('UTC',_timescaledb_internal.to_timestamp(range_end)::timestamp)), +range_start = _timescaledb_internal.to_unix_microseconds(timezone('UTC',_timescaledb_internal.to_timestamp(range_start)::timestamp)) +FROM _timescaledb_catalog.dimension d +WHERE ds.dimension_id = d.id AND d.column_type = 'timestamp'::regtype; + diff --git a/sql/util_time.sql b/sql/util_time.sql index 1ed018cab..4733d8765 100644 --- a/sql/util_time.sql +++ b/sql/util_time.sql @@ -32,9 +32,12 @@ BEGIN CASE column_type WHEN 'BIGINT'::regtype, 'INTEGER'::regtype, 'SMALLINT'::regtype THEN RETURN format('%L', time_value); -- scale determined by user. - WHEN 'TIMESTAMP'::regtype, 'TIMESTAMPTZ'::regtype THEN + WHEN 'TIMESTAMP'::regtype THEN + --the time_value for timestamps w/o tz does not depend on local timezones. So perform at UTC. + RETURN format('TIMESTAMP %1$L', timezone('UTC',_timescaledb_internal.to_timestamp(time_value))); -- microseconds + WHEN 'TIMESTAMPTZ'::regtype THEN -- assume time_value is in microsec - RETURN format('%2$s %1$L', _timescaledb_internal.to_timestamp(time_value), column_type); -- microseconds + RETURN format('TIMESTAMPTZ %1$L', _timescaledb_internal.to_timestamp(time_value)); -- microseconds WHEN 'DATE'::regtype THEN RETURN format('%L', timezone('UTC',_timescaledb_internal.to_timestamp(time_value))::date); END CASE; diff --git a/src/utils.c b/src/utils.c index 1454959d4..9b7fb9409 100644 --- a/src/utils.c +++ b/src/utils.c @@ -159,8 +159,8 @@ time_value_to_internal(Datum time_val, Oid type) } if (type == TIMESTAMPOID) { - Datum tz = DirectFunctionCall1(timestamp_timestamptz, time_val); - Datum res = DirectFunctionCall1(pg_timestamp_to_unix_microseconds, tz); + /* for timestamps, ignore timezones, make believe the timestamp is at UTC */ + Datum res = DirectFunctionCall1(pg_timestamp_to_unix_microseconds, time_val); return DatumGetInt64(res); } diff --git a/test/expected/ddl_alter_column.out b/test/expected/ddl_alter_column.out index f1d45a68c..dd8668331 100644 --- a/test/expected/ddl_alter_column.out +++ b/test/expected/ddl_alter_column.out @@ -61,7 +61,10 @@ SELECT * FROM alter_test WHERE time > '2017-05-20T10:00:01'; -- rename column and change its type ALTER TABLE alter_test RENAME COLUMN time TO time_us; +--converting timestamptz->timestamp should happen under UTC +SET timezone = 'UTC'; ALTER TABLE alter_test ALTER COLUMN time_us TYPE timestamp; +RESET timezone; ALTER TABLE alter_test RENAME COLUMN color TO colorname; \set ON_ERROR_STOP 0 -- Changing types on hash-partitioned columns is not safe for some diff --git a/test/expected/insert_single.out b/test/expected/insert_single.out index ec3f7c4e6..897cb585f 100644 --- a/test/expected/insert_single.out +++ b/test/expected/insert_single.out @@ -242,13 +242,14 @@ SELECT * FROM _timescaledb_catalog.chunk; 5 | 3 | _timescaledb_internal | _hyper_3_5_chunk 6 | 3 | _timescaledb_internal | _hyper_3_6_chunk 7 | 3 | _timescaledb_internal | _hyper_3_7_chunk - 9 | 5 | _timescaledb_internal | _hyper_5_9_chunk - 10 | 6 | _timescaledb_internal | _hyper_6_10_chunk + 8 | 3 | _timescaledb_internal | _hyper_3_8_chunk + 10 | 5 | _timescaledb_internal | _hyper_5_10_chunk 11 | 6 | _timescaledb_internal | _hyper_6_11_chunk 12 | 6 | _timescaledb_internal | _hyper_6_12_chunk 13 | 6 | _timescaledb_internal | _hyper_6_13_chunk 14 | 6 | _timescaledb_internal | _hyper_6_14_chunk -(13 rows) + 15 | 6 | _timescaledb_internal | _hyper_6_15_chunk +(14 rows) SELECT * FROM _timescaledb_catalog.dimension_slice; id | dimension_id | range_start | range_end @@ -257,16 +258,17 @@ SELECT * FROM _timescaledb_catalog.dimension_slice; 2 | 1 | 1257897600000000000 | 1257900192000000000 3 | 1 | 1257985728000000000 | 1257988320000000000 4 | 2 | 1482624000000000 | 1485216000000000 - 5 | 3 | -2592000000000 | 0 - 6 | 3 | 0 | 2592000000000 - 7 | 3 | -28512000000000 | -25920000000000 - 9 | 5 | -2581200000000 | -2581199000000 - 10 | 6 | -20 | -10 - 11 | 6 | -10 | 0 - 12 | 6 | 0 | 10 - 13 | 6 | 10 | 20 - 14 | 6 | 20 | 30 -(13 rows) + 5 | 3 | -5184000000000 | -2592000000000 + 6 | 3 | -2592000000000 | 0 + 7 | 3 | 0 | 2592000000000 + 8 | 3 | -28512000000000 | -25920000000000 + 10 | 5 | -2610000000000 | -2609999000000 + 11 | 6 | -20 | -10 + 12 | 6 | -10 | 0 + 13 | 6 | 0 | 10 + 14 | 6 | 10 | 20 + 15 | 6 | 20 | 30 +(14 rows) -- Create a three-dimensional table CREATE TABLE "3dim" (time timestamp, temp float, device text, location text); @@ -287,12 +289,12 @@ INSERT INTO "3dim" VALUES('2017-01-20T09:00:01', 22.5, 'blue', 'nyc'); INSERT INTO "3dim" VALUES('2017-01-20T09:00:21', 21.2, 'brown', 'sthlm'); INSERT INTO "3dim" VALUES('2017-01-20T09:00:47', 25.1, 'yellow', 'la'); --show the constraints on the three-dimensional chunk -SELECT * FROM test.show_constraints('_timescaledb_internal._hyper_7_15_chunk'); +SELECT * FROM test.show_constraints('_timescaledb_internal._hyper_7_16_chunk'); Constraint | Type | Columns | Index | Expr ---------------+------+------------+-------+---------------------------------------------------------------------------------------------------------------------------------------------- - constraint_15 | c | {time} | - | (("time" >= 'Sat Dec 24 16:00:00 2016'::timestamp without time zone) AND ("time" < 'Mon Jan 23 16:00:00 2017'::timestamp without time zone)) - constraint_16 | c | {device} | - | (_timescaledb_internal.get_partition_hash(device) < 1073741823) - constraint_17 | c | {location} | - | (_timescaledb_internal.get_partition_hash(location) >= 1073741823) + constraint_16 | c | {time} | - | (("time" >= 'Sun Dec 25 00:00:00 2016'::timestamp without time zone) AND ("time" < 'Tue Jan 24 00:00:00 2017'::timestamp without time zone)) + constraint_17 | c | {device} | - | (_timescaledb_internal.get_partition_hash(device) < 1073741823) + constraint_18 | c | {location} | - | (_timescaledb_internal.get_partition_hash(location) >= 1073741823) (3 rows) --queries should work in three dimensions @@ -411,3 +413,30 @@ NOTICE: Adding NOT NULL constraint to time column time (NULL time values not al SET timezone=+1; INSERT INTO "hyper_date" VALUES('2011-01-26', 22.5); RESET timezone; +--make sure timestamp inserts work even when the timezone changes the +SET timezone = 'UTC'; +CREATE TABLE "test_tz"(time timestamp PRIMARY KEY, temp float); +SELECT create_hypertable('"test_tz"', 'time', chunk_time_interval=> INTERVAL '1 day'); + create_hypertable +------------------- + +(1 row) + +INSERT INTO "test_tz" VALUES('2017-09-22 10:00:00', 21.2); +INSERT INTO "test_tz" VALUES('2017-09-21 19:00:00', 21.2); +SET timezone = 'US/central'; +INSERT INTO "test_tz" VALUES('2017-09-21 19:01:00', 21.2); +SELECT * FROM test.show_constraints('_timescaledb_internal._hyper_10_20_chunk'); + Constraint | Type | Columns | Index | Expr +---------------+------+---------+-------+-------------------------------------------------------------------- + constraint_23 | c | {time} | - | (("time" >= '01-26-2011'::date) AND ("time" < '02-25-2011'::date)) +(1 row) + +SELECT * FROM test_tz; + time | temp +--------------------------+------ + Fri Sep 22 10:00:00 2017 | 21.2 + Thu Sep 21 19:00:00 2017 | 21.2 + Thu Sep 21 19:01:00 2017 | 21.2 +(3 rows) + diff --git a/test/expected/reindex.out b/test/expected/reindex.out index 5770af272..9d931ad70 100644 --- a/test/expected/reindex.out +++ b/test/expected/reindex.out @@ -27,13 +27,17 @@ SELECT * FROM test.show_subtables('reindex_test'); _timescaledb_internal._hyper_1_1_chunk _timescaledb_internal._hyper_1_2_chunk _timescaledb_internal._hyper_1_3_chunk -(3 rows) + _timescaledb_internal._hyper_1_4_chunk + _timescaledb_internal._hyper_1_5_chunk +(5 rows) -- show reindexing REINDEX (VERBOSE) TABLE reindex_test; INFO: index "_hyper_1_1_chunk_reindex_test_time_unique_idx" was reindexed INFO: index "_hyper_1_2_chunk_reindex_test_time_unique_idx" was reindexed INFO: index "_hyper_1_3_chunk_reindex_test_time_unique_idx" was reindexed +INFO: index "_hyper_1_4_chunk_reindex_test_time_unique_idx" was reindexed +INFO: index "_hyper_1_5_chunk_reindex_test_time_unique_idx" was reindexed \set ON_ERROR_STOP 0 -- this one currently doesn't recurse to chunks and instead gives an -- error diff --git a/test/expected/vacuum.out b/test/expected/vacuum.out index 72d77c0c2..9c896a549 100644 --- a/test/expected/vacuum.out +++ b/test/expected/vacuum.out @@ -7,12 +7,12 @@ NOTICE: Adding NOT NULL constraint to time column time (NULL time values not al (1 row) -INSERT INTO vacuum_test VALUES ('2017-01-20T09:00:01', 17.5), - ('2017-01-21T09:00:01', 19.1), - ('2017-04-20T09:00:01', 89.5), - ('2017-04-21T09:00:01', 17.1), - ('2017-06-20T09:00:01', 18.5), - ('2017-06-21T09:00:01', 11.0); +INSERT INTO vacuum_test VALUES ('2017-01-20T16:00:01', 17.5), + ('2017-01-21T16:00:01', 19.1), + ('2017-04-20T16:00:01', 89.5), + ('2017-04-21T16:00:01', 17.1), + ('2017-06-20T16:00:01', 18.5), + ('2017-06-21T16:00:01', 11.0); -- no stats SELECT tablename, attname, histogram_bounds, n_distinct FROM pg_stats WHERE schemaname = '_timescaledb_internal' AND tablename LIKE '_hyper_%_chunk' @@ -43,11 +43,11 @@ WHERE schemaname = '_timescaledb_internal' AND tablename LIKE '_hyper_%_chunk' ORDER BY schemaname, tablename; tablename | attname | histogram_bounds | n_distinct ------------------+---------+---------------------------------------------------------+------------ - _hyper_1_1_chunk | time | {"Fri Jan 20 09:00:01 2017","Sat Jan 21 09:00:01 2017"} | -1 + _hyper_1_1_chunk | time | {"Fri Jan 20 16:00:01 2017","Sat Jan 21 16:00:01 2017"} | -1 _hyper_1_1_chunk | temp | {17.5,19.1} | -1 - _hyper_1_2_chunk | time | {"Thu Apr 20 09:00:01 2017","Fri Apr 21 09:00:01 2017"} | -1 + _hyper_1_2_chunk | time | {"Thu Apr 20 16:00:01 2017","Fri Apr 21 16:00:01 2017"} | -1 _hyper_1_2_chunk | temp | {17.1,89.5} | -1 - _hyper_1_3_chunk | time | {"Tue Jun 20 09:00:01 2017","Wed Jun 21 09:00:01 2017"} | -1 + _hyper_1_3_chunk | time | {"Tue Jun 20 16:00:01 2017","Wed Jun 21 16:00:01 2017"} | -1 _hyper_1_3_chunk | temp | {11,18.5} | -1 (6 rows) diff --git a/test/sql/ddl_alter_column.sql b/test/sql/ddl_alter_column.sql index 832b388a6..4cc247ea3 100644 --- a/test/sql/ddl_alter_column.sql +++ b/test/sql/ddl_alter_column.sql @@ -19,7 +19,10 @@ SELECT * FROM alter_test WHERE time > '2017-05-20T10:00:01'; -- rename column and change its type ALTER TABLE alter_test RENAME COLUMN time TO time_us; +--converting timestamptz->timestamp should happen under UTC +SET timezone = 'UTC'; ALTER TABLE alter_test ALTER COLUMN time_us TYPE timestamp; +RESET timezone; ALTER TABLE alter_test RENAME COLUMN color TO colorname; \set ON_ERROR_STOP 0 -- Changing types on hash-partitioned columns is not safe for some diff --git a/test/sql/insert_single.sql b/test/sql/insert_single.sql index 7e6a72cf2..a8e0627ae 100644 --- a/test/sql/insert_single.sql +++ b/test/sql/insert_single.sql @@ -64,7 +64,7 @@ INSERT INTO "3dim" VALUES('2017-01-20T09:00:21', 21.2, 'brown', 'sthlm'); INSERT INTO "3dim" VALUES('2017-01-20T09:00:47', 25.1, 'yellow', 'la'); --show the constraints on the three-dimensional chunk -SELECT * FROM test.show_constraints('_timescaledb_internal._hyper_7_15_chunk'); +SELECT * FROM test.show_constraints('_timescaledb_internal._hyper_7_16_chunk'); --queries should work in three dimensions SELECT * FROM "3dim"; @@ -117,3 +117,15 @@ SELECT create_hypertable('"hyper_date"', 'time'); SET timezone=+1; INSERT INTO "hyper_date" VALUES('2011-01-26', 22.5); RESET timezone; + +--make sure timestamp inserts work even when the timezone changes the +SET timezone = 'UTC'; +CREATE TABLE "test_tz"(time timestamp PRIMARY KEY, temp float); +SELECT create_hypertable('"test_tz"', 'time', chunk_time_interval=> INTERVAL '1 day'); +INSERT INTO "test_tz" VALUES('2017-09-22 10:00:00', 21.2); +INSERT INTO "test_tz" VALUES('2017-09-21 19:00:00', 21.2); +SET timezone = 'US/central'; +INSERT INTO "test_tz" VALUES('2017-09-21 19:01:00', 21.2); + +SELECT * FROM test.show_constraints('_timescaledb_internal._hyper_10_20_chunk'); +SELECT * FROM test_tz; diff --git a/test/sql/updates/setup.sql b/test/sql/updates/setup.sql index fa62cbbc0..bf26009a0 100644 --- a/test/sql/updates/setup.sql +++ b/test/sql/updates/setup.sql @@ -21,7 +21,6 @@ CREATE INDEX ON PUBLIC."two_Partitions" ("timeCustom" DESC NULLS LAST, device_id SELECT * FROM create_hypertable('"public"."two_Partitions"'::regclass, 'timeCustom'::name, 'device_id'::name, associated_schema_name=>'_timescaledb_internal'::text, number_partitions => 2, chunk_time_interval=>_timescaledb_internal.interval_to_usec('1 month')); - INSERT INTO public."two_Partitions"("timeCustom", device_id, series_0, series_1, series_2) VALUES (1257987600000000000, 'dev1', 1.5, 1, 1), (1257987600000000000, 'dev1', 1.5, 2, 2), @@ -30,3 +29,20 @@ INSERT INTO public."two_Partitions"("timeCustom", device_id, series_0, series_1, INSERT INTO "two_Partitions"("timeCustom", device_id, series_0, series_1, series_2) VALUES (1257894000000000000, 'dev2', 1.5, 2, 6); + +CREATE TABLE PUBLIC.hyper_timestamp ( + time timestamp NOT NULL, + device_id TEXT NOT NULL, + value int NOT NULL +); + +SELECT * FROM create_hypertable('hyper_timestamp'::regclass, 'time'::name, 'device_id'::name, number_partitions => 2, + chunk_time_interval=> _timescaledb_internal.interval_to_usec('1 minute')); + +--some old versions use more slice_ids than newer ones. Make this uniform +ALTER SEQUENCE _timescaledb_catalog.dimension_slice_id_seq RESTART WITH 100; + +INSERT INTO hyper_timestamp VALUES +('2017-01-20T09:00:01', 'dev1', 1), +('2017-01-20T08:00:01', 'dev2', 2), +('2016-01-20T09:00:01', 'dev1', 3); diff --git a/test/sql/vacuum.sql b/test/sql/vacuum.sql index 9a5d882f3..e35fbde97 100644 --- a/test/sql/vacuum.sql +++ b/test/sql/vacuum.sql @@ -3,12 +3,12 @@ CREATE TABLE vacuum_test(time timestamp, temp float); -- create hypertable with three chunks SELECT create_hypertable('vacuum_test', 'time', chunk_time_interval => 2628000000000); -INSERT INTO vacuum_test VALUES ('2017-01-20T09:00:01', 17.5), - ('2017-01-21T09:00:01', 19.1), - ('2017-04-20T09:00:01', 89.5), - ('2017-04-21T09:00:01', 17.1), - ('2017-06-20T09:00:01', 18.5), - ('2017-06-21T09:00:01', 11.0); +INSERT INTO vacuum_test VALUES ('2017-01-20T16:00:01', 17.5), + ('2017-01-21T16:00:01', 19.1), + ('2017-04-20T16:00:01', 89.5), + ('2017-04-21T16:00:01', 17.1), + ('2017-06-20T16:00:01', 18.5), + ('2017-06-21T16:00:01', 11.0); -- no stats SELECT tablename, attname, histogram_bounds, n_distinct FROM pg_stats