1
0
mirror of https://github.com/timescale/timescaledb.git synced 2025-05-19 12:13:24 +08:00

Change TIMESTAMP partitioning to be completely tz-independent

Previously, for timezones w/o tz. The range_end and range_start were
defined as UTC, but the constraints on the table were written as as
the local time at the time of chunk creation. This does not work well
if timezones change over the life of the hypertable.

This change removes the dependency on local time for all timestamp
partitioning. Namely, the range_start and range_end remain as UTC
but the constraints are now always written in UTC too. Since old
constraints correctly describe the data currently in the chunks, the
update script to handle this change changes range_start and range_end
instead of the constraints.

Fixes .
This commit is contained in:
Matvey Arye 2017-11-16 14:48:25 -05:00 committed by Matvey Arye
parent 741b25662e
commit 9c7191e898
12 changed files with 131 additions and 43 deletions

@ -44,8 +44,8 @@ cleanup() {
set +e # do not exit immediately on failure in cleanup handler
if [ $status -eq 0 ]; then
rm -rf ${PGTEST_TMPDIR}
docker rm -vf timescaledb-orig timescaledb-clean-restore timescaledb-updated 2>/dev/null
fi
docker rm -vf timescaledb-orig timescaledb-clean-restore timescaledb-updated 2>/dev/null
echo "Exit status is $status"
exit $status
}
@ -61,7 +61,7 @@ docker_pgcmd() {
}
docker_pgscript() {
docker_exec $1 "psql -h localhost -U postgres -f $2"
docker_exec $1 "psql -h localhost -U postgres -v ON_ERROR_STOP=1 -f $2"
}
docker_pgtest() {
@ -78,12 +78,12 @@ docker_pgdiff() {
}
docker_run() {
docker run -d --name $1 -v ${BASE_DIR}:/src $2
docker run -d --name $1 -v ${BASE_DIR}:/src $2 -c timezone="US/Eastern"
wait_for_pg $1
}
docker_run_vol() {
docker run -d --name $1 -v ${BASE_DIR}:/src -v $2 $3
docker run -d --name $1 -v ${BASE_DIR}:/src -v $2 $3 -c timezone="US/Eastern"
wait_for_pg $1
}

@ -34,3 +34,21 @@ BEGIN
END LOOP;
END$$;
--for timestamp (non-tz) columns we used to have internal_time -> constraint_time via local_time.
--So the internal time was interpreted as UTC but the constraint was printed in terms of the local time.
--Now we interpret the internal_time as UTC and the constraints is generated as UTC as well.
--These constraints should not be re-written since they are correct for the data. But we should adjust the internal time
--to be consistent.
-- So _timescaledb_internal.to_timestamp(internal_time)::timestamp gives you the old constraint
-- We then convert it to timestamptz as though it was at UTC
-- finally, we convert it to the internal represtentation back.
UPDATE _timescaledb_catalog.dimension_slice ds
SET
range_end = _timescaledb_internal.to_unix_microseconds(timezone('UTC',_timescaledb_internal.to_timestamp(range_end)::timestamp)),
range_start = _timescaledb_internal.to_unix_microseconds(timezone('UTC',_timescaledb_internal.to_timestamp(range_start)::timestamp))
FROM _timescaledb_catalog.dimension d
WHERE ds.dimension_id = d.id AND d.column_type = 'timestamp'::regtype;

@ -32,9 +32,12 @@ BEGIN
CASE column_type
WHEN 'BIGINT'::regtype, 'INTEGER'::regtype, 'SMALLINT'::regtype THEN
RETURN format('%L', time_value); -- scale determined by user.
WHEN 'TIMESTAMP'::regtype, 'TIMESTAMPTZ'::regtype THEN
WHEN 'TIMESTAMP'::regtype THEN
--the time_value for timestamps w/o tz does not depend on local timezones. So perform at UTC.
RETURN format('TIMESTAMP %1$L', timezone('UTC',_timescaledb_internal.to_timestamp(time_value))); -- microseconds
WHEN 'TIMESTAMPTZ'::regtype THEN
-- assume time_value is in microsec
RETURN format('%2$s %1$L', _timescaledb_internal.to_timestamp(time_value), column_type); -- microseconds
RETURN format('TIMESTAMPTZ %1$L', _timescaledb_internal.to_timestamp(time_value)); -- microseconds
WHEN 'DATE'::regtype THEN
RETURN format('%L', timezone('UTC',_timescaledb_internal.to_timestamp(time_value))::date);
END CASE;

@ -159,8 +159,8 @@ time_value_to_internal(Datum time_val, Oid type)
}
if (type == TIMESTAMPOID)
{
Datum tz = DirectFunctionCall1(timestamp_timestamptz, time_val);
Datum res = DirectFunctionCall1(pg_timestamp_to_unix_microseconds, tz);
/* for timestamps, ignore timezones, make believe the timestamp is at UTC */
Datum res = DirectFunctionCall1(pg_timestamp_to_unix_microseconds, time_val);
return DatumGetInt64(res);
}

@ -61,7 +61,10 @@ SELECT * FROM alter_test WHERE time > '2017-05-20T10:00:01';
-- rename column and change its type
ALTER TABLE alter_test RENAME COLUMN time TO time_us;
--converting timestamptz->timestamp should happen under UTC
SET timezone = 'UTC';
ALTER TABLE alter_test ALTER COLUMN time_us TYPE timestamp;
RESET timezone;
ALTER TABLE alter_test RENAME COLUMN color TO colorname;
\set ON_ERROR_STOP 0
-- Changing types on hash-partitioned columns is not safe for some

@ -242,13 +242,14 @@ SELECT * FROM _timescaledb_catalog.chunk;
5 | 3 | _timescaledb_internal | _hyper_3_5_chunk
6 | 3 | _timescaledb_internal | _hyper_3_6_chunk
7 | 3 | _timescaledb_internal | _hyper_3_7_chunk
9 | 5 | _timescaledb_internal | _hyper_5_9_chunk
10 | 6 | _timescaledb_internal | _hyper_6_10_chunk
8 | 3 | _timescaledb_internal | _hyper_3_8_chunk
10 | 5 | _timescaledb_internal | _hyper_5_10_chunk
11 | 6 | _timescaledb_internal | _hyper_6_11_chunk
12 | 6 | _timescaledb_internal | _hyper_6_12_chunk
13 | 6 | _timescaledb_internal | _hyper_6_13_chunk
14 | 6 | _timescaledb_internal | _hyper_6_14_chunk
(13 rows)
15 | 6 | _timescaledb_internal | _hyper_6_15_chunk
(14 rows)
SELECT * FROM _timescaledb_catalog.dimension_slice;
id | dimension_id | range_start | range_end
@ -257,16 +258,17 @@ SELECT * FROM _timescaledb_catalog.dimension_slice;
2 | 1 | 1257897600000000000 | 1257900192000000000
3 | 1 | 1257985728000000000 | 1257988320000000000
4 | 2 | 1482624000000000 | 1485216000000000
5 | 3 | -2592000000000 | 0
6 | 3 | 0 | 2592000000000
7 | 3 | -28512000000000 | -25920000000000
9 | 5 | -2581200000000 | -2581199000000
10 | 6 | -20 | -10
11 | 6 | -10 | 0
12 | 6 | 0 | 10
13 | 6 | 10 | 20
14 | 6 | 20 | 30
(13 rows)
5 | 3 | -5184000000000 | -2592000000000
6 | 3 | -2592000000000 | 0
7 | 3 | 0 | 2592000000000
8 | 3 | -28512000000000 | -25920000000000
10 | 5 | -2610000000000 | -2609999000000
11 | 6 | -20 | -10
12 | 6 | -10 | 0
13 | 6 | 0 | 10
14 | 6 | 10 | 20
15 | 6 | 20 | 30
(14 rows)
-- Create a three-dimensional table
CREATE TABLE "3dim" (time timestamp, temp float, device text, location text);
@ -287,12 +289,12 @@ INSERT INTO "3dim" VALUES('2017-01-20T09:00:01', 22.5, 'blue', 'nyc');
INSERT INTO "3dim" VALUES('2017-01-20T09:00:21', 21.2, 'brown', 'sthlm');
INSERT INTO "3dim" VALUES('2017-01-20T09:00:47', 25.1, 'yellow', 'la');
--show the constraints on the three-dimensional chunk
SELECT * FROM test.show_constraints('_timescaledb_internal._hyper_7_15_chunk');
SELECT * FROM test.show_constraints('_timescaledb_internal._hyper_7_16_chunk');
Constraint | Type | Columns | Index | Expr
---------------+------+------------+-------+----------------------------------------------------------------------------------------------------------------------------------------------
constraint_15 | c | {time} | - | (("time" >= 'Sat Dec 24 16:00:00 2016'::timestamp without time zone) AND ("time" < 'Mon Jan 23 16:00:00 2017'::timestamp without time zone))
constraint_16 | c | {device} | - | (_timescaledb_internal.get_partition_hash(device) < 1073741823)
constraint_17 | c | {location} | - | (_timescaledb_internal.get_partition_hash(location) >= 1073741823)
constraint_16 | c | {time} | - | (("time" >= 'Sun Dec 25 00:00:00 2016'::timestamp without time zone) AND ("time" < 'Tue Jan 24 00:00:00 2017'::timestamp without time zone))
constraint_17 | c | {device} | - | (_timescaledb_internal.get_partition_hash(device) < 1073741823)
constraint_18 | c | {location} | - | (_timescaledb_internal.get_partition_hash(location) >= 1073741823)
(3 rows)
--queries should work in three dimensions
@ -411,3 +413,30 @@ NOTICE: Adding NOT NULL constraint to time column time (NULL time values not al
SET timezone=+1;
INSERT INTO "hyper_date" VALUES('2011-01-26', 22.5);
RESET timezone;
--make sure timestamp inserts work even when the timezone changes the
SET timezone = 'UTC';
CREATE TABLE "test_tz"(time timestamp PRIMARY KEY, temp float);
SELECT create_hypertable('"test_tz"', 'time', chunk_time_interval=> INTERVAL '1 day');
create_hypertable
-------------------
(1 row)
INSERT INTO "test_tz" VALUES('2017-09-22 10:00:00', 21.2);
INSERT INTO "test_tz" VALUES('2017-09-21 19:00:00', 21.2);
SET timezone = 'US/central';
INSERT INTO "test_tz" VALUES('2017-09-21 19:01:00', 21.2);
SELECT * FROM test.show_constraints('_timescaledb_internal._hyper_10_20_chunk');
Constraint | Type | Columns | Index | Expr
---------------+------+---------+-------+--------------------------------------------------------------------
constraint_23 | c | {time} | - | (("time" >= '01-26-2011'::date) AND ("time" < '02-25-2011'::date))
(1 row)
SELECT * FROM test_tz;
time | temp
--------------------------+------
Fri Sep 22 10:00:00 2017 | 21.2
Thu Sep 21 19:00:00 2017 | 21.2
Thu Sep 21 19:01:00 2017 | 21.2
(3 rows)

@ -27,13 +27,17 @@ SELECT * FROM test.show_subtables('reindex_test');
_timescaledb_internal._hyper_1_1_chunk
_timescaledb_internal._hyper_1_2_chunk
_timescaledb_internal._hyper_1_3_chunk
(3 rows)
_timescaledb_internal._hyper_1_4_chunk
_timescaledb_internal._hyper_1_5_chunk
(5 rows)
-- show reindexing
REINDEX (VERBOSE) TABLE reindex_test;
INFO: index "_hyper_1_1_chunk_reindex_test_time_unique_idx" was reindexed
INFO: index "_hyper_1_2_chunk_reindex_test_time_unique_idx" was reindexed
INFO: index "_hyper_1_3_chunk_reindex_test_time_unique_idx" was reindexed
INFO: index "_hyper_1_4_chunk_reindex_test_time_unique_idx" was reindexed
INFO: index "_hyper_1_5_chunk_reindex_test_time_unique_idx" was reindexed
\set ON_ERROR_STOP 0
-- this one currently doesn't recurse to chunks and instead gives an
-- error

@ -7,12 +7,12 @@ NOTICE: Adding NOT NULL constraint to time column time (NULL time values not al
(1 row)
INSERT INTO vacuum_test VALUES ('2017-01-20T09:00:01', 17.5),
('2017-01-21T09:00:01', 19.1),
('2017-04-20T09:00:01', 89.5),
('2017-04-21T09:00:01', 17.1),
('2017-06-20T09:00:01', 18.5),
('2017-06-21T09:00:01', 11.0);
INSERT INTO vacuum_test VALUES ('2017-01-20T16:00:01', 17.5),
('2017-01-21T16:00:01', 19.1),
('2017-04-20T16:00:01', 89.5),
('2017-04-21T16:00:01', 17.1),
('2017-06-20T16:00:01', 18.5),
('2017-06-21T16:00:01', 11.0);
-- no stats
SELECT tablename, attname, histogram_bounds, n_distinct FROM pg_stats
WHERE schemaname = '_timescaledb_internal' AND tablename LIKE '_hyper_%_chunk'
@ -43,11 +43,11 @@ WHERE schemaname = '_timescaledb_internal' AND tablename LIKE '_hyper_%_chunk'
ORDER BY schemaname, tablename;
tablename | attname | histogram_bounds | n_distinct
------------------+---------+---------------------------------------------------------+------------
_hyper_1_1_chunk | time | {"Fri Jan 20 09:00:01 2017","Sat Jan 21 09:00:01 2017"} | -1
_hyper_1_1_chunk | time | {"Fri Jan 20 16:00:01 2017","Sat Jan 21 16:00:01 2017"} | -1
_hyper_1_1_chunk | temp | {17.5,19.1} | -1
_hyper_1_2_chunk | time | {"Thu Apr 20 09:00:01 2017","Fri Apr 21 09:00:01 2017"} | -1
_hyper_1_2_chunk | time | {"Thu Apr 20 16:00:01 2017","Fri Apr 21 16:00:01 2017"} | -1
_hyper_1_2_chunk | temp | {17.1,89.5} | -1
_hyper_1_3_chunk | time | {"Tue Jun 20 09:00:01 2017","Wed Jun 21 09:00:01 2017"} | -1
_hyper_1_3_chunk | time | {"Tue Jun 20 16:00:01 2017","Wed Jun 21 16:00:01 2017"} | -1
_hyper_1_3_chunk | temp | {11,18.5} | -1
(6 rows)

@ -19,7 +19,10 @@ SELECT * FROM alter_test WHERE time > '2017-05-20T10:00:01';
-- rename column and change its type
ALTER TABLE alter_test RENAME COLUMN time TO time_us;
--converting timestamptz->timestamp should happen under UTC
SET timezone = 'UTC';
ALTER TABLE alter_test ALTER COLUMN time_us TYPE timestamp;
RESET timezone;
ALTER TABLE alter_test RENAME COLUMN color TO colorname;
\set ON_ERROR_STOP 0
-- Changing types on hash-partitioned columns is not safe for some

@ -64,7 +64,7 @@ INSERT INTO "3dim" VALUES('2017-01-20T09:00:21', 21.2, 'brown', 'sthlm');
INSERT INTO "3dim" VALUES('2017-01-20T09:00:47', 25.1, 'yellow', 'la');
--show the constraints on the three-dimensional chunk
SELECT * FROM test.show_constraints('_timescaledb_internal._hyper_7_15_chunk');
SELECT * FROM test.show_constraints('_timescaledb_internal._hyper_7_16_chunk');
--queries should work in three dimensions
SELECT * FROM "3dim";
@ -117,3 +117,15 @@ SELECT create_hypertable('"hyper_date"', 'time');
SET timezone=+1;
INSERT INTO "hyper_date" VALUES('2011-01-26', 22.5);
RESET timezone;
--make sure timestamp inserts work even when the timezone changes the
SET timezone = 'UTC';
CREATE TABLE "test_tz"(time timestamp PRIMARY KEY, temp float);
SELECT create_hypertable('"test_tz"', 'time', chunk_time_interval=> INTERVAL '1 day');
INSERT INTO "test_tz" VALUES('2017-09-22 10:00:00', 21.2);
INSERT INTO "test_tz" VALUES('2017-09-21 19:00:00', 21.2);
SET timezone = 'US/central';
INSERT INTO "test_tz" VALUES('2017-09-21 19:01:00', 21.2);
SELECT * FROM test.show_constraints('_timescaledb_internal._hyper_10_20_chunk');
SELECT * FROM test_tz;

@ -21,7 +21,6 @@ CREATE INDEX ON PUBLIC."two_Partitions" ("timeCustom" DESC NULLS LAST, device_id
SELECT * FROM create_hypertable('"public"."two_Partitions"'::regclass, 'timeCustom'::name, 'device_id'::name, associated_schema_name=>'_timescaledb_internal'::text, number_partitions => 2, chunk_time_interval=>_timescaledb_internal.interval_to_usec('1 month'));
INSERT INTO public."two_Partitions"("timeCustom", device_id, series_0, series_1, series_2) VALUES
(1257987600000000000, 'dev1', 1.5, 1, 1),
(1257987600000000000, 'dev1', 1.5, 2, 2),
@ -30,3 +29,20 @@ INSERT INTO public."two_Partitions"("timeCustom", device_id, series_0, series_1,
INSERT INTO "two_Partitions"("timeCustom", device_id, series_0, series_1, series_2) VALUES
(1257894000000000000, 'dev2', 1.5, 2, 6);
CREATE TABLE PUBLIC.hyper_timestamp (
time timestamp NOT NULL,
device_id TEXT NOT NULL,
value int NOT NULL
);
SELECT * FROM create_hypertable('hyper_timestamp'::regclass, 'time'::name, 'device_id'::name, number_partitions => 2,
chunk_time_interval=> _timescaledb_internal.interval_to_usec('1 minute'));
--some old versions use more slice_ids than newer ones. Make this uniform
ALTER SEQUENCE _timescaledb_catalog.dimension_slice_id_seq RESTART WITH 100;
INSERT INTO hyper_timestamp VALUES
('2017-01-20T09:00:01', 'dev1', 1),
('2017-01-20T08:00:01', 'dev2', 2),
('2016-01-20T09:00:01', 'dev1', 3);

@ -3,12 +3,12 @@ CREATE TABLE vacuum_test(time timestamp, temp float);
-- create hypertable with three chunks
SELECT create_hypertable('vacuum_test', 'time', chunk_time_interval => 2628000000000);
INSERT INTO vacuum_test VALUES ('2017-01-20T09:00:01', 17.5),
('2017-01-21T09:00:01', 19.1),
('2017-04-20T09:00:01', 89.5),
('2017-04-21T09:00:01', 17.1),
('2017-06-20T09:00:01', 18.5),
('2017-06-21T09:00:01', 11.0);
INSERT INTO vacuum_test VALUES ('2017-01-20T16:00:01', 17.5),
('2017-01-21T16:00:01', 19.1),
('2017-04-20T16:00:01', 89.5),
('2017-04-21T16:00:01', 17.1),
('2017-06-20T16:00:01', 18.5),
('2017-06-21T16:00:01', 11.0);
-- no stats
SELECT tablename, attname, histogram_bounds, n_distinct FROM pg_stats