timescaledb/tsl/test/expected/compression_conflicts.out
Ante Kresic a49fdbcffb Reduce decompression during constraint checking
When inserting into a compressed chunk with constraints present,
we need to decompress relevant tuples in order to do speculative
inserting. Usually we used segment by column values to limit the
amount of compressed segments to decompress. This change expands
on that by also using segment metadata to further filter
compressed rows that need to be decompressed.
2023-04-20 12:17:12 +02:00

496 lines
17 KiB
Plaintext

-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
-- test conflict handling on compressed hypertables with unique constraints
-- test 1: single column primary key
CREATE TABLE comp_conflicts_1(time timestamptz, device text, value float, PRIMARY KEY(time));
SELECT table_name FROM create_hypertable('comp_conflicts_1','time');
table_name
------------------
comp_conflicts_1
(1 row)
ALTER TABLE comp_conflicts_1 SET (timescaledb.compress);
-- implicitly create chunk
INSERT INTO comp_conflicts_1 VALUES ('2020-01-01','d1',0.1);
-- sanity check behaviour without compression
-- should fail due to multiple entries with same time value
\set ON_ERROR_STOP 0
INSERT INTO comp_conflicts_1 VALUES ('2020-01-01','d1',0.1);
ERROR: duplicate key value violates unique constraint "1_1_comp_conflicts_1_pkey"
INSERT INTO comp_conflicts_1 VALUES
('2020-01-01','d1',0.1),
('2020-01-01','d2',0.2),
('2020-01-01','d3',0.3);
ERROR: duplicate key value violates unique constraint "1_1_comp_conflicts_1_pkey"
\set ON_ERROR_STOP 1
-- should succeed since there are no conflicts in the values
BEGIN;
INSERT INTO comp_conflicts_1 VALUES
('2020-01-01 0:00:01','d1',0.1),
('2020-01-01 0:00:02','d2',0.2),
('2020-01-01 0:00:03','d3',0.3);
ROLLBACK;
SELECT compress_chunk(c) AS "CHUNK" FROM show_chunks('comp_conflicts_1') c
\gset
-- after compression no data should be in uncompressed chunk
SELECT count(*) FROM ONLY :CHUNK;
count
-------
0
(1 row)
-- repeat tests on an actual compressed chunk
-- should fail due to multiple entries with same time value
\set ON_ERROR_STOP 0
INSERT INTO comp_conflicts_1 VALUES ('2020-01-01','d1',0.1);
ERROR: duplicate key value violates unique constraint "1_1_comp_conflicts_1_pkey"
INSERT INTO comp_conflicts_1 VALUES
('2020-01-01','d1',0.1),
('2020-01-01','d2',0.2),
('2020-01-01','d3',0.3);
ERROR: duplicate key value violates unique constraint "1_1_comp_conflicts_1_pkey"
\set ON_ERROR_STOP 1
-- no data should be in uncompressed chunk since the inserts failed and their transaction rolled back
SELECT count(*) FROM ONLY :CHUNK;
count
-------
0
(1 row)
-- should succeed since there are no conflicts in the values
BEGIN;
INSERT INTO comp_conflicts_1 VALUES
('2020-01-01 0:00:01','d1',0.1),
('2020-01-01 0:00:02','d2',0.2),
('2020-01-01 0:00:03','d3',0.3);
-- no data should have moved into uncompressed chunk for conflict check
-- since we used metadata optimization to guarantee uniqueness
SELECT count(*) FROM ONLY :CHUNK;
count
-------
3
(1 row)
ROLLBACK;
-- no data should be in uncompressed chunk since we did rollback
SELECT count(*) FROM ONLY :CHUNK;
count
-------
0
(1 row)
-- should fail since it conflicts with existing row
\set ON_ERROR_STOP 0
INSERT INTO comp_conflicts_1 VALUES ('2020-01-01','d1',0.1);
ERROR: duplicate key value violates unique constraint "1_1_comp_conflicts_1_pkey"
\set ON_ERROR_STOP 1
INSERT INTO comp_conflicts_1 VALUES ('2020-01-01','d1',0.1) ON CONFLICT DO NOTHING;
-- data should have move into uncompressed chunk for conflict check
SELECT count(*) FROM ONLY :CHUNK;
count
-------
1
(1 row)
-- test 2: multi-column unique without segmentby
CREATE TABLE comp_conflicts_2(time timestamptz NOT NULL, device text, value float, UNIQUE(time, device));
SELECT table_name FROM create_hypertable('comp_conflicts_2','time');
table_name
------------------
comp_conflicts_2
(1 row)
ALTER TABLE comp_conflicts_2 SET (timescaledb.compress);
WARNING: column "device" should be used for segmenting or ordering
-- implicitly create chunk
INSERT INTO comp_conflicts_2 VALUES ('2020-01-01','d1',0.1);
INSERT INTO comp_conflicts_2 VALUES ('2020-01-01','d2',0.2);
SELECT compress_chunk(c) AS "CHUNK" FROM show_chunks('comp_conflicts_2') c
\gset
-- after compression no data should be in uncompressed chunk
SELECT count(*) FROM ONLY :CHUNK;
count
-------
0
(1 row)
-- should fail due to multiple entries with same time, device value
\set ON_ERROR_STOP 0
INSERT INTO comp_conflicts_2 VALUES ('2020-01-01','d1',0.1);
ERROR: duplicate key value violates unique constraint "3_2_comp_conflicts_2_time_device_key"
INSERT INTO comp_conflicts_2 VALUES ('2020-01-01','d2',0.2);
ERROR: duplicate key value violates unique constraint "3_2_comp_conflicts_2_time_device_key"
INSERT INTO comp_conflicts_2 VALUES
('2020-01-01','d1',0.1),
('2020-01-01','d2',0.2),
('2020-01-01','d3',0.3);
ERROR: duplicate key value violates unique constraint "3_2_comp_conflicts_2_time_device_key"
\set ON_ERROR_STOP 1
-- no data should be in uncompressed chunk since the inserts failed and their transaction rolled back
SELECT count(*) FROM ONLY :CHUNK;
count
-------
0
(1 row)
-- should succeed since there are no conflicts in the values
BEGIN;
INSERT INTO comp_conflicts_2 VALUES
('2020-01-01 0:00:01','d1',0.1),
('2020-01-01 0:00:01','d2',0.2),
('2020-01-01 0:00:01','d3',0.3);
-- no data should have move into uncompressed chunk for conflict check
-- since we used metadata optimization to guarantee uniqueness
SELECT count(*) FROM ONLY :CHUNK;
count
-------
3
(1 row)
ROLLBACK;
-- no data should be in uncompressed chunk since we did rollback
SELECT count(*) FROM ONLY :CHUNK;
count
-------
0
(1 row)
-- should fail since it conflicts with existing row
\set ON_ERROR_STOP 0
INSERT INTO comp_conflicts_2 VALUES ('2020-01-01','d1',0.1);
ERROR: duplicate key value violates unique constraint "3_2_comp_conflicts_2_time_device_key"
\set ON_ERROR_STOP 1
INSERT INTO comp_conflicts_2 VALUES ('2020-01-01','d1',0.1) ON CONFLICT DO NOTHING;
-- data should have move into uncompressed chunk for conflict check
SELECT count(*) FROM ONLY :CHUNK;
count
-------
2
(1 row)
-- test 3: multi-column primary key with segmentby
CREATE TABLE comp_conflicts_3(time timestamptz NOT NULL, device text, value float, UNIQUE(time, device));
SELECT table_name FROM create_hypertable('comp_conflicts_3','time');
table_name
------------------
comp_conflicts_3
(1 row)
ALTER TABLE comp_conflicts_3 SET (timescaledb.compress,timescaledb.compress_segmentby='device');
-- implicitly create chunk
INSERT INTO comp_conflicts_3 VALUES ('2020-01-01','d1',0.1);
INSERT INTO comp_conflicts_3 VALUES ('2020-01-01','d2',0.2);
INSERT INTO comp_conflicts_3 VALUES ('2020-01-01',NULL,0.3);
SELECT compress_chunk(c) AS "CHUNK" FROM show_chunks('comp_conflicts_3') c
\gset
-- after compression no data should be in uncompressed chunk
SELECT count(*) FROM ONLY :CHUNK;
count
-------
0
(1 row)
-- should fail due to multiple entries with same time, device value
\set ON_ERROR_STOP 0
INSERT INTO comp_conflicts_3 VALUES ('2020-01-01','d1',0.1);
ERROR: duplicate key value violates unique constraint "5_3_comp_conflicts_3_time_device_key"
INSERT INTO comp_conflicts_3 VALUES ('2020-01-01','d2',0.2);
ERROR: duplicate key value violates unique constraint "5_3_comp_conflicts_3_time_device_key"
INSERT INTO comp_conflicts_3 VALUES
('2020-01-01','d1',0.1),
('2020-01-01','d2',0.2),
('2020-01-01','d3',0.3);
ERROR: duplicate key value violates unique constraint "5_3_comp_conflicts_3_time_device_key"
\set ON_ERROR_STOP 1
-- no data should be in uncompressed chunk since the inserts failed and their transaction rolled back
SELECT count(*) FROM ONLY :CHUNK;
count
-------
0
(1 row)
-- NULL is considered distinct from other NULL so even though the next INSERT looks
-- like a conflict it is not a constraint violation (PG15 makes NULL behaviour configurable)
BEGIN;
INSERT INTO comp_conflicts_3 VALUES ('2020-01-01',NULL,0.3);
-- data for 1 segment (count = 1 value + 1 inserted) should be present in uncompressed chunk
-- we treat NULLs as NOT DISTINCT and let the constraint configuration handle the check
SELECT count(*) FROM ONLY :CHUNK;
count
-------
2
(1 row)
ROLLBACK;
-- should succeed since there are no conflicts in the values
BEGIN;
INSERT INTO comp_conflicts_3 VALUES ('2020-01-01 0:00:01','d1',0.1);
-- no data should have move into uncompressed chunk for conflict check
-- since we used metadata optimization to guarantee uniqueness
SELECT count(*) FROM ONLY :CHUNK;
count
-------
1
(1 row)
ROLLBACK;
BEGIN;
INSERT INTO comp_conflicts_3 VALUES
('2020-01-01 0:00:01','d1',0.1),
('2020-01-01 0:00:01','d2',0.2),
('2020-01-01 0:00:01','d3',0.3);
-- no data for should have move into uncompressed chunk for conflict check
-- since we used metadata optimization to guarantee uniqueness
SELECT count(*) FROM ONLY :CHUNK;
count
-------
3
(1 row)
ROLLBACK;
BEGIN;
INSERT INTO comp_conflicts_3 VALUES ('2020-01-01 0:00:01','d3',0.2);
-- count = 1 since no data should have move into uncompressed chunk for conflict check since d3 is new segment
SELECT count(*) FROM ONLY :CHUNK;
count
-------
1
(1 row)
ROLLBACK;
-- no data should be in uncompressed chunk since we did rollback
SELECT count(*) FROM ONLY :CHUNK;
count
-------
0
(1 row)
-- should fail since it conflicts with existing row
\set ON_ERROR_STOP 0
INSERT INTO comp_conflicts_3 VALUES ('2020-01-01','d1',0.1);
ERROR: duplicate key value violates unique constraint "5_3_comp_conflicts_3_time_device_key"
\set ON_ERROR_STOP 1
INSERT INTO comp_conflicts_3 VALUES ('2020-01-01','d1',0.1) ON CONFLICT DO NOTHING;
-- data should have move into uncompressed chunk for conflict check
SELECT count(*) FROM ONLY :CHUNK;
count
-------
1
(1 row)
-- test 4: multi-column primary key with multi-column orderby compression
CREATE TABLE comp_conflicts_4(time timestamptz NOT NULL, device text, value float, UNIQUE(time, device));
SELECT table_name FROM create_hypertable('comp_conflicts_4','time');
table_name
------------------
comp_conflicts_4
(1 row)
ALTER TABLE comp_conflicts_4 SET (timescaledb.compress,timescaledb.compress_orderby='time,device');
-- implicitly create chunk
INSERT INTO comp_conflicts_4 SELECT generate_series('2020-01-01'::timestamp, '2020-01-01 2:00:00', '1s'), 'd1',0.1;
INSERT INTO comp_conflicts_4 VALUES ('2020-01-01','d2',0.2);
INSERT INTO comp_conflicts_4 VALUES ('2020-01-01',NULL,0.3);
SELECT compress_chunk(c) AS "CHUNK" FROM show_chunks('comp_conflicts_4') c
\gset
-- after compression no data should be in uncompressed chunk
SELECT count(*) FROM ONLY :CHUNK;
count
-------
0
(1 row)
-- NULL is considered distinct from other NULL so even though the next INSERT looks
-- like a conflict it is not a constraint violation (PG15 makes NULL behaviour configurable)
BEGIN;
INSERT INTO comp_conflicts_4 VALUES ('2020-01-01',NULL,0.3);
-- data for 1 segment (count = 1000 values + 1 inserted) should be present in uncompressed chunk
-- we treat NULLs as NOT DISTINCT and let the constraint configuration handle the check
SELECT count(*) FROM ONLY :CHUNK;
count
-------
1001
(1 row)
ROLLBACK;
-- should succeed since there are no conflicts in the values
BEGIN;
INSERT INTO comp_conflicts_4 VALUES ('2020-01-01 2:00:01','d1',0.1);
-- no data should have move into uncompressed chunk for conflict check
-- since we used metadata optimization to guarantee uniqueness
SELECT count(*) FROM ONLY :CHUNK;
count
-------
1
(1 row)
ROLLBACK;
BEGIN;
INSERT INTO comp_conflicts_4 VALUES
('2020-01-01 2:00:01','d1',0.1),
('2020-01-01 2:00:01','d2',0.2),
('2020-01-01 2:00:01','d3',0.3);
-- no data for should have move into uncompressed chunk for conflict check
-- since we used metadata optimization to guarantee uniqueness
SELECT count(*) FROM ONLY :CHUNK;
count
-------
3
(1 row)
ROLLBACK;
BEGIN;
INSERT INTO comp_conflicts_4 VALUES ('2020-01-01 0:00:01','d3',0.2);
-- count = 1 since no data should have move into uncompressed chunk for conflict check since d3 is new segment
SELECT count(*) FROM ONLY :CHUNK;
count
-------
1
(1 row)
ROLLBACK;
-- no data should be in uncompressed chunk since we did rollback
SELECT count(*) FROM ONLY :CHUNK;
count
-------
0
(1 row)
-- should fail since it conflicts with existing row
\set ON_ERROR_STOP 0
INSERT INTO comp_conflicts_4 VALUES ('2020-01-01','d1',0.1);
ERROR: duplicate key value violates unique constraint "7_4_comp_conflicts_4_time_device_key"
\set ON_ERROR_STOP 1
-- data not should have move into uncompressed chunk for conflict check
SELECT count(*) FROM ONLY :CHUNK;
count
-------
0
(1 row)
INSERT INTO comp_conflicts_4 VALUES ('2020-01-01 0:00:01','d1',0.1) ON CONFLICT DO NOTHING;
INSERT INTO comp_conflicts_4 VALUES ('2020-01-01 0:30:00','d1',0.1) ON CONFLICT DO NOTHING;
-- data should have move into uncompressed chunk for conflict check
-- 2 segments (count = 2000)
SELECT count(*) FROM ONLY :CHUNK;
count
-------
2000
(1 row)
CREATE OR REPLACE VIEW compressed_chunk_info_view AS
SELECT
h.schema_name AS hypertable_schema,
h.table_name AS hypertable_name,
c.schema_name as chunk_schema,
c.table_name as chunk_name,
c.status as chunk_status,
comp.schema_name as compressed_chunk_schema,
comp.table_name as compressed_chunk_name
FROM
_timescaledb_catalog.hypertable h JOIN
_timescaledb_catalog.chunk c ON h.id = c.hypertable_id
LEFT JOIN _timescaledb_catalog.chunk comp
ON comp.id = c.compressed_chunk_id;
CREATE TABLE compressed_ht (
time TIMESTAMP WITH TIME ZONE NOT NULL,
sensor_id INTEGER NOT NULL,
cpu double precision null,
temperature double precision null,
name varchar(100) default 'this is a default string value'
);
CREATE UNIQUE INDEX sensor_id_time_idx on compressed_ht(time, sensor_id);
SELECT * FROM create_hypertable('compressed_ht', 'time',
chunk_time_interval => INTERVAL '2 months');
WARNING: column type "character varying" used for "name" does not follow best practices
hypertable_id | schema_name | table_name | created
---------------+-------------+---------------+---------
9 | public | compressed_ht | t
(1 row)
-- create chunk 1
INSERT INTO compressed_ht VALUES ('2017-12-28 01:10:28.192199+05:30', '1', 0.876, 4.123, 'chunk 1');
INSERT INTO compressed_ht VALUES ('2017-12-24 01:10:28.192199+05:30', '1', 0.876, 4.123, 'chunk 1');
-- create chunk 2
INSERT INTO compressed_ht VALUES ('2017-03-28 01:10:28.192199+05:30', '2', 0.876, 4.123, 'chunk 2');
INSERT INTO compressed_ht VALUES ('2017-03-12 01:10:28.192199+05:30', '3', 0.876, 4.123, 'chunk 2');
-- create chunk 3
INSERT INTO compressed_ht VALUES ('2022-01-18 01:10:28.192199+05:30', '4', 0.876, 4.123, 'chunk 3');
INSERT INTO compressed_ht VALUES ('2022-01-08 01:10:28.192199+05:30', '4', 0.876, 4.123, 'chunk 3');
INSERT INTO compressed_ht VALUES ('2022-01-11 01:10:28.192199+05:30', '5', 0.876, 4.123, 'chunk 3');
INSERT INTO compressed_ht VALUES ('2022-01-24 01:10:28.192199+05:30', '6', 0.876, 4.123, 'chunk 3');
ALTER TABLE compressed_ht SET (
timescaledb.compress,
timescaledb.compress_segmentby = 'sensor_id'
);
SELECT COMPRESS_CHUNK(SHOW_CHUNKS('compressed_ht'));
compress_chunk
-----------------------------------------
_timescaledb_internal._hyper_9_9_chunk
_timescaledb_internal._hyper_9_10_chunk
_timescaledb_internal._hyper_9_11_chunk
(3 rows)
-- check compression status
SELECT chunk_status,
chunk_name as "CHUNK_NAME"
FROM compressed_chunk_info_view
WHERE hypertable_name = 'compressed_ht' ORDER BY chunk_name;
chunk_status | CHUNK_NAME
--------------+-------------------
1 | _hyper_9_10_chunk
1 | _hyper_9_11_chunk
1 | _hyper_9_9_chunk
(3 rows)
-- should report 0 row
SELECT COUNT(*) FROM compressed_ht WHERE name = 'ON CONFLICT DO UPDATE';
count
-------
0
(1 row)
INSERT INTO compressed_ht VALUES ('2017-12-28 01:10:28.192199+05:30', '1', 0.876, 4.123, 'new insert row')
ON conflict(sensor_id, time)
DO UPDATE SET sensor_id = excluded.sensor_id , name = 'ON CONFLICT DO UPDATE';
-- should report 1 row
SELECT COUNT(*) FROM compressed_ht WHERE name = 'ON CONFLICT DO UPDATE';
count
-------
1
(1 row)
-- check that chunk 1 compression status is set to partial
SELECT chunk_status,
chunk_name as "CHUNK_NAME"
FROM compressed_chunk_info_view
WHERE hypertable_name = 'compressed_ht' ORDER BY chunk_name;
chunk_status | CHUNK_NAME
--------------+-------------------
1 | _hyper_9_10_chunk
1 | _hyper_9_11_chunk
9 | _hyper_9_9_chunk
(3 rows)
INSERT INTO compressed_ht VALUES ('2022-01-24 01:10:28.192199+05:30', '6', 0.876, 4.123, 'new insert row')
ON conflict(sensor_id, time)
DO UPDATE SET sensor_id = excluded.sensor_id , name = 'ON CONFLICT DO UPDATE' RETURNING *;
time | sensor_id | cpu | temperature | name
-------------------------------------+-----------+-------+-------------+-----------------------
Sun Jan 23 11:40:28.192199 2022 PST | 6 | 0.876 | 4.123 | ON CONFLICT DO UPDATE
(1 row)
-- check that chunks 1 and 3 compression status is set to partial
SELECT chunk_status,
chunk_name as "CHUNK_NAME"
FROM compressed_chunk_info_view
WHERE hypertable_name = 'compressed_ht' ORDER BY chunk_name;
chunk_status | CHUNK_NAME
--------------+-------------------
1 | _hyper_9_10_chunk
9 | _hyper_9_11_chunk
9 | _hyper_9_9_chunk
(3 rows)