mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-18 03:23:37 +08:00
Add GUC option to enable or disable segmentwise recompression. If disabled, then a full recompression is done instead when recompression is attempted through `compress_chunk`. If `recompress_chunk_segmentwise` is used when GUC is disabled, then an error is thrown. Closes #7381.
311 lines
14 KiB
SQL
311 lines
14 KiB
SQL
-- This file and its contents are licensed under the Timescale License.
|
|
-- Please see the included NOTICE for copyright information and
|
|
-- LICENSE-TIMESCALE for a copy of the license.
|
|
|
|
\c :TEST_DBNAME :ROLE_SUPERUSER
|
|
|
|
CREATE OR REPLACE VIEW compressed_chunk_info_view AS
|
|
SELECT
|
|
h.schema_name AS hypertable_schema,
|
|
h.table_name AS hypertable_name,
|
|
c.schema_name as chunk_schema,
|
|
c.table_name as chunk_name,
|
|
c.status as chunk_status,
|
|
comp.schema_name as compressed_chunk_schema,
|
|
comp.table_name as compressed_chunk_name,
|
|
c.id as chunk_id
|
|
FROM
|
|
_timescaledb_catalog.hypertable h JOIN
|
|
_timescaledb_catalog.chunk c ON h.id = c.hypertable_id
|
|
LEFT JOIN _timescaledb_catalog.chunk comp
|
|
ON comp.id = c.compressed_chunk_id
|
|
;
|
|
|
|
CREATE OR REPLACE VIEW compression_rowcnt_view AS
|
|
select ccs.numrows_pre_compression, ccs.numrows_post_compression,
|
|
(v.chunk_schema || '.' || v.chunk_name) as chunk_name,
|
|
v.chunk_id as chunk_id
|
|
from _timescaledb_catalog.compression_chunk_size ccs
|
|
join compressed_chunk_info_view v on ccs.chunk_id = v.chunk_id;
|
|
|
|
------------- only one segment exists and only one segment affected ---------
|
|
|
|
create table mytab_oneseg (time timestamptz not null, a int, b int, c int);
|
|
|
|
SELECT create_hypertable('mytab_oneseg', 'time', chunk_time_interval => interval '1 day');
|
|
|
|
insert into mytab_oneseg values
|
|
('2023-01-01 21:56:20.048355+02'::timestamptz, 2, NULL, 2),
|
|
('2023-01-01 21:56:10.048355+02'::timestamptz, 2, NULL, 2); --same chunk same segment
|
|
|
|
alter table mytab_oneseg set (timescaledb.compress, timescaledb.compress_segmentby = 'a, c');
|
|
|
|
select show_chunks as chunk_to_compress_1 from show_chunks('mytab_oneseg') limit 1 \gset
|
|
|
|
select compress_chunk(:'chunk_to_compress_1');
|
|
|
|
SELECT compressed_chunk_schema || '.' || compressed_chunk_name as compressed_chunk_name_1
|
|
from compressed_chunk_info_view where hypertable_name = 'mytab_oneseg' \gset
|
|
|
|
SELECT ctid, * FROM :compressed_chunk_name_1;
|
|
|
|
-- after compressing the chunk
|
|
select numrows_pre_compression, numrows_post_compression from _timescaledb_catalog.compression_chunk_size;
|
|
|
|
insert into mytab_oneseg values ('2023-01-01 19:56:20.048355+02'::timestamptz, 2, NULL, 2);
|
|
-- after inserting new row in compressed chunk
|
|
select numrows_pre_compression, numrows_post_compression from _timescaledb_catalog.compression_chunk_size;
|
|
|
|
select _timescaledb_functions.recompress_chunk_segmentwise(:'chunk_to_compress_1');
|
|
|
|
-- check the ctid of the rows in the recompressed chunk to verify that we've written new data
|
|
SELECT ctid, * FROM :compressed_chunk_name_1;
|
|
-- after recompressing chunk
|
|
select numrows_pre_compression, numrows_post_compression from _timescaledb_catalog.compression_chunk_size;
|
|
|
|
|
|
insert into mytab_oneseg values ('2023-01-01 19:56:20.048355+02'::timestamptz, 2, NULL, 2);
|
|
select chunk_id
|
|
from compressed_chunk_info_view where hypertable_name = 'mytab_oneseg' \gset
|
|
-- check we are handling unexpected chunk status (partially compressed but not compressed)
|
|
update _timescaledb_catalog.chunk set status = 8 where id = :chunk_id;
|
|
\set ON_ERROR_STOP 0
|
|
select _timescaledb_functions.recompress_chunk_segmentwise(:'chunk_to_compress_1');
|
|
\set ON_ERROR_STOP 1
|
|
|
|
|
|
---------------- test1: one affected segment, one unaffected --------------
|
|
-- unaffected segment will still be recompressed in a future PR we want to avoid doing this
|
|
create table mytab_twoseg (time timestamptz not null, a int, b int, c int);
|
|
|
|
SELECT create_hypertable('mytab_twoseg', 'time', chunk_time_interval => interval '1 day');
|
|
|
|
insert into mytab_twoseg values
|
|
('2023-01-01 21:56:20.048355+02'::timestamptz, 2, NULL, 2),
|
|
('2023-01-01 21:56:20.048355+02'::timestamptz, 3, NULL, 3), --same chunk diff segment
|
|
('2023-01-01 21:57:20.048355+02'::timestamptz, 3, NULL, 3);
|
|
|
|
alter table mytab_twoseg set (timescaledb.compress, timescaledb.compress_segmentby = 'a, c');
|
|
|
|
select show_chunks as chunk_to_compress_2 from show_chunks('mytab_twoseg') limit 1 \gset
|
|
|
|
select compress_chunk(:'chunk_to_compress_2');
|
|
|
|
-- stats are no longer updated during segmentwise recompression
|
|
select * from compression_rowcnt_view where chunk_name = :'chunk_to_compress_2';
|
|
|
|
insert into mytab_twoseg values ('2023-01-01 19:56:20.048355+02'::timestamptz, 2, NULL, 2);
|
|
|
|
select * from :chunk_to_compress_2 ORDER BY a, c, time DESC;
|
|
|
|
SELECT compressed_chunk_schema || '.' || compressed_chunk_name as compressed_chunk_name_2
|
|
from compressed_chunk_info_view where hypertable_name = 'mytab_twoseg' \gset
|
|
|
|
select ctid, * from :compressed_chunk_name_2;
|
|
|
|
select _timescaledb_functions.recompress_chunk_segmentwise(:'chunk_to_compress_2');
|
|
|
|
-- verify that metadata count looks good
|
|
select ctid, * from :compressed_chunk_name_2;
|
|
|
|
-- verify that initial data is returned as expected
|
|
select * from :chunk_to_compress_2 ORDER BY a, c, time DESC;
|
|
|
|
-- stats are no longer updated during segmentwise recompression
|
|
select * from compression_rowcnt_view where chunk_name = :'chunk_to_compress_2';
|
|
|
|
----------------- more than one batch per segment ----------------------
|
|
-- test that metadata sequence number is correct
|
|
create table mytab2(time timestamptz not null, a int, b int, c int);
|
|
|
|
select create_hypertable('mytab2', 'time', chunk_time_interval => interval '1 week');
|
|
|
|
insert into mytab2 (time, a, c) select t,s,s from
|
|
generate_series('2023-01-01 00:00:00+00'::timestamptz, '2023-01-01 00:00:00+00'::timestamptz + interval '1 day', interval '30 sec') t cross join generate_series(0,2, 1) s;
|
|
|
|
alter table mytab2 set (timescaledb.compress, timescaledb.compress_segmentby = 'a, c');
|
|
|
|
select compress_chunk(c) from show_chunks('mytab2') c;
|
|
|
|
SELECT compressed_chunk_schema || '.' || compressed_chunk_name as compressed_chunk_name_2
|
|
from compressed_chunk_info_view where hypertable_name = 'mytab2'
|
|
and compressed_chunk_name is not null limit 1 \gset
|
|
|
|
insert into mytab2 values ('2023-01-01 00:00:02+00'::timestamptz, 0, NULL, 0); -- goes into the uncompressed chunk
|
|
|
|
select show_chunks('mytab2') as chunk_to_compress_2 \gset
|
|
|
|
select ctid, * from :compressed_chunk_name_2;
|
|
-- after compression
|
|
-- stats are no longer updated during segmentwise recompression
|
|
select * from compression_rowcnt_view where chunk_name = :'chunk_to_compress_2';
|
|
|
|
select _timescaledb_functions.recompress_chunk_segmentwise(:'chunk_to_compress_2');
|
|
|
|
select ctid, * from :compressed_chunk_name_2;
|
|
-- stats are no longer updated during segmentwise recompression
|
|
select * from compression_rowcnt_view where chunk_name = :'chunk_to_compress_2';
|
|
|
|
-- failing test from compression_ddl
|
|
CREATE TABLE test_defaults(time timestamptz NOT NULL, device_id int);
|
|
SELECT create_hypertable('test_defaults','time');
|
|
|
|
ALTER TABLE test_defaults SET (timescaledb.compress,timescaledb.compress_segmentby='device_id');
|
|
|
|
-- create 2 chunks
|
|
INSERT INTO test_defaults SELECT '2000-01-01', 1;
|
|
INSERT INTO test_defaults SELECT '2001-01-01', 1;
|
|
|
|
SELECT compress_chunk(show_chunks) AS "compressed_chunk" FROM show_chunks('test_defaults') ORDER BY show_chunks::text LIMIT 1 \gset
|
|
|
|
-- stats are no longer updated during segmentwise recompression
|
|
select * from compression_rowcnt_view where chunk_name = :'compressed_chunk';
|
|
|
|
SELECT * FROM test_defaults ORDER BY 1;
|
|
|
|
ALTER TABLE test_defaults ADD COLUMN c1 int;
|
|
ALTER TABLE test_defaults ADD COLUMN c2 int NOT NULL DEFAULT 42;
|
|
SELECT * FROM test_defaults ORDER BY 1,2;
|
|
|
|
INSERT INTO test_defaults SELECT '2000-01-01', 2;
|
|
SELECT * FROM test_defaults ORDER BY 1,2;
|
|
|
|
SELECT compress_chunk(:'compressed_chunk');
|
|
SELECT * FROM test_defaults ORDER BY 1,2;
|
|
-- stats are no longer updated during segmentwise recompression
|
|
select * from compression_rowcnt_view where chunk_name = :'compressed_chunk';
|
|
|
|
-- test prepared statements
|
|
-- PREPRE A SELECT before recompress and perform it after recompress
|
|
CREATE TABLE mytab_prep (time timestamptz, a int, b int, c int);
|
|
SELECT create_hypertable('mytab_prep', 'time');
|
|
INSERT INTO mytab_prep VALUES ('2023-01-01'::timestamptz, 2, NULL, 2),
|
|
('2023-01-01'::timestamptz, 2, NULL, 2);
|
|
|
|
alter table mytab_prep set (timescaledb.compress, timescaledb.compress_segmentby = 'a, c');
|
|
|
|
PREPARE p1 AS
|
|
SELECT * FROM mytab_prep ORDER BY a, c, time DESC;
|
|
|
|
select show_chunks as chunk_to_compress_prep from show_chunks('mytab_prep') limit 1 \gset
|
|
SELECT compress_chunk(:'chunk_to_compress_prep'); -- the output of the prepared plan would change before and after compress
|
|
INSERT INTO mytab_prep VALUES ('2023-01-01'::timestamptz, 2, 3, 2);
|
|
|
|
-- plan should be invalidated to return results from the uncompressed chunk also
|
|
EXPLAIN (COSTS OFF) EXECUTE p1;
|
|
EXECUTE p1;
|
|
|
|
-- check plan again after recompression
|
|
SELECT compress_chunk(:'chunk_to_compress_prep');
|
|
EXPLAIN (COSTS OFF) EXECUTE p1;
|
|
EXECUTE p1;
|
|
|
|
-- verify segmentwise recompression when index exists, decompress + compress otherwise
|
|
-- we verify by checking the compressed chunk after recompression in both cases.
|
|
-- in the first case, it is the same before and after
|
|
-- in the second case, a new compressed chunk is created
|
|
CREATE TABLE mytab (time timestamptz, a int, b int, c int);
|
|
SELECT create_hypertable('mytab', 'time');
|
|
INSERT INTO mytab VALUES ('2023-01-01'::timestamptz, 2, NULL, 2),
|
|
('2023-01-01'::timestamptz, 2, NULL, 2);
|
|
|
|
select show_chunks as chunk_to_compress_mytab from show_chunks('mytab') limit 1 \gset
|
|
-- index exists, recompression should happen segment by segment so expect a debug message
|
|
alter table mytab set (timescaledb.compress, timescaledb.compress_segmentby = 'a, c');
|
|
select compress_chunk(show_chunks('mytab'));
|
|
select compressed_chunk_name as compressed_chunk_name_before_recompression from compressed_chunk_info_view where hypertable_name = 'mytab' \gset
|
|
INSERT INTO mytab VALUES ('2023-01-01'::timestamptz, 2, 3, 2);
|
|
-- segmentwise recompression should not create a new compressed chunk, so verify compressed chunk is the same after recompression
|
|
SELECT compress_chunk(:'chunk_to_compress_mytab');
|
|
select compressed_chunk_name as compressed_chunk_name_after_recompression from compressed_chunk_info_view where hypertable_name = 'mytab' \gset
|
|
select :'compressed_chunk_name_before_recompression' as before_segmentwise_recompression, :'compressed_chunk_name_after_recompression' as after_segmentwise_recompression;
|
|
|
|
INSERT INTO mytab
|
|
SELECT t, a, 3, 2
|
|
FROM generate_series('2023-01-01'::timestamptz, '2023-01-02'::timestamptz, '1 hour'::interval) t
|
|
CROSS JOIN generate_series(1, 10, 1) a;
|
|
-- recompress will insert newly inserted tuples into compressed chunk along with inserting into the compressed chunk index
|
|
SELECT compress_chunk(:'chunk_to_compress_mytab');
|
|
-- make sure we are hitting the index and that the index contains the tuples
|
|
SET enable_seqscan TO off;
|
|
EXPLAIN (COSTS OFF) SELECT count(*) FROM mytab where a = 2;
|
|
SELECT count(*) FROM mytab where a = 2;
|
|
RESET enable_seqscan;
|
|
|
|
SELECT decompress_chunk(show_chunks('mytab'));
|
|
alter table mytab set (timescaledb.compress = false);
|
|
alter table mytab set (timescaledb.compress);
|
|
select compress_chunk(show_chunks('mytab'));
|
|
select compressed_chunk_name as compressed_chunk_name_before_recompression from compressed_chunk_info_view where hypertable_name = 'mytab' \gset
|
|
INSERT INTO mytab VALUES ('2023-01-01'::timestamptz, 2, 3, 2);
|
|
-- expect to see a different compressed chunk after recompressing now as the operation is decompress + compress
|
|
SELECT compress_chunk(:'chunk_to_compress_mytab');
|
|
select compressed_chunk_name as compressed_chunk_name_after_recompression from compressed_chunk_info_view where hypertable_name = 'mytab' \gset
|
|
select :'compressed_chunk_name_before_recompression' as before_recompression, :'compressed_chunk_name_after_recompression' as after_recompression;
|
|
|
|
-- check behavior with NULL values in segmentby columns
|
|
select '2022-01-01 09:00:00+00' as start_time \gset
|
|
create table nullseg_one (time timestamptz, a int, b int);
|
|
|
|
select create_hypertable('nullseg_one', 'time');
|
|
|
|
insert into nullseg_one values (:'start_time', 1, 1), (:'start_time', 1, 2), (:'start_time', 2,2), (:'start_time', 2,3);
|
|
|
|
alter table nullseg_one set (timescaledb.compress, timescaledb.compress_segmentby= 'a');
|
|
select compress_chunk(show_chunks('nullseg_one'));
|
|
|
|
insert into nullseg_one values (:'start_time', NULL, 4);
|
|
|
|
select show_chunks as chunk_to_compress from show_chunks('nullseg_one') limit 1 \gset
|
|
select compressed_chunk_schema || '.' || compressed_chunk_name as compressed_chunk_name from compressed_chunk_info_view where hypertable_name = 'nullseg_one' \gset
|
|
|
|
SELECT compress_chunk(:'chunk_to_compress');
|
|
|
|
select * from :compressed_chunk_name;
|
|
-- insert again, check both index insertion works and NULL values properly handled
|
|
insert into nullseg_one values (:'start_time', NULL, 4);
|
|
SELECT compress_chunk(:'chunk_to_compress');
|
|
select * from :compressed_chunk_name;
|
|
|
|
-- test multiple NULL segmentby columns
|
|
create table nullseg_many (time timestamptz, a int, b int, c int);
|
|
|
|
select create_hypertable('nullseg_many', 'time');
|
|
|
|
insert into nullseg_many values (:'start_time', 1, 1, 1), (:'start_time', 1, 2, 2), (:'start_time', 2,2, 2), (:'start_time', 2,3, 3), (:'start_time', 2, NULL, 3);
|
|
|
|
alter table nullseg_many set (timescaledb.compress, timescaledb.compress_segmentby= 'a, c');
|
|
select compress_chunk(show_chunks('nullseg_many'));
|
|
-- new segment (1, NULL)
|
|
insert into nullseg_many values (:'start_time', 1, 4, NULL);
|
|
|
|
select show_chunks as chunk_to_compress from show_chunks('nullseg_many') limit 1 \gset
|
|
select compressed_chunk_schema || '.' || compressed_chunk_name as compressed_chunk_name from compressed_chunk_info_view where hypertable_name = 'nullseg_many' \gset
|
|
|
|
SELECT compress_chunk(:'chunk_to_compress');
|
|
|
|
select * from :compressed_chunk_name;
|
|
-- insert again, check both index insertion works and NULL values properly handled
|
|
-- should match existing segment (1, NULL)
|
|
insert into nullseg_many values (:'start_time', 1, NULL, NULL);
|
|
SELECT compress_chunk(:'chunk_to_compress');
|
|
select * from :compressed_chunk_name;
|
|
|
|
--- Test behaviour when enable_segmentwise_recompression GUC if OFF
|
|
CREATE TABLE guc_test(time timestamptz not null, a int, b int, c int);
|
|
SELECT create_hypertable('guc_test', by_range('time', INTERVAL '1 day'));
|
|
|
|
ALTER TABLE guc_test set (timescaledb.compress, timescaledb.compress_segmentby = 'a, b');
|
|
INSERT INTO guc_test VALUES ('2024-10-30 14:04:00.501519-06'::timestamptz, 1, 1, 1);
|
|
SELECT show_chunks as chunk_to_compress FROM show_chunks('guc_test') LIMIT 1 \gset
|
|
SELECT compress_chunk(:'chunk_to_compress');
|
|
|
|
INSERT INTO guc_test VALUES ('2024-10-30 14:14:00.501519-06'::timestamptz, 1, 1, 2);
|
|
-- When GUC is OFF, recompress function should throw an error
|
|
SET timescaledb.enable_segmentwise_recompression TO OFF;
|
|
\set ON_ERROR_STOP 0
|
|
SELECT _timescaledb_functions.recompress_chunk_segmentwise(:'chunk_to_compress');
|
|
\set ON_ERROR_STOP 1
|
|
-- When GUC is OFF, entire chunk should be fully uncompressed and compressed instead
|
|
SELECT compress_chunk(:'chunk_to_compress');
|