timescaledb/tsl/test/sql/hypercore.sql
Erik Nordström 4128feb262 Add GUC for TAM arrow cache size
The arrow cache is used to cache decompressed arrow segments and is
mostly useful for index scans that access values across segments in
non-segment order.

The fixed cache size was set too small for index scans on bigger data
sets. Therefore, increase the default size and make it configurable via
a GUC.
2025-02-17 16:08:53 +01:00

471 lines
18 KiB
SQL

-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
\c :TEST_DBNAME :ROLE_SUPERUSER
show timescaledb.hypercore_indexam_whitelist;
-- We need this to be able to create an index for a chunk as a default
-- user.
grant all on schema _timescaledb_internal to :ROLE_DEFAULT_PERM_USER;
set role :ROLE_DEFAULT_PERM_USER;
SET timescaledb.hypercore_arrow_cache_max_entries = 4;
CREATE TABLE readings(
time timestamptz UNIQUE,
location int,
device int,
temp numeric(4,1),
humidity float,
jdata jsonb,
temp_far float8 generated always as (9 * temp / 5 + 32) stored
);
SELECT create_hypertable('readings', by_range('time', '1d'::interval));
-- Disable incremental sort to make tests stable
SET enable_incremental_sort = false;
SELECT setseed(1);
INSERT INTO readings (time, location, device, temp, humidity, jdata)
SELECT t, ceil(random()*10), ceil(random()*30), random()*40, random()*100, '{"a":1,"b":2}'::jsonb
FROM generate_series('2022-06-01'::timestamptz, '2022-07-01'::timestamptz, '5m') t;
ALTER TABLE readings SET (
timescaledb.compress,
timescaledb.compress_orderby = 'time',
timescaledb.compress_segmentby = 'device'
);
-- Set some test chunks as global variables
SELECT format('%I.%I', chunk_schema, chunk_name)::regclass AS chunk
FROM timescaledb_information.chunks
WHERE format('%I.%I', hypertable_schema, hypertable_name)::regclass = 'readings'::regclass
LIMIT 1 \gset
SELECT format('%I.%I', chunk_schema, chunk_name)::regclass AS chunk2
FROM timescaledb_information.chunks
WHERE format('%I.%I', hypertable_schema, hypertable_name)::regclass = 'readings'::regclass
ORDER BY chunk2 DESC
LIMIT 1 \gset
-- We do some basic checks that the compressed data is the same as the
-- uncompressed. In this case, we just count the rows for each device.
SELECT device, count(*) INTO orig FROM readings GROUP BY device;
-- Initially an index on time
SELECT * FROM test.show_indexes(:'chunk');
EXPLAIN (verbose, costs off)
SELECT count(*) FROM :chunk
WHERE time = '2022-06-01'::timestamptz;
SELECT count(*) FROM :chunk
WHERE time = '2022-06-01'::timestamptz;
SELECT count(*) FROM :chunk
WHERE location = 1;
-- We should be able to set the table access method for a chunk, which
-- will automatically compress the chunk.
ALTER TABLE :chunk SET ACCESS METHOD hypercore;
SET timescaledb.enable_transparent_decompression TO false;
vacuum analyze readings;
-- Show access method used on chunk
SELECT c.relname, a.amname FROM pg_class c
INNER JOIN pg_am a ON (c.relam = a.oid)
WHERE c.oid = :'chunk'::regclass;
-- This should show the chunk as compressed
SELECT chunk_name FROM chunk_compression_stats('readings') WHERE compression_status='Compressed';
-- Should give the same result as above
SELECT device, count(*) INTO comp FROM readings GROUP BY device;
-- Row counts for each device should match, so this should be empty.
SELECT device FROM orig JOIN comp USING (device) WHERE orig.count != comp.count;
EXPLAIN (verbose, costs off)
SELECT count(*) FROM :chunk
WHERE time = '2022-06-01'::timestamptz;
SELECT count(*) FROM :chunk
WHERE time = '2022-06-01'::timestamptz;
-- Create a new index on a compressed column
CREATE INDEX ON readings (location);
\set ON_ERROR_STOP 0
-- Check that we error out on unsupported index types
create index on readings using brin (device);
create index on readings using gin (jdata);
create index on readings using magicam (device);
-- Check that we error out when trying to build index concurrently.
create index concurrently on readings (device);
create index concurrently invalid_index on :chunk (device);
-- This will also create the index on the chunk (this is how it works,
-- see validate_index() in index.c for more information), but that
-- index is not valid, so we just drop it explicitly here to keep the
-- rest of the test clean.
drop index _timescaledb_internal.invalid_index;
\set ON_ERROR_STOP 1
-- Index added on location
SELECT * FROM test.show_indexes(:'chunk') ORDER BY "Index"::text;
-- Query by location should be an index scan
EXPLAIN (verbose, costs off)
SELECT count(*) FROM :chunk
WHERE location = 1;
-- Count by location should be the same as non-index scan before
-- compression above
SELECT count(*) FROM :chunk
WHERE location = 1;
SET enable_indexscan = false;
-- Columnar scan with qual on segmentby where filtering should be
-- turned into scankeys
EXPLAIN (costs off, timing off, summary off)
SELECT time, location, device, temp, humidity, jdata FROM :chunk WHERE device < 4 ORDER BY time, device LIMIT 5;
SELECT time, location, device, temp, humidity, jdata FROM :chunk WHERE device < 4 ORDER BY time, device LIMIT 5;
-- Show with indexscan
SET enable_indexscan = true;
SET enable_seqscan = false;
SET timescaledb.enable_columnarscan = false;
EXPLAIN (costs off, timing off, summary off)
SELECT time, location, device, temp, humidity, jdata FROM :chunk WHERE device < 4 ORDER BY time, device LIMIT 5;
SELECT time, location, device, temp, humidity, jdata FROM :chunk WHERE device < 4 ORDER BY time, device LIMIT 5;
SET enable_indexscan = false;
-- Compare the output to transparent decompression. Heap output is
-- shown further down.
SET timescaledb.enable_transparent_decompression TO 'hypercore';
EXPLAIN (costs off, timing off, summary off)
SELECT time, location, device, temp, humidity, jdata FROM :chunk WHERE device < 4 ORDER BY time, device LIMIT 5;
SELECT time, location, device, temp, humidity, jdata FROM :chunk WHERE device < 4 ORDER BY time, device LIMIT 5;
SET timescaledb.enable_transparent_decompression TO false;
-- Qual on compressed column with index
SET timescaledb.enable_columnarscan = true;
EXPLAIN (costs off, timing off, summary off)
SELECT time, location, device, temp, humidity, jdata FROM :chunk WHERE location < 4 ORDER BY time, device LIMIT 5;
SELECT time, location, device, temp, humidity, jdata FROM :chunk WHERE location < 4 ORDER BY time, device LIMIT 5;
-- With index scan
SET enable_indexscan = true;
SET timescaledb.enable_columnarscan = false;
EXPLAIN (costs off, timing off, summary off)
SELECT time, location, device, temp, humidity, jdata FROM :chunk WHERE location < 4 ORDER BY time, device LIMIT 5;
SELECT time, location, device, temp, humidity, jdata FROM :chunk WHERE location < 4 ORDER BY time, device LIMIT 5;
SET enable_indexscan = false;
SET enable_seqscan = true;
SET timescaledb.enable_columnarscan = true;
-- With transparent decompression
SET timescaledb.enable_transparent_decompression TO 'hypercore';
SELECT time, location, device, temp, humidity, jdata FROM :chunk WHERE location < 4 ORDER BY time, device LIMIT 5;
SET timescaledb.enable_transparent_decompression TO false;
-- Ordering on compressed column that has index
SET enable_indexscan = true;
EXPLAIN (costs off, timing off, summary off)
SELECT time, location, device, temp, humidity, jdata FROM :chunk ORDER BY location ASC LIMIT 5;
SELECT time, location, device, temp, humidity, jdata FROM :chunk ORDER BY location ASC LIMIT 5;
-- Show with transparent decompression
SET timescaledb.enable_transparent_decompression TO 'hypercore';
SELECT time, location, device, temp, humidity, jdata FROM :chunk ORDER BY location ASC LIMIT 5;
SET timescaledb.enable_transparent_decompression TO false;
-- We should be able to change it back to heap.
-- Compression metadata should be cleaned up
SELECT count(*) FROM _timescaledb_catalog.compression_chunk_size ccs
INNER JOIN _timescaledb_catalog.chunk c ON (c.id = ccs.chunk_id)
WHERE format('%I.%I', c.schema_name, c.table_name)::regclass = :'chunk'::regclass;
SELECT device, count(*) INTO num_rows_before FROM :chunk GROUP BY device;
SELECT format('%I.%I', c2.schema_name, c2.table_name)::regclass AS cchunk
FROM _timescaledb_catalog.chunk c1
INNER JOIN _timescaledb_catalog.chunk c2
ON (c1.compressed_chunk_id = c2.id);
ALTER TABLE :chunk SET ACCESS METHOD heap;
SET timescaledb.enable_transparent_decompression TO 'hypercore';
-- The compressed chunk should no longer exist
SELECT format('%I.%I', c2.schema_name, c2.table_name)::regclass AS cchunk
FROM _timescaledb_catalog.chunk c1
INNER JOIN _timescaledb_catalog.chunk c2
ON (c1.compressed_chunk_id = c2.id);
SELECT device, count(*) INTO num_rows_after FROM :chunk GROUP BY device;
SELECT device, num_rows_after.count AS after,
num_rows_before.count AS before,
(num_rows_after.count - num_rows_before.count) AS diff
FROM num_rows_after JOIN num_rows_before USING (device)
WHERE num_rows_after.count != num_rows_before.count;
SELECT count(*) FROM _timescaledb_catalog.compression_chunk_size ccs
INNER JOIN _timescaledb_catalog.chunk c ON (c.id = ccs.chunk_id)
WHERE format('%I.%I', c.schema_name, c.table_name)::regclass = :'chunk'::regclass;
-- Check that the generated columns contain the correct data before
-- compression.
select temp, temp_far from :chunk where temp_far != 9 * temp / 5 + 32;
SELECT compress_chunk(:'chunk');
-- A new compressed chunk should be created
SELECT format('%I.%I', c2.schema_name, c2.table_name)::regclass AS cchunk
FROM _timescaledb_catalog.chunk c1
INNER JOIN _timescaledb_catalog.chunk c2
ON (c1.compressed_chunk_id = c2.id);
-- Check that the generated columns contain the correct data after
-- compression.
select temp, temp_far from :chunk where temp_far != 9 * temp / 5 + 32;
-- Show same output as first query above but for heap
SELECT time, location, device, temp, humidity, jdata FROM :chunk WHERE device < 4 ORDER BY time, device LIMIT 5;
-- Show access method used on chunk
SELECT c.relname, a.amname FROM pg_class c
INNER JOIN pg_am a ON (c.relam = a.oid)
WHERE c.oid = :'chunk'::regclass;
-- Should give the same result as above
SELECT device, count(*) INTO decomp FROM readings GROUP BY device;
-- Row counts for each device should match, except for the chunk we did inserts on.
SELECT device, orig.count AS orig_count, decomp.count AS decomp_count, (decomp.count - orig.count) AS diff
FROM orig JOIN decomp USING (device) WHERE orig.count != decomp.count;
-- Convert back to hypercore to check that metadata was cleaned up
-- from last time this table used hypercore
ALTER TABLE :chunk SET ACCESS METHOD hypercore;
SET timescaledb.enable_transparent_decompression TO false;
-- Get the chunk's corresponding compressed chunk
SELECT format('%I.%I', c2.schema_name, c2.table_name)::regclass AS cchunk
FROM _timescaledb_catalog.chunk c1
INNER JOIN _timescaledb_catalog.chunk c2
ON (c1.compressed_chunk_id = c2.id) LIMIT 1 \gset
SELECT range_start, range_end
FROM timescaledb_information.chunks
WHERE format('%I.%I', chunk_schema, chunk_name)::regclass = :'chunk'::regclass;
-- Drop the generated column to make tests below easier.
alter table readings drop column temp_far;
--
-- ADD COLUMN
--
-- Check that adding a column works across recompression. First save
-- some sample data from the table that will be used as a comparison
-- to ensure adding a column doesn't mess up the data or column
-- mapping.
CREATE TEMP TABLE sample_readings AS
SELECT * FROM readings
WHERE time BETWEEN '2022-06-01 00:00:00' AND '2022-06-01 00:10:00'::timestamptz;
SELECT count(*) FROM sample_readings;
-- Now add the column
ALTER TABLE readings ADD COLUMN pressure float;
-- Check that the sample data remains the same in the modified
-- table. Should return the same count as above if everything is the
-- same.
SELECT count(*) FROM readings r
JOIN sample_readings s USING (time, location, device, temp, humidity);
-- insert some new (non-compressed) data into the chunk in order to
-- test recompression
INSERT INTO :chunk (time, location, device, temp, humidity, pressure)
SELECT t, ceil(random()*10), ceil(random()*30), random()*40, random()*100, random() * 30
FROM generate_series('2022-06-01 00:06:15'::timestamptz, '2022-06-01 17:00', '5m') t;
-- Check that new data is returned
SELECT * FROM :chunk WHERE time = '2022-06-01 00:06:15'::timestamptz;
-- Want to check that index scans work after recompression, so the
-- query should be an index scan.
EXPLAIN (verbose, costs off)
SELECT * FROM :chunk WHERE time = '2022-06-01 00:06:15'::timestamptz;
-- Show counts in compressed chunk prior to recompression
SELECT sum(_ts_meta_count) FROM :cchunk;
CALL recompress_chunk(:'chunk');
-- Data should be returned even after recompress, but now from the
-- compressed relation. Still using index scan.
EXPLAIN (verbose, costs off)
SELECT * FROM :chunk WHERE time = '2022-06-01 00:06:15'::timestamptz;
SELECT * FROM :chunk WHERE time = '2022-06-01 00:06:15'::timestamptz;
-- Drop column and add again, with a default this time
ALTER TABLE readings DROP COLUMN pressure;
EXPLAIN (verbose, costs off)
SELECT * FROM :chunk WHERE time = '2022-06-01 00:06:15'::timestamptz;
SELECT * FROM :chunk WHERE time = '2022-06-01 00:06:15'::timestamptz;
ALTER TABLE readings ADD COLUMN pressure float default 1.0;
EXPLAIN (verbose, costs off)
SELECT * FROM :chunk WHERE time = '2022-06-01 00:06:15'::timestamptz;
SELECT * FROM :chunk WHERE time = '2022-06-01 00:06:15'::timestamptz;
\set ON_ERROR_STOP 0
-- Can't recompress twice without new non-compressed rows
CALL recompress_chunk(:'chunk');
\set ON_ERROR_STOP 1
-- Compressed count after recompression
SELECT sum(_ts_meta_count) FROM :cchunk;
-- A count on the chunk should return the same count
SELECT count(*) FROM :chunk;
drop table readings;
---------------------------------------------
-- Test recompression via compress_chunk() --
---------------------------------------------
show timescaledb.enable_transparent_decompression;
create table recompress (time timestamptz, value int);
select create_hypertable('recompress', 'time', create_default_indexes => false);
insert into recompress values ('2024-01-01 01:00', 1), ('2024-01-01 02:00', 2);
select format('%I.%I', chunk_schema, chunk_name)::regclass as unique_chunk
from timescaledb_information.chunks
where format('%I.%I', hypertable_schema, hypertable_name)::regclass = 'recompress'::regclass
order by unique_chunk asc
limit 1 \gset
alter table recompress set (timescaledb.compress_orderby='time');
alter table :unique_chunk set access method hypercore;
-- Should already be compressed
select compress_chunk(:'unique_chunk');
-- Insert something to compress
insert into recompress values ('2024-01-01 03:00', 3);
select compress_chunk(:'unique_chunk');
-- Make sure we see the data after recompression and everything is
-- compressed
select _timescaledb_debug.is_compressed_tid(ctid), * from recompress order by time;
-- Add a time index to test recompression with index scan. Index scans
-- during compression is actually disabled for Hypercore TAM since the
-- index covers also compressed data, so this is only a check that the
-- GUC can be set without negative consequences.
create index on recompress (time);
set timescaledb.enable_compression_indexscan=true;
-- Insert another value to compress
insert into recompress values ('2024-01-02 04:00', 4);
select compress_chunk(:'unique_chunk');
select _timescaledb_debug.is_compressed_tid(ctid), * from recompress order by time;
-- Test using delete instead of truncate when compressing
set timescaledb.enable_delete_after_compression=true;
-- Insert another value to compress
insert into recompress values ('2024-01-02 05:00', 5);
select compress_chunk(:'unique_chunk');
select _timescaledb_debug.is_compressed_tid(ctid), * from recompress order by time;
-- Add a segmentby key to test segmentwise recompression
-- Insert another value to compress that goes into same segment
alter table :unique_chunk set access method heap;
alter table recompress set (timescaledb.compress_orderby='time', timescaledb.compress_segmentby='value');
alter table :unique_chunk set access method hypercore;
insert into recompress values ('2024-01-02 06:00', 5);
select compress_chunk(:'unique_chunk');
select _timescaledb_debug.is_compressed_tid(ctid), * from recompress order by time;
--------------------------------------
-- C-native tests for hypercore TAM --
--------------------------------------
-- Test rescan functionality and ability to return only non-compressed data
create table rescan (time timestamptz, device int, temp float);
select create_hypertable('rescan', 'time');
alter table rescan set (timescaledb.compress_orderby='time', timescaledb.compress_segmentby='device');
insert into rescan values ('2024-11-01 01:00', 1, 1.0), ('2024-11-01 02:00', 1, 2.0), ('2024-11-01 03:00', 1, 3.0), ('2024-11-01 06:00', 1, 4.0), ('2024-11-01 05:00', 1, 5.0);
select format('%I.%I', chunk_schema, chunk_name)::regclass as rescan_chunk
from timescaledb_information.chunks
where format('%I.%I', hypertable_schema, hypertable_name)::regclass = 'rescan'::regclass
order by rescan_chunk asc
limit 1 \gset
select compress_chunk(:'rescan_chunk', hypercore_use_access_method => true);
select relname, amname
from show_chunks('rescan') as chunk
join pg_class on (pg_class.oid = chunk)
join pg_am on (relam = pg_am.oid);
insert into rescan values ('2024-11-02 01:00', 2, 1.0), ('2024-11-02 02:00', 2, 2.0), ('2024-11-02 03:00', 1, 3.0), ('2024-11-02 05:00', 2, 4.0);
reset role;
create function test_hypercore(relid regclass)
returns void as :TSL_MODULE_PATHNAME, 'ts_test_hypercore' language c;
set role :ROLE_DEFAULT_PERM_USER;
select test_hypercore(:'rescan_chunk');
-- Simple test that loading the hypercore handler works on a fresh
-- backend.
--
-- Since TSL library is loaded late (after a query has been executed),
-- this can generate a license error if an attempt is made to use a
-- crossmodule function inside a query, and the hypertable handler is
-- used through GetTableAmRoutine().
--
-- To test this, we create a hypercore table, start a fresh
-- connection, and run a query on it.
\c :TEST_DBNAME :ROLE_SUPERUSER
create table crossmodule_test(
time timestamptz unique,
location int
);
select hypertable_id
from create_hypertable('crossmodule_test', by_range('time', '1d'::interval)) \gset
select setseed(1);
insert into crossmodule_test select t, ceil(random()*10)
from generate_series('2022-06-01'::timestamptz, '2022-06-10'::timestamptz, '1h') t;
alter table crossmodule_test
set access method hypercore,
set (timescaledb.compress_orderby = 'time');
\c :TEST_DBNAME :ROLE_SUPERUSER
select count(*) from crossmodule_test;
-- Verify that vacuum with fail without the correct license.
alter system set timescaledb.license to 'apache';
select pg_reload_conf();
\c :TEST_DBNAME :ROLE_SUPERUSER
\set ON_ERROR_STOP 0
vacuum crossmodule_test;
\set ON_ERROR_STOP 1
alter system reset timescaledb.license;
select pg_reload_conf();