mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-22 13:40:56 +08:00
This should improve the throughput somewhat. This commit does several things: * Simplify loop condition in decompressing the compressed batch by using the count metadata column. * Split out a separate function that decompresses the entire compressed batch and saves the decompressed tuples slot into RowDecompressor. * Use bulk table insert function for inserting the decompressed rows, this reduces WAL activity. If we have indexes on uncompressed chunk, update them one index for entire batch at a time, to reduce load on shared buffers cache. Before that we used to update all indexes for one row, then for another, etc. * Add a test for memory leaks during (de)compression. * Update the compression_update_delete test to use INFO messages + a debug GUC instead of DEBUG messages which are flaky. This gives 10%-30% speedup on tsbench for decompress_chunk and various compressed DML queries. This is very far from the performance we had in 2.10, but still a nice improvement.
55 lines
2.6 KiB
PL/PgSQL
55 lines
2.6 KiB
PL/PgSQL
-- This file and its contents are licensed under the Timescale License.
|
|
-- Please see the included NOTICE for copyright information and
|
|
-- LICENSE-TIMESCALE for a copy of the license.
|
|
|
|
\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER;
|
|
|
|
CREATE TABLE ht_metrics_compressed(time timestamptz, device int, value float, tag text);
|
|
ALTER TABLE ht_metrics_compressed SET (autovacuum_enabled = false);
|
|
SELECT create_hypertable('ht_metrics_compressed','time',create_default_indexes:=false);
|
|
ALTER TABLE ht_metrics_compressed SET (timescaledb.compress, timescaledb.compress_segmentby='device', timescaledb.compress_orderby='time');
|
|
-- helper function: float -> pseudorandom float [0..1].
|
|
CREATE OR REPLACE FUNCTION mix(x float4) RETURNS float4 AS $$ SELECT ((hashfloat4(x) / (pow(2., 31) - 1) + 1) / 2)::float4 $$ LANGUAGE SQL;
|
|
|
|
|
|
INSERT INTO ht_metrics_compressed
|
|
SELECT
|
|
'2020-01-08'::timestamptz + interval '1 second' * (x + 0.1 * mix(device + x * 10)),
|
|
device,
|
|
100 * mix(device) * sin(x / 3600)
|
|
+ 100 * mix(device + 1) * sin(x / (3600 * 24))
|
|
+ 100 * mix(device + 2) * sin(x / (3600 * 24 * 7))
|
|
+ mix(device + x * 10 + 1),
|
|
format('this-is-a-long-tag-#%s', x % 29)
|
|
FROM generate_series(1, 3600 * 24 * 356, 100) x, generate_series(1,2) device;
|
|
|
|
-- compress it all
|
|
SELECT count(compress_chunk(c, true)) FROM show_chunks('ht_metrics_compressed') c;
|
|
|
|
select count(*) from ht_metrics_compressed;
|
|
|
|
-- Helper function that returns the amount of memory currently allocated in a
|
|
-- given memory context.
|
|
create or replace function ts_debug_allocated_bytes(text = 'PortalContext') returns bigint
|
|
as :MODULE_PATHNAME, 'ts_debug_allocated_bytes'
|
|
language c strict volatile;
|
|
|
|
-- Check that decompression doesn't leak memory. Record memory usage after each
|
|
-- compressed chunk, and use linear regression to tell if memory usage grows.
|
|
with log as materialized (
|
|
select rank() over (order by c) n, ts_debug_allocated_bytes() b, decompress_chunk(c, true)
|
|
from show_chunks('ht_metrics_compressed') c order by c)
|
|
, regression as (select regr_slope(b, n) slope, regr_intercept(b, n) intercept from log)
|
|
select * from log
|
|
where (select slope / intercept::float > 0.01 from regression)
|
|
;
|
|
|
|
-- Same as above but for compression.
|
|
with log as materialized (
|
|
select rank() over (order by c) n, ts_debug_allocated_bytes() b, compress_chunk(c, true)
|
|
from show_chunks('ht_metrics_compressed') c order by c)
|
|
, regression as (select regr_slope(b, n) slope, regr_intercept(b, n) intercept from log)
|
|
select * from log
|
|
where (select slope / intercept::float > 0.01 from regression)
|
|
;
|