mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-17 11:03:36 +08:00
Don't analyze the uncompressed chunk before compressing it
We don't use the statistics on the uncompressed chunk anyway. This significantly improves the compression performance.
This commit is contained in:
parent
1cd77b3a24
commit
4e7edf370a
@ -150,7 +150,6 @@ static int create_segment_filter_scankey(RowDecompressor *decompressor,
|
||||
char *segment_filter_col_name, StrategyNumber strategy,
|
||||
ScanKeyData *scankeys, int num_scankeys,
|
||||
Bitmapset **null_columns, Datum value, bool isnull);
|
||||
static void run_analyze_on_chunk(Oid chunk_relid);
|
||||
static void create_per_compressed_column(RowDecompressor *decompressor);
|
||||
|
||||
/********************
|
||||
@ -425,7 +424,6 @@ compress_chunk(Oid in_table, Oid out_table, int insert_options)
|
||||
row_compressor_process_ordered_slot(&row_compressor, slot, mycid);
|
||||
}
|
||||
|
||||
run_analyze_on_chunk(in_rel->rd_id);
|
||||
if (row_compressor.rows_compressed_into_current_value > 0)
|
||||
row_compressor_flush(&row_compressor, mycid, true);
|
||||
|
||||
@ -530,12 +528,6 @@ compress_chunk_sort_relation(CompressionSettings *settings, Relation in_rel)
|
||||
|
||||
table_endscan(scan);
|
||||
|
||||
/* Perform an analyze on the chunk to get up-to-date stats before compressing.
|
||||
* We do it at this point because we've just read out the entire chunk into
|
||||
* tuplesort, so its pages are likely to be cached and we can save on I/O.
|
||||
*/
|
||||
run_analyze_on_chunk(in_rel->rd_id);
|
||||
|
||||
ExecDropSingleTupleTableSlot(slot);
|
||||
|
||||
tuplesort_performsort(tuplesortstate);
|
||||
@ -591,26 +583,8 @@ compress_chunk_populate_sort_info_for_column(CompressionSettings *settings, Oid
|
||||
ReleaseSysCache(tp);
|
||||
}
|
||||
|
||||
static void
|
||||
run_analyze_on_chunk(Oid chunk_relid)
|
||||
{
|
||||
VacuumRelation vr = {
|
||||
.type = T_VacuumRelation,
|
||||
.relation = NULL,
|
||||
.oid = chunk_relid,
|
||||
.va_cols = NIL,
|
||||
};
|
||||
VacuumStmt vs = {
|
||||
.type = T_VacuumStmt,
|
||||
.rels = list_make1(&vr),
|
||||
.is_vacuumcmd = false,
|
||||
.options = NIL,
|
||||
};
|
||||
|
||||
ExecVacuum(NULL, &vs, true);
|
||||
}
|
||||
|
||||
/* Find segment by index for setting the correct sequence number if
|
||||
/*
|
||||
* Find segment by index for setting the correct sequence number if
|
||||
* we are trying to roll up chunks while compressing
|
||||
*/
|
||||
static Oid
|
||||
|
@ -23,6 +23,7 @@ SELECT compress_chunk(c) FROM show_chunks('testtable') c;
|
||||
_timescaledb_internal._hyper_1_2_chunk
|
||||
(2 rows)
|
||||
|
||||
ANALYZE testtable;
|
||||
-- Pushdown aggregation to the chunk level
|
||||
SELECT count(*), sum(v0), sum(v1), sum(v2), sum(v3) FROM testtable WHERE time >= '2000-01-01 00:00:00+0' AND time <= '2000-02-01 00:00:00+0';
|
||||
count | sum | sum | sum | sum
|
||||
@ -61,6 +62,7 @@ SELECT count(*), sum(v0), sum(v1), sum(v2), sum(v3) FROM testtable WHERE time >=
|
||||
INSERT INTO testtable(time,device_id,v0,v1,v2,v3)
|
||||
SELECT time, device_id, device_id+1, device_id + 2, device_id + 0.5, NULL
|
||||
FROM generate_series('2000-01-01 0:00:00+0'::timestamptz,'2000-01-10 23:55:00+0','1day') gtime(time), generate_series(1,5,1) gdevice(device_id);
|
||||
ANALYZE testtable;
|
||||
-- Pushdown aggregation to the chunk level
|
||||
SELECT count(*), sum(v0), sum(v1), sum(v2), sum(v3) FROM testtable WHERE time >= '2000-01-01 00:00:00+0' AND time <= '2000-02-01 00:00:00+0';
|
||||
count | sum | sum | sum | sum
|
||||
@ -177,10 +179,11 @@ SELECT count(*), sum(v0), sum(v1), sum(v2), sum(v3) FROM testtable WHERE time >=
|
||||
Filter: ((compress_hyper_2_4_chunk._ts_meta_max_1 >= ('2000-01-09 00:00:00+0'::cstring)::timestamp with time zone) AND (compress_hyper_2_4_chunk._ts_meta_min_1 <= ('2000-02-01 00:00:00+0'::cstring)::timestamp with time zone))
|
||||
-> Partial Aggregate (actual rows=1 loops=1)
|
||||
Output: PARTIAL count(*), PARTIAL sum(_hyper_1_2_chunk.v0), PARTIAL sum(_hyper_1_2_chunk.v1), PARTIAL sum(_hyper_1_2_chunk.v2), PARTIAL sum(_hyper_1_2_chunk.v3)
|
||||
-> Index Scan using _hyper_1_2_chunk_testtable_time_idx on _timescaledb_internal._hyper_1_2_chunk (actual rows=10 loops=1)
|
||||
-> Seq Scan on _timescaledb_internal._hyper_1_2_chunk (actual rows=10 loops=1)
|
||||
Output: _hyper_1_2_chunk.v0, _hyper_1_2_chunk.v1, _hyper_1_2_chunk.v2, _hyper_1_2_chunk.v3
|
||||
Index Cond: ((_hyper_1_2_chunk."time" >= ('2000-01-09 00:00:00+0'::cstring)::timestamp with time zone) AND (_hyper_1_2_chunk."time" <= ('2000-02-01 00:00:00+0'::cstring)::timestamp with time zone))
|
||||
(22 rows)
|
||||
Filter: ((_hyper_1_2_chunk."time" >= ('2000-01-09 00:00:00+0'::cstring)::timestamp with time zone) AND (_hyper_1_2_chunk."time" <= ('2000-02-01 00:00:00+0'::cstring)::timestamp with time zone))
|
||||
Rows Removed by Filter: 15
|
||||
(23 rows)
|
||||
|
||||
-- Force plain / sorted aggregation
|
||||
SET enable_hashagg = OFF;
|
||||
|
@ -1282,12 +1282,6 @@ SELECT relpages, CASE WHEN reltuples > 0 THEN reltuples ELSE 0 END as reltuples
|
||||
0 | 0
|
||||
(1 row)
|
||||
|
||||
SELECT histogram_bounds FROM pg_stats WHERE tablename = :statchunk AND attname = 'c1';
|
||||
histogram_bounds
|
||||
-------------------------------------------------------------------------------------------------------------------------------
|
||||
{0,250,500,750,1000,1250,1500,1750,2000,2250,2500,2750,3000,3250,3500,3750,4000,4250,4500,4750,5000,5250,5500,5750,6000,6250}
|
||||
(1 row)
|
||||
|
||||
SELECT compch.table_name as "STAT_COMP_CHUNK_NAME"
|
||||
FROM _timescaledb_catalog.hypertable ht, _timescaledb_catalog.chunk ch
|
||||
, _timescaledb_catalog.chunk compch
|
||||
@ -1300,15 +1294,8 @@ SELECT relpages, CASE WHEN reltuples > 0 THEN reltuples ELSE 0 END as reltuples
|
||||
0 | 0
|
||||
(1 row)
|
||||
|
||||
-- Now verify stats are updated on compressed chunk table when we analyze the hypertable.
|
||||
ANALYZE stattest;
|
||||
SELECT histogram_bounds FROM pg_stats WHERE tablename = :statchunk AND attname = 'c1';
|
||||
histogram_bounds
|
||||
-------------------------------------------------------------------------------------------------------------------------------
|
||||
{0,250,500,750,1000,1250,1500,1750,2000,2250,2500,2750,3000,3250,3500,3750,4000,4250,4500,4750,5000,5250,5500,5750,6000,6250}
|
||||
(1 row)
|
||||
|
||||
-- Unfortunately, the stats on the hypertable won't find any rows to sample from the chunk
|
||||
ANALYZE stattest;
|
||||
SELECT histogram_bounds FROM pg_stats WHERE tablename = 'stattest' AND attname = 'c1';
|
||||
histogram_bounds
|
||||
------------------
|
||||
|
@ -1481,6 +1481,7 @@ SELECT compress_chunk(i) FROM show_chunks('bugtab') i;
|
||||
_timescaledb_internal._hyper_11_23_chunk
|
||||
(1 row)
|
||||
|
||||
ANALYZE bugtab;
|
||||
:PREFIX
|
||||
SELECT "time","hin"::text,"model"::text,"block"::text,"message_name"::text,"signal_name"::text,"signal_numeric_value","signal_string_value"::text FROM :chunk_table_bugtab ORDER BY "time" DESC;
|
||||
QUERY PLAN
|
||||
|
@ -1481,6 +1481,7 @@ SELECT compress_chunk(i) FROM show_chunks('bugtab') i;
|
||||
_timescaledb_internal._hyper_11_23_chunk
|
||||
(1 row)
|
||||
|
||||
ANALYZE bugtab;
|
||||
:PREFIX
|
||||
SELECT "time","hin"::text,"model"::text,"block"::text,"message_name"::text,"signal_name"::text,"signal_numeric_value","signal_string_value"::text FROM :chunk_table_bugtab ORDER BY "time" DESC;
|
||||
QUERY PLAN
|
||||
|
@ -1481,6 +1481,7 @@ SELECT compress_chunk(i) FROM show_chunks('bugtab') i;
|
||||
_timescaledb_internal._hyper_11_23_chunk
|
||||
(1 row)
|
||||
|
||||
ANALYZE bugtab;
|
||||
:PREFIX
|
||||
SELECT "time","hin"::text,"model"::text,"block"::text,"message_name"::text,"signal_name"::text,"signal_numeric_value","signal_string_value"::text FROM :chunk_table_bugtab ORDER BY "time" DESC;
|
||||
QUERY PLAN
|
||||
|
@ -1481,6 +1481,7 @@ SELECT compress_chunk(i) FROM show_chunks('bugtab') i;
|
||||
_timescaledb_internal._hyper_11_23_chunk
|
||||
(1 row)
|
||||
|
||||
ANALYZE bugtab;
|
||||
:PREFIX
|
||||
SELECT "time","hin"::text,"model"::text,"block"::text,"message_name"::text,"signal_name"::text,"signal_numeric_value","signal_string_value"::text FROM :chunk_table_bugtab ORDER BY "time" DESC;
|
||||
QUERY PLAN
|
||||
|
@ -1,7 +1,6 @@
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
-- github issue 5585
|
||||
create table test (
|
||||
time timestamptz not null,
|
||||
@ -33,8 +32,11 @@ select compress_chunk(show_chunks('test'));
|
||||
|
||||
-- force an index scan
|
||||
set enable_seqscan = 'off';
|
||||
-- disable jit to avoid test flakiness
|
||||
-- make some tweaks to avoid flakiness
|
||||
analyze test;
|
||||
analyze test_copy;
|
||||
set jit = off;
|
||||
set max_parallel_workers_per_gather = 0;
|
||||
explain (costs off) with query_params as (
|
||||
select distinct a, b
|
||||
from test_copy
|
||||
|
@ -1,7 +1,6 @@
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
-- github issue 5585
|
||||
create table test (
|
||||
time timestamptz not null,
|
||||
@ -33,8 +32,11 @@ select compress_chunk(show_chunks('test'));
|
||||
|
||||
-- force an index scan
|
||||
set enable_seqscan = 'off';
|
||||
-- disable jit to avoid test flakiness
|
||||
-- make some tweaks to avoid flakiness
|
||||
analyze test;
|
||||
analyze test_copy;
|
||||
set jit = off;
|
||||
set max_parallel_workers_per_gather = 0;
|
||||
explain (costs off) with query_params as (
|
||||
select distinct a, b
|
||||
from test_copy
|
||||
|
@ -1,7 +1,6 @@
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
-- github issue 5585
|
||||
create table test (
|
||||
time timestamptz not null,
|
||||
@ -33,8 +32,11 @@ select compress_chunk(show_chunks('test'));
|
||||
|
||||
-- force an index scan
|
||||
set enable_seqscan = 'off';
|
||||
-- disable jit to avoid test flakiness
|
||||
-- make some tweaks to avoid flakiness
|
||||
analyze test;
|
||||
analyze test_copy;
|
||||
set jit = off;
|
||||
set max_parallel_workers_per_gather = 0;
|
||||
explain (costs off) with query_params as (
|
||||
select distinct a, b
|
||||
from test_copy
|
||||
|
@ -1,7 +1,6 @@
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
-- github issue 5585
|
||||
create table test (
|
||||
time timestamptz not null,
|
||||
@ -33,8 +32,11 @@ select compress_chunk(show_chunks('test'));
|
||||
|
||||
-- force an index scan
|
||||
set enable_seqscan = 'off';
|
||||
-- disable jit to avoid test flakiness
|
||||
-- make some tweaks to avoid flakiness
|
||||
analyze test;
|
||||
analyze test_copy;
|
||||
set jit = off;
|
||||
set max_parallel_workers_per_gather = 0;
|
||||
explain (costs off) with query_params as (
|
||||
select distinct a, b
|
||||
from test_copy
|
||||
|
@ -18,6 +18,7 @@ SELECT count(compress_chunk(ch)) FROM show_chunks('decompress_tracking') ch;
|
||||
2
|
||||
(1 row)
|
||||
|
||||
ANALYZE decompress_tracking;
|
||||
-- no tracking without analyze
|
||||
:EXPLAIN UPDATE decompress_tracking SET value = value + 3;
|
||||
QUERY PLAN
|
||||
@ -131,31 +132,35 @@ QUERY PLAN
|
||||
-- test prepared statements EXPLAIN still works after execution
|
||||
SET plan_cache_mode TO force_generic_plan;
|
||||
PREPARE p1 AS UPDATE decompress_tracking SET value = value + 3 WHERE device = 'd1';
|
||||
BEGIN; EXPLAIN EXECUTE p1; EXECUTE p1; EXPLAIN EXECUTE p1; ROLLBACK;
|
||||
BEGIN;
|
||||
EXPLAIN (COSTS OFF) EXECUTE p1;
|
||||
QUERY PLAN
|
||||
Custom Scan (HypertableModify) (cost=0.00..70.83 rows=433 width=18)
|
||||
-> Update on decompress_tracking (cost=0.00..70.83 rows=433 width=18)
|
||||
Custom Scan (HypertableModify)
|
||||
-> Update on decompress_tracking
|
||||
Update on _hyper_X_X_chunk decompress_tracking_1
|
||||
Update on _hyper_X_X_chunk decompress_tracking_2
|
||||
-> Result (cost=0.00..70.83 rows=433 width=18)
|
||||
-> Append (cost=0.00..65.42 rows=433 width=18)
|
||||
-> Seq Scan on _hyper_X_X_chunk decompress_tracking_1 (cost=0.00..31.62 rows=432 width=18)
|
||||
-> Result
|
||||
-> Append
|
||||
-> Seq Scan on _hyper_X_X_chunk decompress_tracking_1
|
||||
Filter: (device = 'd1'::text)
|
||||
-> Seq Scan on _hyper_X_X_chunk decompress_tracking_2 (cost=0.00..31.62 rows=1 width=18)
|
||||
-> Seq Scan on _hyper_X_X_chunk decompress_tracking_2
|
||||
Filter: (device = 'd1'::text)
|
||||
(10 rows)
|
||||
|
||||
EXECUTE p1;
|
||||
EXPLAIN (COSTS OFF) EXECUTE p1;
|
||||
QUERY PLAN
|
||||
Custom Scan (HypertableModify) (cost=0.00..70.83 rows=433 width=18)
|
||||
-> Update on decompress_tracking (cost=0.00..70.83 rows=433 width=18)
|
||||
Custom Scan (HypertableModify)
|
||||
-> Update on decompress_tracking
|
||||
Update on _hyper_X_X_chunk decompress_tracking_1
|
||||
Update on _hyper_X_X_chunk decompress_tracking_2
|
||||
-> Result (cost=0.00..70.83 rows=433 width=18)
|
||||
-> Append (cost=0.00..65.42 rows=433 width=18)
|
||||
-> Seq Scan on _hyper_X_X_chunk decompress_tracking_1 (cost=0.00..31.62 rows=432 width=18)
|
||||
-> Result
|
||||
-> Append
|
||||
-> Seq Scan on _hyper_X_X_chunk decompress_tracking_1
|
||||
Filter: (device = 'd1'::text)
|
||||
-> Seq Scan on _hyper_X_X_chunk decompress_tracking_2 (cost=0.00..31.62 rows=1 width=18)
|
||||
-> Seq Scan on _hyper_X_X_chunk decompress_tracking_2
|
||||
Filter: (device = 'd1'::text)
|
||||
(10 rows)
|
||||
|
||||
ROLLBACK;
|
||||
DROP TABLE decompress_tracking;
|
||||
|
@ -15,6 +15,8 @@ INSERT INTO decompress_tracking SELECT '2020-01-01'::timestamptz + format('%s ho
|
||||
|
||||
SELECT count(compress_chunk(ch)) FROM show_chunks('decompress_tracking') ch;
|
||||
|
||||
ANALYZE decompress_tracking;
|
||||
|
||||
-- no tracking without analyze
|
||||
:EXPLAIN UPDATE decompress_tracking SET value = value + 3;
|
||||
|
||||
@ -30,6 +32,10 @@ BEGIN; :EXPLAIN_ANALYZE INSERT INTO decompress_tracking (VALUES ('2020-01-01 1:3
|
||||
-- test prepared statements EXPLAIN still works after execution
|
||||
SET plan_cache_mode TO force_generic_plan;
|
||||
PREPARE p1 AS UPDATE decompress_tracking SET value = value + 3 WHERE device = 'd1';
|
||||
BEGIN; EXPLAIN EXECUTE p1; EXECUTE p1; EXPLAIN EXECUTE p1; ROLLBACK;
|
||||
BEGIN;
|
||||
EXPLAIN (COSTS OFF) EXECUTE p1;
|
||||
EXECUTE p1;
|
||||
EXPLAIN (COSTS OFF) EXECUTE p1;
|
||||
ROLLBACK;
|
||||
|
||||
DROP TABLE decompress_tracking;
|
||||
|
@ -18,6 +18,8 @@ FROM generate_series('2000-01-01 0:00:00+0'::timestamptz,'2000-01-10 23:55:00+0'
|
||||
|
||||
SELECT compress_chunk(c) FROM show_chunks('testtable') c;
|
||||
|
||||
ANALYZE testtable;
|
||||
|
||||
-- Pushdown aggregation to the chunk level
|
||||
SELECT count(*), sum(v0), sum(v1), sum(v2), sum(v3) FROM testtable WHERE time >= '2000-01-01 00:00:00+0' AND time <= '2000-02-01 00:00:00+0';
|
||||
|
||||
@ -29,6 +31,8 @@ INSERT INTO testtable(time,device_id,v0,v1,v2,v3)
|
||||
SELECT time, device_id, device_id+1, device_id + 2, device_id + 0.5, NULL
|
||||
FROM generate_series('2000-01-01 0:00:00+0'::timestamptz,'2000-01-10 23:55:00+0','1day') gtime(time), generate_series(1,5,1) gdevice(device_id);
|
||||
|
||||
ANALYZE testtable;
|
||||
|
||||
-- Pushdown aggregation to the chunk level
|
||||
SELECT count(*), sum(v0), sum(v1), sum(v2), sum(v3) FROM testtable WHERE time >= '2000-01-01 00:00:00+0' AND time <= '2000-02-01 00:00:00+0';
|
||||
|
||||
|
@ -542,7 +542,6 @@ SELECT count(*) from stattest;
|
||||
-- Uncompressed chunk table is empty since we just compressed the chunk and moved everything to compressed chunk table.
|
||||
-- reltuples is initially -1 on PG14 before VACUUM/ANALYZE was run
|
||||
SELECT relpages, CASE WHEN reltuples > 0 THEN reltuples ELSE 0 END as reltuples FROM pg_class WHERE relname = :statchunk;
|
||||
SELECT histogram_bounds FROM pg_stats WHERE tablename = :statchunk AND attname = 'c1';
|
||||
|
||||
SELECT compch.table_name as "STAT_COMP_CHUNK_NAME"
|
||||
FROM _timescaledb_catalog.hypertable ht, _timescaledb_catalog.chunk ch
|
||||
@ -553,10 +552,8 @@ FROM _timescaledb_catalog.hypertable ht, _timescaledb_catalog.chunk ch
|
||||
-- reltuples is initially -1 on PG14 before VACUUM/ANALYZE was run
|
||||
SELECT relpages, CASE WHEN reltuples > 0 THEN reltuples ELSE 0 END as reltuples FROM pg_class WHERE relname = :'STAT_COMP_CHUNK_NAME';
|
||||
|
||||
-- Now verify stats are updated on compressed chunk table when we analyze the hypertable.
|
||||
ANALYZE stattest;
|
||||
SELECT histogram_bounds FROM pg_stats WHERE tablename = :statchunk AND attname = 'c1';
|
||||
-- Unfortunately, the stats on the hypertable won't find any rows to sample from the chunk
|
||||
ANALYZE stattest;
|
||||
SELECT histogram_bounds FROM pg_stats WHERE tablename = 'stattest' AND attname = 'c1';
|
||||
SELECT relpages, reltuples FROM pg_class WHERE relname = :statchunk;
|
||||
|
||||
|
@ -524,6 +524,8 @@ SELECT chunk_schema || '.' || chunk_name AS "chunk_table_bugtab"
|
||||
|
||||
SELECT compress_chunk(i) FROM show_chunks('bugtab') i;
|
||||
|
||||
ANALYZE bugtab;
|
||||
|
||||
:PREFIX
|
||||
SELECT "time","hin"::text,"model"::text,"block"::text,"message_name"::text,"signal_name"::text,"signal_numeric_value","signal_string_value"::text FROM :chunk_table_bugtab ORDER BY "time" DESC;
|
||||
|
||||
|
@ -2,8 +2,6 @@
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
|
||||
-- github issue 5585
|
||||
create table test (
|
||||
time timestamptz not null,
|
||||
@ -28,8 +26,11 @@ alter table test set (timescaledb.compress, timescaledb.compress_segmentby='a, b
|
||||
select compress_chunk(show_chunks('test'));
|
||||
-- force an index scan
|
||||
set enable_seqscan = 'off';
|
||||
-- disable jit to avoid test flakiness
|
||||
-- make some tweaks to avoid flakiness
|
||||
analyze test;
|
||||
analyze test_copy;
|
||||
set jit = off;
|
||||
set max_parallel_workers_per_gather = 0;
|
||||
|
||||
explain (costs off) with query_params as (
|
||||
select distinct a, b
|
||||
|
Loading…
x
Reference in New Issue
Block a user