mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-14 17:43:34 +08:00
Use numrows_pre_compression in approx row count
The approximate_row_count function was using the reltuples from compressed chunks and multiplying that with 1000 which is the default batch size. This was leading to a huge skew between the actual row count and the approximate one. We now use the numrows_pre_compression value from the timescaledb catalog which accurately represents the number of rows before the actual compression.
This commit is contained in:
parent
ef030d29c0
commit
293104add2
1
.unreleased/fix_6365
Normal file
1
.unreleased/fix_6365
Normal file
@ -0,0 +1 @@
|
||||
Fixes: #6365 Use numrows_pre_compression in approximate row count
|
@ -453,14 +453,15 @@ BEGIN
|
||||
IF local_compressed_hypertable_id IS NOT NULL THEN
|
||||
uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation);
|
||||
|
||||
WITH compressed_hypertable AS (SELECT table_name, schema_name FROM _timescaledb_catalog.hypertable ht
|
||||
WHERE ht.id = local_compressed_hypertable_id)
|
||||
SELECT c.oid INTO compressed_hypertable_oid FROM pg_class c
|
||||
INNER JOIN compressed_hypertable h ON (c.relname = h.table_name)
|
||||
INNER JOIN pg_namespace n ON (n.nspname = h.schema_name);
|
||||
-- use the compression_chunk_size stats to fetch precompressed num rows
|
||||
SELECT COALESCE(SUM(numrows_pre_compression), 0) FROM _timescaledb_catalog.chunk srcch,
|
||||
_timescaledb_catalog.compression_chunk_size map, _timescaledb_catalog.hypertable srcht
|
||||
INTO compressed_row_count
|
||||
WHERE map.chunk_id = srcch.id
|
||||
AND srcht.id = srcch.hypertable_id AND srcht.table_name = local_table_name
|
||||
AND srcht.schema_name = local_schema_name;
|
||||
|
||||
compressed_row_count = _timescaledb_functions.get_approx_row_count(compressed_hypertable_oid);
|
||||
RETURN (uncompressed_row_count + (compressed_row_count * max_compressed_row_count));
|
||||
RETURN (uncompressed_row_count + compressed_row_count);
|
||||
ELSE
|
||||
uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation);
|
||||
RETURN uncompressed_row_count;
|
||||
@ -477,22 +478,24 @@ BEGIN
|
||||
-- 'input is chunk #1';
|
||||
IF is_compressed_chunk IS NULL AND local_compressed_chunk_id IS NOT NULL THEN
|
||||
-- 'Include both uncompressed and compressed chunk #2';
|
||||
WITH compressed_ns_oid AS ( SELECT table_name, oid FROM _timescaledb_catalog.chunk ch INNER JOIN pg_namespace ns ON
|
||||
(ch.id = local_compressed_chunk_id and ch.schema_name = ns.nspname))
|
||||
SELECT c.oid FROM pg_class c INNER JOIN compressed_ns_oid
|
||||
ON ( c.relnamespace = compressed_ns_oid.oid AND c.relname = compressed_ns_oid.table_name)
|
||||
INTO local_compressed_chunk_oid;
|
||||
-- use the compression_chunk_size stats to fetch precompressed num rows
|
||||
SELECT COALESCE(numrows_pre_compression, 0) FROM _timescaledb_catalog.compression_chunk_size
|
||||
INTO compressed_row_count
|
||||
WHERE compressed_chunk_id = local_compressed_chunk_id;
|
||||
|
||||
uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation);
|
||||
compressed_row_count = _timescaledb_functions.get_approx_row_count(local_compressed_chunk_oid);
|
||||
RETURN uncompressed_row_count + (compressed_row_count * max_compressed_row_count);
|
||||
RETURN (uncompressed_row_count + compressed_row_count);
|
||||
ELSIF is_compressed_chunk IS NULL AND local_compressed_chunk_id IS NULL THEN
|
||||
-- 'input relation is uncompressed chunk #3';
|
||||
uncompressed_row_count = _timescaledb_functions.get_approx_row_count(relation);
|
||||
RETURN uncompressed_row_count;
|
||||
ELSE
|
||||
-- 'compressed chunk only #4';
|
||||
compressed_row_count = _timescaledb_functions.get_approx_row_count(relation) * max_compressed_row_count;
|
||||
-- use the compression_chunk_size stats to fetch precompressed num rows
|
||||
SELECT COALESCE(SUM(numrows_pre_compression), 0) FROM _timescaledb_catalog.chunk srcch,
|
||||
_timescaledb_catalog.compression_chunk_size map INTO compressed_row_count
|
||||
WHERE map.compressed_chunk_id = srcch.id
|
||||
AND srcch.table_name = local_table_name AND srcch.schema_name = local_schema_name;
|
||||
RETURN compressed_row_count;
|
||||
END IF;
|
||||
END IF;
|
||||
|
@ -39,6 +39,20 @@ insert into foo values( 3 , 16 , 20, NULL);
|
||||
insert into foo values( 10 , 10 , 20, NULL);
|
||||
insert into foo values( 20 , 11 , 20, NULL);
|
||||
insert into foo values( 30 , 12 , 20, NULL);
|
||||
analyze foo;
|
||||
-- check that approximate_row_count works with a regular table
|
||||
SELECT approximate_row_count('foo');
|
||||
approximate_row_count
|
||||
-----------------------
|
||||
4
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) from foo;
|
||||
count
|
||||
-------
|
||||
4
|
||||
(1 row)
|
||||
|
||||
alter table foo set (timescaledb.compress, timescaledb.compress_segmentby = 'a,b', timescaledb.compress_orderby = 'c desc, d asc nulls last');
|
||||
--test self-refencing updates
|
||||
SET timescaledb.enable_transparent_decompression to ON;
|
||||
@ -1259,6 +1273,7 @@ SELECT * FROM pg_stats WHERE tablename = :statchunk;
|
||||
(0 rows)
|
||||
|
||||
ALTER TABLE stattest SET (timescaledb.compress);
|
||||
-- check that approximate_row_count works with all normal chunks
|
||||
SELECT approximate_row_count('stattest');
|
||||
approximate_row_count
|
||||
-----------------------
|
||||
@ -1271,10 +1286,18 @@ SELECT compress_chunk(c) FROM show_chunks('stattest') c;
|
||||
_timescaledb_internal._hyper_27_55_chunk
|
||||
(1 row)
|
||||
|
||||
-- check that approximate_row_count works with all compressed chunks
|
||||
SELECT approximate_row_count('stattest');
|
||||
approximate_row_count
|
||||
-----------------------
|
||||
0
|
||||
26
|
||||
(1 row)
|
||||
|
||||
-- actual count should match with the above
|
||||
SELECT count(*) from stattest;
|
||||
count
|
||||
-------
|
||||
26
|
||||
(1 row)
|
||||
|
||||
-- Uncompressed chunk table is empty since we just compressed the chunk and moved everything to compressed chunk table.
|
||||
@ -1330,6 +1353,13 @@ SELECT relpages, reltuples FROM pg_class WHERE relname = :'STAT_COMP_CHUNK_NAME'
|
||||
1 | 1
|
||||
(1 row)
|
||||
|
||||
-- verify that approximate_row_count works fine on a chunk with compressed data
|
||||
SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_COMP_CHUNK_NAME');
|
||||
approximate_row_count
|
||||
-----------------------
|
||||
26
|
||||
(1 row)
|
||||
|
||||
-- Verify partial chunk stats are handled correctly when analyzing
|
||||
-- for both uncompressed and compressed chunk tables
|
||||
INSERT INTO stattest SELECT '2020/02/20 01:00'::TIMESTAMPTZ + ('1 hour'::interval * v), 250 * v FROM generate_series(25,50) v;
|
||||
@ -1362,6 +1392,28 @@ SELECT relpages, reltuples FROM pg_class WHERE relname = :'STAT_COMP_CHUNK_NAME'
|
||||
1 | 1
|
||||
(1 row)
|
||||
|
||||
-- verify that approximate_row_count works fine on a chunk with a mix of uncompressed
|
||||
-- and compressed data
|
||||
SELECT table_name as "STAT_CHUNK_NAME" from temptable \gset
|
||||
SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_CHUNK_NAME');
|
||||
approximate_row_count
|
||||
-----------------------
|
||||
52
|
||||
(1 row)
|
||||
|
||||
-- should match with the result via the hypertable post in-memory decompression
|
||||
SELECT count(*) from stattest;
|
||||
count
|
||||
-------
|
||||
52
|
||||
(1 row)
|
||||
|
||||
SELECT count(*) from show_chunks('stattest');
|
||||
count
|
||||
-------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
-- Verify that decompressing the chunk restores autoanalyze to the hypertable's setting
|
||||
SELECT reloptions FROM pg_class WHERE relname = :statchunk;
|
||||
reloptions
|
||||
@ -1423,7 +1475,6 @@ SELECT relpages, reltuples FROM pg_class WHERE relname = :statchunk;
|
||||
1 | 52
|
||||
(1 row)
|
||||
|
||||
DROP TABLE stattest;
|
||||
--- Test that analyze on compression internal table updates stats on original chunks
|
||||
CREATE TABLE stattest2(time TIMESTAMPTZ NOT NULL, c1 int, c2 int);
|
||||
SELECT create_hypertable('stattest2', 'time', chunk_time_interval=>'1 day'::interval);
|
||||
@ -2626,3 +2677,30 @@ SELECT * FROM sensor_data_compressed ORDER BY time DESC LIMIT 5;
|
||||
(70 rows)
|
||||
|
||||
RESET timescaledb.enable_decompression_sorted_merge;
|
||||
-- create another chunk
|
||||
INSERT INTO stattest SELECT '2021/02/20 01:00'::TIMESTAMPTZ + ('1 hour'::interval * v), 250 * v FROM generate_series(125,140) v;
|
||||
ANALYZE stattest;
|
||||
SELECT count(*) from show_chunks('stattest');
|
||||
count
|
||||
-------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
SELECT table_name INTO TEMPORARY temptable FROM _timescaledb_catalog.chunk WHERE hypertable_id = (SELECT id FROM _timescaledb_catalog.hypertable WHERE table_name = 'stattest') ORDER BY creation_time desc limit 1;
|
||||
SELECT table_name as "STAT_CHUNK2_NAME" FROM temptable \gset
|
||||
-- verify that approximate_row_count works ok on normal chunks
|
||||
SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_CHUNK2_NAME');
|
||||
approximate_row_count
|
||||
-----------------------
|
||||
16
|
||||
(1 row)
|
||||
|
||||
-- verify that approximate_row_count works fine on a hypertable with a mix of uncompressed
|
||||
-- and compressed data
|
||||
SELECT approximate_row_count('stattest');
|
||||
approximate_row_count
|
||||
-----------------------
|
||||
68
|
||||
(1 row)
|
||||
|
||||
DROP TABLE stattest;
|
||||
|
@ -19,6 +19,10 @@ insert into foo values( 3 , 16 , 20, NULL);
|
||||
insert into foo values( 10 , 10 , 20, NULL);
|
||||
insert into foo values( 20 , 11 , 20, NULL);
|
||||
insert into foo values( 30 , 12 , 20, NULL);
|
||||
analyze foo;
|
||||
-- check that approximate_row_count works with a regular table
|
||||
SELECT approximate_row_count('foo');
|
||||
SELECT count(*) from foo;
|
||||
|
||||
alter table foo set (timescaledb.compress, timescaledb.compress_segmentby = 'a,b', timescaledb.compress_orderby = 'c desc, d asc nulls last');
|
||||
|
||||
@ -538,9 +542,13 @@ SELECT table_name INTO TEMPORARY temptable FROM _timescaledb_catalog.chunk WHERE
|
||||
SELECT * FROM pg_stats WHERE tablename = :statchunk;
|
||||
|
||||
ALTER TABLE stattest SET (timescaledb.compress);
|
||||
-- check that approximate_row_count works with all normal chunks
|
||||
SELECT approximate_row_count('stattest');
|
||||
SELECT compress_chunk(c) FROM show_chunks('stattest') c;
|
||||
-- check that approximate_row_count works with all compressed chunks
|
||||
SELECT approximate_row_count('stattest');
|
||||
-- actual count should match with the above
|
||||
SELECT count(*) from stattest;
|
||||
-- Uncompressed chunk table is empty since we just compressed the chunk and moved everything to compressed chunk table.
|
||||
-- reltuples is initially -1 on PG14 before VACUUM/ANALYZE was run
|
||||
SELECT relpages, CASE WHEN reltuples > 0 THEN reltuples ELSE 0 END as reltuples FROM pg_class WHERE relname = :statchunk;
|
||||
@ -564,6 +572,8 @@ SELECT relpages, reltuples FROM pg_class WHERE relname = :statchunk;
|
||||
|
||||
-- verify that corresponding compressed chunk table stats is updated as well.
|
||||
SELECT relpages, reltuples FROM pg_class WHERE relname = :'STAT_COMP_CHUNK_NAME';
|
||||
-- verify that approximate_row_count works fine on a chunk with compressed data
|
||||
SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_COMP_CHUNK_NAME');
|
||||
|
||||
-- Verify partial chunk stats are handled correctly when analyzing
|
||||
-- for both uncompressed and compressed chunk tables
|
||||
@ -577,7 +587,13 @@ SELECT relpages, reltuples FROM pg_class WHERE relname = :statchunk;
|
||||
-- verify that corresponding compressed chunk table stats have not changed since
|
||||
-- we didn't compress anything new.
|
||||
SELECT relpages, reltuples FROM pg_class WHERE relname = :'STAT_COMP_CHUNK_NAME';
|
||||
|
||||
-- verify that approximate_row_count works fine on a chunk with a mix of uncompressed
|
||||
-- and compressed data
|
||||
SELECT table_name as "STAT_CHUNK_NAME" from temptable \gset
|
||||
SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_CHUNK_NAME');
|
||||
-- should match with the result via the hypertable post in-memory decompression
|
||||
SELECT count(*) from stattest;
|
||||
SELECT count(*) from show_chunks('stattest');
|
||||
|
||||
-- Verify that decompressing the chunk restores autoanalyze to the hypertable's setting
|
||||
SELECT reloptions FROM pg_class WHERE relname = :statchunk;
|
||||
@ -597,7 +613,6 @@ SET client_min_messages TO NOTICE;
|
||||
SELECT histogram_bounds FROM pg_stats WHERE tablename = :statchunk and attname = 'c1';
|
||||
SELECT relpages, reltuples FROM pg_class WHERE relname = :statchunk;
|
||||
|
||||
DROP TABLE stattest;
|
||||
|
||||
--- Test that analyze on compression internal table updates stats on original chunks
|
||||
CREATE TABLE stattest2(time TIMESTAMPTZ NOT NULL, c1 int, c2 int);
|
||||
@ -1052,3 +1067,17 @@ SET timescaledb.enable_decompression_sorted_merge = FALSE;
|
||||
:PREFIX
|
||||
SELECT * FROM sensor_data_compressed ORDER BY time DESC LIMIT 5;
|
||||
RESET timescaledb.enable_decompression_sorted_merge;
|
||||
|
||||
-- create another chunk
|
||||
INSERT INTO stattest SELECT '2021/02/20 01:00'::TIMESTAMPTZ + ('1 hour'::interval * v), 250 * v FROM generate_series(125,140) v;
|
||||
ANALYZE stattest;
|
||||
SELECT count(*) from show_chunks('stattest');
|
||||
SELECT table_name INTO TEMPORARY temptable FROM _timescaledb_catalog.chunk WHERE hypertable_id = (SELECT id FROM _timescaledb_catalog.hypertable WHERE table_name = 'stattest') ORDER BY creation_time desc limit 1;
|
||||
SELECT table_name as "STAT_CHUNK2_NAME" FROM temptable \gset
|
||||
-- verify that approximate_row_count works ok on normal chunks
|
||||
SELECT approximate_row_count('_timescaledb_internal.' || :'STAT_CHUNK2_NAME');
|
||||
-- verify that approximate_row_count works fine on a hypertable with a mix of uncompressed
|
||||
-- and compressed data
|
||||
SELECT approximate_row_count('stattest');
|
||||
|
||||
DROP TABLE stattest;
|
||||
|
Loading…
x
Reference in New Issue
Block a user