mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-15 01:53:41 +08:00
This patch implements SkipScan; an optimization for SELECT DISTINCT ON. Usually for SELECT DISTINCT ON postgres will plan either a UNIQUE over a sorted path, or some form of aggregate. In either case, it needs to scan the entire table, even in cases where there are only a few unique values. A skip scan optimizes this case when we have an ordered index. Instead of scanning the entire table and deduplicating after, the scan remembers the last value returned, and searches the index for the next value after that one. This means that for a table with k keys, with u distinct values, a skip scan runs in time u * log(k) as opposed to scanning then deduplicating, which takes time k. We can write the number of unique values u as of function of k by dividing by the number of repeats r i.e. u = k/r this means that a skip scan will be faster if each key is repeated more than a logarithmic number of times, i.e. if r > log(k) then u * log(k) < k/log(k) * log(k) < k. Co-authored-by: Joshua Lockerman <josh@timescale.com>
50 lines
1.5 KiB
SQL
50 lines
1.5 KiB
SQL
-- This file and its contents are licensed under the Timescale License.
|
|
-- Please see the included NOTICE for copyright information and
|
|
-- LICENSE-TIMESCALE for a copy of the license.
|
|
|
|
-- need superuser to adjust statistics in load script
|
|
\c :TEST_DBNAME :ROLE_CLUSTER_SUPERUSER
|
|
|
|
\set TEST_BASE_NAME skip_scan
|
|
SELECT format('include/%s_load.sql', :'TEST_BASE_NAME') AS "TEST_LOAD_NAME",
|
|
format('include/%s_query.sql', :'TEST_BASE_NAME') AS "TEST_QUERY_NAME",
|
|
format('%s/results/%s_results_unoptimized.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') AS "TEST_RESULTS_UNOPTIMIZED",
|
|
format('%s/results/%s_results_optimized.out', :'TEST_OUTPUT_DIR', :'TEST_BASE_NAME') AS "TEST_RESULTS_OPTIMIZED" \gset
|
|
|
|
SELECT format('\! diff -u --label "Unoptimized results" --label "Optimized results" %s %s', :'TEST_RESULTS_UNOPTIMIZED', :'TEST_RESULTS_OPTIMIZED') AS "DIFF_CMD" \gset
|
|
|
|
\ir :TEST_LOAD_NAME
|
|
|
|
-- run tests on normal table and diff results
|
|
\set TABLE skip_scan
|
|
\set PREFIX ''
|
|
\o :TEST_RESULTS_OPTIMIZED
|
|
\ir :TEST_QUERY_NAME
|
|
\o
|
|
|
|
SET timescaledb.enable_skipscan TO false;
|
|
\o :TEST_RESULTS_UNOPTIMIZED
|
|
\ir :TEST_QUERY_NAME
|
|
\o
|
|
RESET timescaledb.enable_skipscan;
|
|
|
|
-- compare SkipScan results on normal table
|
|
:DIFF_CMD
|
|
|
|
-- run tests on hypertable and diff results
|
|
\set TABLE skip_scan_ht
|
|
\set PREFIX ''
|
|
\o :TEST_RESULTS_OPTIMIZED
|
|
\ir :TEST_QUERY_NAME
|
|
\o
|
|
|
|
SET timescaledb.enable_skipscan TO false;
|
|
\o :TEST_RESULTS_UNOPTIMIZED
|
|
\ir :TEST_QUERY_NAME
|
|
\o
|
|
RESET timescaledb.enable_skipscan;
|
|
|
|
-- compare SkipScan results on hypertable
|
|
:DIFF_CMD
|
|
|