Fix SkipScan path generation with constant DISTINCT column

When a DISTINCT query has a WHERE clause that constifies the
DISTINCT column the query might use an index that does not have
include the DISTINCT column even though it is referenced in the
ORDER BY clause. The skipscan path generation would error on any
path with such a configuration. This patch changes the path
generation code to skip generating SkipScan path under these
circumstances.

Fixes #3629
This commit is contained in:
Sven Klemm 2021-10-06 19:05:12 +02:00 committed by Sven Klemm
parent 74ca546565
commit 9e53cbb6d3
6 changed files with 127 additions and 3 deletions

View File

@ -9,6 +9,10 @@ accidentally triggering the load of a previous DB version.**
**Bugfixes** **Bugfixes**
* #3580 Fix memory context bug executing TRUNCATE * #3580 Fix memory context bug executing TRUNCATE
* #3654 Fix index attnum mapping in reorder_chunk * #3654 Fix index attnum mapping in reorder_chunk
* #3661 Fix SkipScan path generation with constant DISTINCT column
**Thanks**
* @binakot and @sebvett for reporting an issue with DISTINCT queries
**Thanks** **Thanks**
* @hardikm10, @DavidPavlicek and @pafiti for reporting bugs on TRUNCATE * @hardikm10, @DavidPavlicek and @pafiti for reporting bugs on TRUNCATE

View File

@ -536,7 +536,25 @@ build_skip_qual(PlannerInfo *root, SkipScanPath *skip_scan_path, IndexPath *inde
Oid column_type = exprType((Node *) var); Oid column_type = exprType((Node *) var);
Oid column_collation = get_typcollation(column_type); Oid column_collation = get_typcollation(column_type);
TypeCacheEntry *tce = lookup_type_cache(column_type, 0); TypeCacheEntry *tce = lookup_type_cache(column_type, 0);
/*
* Skipscan is not applicable for the following case:
* We might have a path with an index that produces the correct pathkeys for the target ordering
* without actually including all the columns of the ORDER BY. If the path uses an index that
* does not include the distinct column, we cannot use it for skipscan and have to discard this
* path from skipscan generation. This happens, for instance, when we have an order by clause
* (like ORDER BY a, b) with constraints in the WHERE clause (like WHERE a = <constant>) . "a"
* can now be removed from the Pathkeys (since it is a constant) and the query can be satisfied
* by using an index on just column "b".
*
* Example query:
* SELECT DISTINCT ON (a) * FROM test WHERE a in (2) ORDER BY a ASC, time DESC;
* Since a is always 2 due to the WHERE clause we can create the correct ordering for the
* ORDER BY with an index that does not include the a column and only includes the time column.
*/
int idx_key = get_idx_key(info, var->varattno); int idx_key = get_idx_key(info, var->varattno);
if (idx_key < 0)
return false;
skip_scan_path->distinct_attno = var->varattno; skip_scan_path->distinct_attno = var->varattno;
skip_scan_path->distinct_by_val = tce->typbyval; skip_scan_path->distinct_by_val = tce->typbyval;
@ -586,9 +604,7 @@ get_idx_key(IndexOptInfo *idxinfo, AttrNumber attno)
if (attno == idxinfo->indexkeys[i]) if (attno == idxinfo->indexkeys[i])
return i; return i;
} }
return -1;
elog(ERROR, "column not present in index: %d", attno);
pg_unreachable();
} }
/* Sort quals according to index column order. /* Sort quals according to index column order.

View File

@ -3917,3 +3917,35 @@ EXPLAIN (costs off, timing off, summary off) SELECT DISTINCT ON (dev_name) dev_n
Index Cond: (dev_name > NULL::text) Index Cond: (dev_name > NULL::text)
(15 rows) (15 rows)
-- #3629 skipscan with constant skipscan column in where clause
CREATE TABLE i3629(a int, time timestamptz NOT NULL);
SELECT table_name FROM create_hypertable('i3629', 'time');
table_name
------------
i3629
(1 row)
INSERT INTO i3629 SELECT i, '2020-04-01'::date-10-i from generate_series(1,20) i;
EXPLAIN (SUMMARY OFF, COSTS OFF) SELECT DISTINCT ON (a) * FROM i3629 WHERE a in (2) ORDER BY a ASC, time DESC;
QUERY PLAN
------------------------------------------------
Unique
-> Sort
Sort Key: _hyper_3_6_chunk."time" DESC
-> Append
-> Seq Scan on _hyper_3_6_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_7_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_8_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_9_chunk
Filter: (a = 2)
(12 rows)
SELECT DISTINCT ON (a) * FROM i3629 WHERE a in (2) ORDER BY a ASC, time DESC;
a | time
---+------------------------------
2 | Fri Mar 20 00:00:00 2020 PDT
(1 row)

View File

@ -3909,3 +3909,35 @@ EXPLAIN (costs off, timing off, summary off) SELECT DISTINCT ON (dev_name) dev_n
Index Cond: (dev_name > NULL::text) Index Cond: (dev_name > NULL::text)
(15 rows) (15 rows)
-- #3629 skipscan with constant skipscan column in where clause
CREATE TABLE i3629(a int, time timestamptz NOT NULL);
SELECT table_name FROM create_hypertable('i3629', 'time');
table_name
------------
i3629
(1 row)
INSERT INTO i3629 SELECT i, '2020-04-01'::date-10-i from generate_series(1,20) i;
EXPLAIN (SUMMARY OFF, COSTS OFF) SELECT DISTINCT ON (a) * FROM i3629 WHERE a in (2) ORDER BY a ASC, time DESC;
QUERY PLAN
------------------------------------------------
Unique
-> Sort
Sort Key: _hyper_3_6_chunk."time" DESC
-> Append
-> Seq Scan on _hyper_3_6_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_7_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_8_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_9_chunk
Filter: (a = 2)
(12 rows)
SELECT DISTINCT ON (a) * FROM i3629 WHERE a in (2) ORDER BY a ASC, time DESC;
a | time
---+------------------------------
2 | Fri Mar 20 00:00:00 2020 PDT
(1 row)

View File

@ -3909,3 +3909,35 @@ EXPLAIN (costs off, timing off, summary off) SELECT DISTINCT ON (dev_name) dev_n
Index Cond: (dev_name > NULL::text) Index Cond: (dev_name > NULL::text)
(15 rows) (15 rows)
-- #3629 skipscan with constant skipscan column in where clause
CREATE TABLE i3629(a int, time timestamptz NOT NULL);
SELECT table_name FROM create_hypertable('i3629', 'time');
table_name
------------
i3629
(1 row)
INSERT INTO i3629 SELECT i, '2020-04-01'::date-10-i from generate_series(1,20) i;
EXPLAIN (SUMMARY OFF, COSTS OFF) SELECT DISTINCT ON (a) * FROM i3629 WHERE a in (2) ORDER BY a ASC, time DESC;
QUERY PLAN
------------------------------------------------
Unique
-> Sort
Sort Key: _hyper_3_6_chunk."time" DESC
-> Append
-> Seq Scan on _hyper_3_6_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_7_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_8_chunk
Filter: (a = 2)
-> Seq Scan on _hyper_3_9_chunk
Filter: (a = 2)
(12 rows)
SELECT DISTINCT ON (a) * FROM i3629 WHERE a in (2) ORDER BY a ASC, time DESC;
a | time
---+------------------------------
2 | Fri Mar 20 00:00:00 2020 PDT
(1 row)

View File

@ -18,3 +18,11 @@
-- try one query with EXPLAIN only for coverage -- try one query with EXPLAIN only for coverage
EXPLAIN (costs off, timing off, summary off) SELECT DISTINCT ON (dev_name) dev_name FROM skip_scan; EXPLAIN (costs off, timing off, summary off) SELECT DISTINCT ON (dev_name) dev_name FROM skip_scan;
EXPLAIN (costs off, timing off, summary off) SELECT DISTINCT ON (dev_name) dev_name FROM skip_scan_ht; EXPLAIN (costs off, timing off, summary off) SELECT DISTINCT ON (dev_name) dev_name FROM skip_scan_ht;
-- #3629 skipscan with constant skipscan column in where clause
CREATE TABLE i3629(a int, time timestamptz NOT NULL);
SELECT table_name FROM create_hypertable('i3629', 'time');
INSERT INTO i3629 SELECT i, '2020-04-01'::date-10-i from generate_series(1,20) i;
EXPLAIN (SUMMARY OFF, COSTS OFF) SELECT DISTINCT ON (a) * FROM i3629 WHERE a in (2) ORDER BY a ASC, time DESC;
SELECT DISTINCT ON (a) * FROM i3629 WHERE a in (2) ORDER BY a ASC, time DESC;