Fix groupby pathkeys for gapfill in PG16

With PG16, group pathkeys can include column which are
not in the GROUP BY clause when dealing with ordered
aggregates. This means we have to exclude those columns
when checking and creating the gapfill custom scan subpath.

Fixes #6396
This commit is contained in:
Ante Kresic 2023-12-12 14:16:32 +01:00 committed by Ante Kresic
parent c914d19fac
commit 0e2ff92ec7
7 changed files with 289 additions and 10 deletions

2
.unreleased/fix_6408 Normal file
View File

@ -0,0 +1,2 @@
Fixes: #6408 Fix groupby pathkeys for gapfill in PG16
Thanks: @MA-MacDonald for reporting an issue with gapfill in PG16

View File

@ -135,7 +135,18 @@ window_function_walker(Node *node, gapfill_walker_context *context)
static bool
gapfill_correct_order(PlannerInfo *root, Path *subpath, FuncExpr *func)
{
if (list_length(subpath->pathkeys) != list_length(root->group_pathkeys))
int num_groupby_pathkeys;
#if PG16_LT
num_groupby_pathkeys = list_length(root->group_pathkeys);
#else
/* In PG16 group_pathkeys can contain additional pathkeys
* used for optimization on ordered aggregates.
* We only want to deal with group by elements only here.
*/
num_groupby_pathkeys = root->num_groupby_pathkeys;
#endif
if (list_length(subpath->pathkeys) != num_groupby_pathkeys)
return false;
if (list_length(subpath->pathkeys) > 0)
@ -147,12 +158,11 @@ gapfill_correct_order(PlannerInfo *root, Path *subpath, FuncExpr *func)
if (BTLessStrategyNumber == pk->pk_strategy && IsA(em->em_expr, FuncExpr) &&
((FuncExpr *) em->em_expr)->funcid == func->funcid)
{
ListCell *lc;
/* check all groups are part of subpath pathkeys */
foreach (lc, root->group_pathkeys)
int i;
/* check all groupby pathkeys are part of subpath pathkeys */
for (i = 0; i < num_groupby_pathkeys; i++)
{
if (!list_member(subpath->pathkeys, lfirst(lc)))
if (!list_member(subpath->pathkeys, list_nth(root->group_pathkeys, i)))
return false;
}
return true;
@ -358,13 +368,22 @@ gapfill_path_create(PlannerInfo *root, Path *subpath, FuncExpr *func)
if (!gapfill_correct_order(root, subpath, func))
{
List *new_order = NIL;
ListCell *lc;
PathKey *pk_func = NULL;
int num_groupby_pathkeys;
#if PG16_LT
num_groupby_pathkeys = list_length(root->group_pathkeys);
#else
/* In PG16 group_pathkeys can contain additional pathkeys
* used for optimization on ordered aggregates.
* We only want to deal with group by elements only here.
*/
num_groupby_pathkeys = root->num_groupby_pathkeys;
#endif
int i;
/* subpath does not have correct order */
foreach (lc, root->group_pathkeys)
for (i = 0; i < num_groupby_pathkeys; i++)
{
PathKey *pk = lfirst(lc);
PathKey *pk = list_nth(root->group_pathkeys, i);
EquivalenceMember *em = linitial(pk->pk_eclass->ec_members);
if (!pk_func && IsA(em->em_expr, FuncExpr) &&

View File

@ -3392,3 +3392,59 @@ GROUP BY 1;
(7 rows)
DROP TABLE month_timezone;
-- Test gapfill with additional group pathkeys added for optimization (#6396)
CREATE TABLE stocks_real_time (
time TIMESTAMPTZ NOT NULL,
symbol TEXT NOT NULL,
price DOUBLE PRECISION NULL,
day_volume INT NULL
);
INSERT INTO stocks_real_time
VALUES
(NOW(), 's1', 70.0, 50),
(NOW(), 's2', 66.5, 60),
(NOW(), 's3', 77.0, 65);
SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1;
day | symbol_count
------------------------------+--------------
Thu Nov 30 16:00:00 2023 PST |
Fri Dec 01 16:00:00 2023 PST |
Sat Dec 02 16:00:00 2023 PST |
Sun Dec 03 16:00:00 2023 PST |
Mon Dec 04 16:00:00 2023 PST |
Tue Dec 05 16:00:00 2023 PST |
Wed Dec 06 16:00:00 2023 PST |
Thu Dec 07 16:00:00 2023 PST |
Fri Dec 08 16:00:00 2023 PST |
Sat Dec 09 16:00:00 2023 PST |
(10 rows)
SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1
ORDER BY 1 DESC;
day | symbol_count
------------------------------+--------------
Sat Dec 09 16:00:00 2023 PST |
Fri Dec 08 16:00:00 2023 PST |
Thu Dec 07 16:00:00 2023 PST |
Wed Dec 06 16:00:00 2023 PST |
Tue Dec 05 16:00:00 2023 PST |
Mon Dec 04 16:00:00 2023 PST |
Sun Dec 03 16:00:00 2023 PST |
Sat Dec 02 16:00:00 2023 PST |
Fri Dec 01 16:00:00 2023 PST |
Thu Nov 30 16:00:00 2023 PST |
(10 rows)
DROP TABLE stocks_real_time;

View File

@ -3392,3 +3392,59 @@ GROUP BY 1;
(7 rows)
DROP TABLE month_timezone;
-- Test gapfill with additional group pathkeys added for optimization (#6396)
CREATE TABLE stocks_real_time (
time TIMESTAMPTZ NOT NULL,
symbol TEXT NOT NULL,
price DOUBLE PRECISION NULL,
day_volume INT NULL
);
INSERT INTO stocks_real_time
VALUES
(NOW(), 's1', 70.0, 50),
(NOW(), 's2', 66.5, 60),
(NOW(), 's3', 77.0, 65);
SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1;
day | symbol_count
------------------------------+--------------
Thu Nov 30 16:00:00 2023 PST |
Fri Dec 01 16:00:00 2023 PST |
Sat Dec 02 16:00:00 2023 PST |
Sun Dec 03 16:00:00 2023 PST |
Mon Dec 04 16:00:00 2023 PST |
Tue Dec 05 16:00:00 2023 PST |
Wed Dec 06 16:00:00 2023 PST |
Thu Dec 07 16:00:00 2023 PST |
Fri Dec 08 16:00:00 2023 PST |
Sat Dec 09 16:00:00 2023 PST |
(10 rows)
SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1
ORDER BY 1 DESC;
day | symbol_count
------------------------------+--------------
Sat Dec 09 16:00:00 2023 PST |
Fri Dec 08 16:00:00 2023 PST |
Thu Dec 07 16:00:00 2023 PST |
Wed Dec 06 16:00:00 2023 PST |
Tue Dec 05 16:00:00 2023 PST |
Mon Dec 04 16:00:00 2023 PST |
Sun Dec 03 16:00:00 2023 PST |
Sat Dec 02 16:00:00 2023 PST |
Fri Dec 01 16:00:00 2023 PST |
Thu Nov 30 16:00:00 2023 PST |
(10 rows)
DROP TABLE stocks_real_time;

View File

@ -3392,3 +3392,59 @@ GROUP BY 1;
(7 rows)
DROP TABLE month_timezone;
-- Test gapfill with additional group pathkeys added for optimization (#6396)
CREATE TABLE stocks_real_time (
time TIMESTAMPTZ NOT NULL,
symbol TEXT NOT NULL,
price DOUBLE PRECISION NULL,
day_volume INT NULL
);
INSERT INTO stocks_real_time
VALUES
(NOW(), 's1', 70.0, 50),
(NOW(), 's2', 66.5, 60),
(NOW(), 's3', 77.0, 65);
SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1;
day | symbol_count
------------------------------+--------------
Thu Nov 30 16:00:00 2023 PST |
Fri Dec 01 16:00:00 2023 PST |
Sat Dec 02 16:00:00 2023 PST |
Sun Dec 03 16:00:00 2023 PST |
Mon Dec 04 16:00:00 2023 PST |
Tue Dec 05 16:00:00 2023 PST |
Wed Dec 06 16:00:00 2023 PST |
Thu Dec 07 16:00:00 2023 PST |
Fri Dec 08 16:00:00 2023 PST |
Sat Dec 09 16:00:00 2023 PST |
(10 rows)
SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1
ORDER BY 1 DESC;
day | symbol_count
------------------------------+--------------
Sat Dec 09 16:00:00 2023 PST |
Fri Dec 08 16:00:00 2023 PST |
Thu Dec 07 16:00:00 2023 PST |
Wed Dec 06 16:00:00 2023 PST |
Tue Dec 05 16:00:00 2023 PST |
Mon Dec 04 16:00:00 2023 PST |
Sun Dec 03 16:00:00 2023 PST |
Sat Dec 02 16:00:00 2023 PST |
Fri Dec 01 16:00:00 2023 PST |
Thu Nov 30 16:00:00 2023 PST |
(10 rows)
DROP TABLE stocks_real_time;

View File

@ -3394,3 +3394,59 @@ GROUP BY 1;
(7 rows)
DROP TABLE month_timezone;
-- Test gapfill with additional group pathkeys added for optimization (#6396)
CREATE TABLE stocks_real_time (
time TIMESTAMPTZ NOT NULL,
symbol TEXT NOT NULL,
price DOUBLE PRECISION NULL,
day_volume INT NULL
);
INSERT INTO stocks_real_time
VALUES
(NOW(), 's1', 70.0, 50),
(NOW(), 's2', 66.5, 60),
(NOW(), 's3', 77.0, 65);
SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1;
day | symbol_count
------------------------------+--------------
Thu Nov 30 16:00:00 2023 PST |
Fri Dec 01 16:00:00 2023 PST |
Sat Dec 02 16:00:00 2023 PST |
Sun Dec 03 16:00:00 2023 PST |
Mon Dec 04 16:00:00 2023 PST |
Tue Dec 05 16:00:00 2023 PST |
Wed Dec 06 16:00:00 2023 PST |
Thu Dec 07 16:00:00 2023 PST |
Fri Dec 08 16:00:00 2023 PST |
Sat Dec 09 16:00:00 2023 PST |
(10 rows)
SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1
ORDER BY 1 DESC;
day | symbol_count
------------------------------+--------------
Sat Dec 09 16:00:00 2023 PST |
Fri Dec 08 16:00:00 2023 PST |
Thu Dec 07 16:00:00 2023 PST |
Wed Dec 06 16:00:00 2023 PST |
Tue Dec 05 16:00:00 2023 PST |
Mon Dec 04 16:00:00 2023 PST |
Sun Dec 03 16:00:00 2023 PST |
Sat Dec 02 16:00:00 2023 PST |
Fri Dec 01 16:00:00 2023 PST |
Thu Nov 30 16:00:00 2023 PST |
(10 rows)
DROP TABLE stocks_real_time;

View File

@ -1537,3 +1537,37 @@ FROM
GROUP BY 1;
DROP TABLE month_timezone;
-- Test gapfill with additional group pathkeys added for optimization (#6396)
CREATE TABLE stocks_real_time (
time TIMESTAMPTZ NOT NULL,
symbol TEXT NOT NULL,
price DOUBLE PRECISION NULL,
day_volume INT NULL
);
INSERT INTO stocks_real_time
VALUES
(NOW(), 's1', 70.0, 50),
(NOW(), 's2', 66.5, 60),
(NOW(), 's3', 77.0, 65);
SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1;
SELECT
time_bucket_gapfill('1 day', time) AS day,
COUNT(DISTINCT symbol) AS symbol_count
FROM stocks_real_time
WHERE time > '2023-12-01'
AND time < '2023-12-10'
GROUP BY 1
ORDER BY 1 DESC;
DROP TABLE stocks_real_time;