Fix overflow in gapfill's interpolate

All integer types must use numeric-based interpolation calculations
since they are multiplied by int64 and this could cause an overflow.
numerics also interpolate better because the answer is rounded and not
truncated. We can't use float8 because that doesn't handle really big
ints exactly. We can't use the Postgres INT128 implementation because
it doesn't support division.

In the future we can optimize this for cases where overflow doesn't
occur.

Fixes #1491.
This commit is contained in:
Matvey Arye 2019-10-27 14:07:30 -04:00 committed by Matvey Arye
parent f82d83783e
commit bf9eea0595
3 changed files with 99 additions and 9 deletions

View File

@ -10,6 +10,7 @@
#include <utils/builtins.h>
#include <utils/datum.h>
#include <utils/typcache.h>
#include <utils/numeric.h>
#include "compat.h"
#include "nodes/gapfill/interpolate.h"
@ -144,6 +145,25 @@ gapfill_fetch_sample(GapFillState *state, GapFillInterpolateColumnState *column,
DecrTupleDescRefCount(tupdesc);
}
/* Calculate the interpolation using numerics, returning the result as a numeric datum */
static Datum
interpolate_numeric(int64 x_i, int64 x0_i, int64 x1_i, Datum y0, Datum y1)
{
Datum x0 = DirectFunctionCall1(int8_numeric, Int64GetDatum(x0_i));
Datum x1 = DirectFunctionCall1(int8_numeric, Int64GetDatum(x1_i));
Datum x = DirectFunctionCall1(int8_numeric, Int64GetDatum(x_i));
Datum x1_sub_x = DirectFunctionCall2(numeric_sub, x1, x);
Datum x_sub_x0 = DirectFunctionCall2(numeric_sub, x, x0);
Datum y0_mul_x1_sub_x = DirectFunctionCall2(numeric_mul, y0, x1_sub_x);
Datum y1_mul_x_sub_x0 = DirectFunctionCall2(numeric_mul, y1, x_sub_x0);
Datum numerator = DirectFunctionCall2(numeric_add, y0_mul_x1_sub_x, y1_mul_x_sub_x0);
Datum denominator = DirectFunctionCall2(numeric_sub, x1, x0);
return DirectFunctionCall2(numeric_div, numerator, denominator);
}
/*
* gapfill_interpolate_calculate gets called for every gapfilled tuple to calculate values
*
@ -178,14 +198,43 @@ gapfill_interpolate_calculate(GapFillInterpolateColumnState *column, GapFillStat
switch (column->base.typid)
{
/* All integer types must use numeric-based interpolation calculations since they are
* multiplied by int64 and this could cause an overflow. numerics also interpolate better
* because the answer is rounded and not truncated. We can't use float8 because that
doesn't handle really big ints exactly. We can't use the Postgres INT128 implementation
because it doesn't support division. */
case INT2OID:
*value = Int16GetDatum(INTERPOLATE(x, x0, x1, DatumGetInt16(y0), DatumGetInt16(y1)));
*value =
DirectFunctionCall1(numeric_int2,
interpolate_numeric(x,
x0,
x1,
DirectFunctionCall1(int2_numeric,
DatumGetInt16(y0)),
DirectFunctionCall1(int2_numeric,
DatumGetInt16(y1))));
break;
case INT4OID:
*value = Int32GetDatum(INTERPOLATE(x, x0, x1, DatumGetInt32(y0), DatumGetInt32(y1)));
*value =
DirectFunctionCall1(numeric_int4,
interpolate_numeric(x,
x0,
x1,
DirectFunctionCall1(int4_numeric,
DatumGetInt32(y0)),
DirectFunctionCall1(int4_numeric,
DatumGetInt32(y1))));
break;
case INT8OID:
*value = Int64GetDatum(INTERPOLATE(x, x0, x1, DatumGetInt64(y0), DatumGetInt64(y1)));
*value =
DirectFunctionCall1(numeric_int8,
interpolate_numeric(x,
x0,
x1,
DirectFunctionCall1(int8_numeric,
DatumGetInt64(y0)),
DirectFunctionCall1(int8_numeric,
DatumGetInt64(y1))));
break;
case FLOAT4OID:
*value = Float4GetDatum(INTERPOLATE(x, x0, x1, DatumGetFloat4(y0), DatumGetFloat4(y1)));

View File

@ -1083,10 +1083,10 @@ GROUP BY 1 ORDER BY 1;
time | smallint | int | bigint | float4 | float8
------+----------+-----+--------+--------+--------
0 | -3 | -3 | -3 | -3 | -3
10 | -1 | -1 | -1 | -1.8 | -1.8
20 | 0 | 0 | 0 | -0.6 | -0.6
30 | 0 | 0 | 0 | 0.6 | 0.6
40 | 1 | 1 | 1 | 1.8 | 1.8
10 | -2 | -2 | -2 | -1.8 | -1.8
20 | -1 | -1 | -1 | -0.6 | -0.6
30 | 1 | 1 | 1 | 0.6 | 0.6
40 | 2 | 2 | 2 | 1.8 | 1.8
50 | 3 | 3 | 3 | 3 | 3
(6 rows)
@ -2683,3 +2683,26 @@ GROUP BY 1,device_id;
4 | Device 2
(10 rows)
--test interpolation with big diifferences in values (test overflows in calculations)
--we use the biggest possible difference in time(x) and the value(y).
--For bigints we also test values of smaller than bigintmax/min to avoid
--the symmetry where x=y (which catches more errors)
SELECT 9223372036854775807 as big_int_max \gset
SELECT -9223372036854775808 as big_int_min \gset
SELECT
time_bucket_gapfill(1,time,0,1) AS time,
interpolate(min(s)) AS "smallint",
interpolate(min(i)) AS "int",
interpolate(min(b)) AS "bigint",
interpolate(min(b2)) AS "bigint2",
interpolate(min(d)) AS "double"
FROM (values (:big_int_min,(-32768)::smallint,(-2147483648)::int,:big_int_min,-2147483648::bigint, '-Infinity'::double precision),
(:big_int_max, 32767::smallint, 2147483647::int,:big_int_max, 2147483647::bigint, 'Infinity'::double precision)) v(time,s,i,b,b2,d)
GROUP BY 1 ORDER BY 1;
time | smallint | int | bigint | bigint2 | double
----------------------+----------+-------------+----------------------+-------------+-----------
-9223372036854775808 | -32768 | -2147483648 | -9223372036854775808 | -2147483648 | -Infinity
0 | 0 | 0 | 0 | 0 | Infinity
9223372036854775807 | 32767 | 2147483647 | 9223372036854775807 | 2147483647 | Infinity
(3 rows)

View File

@ -1456,3 +1456,21 @@ GROUP BY 1,device_id;
--test interpolation with big diifferences in values (test overflows in calculations)
--we use the biggest possible difference in time(x) and the value(y).
--For bigints we also test values of smaller than bigintmax/min to avoid
--the symmetry where x=y (which catches more errors)
SELECT 9223372036854775807 as big_int_max \gset
SELECT -9223372036854775808 as big_int_min \gset
SELECT
time_bucket_gapfill(1,time,0,1) AS time,
interpolate(min(s)) AS "smallint",
interpolate(min(i)) AS "int",
interpolate(min(b)) AS "bigint",
interpolate(min(b2)) AS "bigint2",
interpolate(min(d)) AS "double"
FROM (values (:big_int_min,(-32768)::smallint,(-2147483648)::int,:big_int_min,-2147483648::bigint, '-Infinity'::double precision),
(:big_int_max, 32767::smallint, 2147483647::int,:big_int_max, 2147483647::bigint, 'Infinity'::double precision)) v(time,s,i,b,b2,d)
GROUP BY 1 ORDER BY 1;