mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-16 02:23:49 +08:00
Fix overflow in gapfill's interpolate
All integer types must use numeric-based interpolation calculations since they are multiplied by int64 and this could cause an overflow. numerics also interpolate better because the answer is rounded and not truncated. We can't use float8 because that doesn't handle really big ints exactly. We can't use the Postgres INT128 implementation because it doesn't support division. In the future we can optimize this for cases where overflow doesn't occur. Fixes #1491.
This commit is contained in:
parent
f82d83783e
commit
bf9eea0595
@ -10,6 +10,7 @@
|
||||
#include <utils/builtins.h>
|
||||
#include <utils/datum.h>
|
||||
#include <utils/typcache.h>
|
||||
#include <utils/numeric.h>
|
||||
|
||||
#include "compat.h"
|
||||
#include "nodes/gapfill/interpolate.h"
|
||||
@ -144,6 +145,25 @@ gapfill_fetch_sample(GapFillState *state, GapFillInterpolateColumnState *column,
|
||||
DecrTupleDescRefCount(tupdesc);
|
||||
}
|
||||
|
||||
/* Calculate the interpolation using numerics, returning the result as a numeric datum */
|
||||
static Datum
|
||||
interpolate_numeric(int64 x_i, int64 x0_i, int64 x1_i, Datum y0, Datum y1)
|
||||
{
|
||||
Datum x0 = DirectFunctionCall1(int8_numeric, Int64GetDatum(x0_i));
|
||||
Datum x1 = DirectFunctionCall1(int8_numeric, Int64GetDatum(x1_i));
|
||||
Datum x = DirectFunctionCall1(int8_numeric, Int64GetDatum(x_i));
|
||||
|
||||
Datum x1_sub_x = DirectFunctionCall2(numeric_sub, x1, x);
|
||||
Datum x_sub_x0 = DirectFunctionCall2(numeric_sub, x, x0);
|
||||
Datum y0_mul_x1_sub_x = DirectFunctionCall2(numeric_mul, y0, x1_sub_x);
|
||||
Datum y1_mul_x_sub_x0 = DirectFunctionCall2(numeric_mul, y1, x_sub_x0);
|
||||
|
||||
Datum numerator = DirectFunctionCall2(numeric_add, y0_mul_x1_sub_x, y1_mul_x_sub_x0);
|
||||
Datum denominator = DirectFunctionCall2(numeric_sub, x1, x0);
|
||||
|
||||
return DirectFunctionCall2(numeric_div, numerator, denominator);
|
||||
}
|
||||
|
||||
/*
|
||||
* gapfill_interpolate_calculate gets called for every gapfilled tuple to calculate values
|
||||
*
|
||||
@ -178,14 +198,43 @@ gapfill_interpolate_calculate(GapFillInterpolateColumnState *column, GapFillStat
|
||||
|
||||
switch (column->base.typid)
|
||||
{
|
||||
/* All integer types must use numeric-based interpolation calculations since they are
|
||||
* multiplied by int64 and this could cause an overflow. numerics also interpolate better
|
||||
* because the answer is rounded and not truncated. We can't use float8 because that
|
||||
doesn't handle really big ints exactly. We can't use the Postgres INT128 implementation
|
||||
because it doesn't support division. */
|
||||
case INT2OID:
|
||||
*value = Int16GetDatum(INTERPOLATE(x, x0, x1, DatumGetInt16(y0), DatumGetInt16(y1)));
|
||||
*value =
|
||||
DirectFunctionCall1(numeric_int2,
|
||||
interpolate_numeric(x,
|
||||
x0,
|
||||
x1,
|
||||
DirectFunctionCall1(int2_numeric,
|
||||
DatumGetInt16(y0)),
|
||||
DirectFunctionCall1(int2_numeric,
|
||||
DatumGetInt16(y1))));
|
||||
break;
|
||||
case INT4OID:
|
||||
*value = Int32GetDatum(INTERPOLATE(x, x0, x1, DatumGetInt32(y0), DatumGetInt32(y1)));
|
||||
*value =
|
||||
DirectFunctionCall1(numeric_int4,
|
||||
interpolate_numeric(x,
|
||||
x0,
|
||||
x1,
|
||||
DirectFunctionCall1(int4_numeric,
|
||||
DatumGetInt32(y0)),
|
||||
DirectFunctionCall1(int4_numeric,
|
||||
DatumGetInt32(y1))));
|
||||
break;
|
||||
case INT8OID:
|
||||
*value = Int64GetDatum(INTERPOLATE(x, x0, x1, DatumGetInt64(y0), DatumGetInt64(y1)));
|
||||
*value =
|
||||
DirectFunctionCall1(numeric_int8,
|
||||
interpolate_numeric(x,
|
||||
x0,
|
||||
x1,
|
||||
DirectFunctionCall1(int8_numeric,
|
||||
DatumGetInt64(y0)),
|
||||
DirectFunctionCall1(int8_numeric,
|
||||
DatumGetInt64(y1))));
|
||||
break;
|
||||
case FLOAT4OID:
|
||||
*value = Float4GetDatum(INTERPOLATE(x, x0, x1, DatumGetFloat4(y0), DatumGetFloat4(y1)));
|
||||
|
@ -1083,10 +1083,10 @@ GROUP BY 1 ORDER BY 1;
|
||||
time | smallint | int | bigint | float4 | float8
|
||||
------+----------+-----+--------+--------+--------
|
||||
0 | -3 | -3 | -3 | -3 | -3
|
||||
10 | -1 | -1 | -1 | -1.8 | -1.8
|
||||
20 | 0 | 0 | 0 | -0.6 | -0.6
|
||||
30 | 0 | 0 | 0 | 0.6 | 0.6
|
||||
40 | 1 | 1 | 1 | 1.8 | 1.8
|
||||
10 | -2 | -2 | -2 | -1.8 | -1.8
|
||||
20 | -1 | -1 | -1 | -0.6 | -0.6
|
||||
30 | 1 | 1 | 1 | 0.6 | 0.6
|
||||
40 | 2 | 2 | 2 | 1.8 | 1.8
|
||||
50 | 3 | 3 | 3 | 3 | 3
|
||||
(6 rows)
|
||||
|
||||
@ -2683,3 +2683,26 @@ GROUP BY 1,device_id;
|
||||
4 | Device 2
|
||||
(10 rows)
|
||||
|
||||
--test interpolation with big diifferences in values (test overflows in calculations)
|
||||
--we use the biggest possible difference in time(x) and the value(y).
|
||||
--For bigints we also test values of smaller than bigintmax/min to avoid
|
||||
--the symmetry where x=y (which catches more errors)
|
||||
SELECT 9223372036854775807 as big_int_max \gset
|
||||
SELECT -9223372036854775808 as big_int_min \gset
|
||||
SELECT
|
||||
time_bucket_gapfill(1,time,0,1) AS time,
|
||||
interpolate(min(s)) AS "smallint",
|
||||
interpolate(min(i)) AS "int",
|
||||
interpolate(min(b)) AS "bigint",
|
||||
interpolate(min(b2)) AS "bigint2",
|
||||
interpolate(min(d)) AS "double"
|
||||
FROM (values (:big_int_min,(-32768)::smallint,(-2147483648)::int,:big_int_min,-2147483648::bigint, '-Infinity'::double precision),
|
||||
(:big_int_max, 32767::smallint, 2147483647::int,:big_int_max, 2147483647::bigint, 'Infinity'::double precision)) v(time,s,i,b,b2,d)
|
||||
GROUP BY 1 ORDER BY 1;
|
||||
time | smallint | int | bigint | bigint2 | double
|
||||
----------------------+----------+-------------+----------------------+-------------+-----------
|
||||
-9223372036854775808 | -32768 | -2147483648 | -9223372036854775808 | -2147483648 | -Infinity
|
||||
0 | 0 | 0 | 0 | 0 | Infinity
|
||||
9223372036854775807 | 32767 | 2147483647 | 9223372036854775807 | 2147483647 | Infinity
|
||||
(3 rows)
|
||||
|
||||
|
@ -1456,3 +1456,21 @@ GROUP BY 1,device_id;
|
||||
|
||||
|
||||
|
||||
|
||||
--test interpolation with big diifferences in values (test overflows in calculations)
|
||||
--we use the biggest possible difference in time(x) and the value(y).
|
||||
--For bigints we also test values of smaller than bigintmax/min to avoid
|
||||
--the symmetry where x=y (which catches more errors)
|
||||
SELECT 9223372036854775807 as big_int_max \gset
|
||||
SELECT -9223372036854775808 as big_int_min \gset
|
||||
|
||||
SELECT
|
||||
time_bucket_gapfill(1,time,0,1) AS time,
|
||||
interpolate(min(s)) AS "smallint",
|
||||
interpolate(min(i)) AS "int",
|
||||
interpolate(min(b)) AS "bigint",
|
||||
interpolate(min(b2)) AS "bigint2",
|
||||
interpolate(min(d)) AS "double"
|
||||
FROM (values (:big_int_min,(-32768)::smallint,(-2147483648)::int,:big_int_min,-2147483648::bigint, '-Infinity'::double precision),
|
||||
(:big_int_max, 32767::smallint, 2147483647::int,:big_int_max, 2147483647::bigint, 'Infinity'::double precision)) v(time,s,i,b,b2,d)
|
||||
GROUP BY 1 ORDER BY 1;
|
||||
|
Loading…
x
Reference in New Issue
Block a user