mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-15 01:53:41 +08:00
Fix ubsan failure in gorilla decompression
Also add more tests
This commit is contained in:
parent
936d751037
commit
8ff0648fd0
@ -2063,68 +2063,28 @@ get_compression_algorithm(char *name)
|
||||
return _INVALID_COMPRESSION_ALGORITHM;
|
||||
}
|
||||
|
||||
#define FUNCTION_NAME_HELPER(X, Y) decompress_##X##_##Y
|
||||
#define FUNCTION_NAME(X, Y) FUNCTION_NAME_HELPER(X, Y)
|
||||
|
||||
#define TOSTRING_HELPER(x) #x
|
||||
#define TOSTRING(x) TOSTRING_HELPER(x)
|
||||
|
||||
/* Try to decompress the given compressed data. Used for fuzzing. */
|
||||
#define DECOMPRESS_FN \
|
||||
static int FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size) \
|
||||
{ \
|
||||
StringInfoData si = { .data = (char *) Data, .len = Size }; \
|
||||
\
|
||||
int algo = pq_getmsgbyte(&si); \
|
||||
\
|
||||
CheckCompressedData(algo > 0 && algo < _END_COMPRESSION_ALGORITHMS); \
|
||||
\
|
||||
if (algo != get_compression_algorithm(TOSTRING(ALGO))) \
|
||||
{ \
|
||||
/* \
|
||||
* It's convenient to fuzz only one algorithm at a time. We specialize \
|
||||
* the fuzz target for one algorithm, so that the fuzzer doesn't waste \
|
||||
* time discovering others from scratch. \
|
||||
*/ \
|
||||
return -1; \
|
||||
} \
|
||||
\
|
||||
Datum compressed_data = definitions[algo].compressed_data_recv(&si); \
|
||||
DecompressionIterator *iter = \
|
||||
definitions[algo].iterator_init_forward(compressed_data, PGTYPE); \
|
||||
\
|
||||
int n = 0; \
|
||||
for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) \
|
||||
{ \
|
||||
n++; \
|
||||
} \
|
||||
\
|
||||
return n; \
|
||||
}
|
||||
|
||||
#define ALGO gorilla
|
||||
#define CTYPE float8
|
||||
#define PGTYPE FLOAT8OID
|
||||
DECOMPRESS_FN
|
||||
#define DATUM_TO_CTYPE DatumGetFloat8
|
||||
#include "decompress_test_impl.c"
|
||||
#undef ALGO
|
||||
#undef CTYPE
|
||||
#undef PGTYPE
|
||||
#undef DATUM_TO_CTYPE
|
||||
|
||||
#define ALGO deltadelta
|
||||
#define CTYPE int64
|
||||
#define PGTYPE INT8OID
|
||||
DECOMPRESS_FN
|
||||
#define DATUM_TO_CTYPE DatumGetInt64
|
||||
#include "decompress_test_impl.c"
|
||||
#undef ALGO
|
||||
#undef CTYPE
|
||||
#undef PGTYPE
|
||||
#undef DATUM_TO_CTYPE
|
||||
|
||||
#undef TOSTRING
|
||||
#undef TOSTRING_HELPER
|
||||
|
||||
#undef FUNCTION_NAME
|
||||
#undef FUNCTION_NAME_HELPER
|
||||
|
||||
static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Size)
|
||||
static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Size,
|
||||
bool check_compression)
|
||||
{
|
||||
if (algo == COMPRESSION_ALGORITHM_GORILLA && type == FLOAT8OID)
|
||||
{
|
||||
@ -2185,7 +2145,9 @@ ts_read_compressed_data_file(PG_FUNCTION_ARGS)
|
||||
|
||||
Oid type = PG_GETARG_OID(1);
|
||||
|
||||
int res = get_decompress_fn(algo, type)((const uint8 *) string, fsize);
|
||||
int res =
|
||||
get_decompress_fn(algo,
|
||||
type)((const uint8 *) string, fsize, /* check_compression = */ true);
|
||||
|
||||
PG_RETURN_INT32(res);
|
||||
}
|
||||
|
111
tsl/src/compression/decompress_test_impl.c
Normal file
111
tsl/src/compression/decompress_test_impl.c
Normal file
@ -0,0 +1,111 @@
|
||||
/*
|
||||
* This file and its contents are licensed under the Timescale License.
|
||||
* Please see the included NOTICE for copyright information and
|
||||
* LICENSE-TIMESCALE for a copy of the license.
|
||||
*/
|
||||
|
||||
#define FUNCTION_NAME_HELPER(X, Y) decompress_##X##_##Y
|
||||
#define FUNCTION_NAME(X, Y) FUNCTION_NAME_HELPER(X, Y)
|
||||
|
||||
#define TOSTRING_HELPER(x) #x
|
||||
#define TOSTRING(x) TOSTRING_HELPER(x)
|
||||
|
||||
/*
|
||||
* Try to decompress the given compressed data. Used for fuzzing and for checking
|
||||
* the examples found by fuzzing. For fuzzing we don't check that the
|
||||
* recompression result is the same.
|
||||
*/
|
||||
static int
|
||||
FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool check_compression)
|
||||
{
|
||||
StringInfoData si = { .data = (char *) Data, .len = Size };
|
||||
|
||||
int algo = pq_getmsgbyte(&si);
|
||||
|
||||
CheckCompressedData(algo > 0 && algo < _END_COMPRESSION_ALGORITHMS);
|
||||
|
||||
if (algo != get_compression_algorithm(TOSTRING(ALGO)))
|
||||
{
|
||||
/*
|
||||
* It's convenient to fuzz only one algorithm at a time. We specialize
|
||||
* the fuzz target for one algorithm, so that the fuzzer doesn't waste
|
||||
* time discovering others from scratch.
|
||||
*/
|
||||
return -1;
|
||||
}
|
||||
|
||||
Compressor *compressor = NULL;
|
||||
if (check_compression)
|
||||
{
|
||||
compressor = definitions[algo].compressor_for_type(PGTYPE);
|
||||
}
|
||||
|
||||
Datum compressed_data = definitions[algo].compressed_data_recv(&si);
|
||||
DecompressionIterator *iter = definitions[algo].iterator_init_forward(compressed_data, PGTYPE);
|
||||
|
||||
DecompressResult results[GLOBAL_MAX_ROWS_PER_COMPRESSION];
|
||||
int n = 0;
|
||||
for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter))
|
||||
{
|
||||
if (check_compression)
|
||||
{
|
||||
if (r.is_null)
|
||||
{
|
||||
compressor->append_null(compressor);
|
||||
}
|
||||
else
|
||||
{
|
||||
compressor->append_val(compressor, r.val);
|
||||
}
|
||||
results[n] = r;
|
||||
}
|
||||
|
||||
n++;
|
||||
}
|
||||
|
||||
if (!check_compression || n == 0)
|
||||
{
|
||||
return n;
|
||||
}
|
||||
|
||||
compressed_data = (Datum) compressor->finish(compressor);
|
||||
if (compressed_data == 0)
|
||||
{
|
||||
/* The gorilla compressor returns NULL for all-null input sets. */
|
||||
return n;
|
||||
};
|
||||
|
||||
/*
|
||||
* Check that the result is still the same after we compress and decompress
|
||||
* back.
|
||||
*/
|
||||
iter = definitions[algo].iterator_init_forward(compressed_data, PGTYPE);
|
||||
int nn = 0;
|
||||
for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter))
|
||||
{
|
||||
if (r.is_null != results[nn].is_null)
|
||||
{
|
||||
elog(ERROR, "the decompression result doesn't match");
|
||||
}
|
||||
|
||||
if (!r.is_null && (DATUM_TO_CTYPE(r.val) != DATUM_TO_CTYPE(results[nn].val)))
|
||||
{
|
||||
elog(ERROR, "the decompression result doesn't match");
|
||||
}
|
||||
|
||||
nn++;
|
||||
|
||||
if (nn > n)
|
||||
{
|
||||
elog(ERROR, "the recompression result doesn't match");
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
#undef TOSTRING
|
||||
#undef TOSTRING_HELPER
|
||||
|
||||
#undef FUNCTION_NAME
|
||||
#undef FUNCTION_NAME_HELPER
|
@ -647,9 +647,21 @@ gorilla_decompression_iterator_try_next_forward_internal(GorillaDecompressionIte
|
||||
iter->prev_xor_bits_used = num_xor_bits.val;
|
||||
CheckCompressedData(iter->prev_xor_bits_used <= 64);
|
||||
|
||||
/*
|
||||
* More than 64 significant bits don't make sense. Exactly 64 we get for
|
||||
* the first encoded number.
|
||||
*/
|
||||
CheckCompressedData(iter->prev_xor_bits_used + iter->prev_leading_zeroes <= 64);
|
||||
}
|
||||
|
||||
/*
|
||||
* Zero significant bits would mean that the previous number is repeated,
|
||||
* but this should have been encoded with tag0 = 0.
|
||||
* This also might fail if we haven't seen the tag1 = 1 for the first number
|
||||
* and didn't initialize the bit widths.
|
||||
*/
|
||||
CheckCompressedData(iter->prev_xor_bits_used + iter->prev_leading_zeroes > 0);
|
||||
|
||||
xor = bit_array_iter_next(&iter->xors, iter->prev_xor_bits_used);
|
||||
if (iter->prev_leading_zeroes + iter->prev_xor_bits_used < 64)
|
||||
xor <<= 64 - (iter->prev_leading_zeroes + iter->prev_xor_bits_used);
|
||||
|
@ -1550,8 +1550,8 @@ from ts_read_compressed_data_directory('gorilla', 'float8', (:'TEST_INPUT_DIR' |
|
||||
group by 2 order by 1 desc;
|
||||
count | result
|
||||
-------+--------
|
||||
521 | XX001
|
||||
214 | true
|
||||
657 | XX001
|
||||
78 | true
|
||||
72 | 08P01
|
||||
2 | false
|
||||
(4 rows)
|
||||
|
Loading…
x
Reference in New Issue
Block a user