mirror of
https://github.com/timescale/timescaledb.git
synced 2025-05-16 10:33:27 +08:00
Implement bool compression
Reusing the existing Simple8bRLE algorithm for bools. I added a new compression type specifically for this case called 'bool'. A new GUC is introduced so we can revert to the previous, array compression for bools: `timescaledb.enable_bool_compression`. It defaults to `false`. To enable bool compression set the GUC: `timescaledb.enable_bool_compression=true` Fixes #7233
This commit is contained in:
parent
4128feb262
commit
d7a8b4b8e2
1
.unreleased/pr_7701
Normal file
1
.unreleased/pr_7701
Normal file
@ -0,0 +1 @@
|
||||
Implements: #7701 Implement a custom compression algorithm for bool columns. It is experimental and can undergo backwards-incompatible changes. For testing, enable it using timescaledb.enable_bool_compression = on.
|
@ -6,3 +6,6 @@ CREATE FUNCTION _timescaledb_functions.compressed_data_has_nulls(_timescaledb_in
|
||||
RETURNS BOOL
|
||||
LANGUAGE C STRICT IMMUTABLE
|
||||
AS '@MODULE_PATHNAME@', 'ts_update_placeholder';
|
||||
|
||||
INSERT INTO _timescaledb_catalog.compression_algorithm( id, version, name, description) values
|
||||
( 5, 1, 'COMPRESSION_ALGORITHM_BOOL', 'bool');
|
||||
|
@ -6,3 +6,4 @@ ALTER TABLE _timescaledb_internal.bgw_job_stat_history
|
||||
|
||||
DROP FUNCTION IF EXISTS _timescaledb_functions.compressed_data_has_nulls(_timescaledb_internal.compressed_data);
|
||||
|
||||
DELETE FROM _timescaledb_catalog.compression_algorithm WHERE id = 5 AND version = 1 AND name = 'COMPRESSION_ALGORITHM_BOOL';
|
||||
|
@ -77,6 +77,8 @@ CROSSMODULE_WRAPPER(dictionary_compressor_append);
|
||||
CROSSMODULE_WRAPPER(dictionary_compressor_finish);
|
||||
CROSSMODULE_WRAPPER(array_compressor_append);
|
||||
CROSSMODULE_WRAPPER(array_compressor_finish);
|
||||
CROSSMODULE_WRAPPER(bool_compressor_append);
|
||||
CROSSMODULE_WRAPPER(bool_compressor_finish);
|
||||
CROSSMODULE_WRAPPER(create_compressed_chunk);
|
||||
CROSSMODULE_WRAPPER(compress_chunk);
|
||||
CROSSMODULE_WRAPPER(decompress_chunk);
|
||||
@ -419,6 +421,8 @@ TSDLLEXPORT CrossModuleFunctions ts_cm_functions_default = {
|
||||
.dictionary_compressor_finish = error_no_default_fn_pg_community,
|
||||
.array_compressor_append = error_no_default_fn_pg_community,
|
||||
.array_compressor_finish = error_no_default_fn_pg_community,
|
||||
.bool_compressor_append = error_no_default_fn_pg_community,
|
||||
.bool_compressor_finish = error_no_default_fn_pg_community,
|
||||
.hypercore_handler = process_hypercore_handler,
|
||||
.hypercore_proxy_handler = process_hypercore_proxy_handler,
|
||||
.is_compressed_tid = error_no_default_fn_pg_community,
|
||||
|
@ -150,6 +150,8 @@ typedef struct CrossModuleFunctions
|
||||
PGFunction dictionary_compressor_finish;
|
||||
PGFunction array_compressor_append;
|
||||
PGFunction array_compressor_finish;
|
||||
PGFunction bool_compressor_append;
|
||||
PGFunction bool_compressor_finish;
|
||||
PGFunction hypercore_handler;
|
||||
PGFunction hypercore_proxy_handler;
|
||||
PGFunction is_compressed_tid;
|
||||
|
12
src/guc.c
12
src/guc.c
@ -149,6 +149,7 @@ TSDLLEXPORT bool ts_guc_auto_sparse_indexes = true;
|
||||
TSDLLEXPORT bool ts_guc_default_hypercore_use_access_method = false;
|
||||
bool ts_guc_enable_chunk_skipping = false;
|
||||
TSDLLEXPORT bool ts_guc_enable_segmentwise_recompression = true;
|
||||
TSDLLEXPORT bool ts_guc_enable_bool_compression = false;
|
||||
|
||||
/* Enable of disable columnar scans for columnar-oriented storage engines. If
|
||||
* disabled, regular sequence scans will be used instead. */
|
||||
@ -746,6 +747,17 @@ _guc_init(void)
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
DefineCustomBoolVariable(MAKE_EXTOPTION("enable_bool_compression"),
|
||||
"Enable experimental bool compression functionality",
|
||||
"Enable bool compression",
|
||||
&ts_guc_enable_bool_compression,
|
||||
false,
|
||||
PGC_USERSET,
|
||||
0,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL);
|
||||
|
||||
/*
|
||||
* Define the limit on number of invalidation-based refreshes we allow per
|
||||
* refresh call. If this limit is exceeded, fall back to a single refresh that
|
||||
|
@ -69,6 +69,7 @@ extern TSDLLEXPORT bool ts_guc_enable_delete_after_compression;
|
||||
extern TSDLLEXPORT bool ts_guc_enable_merge_on_cagg_refresh;
|
||||
extern bool ts_guc_enable_chunk_skipping;
|
||||
extern TSDLLEXPORT bool ts_guc_enable_segmentwise_recompression;
|
||||
extern TSDLLEXPORT bool ts_guc_enable_bool_compression;
|
||||
|
||||
#ifdef USE_TELEMETRY
|
||||
typedef enum TelemetryLevel
|
||||
|
@ -67,6 +67,15 @@ structure and does not actually compress it (though TOAST-based compression
|
||||
can be applied on top). It is the compression mechanism used when no other
|
||||
compression mechanism works. It can store any type of data.
|
||||
|
||||
### Bool Compressor
|
||||
|
||||
The bool compressor is a simple compression algorithm that stores boolean values
|
||||
using the simple8b_rle algorithm only, without any additional processing. During
|
||||
decompression it decompresses the data and stores it in memory as a bitmap. The
|
||||
row based iterators then walk through the bitmap. The bool compressor differs from
|
||||
the other compressors in that it stores the last non-value as a place holder for
|
||||
the null values. This is done to make vectorization easier.
|
||||
|
||||
# Merging chunks while compressing #
|
||||
|
||||
## Setup ##
|
||||
|
@ -3,5 +3,6 @@ set(SOURCES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/datum_serialize.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/deltadelta.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/dictionary.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/gorilla.c)
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/gorilla.c
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/bool_compress.c)
|
||||
target_sources(${TSL_LIBRARY_NAME} PRIVATE ${SOURCES})
|
||||
|
440
tsl/src/compression/algorithms/bool_compress.c
Normal file
440
tsl/src/compression/algorithms/bool_compress.c
Normal file
@ -0,0 +1,440 @@
|
||||
/*
|
||||
* This file and its contents are licensed under the Timescale License.
|
||||
* Please see the included NOTICE for copyright information and
|
||||
* LICENSE-TIMESCALE for a copy of the license.
|
||||
*/
|
||||
|
||||
#include "bool_compress.h"
|
||||
#include "compression/arrow_c_data_interface.h"
|
||||
#include "compression/compression.h"
|
||||
#include "simple8b_rle.h"
|
||||
#include "simple8b_rle_bitmap.h"
|
||||
|
||||
typedef struct BoolCompressed
|
||||
{
|
||||
CompressedDataHeaderFields;
|
||||
uint8 has_nulls; /* 1 if this has a NULLs bitmap after the values, 0 otherwise */
|
||||
uint8 padding[2]; /* padding added because of Simple8bRleSerialized format */
|
||||
char values[FLEXIBLE_ARRAY_MEMBER];
|
||||
} BoolCompressed;
|
||||
|
||||
typedef struct BoolDecompressionIterator
|
||||
{
|
||||
DecompressionIterator base;
|
||||
Simple8bRleBitmap values;
|
||||
Simple8bRleBitmap validity_bitmap;
|
||||
int32 position;
|
||||
} BoolDecompressionIterator;
|
||||
|
||||
typedef struct BoolCompressor
|
||||
{
|
||||
Simple8bRleCompressor values;
|
||||
Simple8bRleCompressor validity_bitmap;
|
||||
bool has_nulls;
|
||||
bool last_value;
|
||||
} BoolCompressor;
|
||||
|
||||
typedef struct ExtendedCompressor
|
||||
{
|
||||
Compressor base;
|
||||
BoolCompressor *internal;
|
||||
} ExtendedCompressor;
|
||||
|
||||
/*
|
||||
* Local helpers
|
||||
*/
|
||||
static void bool_compressor_append_bool(Compressor *compressor, Datum val);
|
||||
|
||||
static void bool_compressor_append_null_value(Compressor *compressor);
|
||||
|
||||
static void *bool_compressor_finish_and_reset(Compressor *compressor);
|
||||
|
||||
const Compressor bool_compressor_initializer = {
|
||||
.append_val = bool_compressor_append_bool,
|
||||
.append_null = bool_compressor_append_null_value,
|
||||
.finish = bool_compressor_finish_and_reset,
|
||||
};
|
||||
|
||||
static BoolCompressed *bool_compressed_from_parts(Simple8bRleSerialized *values,
|
||||
Simple8bRleSerialized *validity_bitmap);
|
||||
|
||||
static void decompression_iterator_init(BoolDecompressionIterator *iter, void *compressed,
|
||||
Oid element_type, bool forward);
|
||||
|
||||
/*
|
||||
* Compressor framework functions and definitions for the bool_compress algorithm.
|
||||
*/
|
||||
|
||||
extern BoolCompressor *
|
||||
bool_compressor_alloc(void)
|
||||
{
|
||||
BoolCompressor *compressor = palloc0(sizeof(*compressor));
|
||||
simple8brle_compressor_init(&compressor->values);
|
||||
simple8brle_compressor_init(&compressor->validity_bitmap);
|
||||
return compressor;
|
||||
}
|
||||
|
||||
extern void
|
||||
bool_compressor_append_null(BoolCompressor *compressor)
|
||||
{
|
||||
/*
|
||||
* We use parallel bitmaps of same size for validity and values, to support
|
||||
* zero-copy decompression into ArrowArray. When an element is null,
|
||||
* the particular value that goes into the values bitmap doesn't matter, so
|
||||
* we add the last seen value, not to break the RLE sequences.
|
||||
*/
|
||||
compressor->has_nulls = true;
|
||||
simple8brle_compressor_append(&compressor->values, compressor->last_value);
|
||||
simple8brle_compressor_append(&compressor->validity_bitmap, 0);
|
||||
}
|
||||
|
||||
extern void
|
||||
bool_compressor_append_value(BoolCompressor *compressor, bool next_val)
|
||||
{
|
||||
compressor->last_value = next_val;
|
||||
simple8brle_compressor_append(&compressor->values, next_val);
|
||||
simple8brle_compressor_append(&compressor->validity_bitmap, 1);
|
||||
}
|
||||
|
||||
extern void *
|
||||
bool_compressor_finish(BoolCompressor *compressor)
|
||||
{
|
||||
if (compressor == NULL)
|
||||
return NULL;
|
||||
|
||||
Simple8bRleSerialized *values = simple8brle_compressor_finish(&compressor->values);
|
||||
if (values == NULL)
|
||||
return NULL;
|
||||
|
||||
Simple8bRleSerialized *validity_bitmap =
|
||||
simple8brle_compressor_finish(&compressor->validity_bitmap);
|
||||
BoolCompressed *compressed;
|
||||
|
||||
compressed = bool_compressed_from_parts(values, compressor->has_nulls ? validity_bitmap : NULL);
|
||||
/* When only nulls are present, we can return NULL */
|
||||
Assert(compressed == NULL || compressed->compression_algorithm == COMPRESSION_ALGORITHM_BOOL);
|
||||
return compressed;
|
||||
}
|
||||
|
||||
extern bool
|
||||
bool_compressed_has_nulls(const CompressedDataHeader *header)
|
||||
{
|
||||
const BoolCompressed *ddc = (const BoolCompressed *) header;
|
||||
return ddc->has_nulls;
|
||||
}
|
||||
|
||||
extern DecompressResult
|
||||
bool_decompression_iterator_try_next_forward(DecompressionIterator *iter)
|
||||
{
|
||||
Assert(iter->compression_algorithm == COMPRESSION_ALGORITHM_BOOL && iter->forward);
|
||||
Assert(iter->element_type == BOOLOID);
|
||||
|
||||
BoolDecompressionIterator *bool_iter = (BoolDecompressionIterator *) iter;
|
||||
|
||||
if (bool_iter->position >= bool_iter->values.num_elements)
|
||||
return (DecompressResult){
|
||||
.is_done = true,
|
||||
};
|
||||
|
||||
/* check nulls */
|
||||
if (bool_iter->validity_bitmap.num_elements > 0)
|
||||
{
|
||||
bool is_null = !simple8brle_bitmap_get_at(&bool_iter->validity_bitmap, bool_iter->position);
|
||||
if (is_null)
|
||||
{
|
||||
bool_iter->position++;
|
||||
return (DecompressResult){
|
||||
.is_null = true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
bool val = simple8brle_bitmap_get_at(&bool_iter->values, bool_iter->position);
|
||||
bool_iter->position++;
|
||||
|
||||
return (DecompressResult){
|
||||
.val = BoolGetDatum(val),
|
||||
};
|
||||
}
|
||||
|
||||
extern DecompressionIterator *
|
||||
bool_decompression_iterator_from_datum_forward(Datum bool_compressed, Oid element_type)
|
||||
{
|
||||
BoolDecompressionIterator *iterator = palloc(sizeof(*iterator));
|
||||
decompression_iterator_init(iterator,
|
||||
(void *) PG_DETOAST_DATUM(bool_compressed),
|
||||
element_type,
|
||||
true);
|
||||
return &iterator->base;
|
||||
}
|
||||
|
||||
extern DecompressResult
|
||||
bool_decompression_iterator_try_next_reverse(DecompressionIterator *iter)
|
||||
{
|
||||
Assert(iter->compression_algorithm == COMPRESSION_ALGORITHM_BOOL && !iter->forward);
|
||||
Assert(iter->element_type == BOOLOID);
|
||||
|
||||
BoolDecompressionIterator *bool_iter = (BoolDecompressionIterator *) iter;
|
||||
|
||||
if (bool_iter->position < 0)
|
||||
return (DecompressResult){
|
||||
.is_done = true,
|
||||
};
|
||||
|
||||
/* check nulls */
|
||||
if (bool_iter->validity_bitmap.num_elements > 0)
|
||||
{
|
||||
bool is_null = !simple8brle_bitmap_get_at(&bool_iter->validity_bitmap, bool_iter->position);
|
||||
if (is_null)
|
||||
{
|
||||
bool_iter->position--;
|
||||
return (DecompressResult){
|
||||
.is_null = true,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
bool val = simple8brle_bitmap_get_at(&bool_iter->values, bool_iter->position);
|
||||
bool_iter->position--;
|
||||
|
||||
return (DecompressResult){
|
||||
.val = BoolGetDatum(val),
|
||||
};
|
||||
}
|
||||
|
||||
extern DecompressionIterator *
|
||||
bool_decompression_iterator_from_datum_reverse(Datum bool_compressed, Oid element_type)
|
||||
{
|
||||
BoolDecompressionIterator *iterator = palloc(sizeof(*iterator));
|
||||
decompression_iterator_init(iterator,
|
||||
(void *) PG_DETOAST_DATUM(bool_compressed),
|
||||
element_type,
|
||||
false);
|
||||
return &iterator->base;
|
||||
}
|
||||
|
||||
extern void
|
||||
bool_compressed_send(CompressedDataHeader *header, StringInfo buffer)
|
||||
{
|
||||
const BoolCompressed *data = (BoolCompressed *) header;
|
||||
Assert(header->compression_algorithm == COMPRESSION_ALGORITHM_BOOL);
|
||||
pq_sendbyte(buffer, data->has_nulls);
|
||||
simple8brle_serialized_send(buffer, (Simple8bRleSerialized *) data->values);
|
||||
if (data->has_nulls)
|
||||
{
|
||||
Simple8bRleSerialized *validity_bitmap =
|
||||
(Simple8bRleSerialized *) (((char *) data->values) +
|
||||
simple8brle_serialized_total_size(
|
||||
(Simple8bRleSerialized *) data->values));
|
||||
simple8brle_serialized_send(buffer, validity_bitmap);
|
||||
}
|
||||
}
|
||||
|
||||
extern Datum
|
||||
bool_compressed_recv(StringInfo buffer)
|
||||
{
|
||||
uint8 has_nulls;
|
||||
Simple8bRleSerialized *values;
|
||||
Simple8bRleSerialized *validity_bitmap = NULL;
|
||||
BoolCompressed *compressed;
|
||||
|
||||
has_nulls = pq_getmsgbyte(buffer);
|
||||
CheckCompressedData(has_nulls == 0 || has_nulls == 1);
|
||||
|
||||
values = simple8brle_serialized_recv(buffer);
|
||||
if (has_nulls)
|
||||
validity_bitmap = simple8brle_serialized_recv(buffer);
|
||||
|
||||
compressed = bool_compressed_from_parts(values, validity_bitmap);
|
||||
|
||||
PG_RETURN_POINTER(compressed);
|
||||
}
|
||||
|
||||
extern Compressor *
|
||||
bool_compressor_for_type(Oid element_type)
|
||||
{
|
||||
ExtendedCompressor *compressor = palloc(sizeof(*compressor));
|
||||
switch (element_type)
|
||||
{
|
||||
case BOOLOID:
|
||||
*compressor = (ExtendedCompressor){ .base = bool_compressor_initializer };
|
||||
return &compressor->base;
|
||||
default:
|
||||
elog(ERROR, "invalid type for bool compressor \"%s\"", format_type_be(element_type));
|
||||
}
|
||||
|
||||
pg_unreachable();
|
||||
}
|
||||
|
||||
/*
|
||||
* Cross-module functions for the bool_compress algorithm.
|
||||
*/
|
||||
extern Datum
|
||||
tsl_bool_compressor_append(PG_FUNCTION_ARGS)
|
||||
{
|
||||
MemoryContext old_context;
|
||||
MemoryContext agg_context;
|
||||
BoolCompressor *compressor = (BoolCompressor *) (PG_ARGISNULL(0) ? NULL : PG_GETARG_POINTER(0));
|
||||
|
||||
if (!AggCheckCallContext(fcinfo, &agg_context))
|
||||
{
|
||||
/* cannot be called directly because of internal-type argument */
|
||||
elog(ERROR, "tsl_bool_compressor_append called in non-aggregate context");
|
||||
}
|
||||
|
||||
old_context = MemoryContextSwitchTo(agg_context);
|
||||
|
||||
if (compressor == NULL)
|
||||
{
|
||||
compressor = bool_compressor_alloc();
|
||||
if (PG_NARGS() > 2)
|
||||
elog(ERROR, "append expects two arguments");
|
||||
}
|
||||
|
||||
if (PG_ARGISNULL(1))
|
||||
bool_compressor_append_null(compressor);
|
||||
else
|
||||
{
|
||||
bool next_val = PG_GETARG_BOOL(1);
|
||||
bool_compressor_append_value(compressor, next_val);
|
||||
}
|
||||
|
||||
MemoryContextSwitchTo(old_context);
|
||||
PG_RETURN_POINTER(compressor);
|
||||
}
|
||||
|
||||
extern Datum
|
||||
tsl_bool_compressor_finish(PG_FUNCTION_ARGS)
|
||||
{
|
||||
BoolCompressor *compressor = PG_ARGISNULL(0) ? NULL : (BoolCompressor *) PG_GETARG_POINTER(0);
|
||||
void *compressed;
|
||||
if (compressor == NULL)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
compressed = bool_compressor_finish(compressor);
|
||||
if (compressed == NULL)
|
||||
PG_RETURN_NULL();
|
||||
PG_RETURN_POINTER(compressed);
|
||||
}
|
||||
|
||||
/*
|
||||
* Local helpers
|
||||
*/
|
||||
static void
|
||||
bool_compressor_append_bool(Compressor *compressor, Datum val)
|
||||
{
|
||||
ExtendedCompressor *extended = (ExtendedCompressor *) compressor;
|
||||
if (extended->internal == NULL)
|
||||
extended->internal = bool_compressor_alloc();
|
||||
|
||||
bool_compressor_append_value(extended->internal, DatumGetBool(val) ? true : false);
|
||||
}
|
||||
|
||||
static void
|
||||
bool_compressor_append_null_value(Compressor *compressor)
|
||||
{
|
||||
ExtendedCompressor *extended = (ExtendedCompressor *) compressor;
|
||||
if (extended->internal == NULL)
|
||||
extended->internal = bool_compressor_alloc();
|
||||
|
||||
bool_compressor_append_null(extended->internal);
|
||||
}
|
||||
|
||||
static void *
|
||||
bool_compressor_finish_and_reset(Compressor *compressor)
|
||||
{
|
||||
ExtendedCompressor *extended = (ExtendedCompressor *) compressor;
|
||||
void *compressed = NULL;
|
||||
if (extended != NULL && extended->internal != NULL)
|
||||
{
|
||||
compressed = bool_compressor_finish(extended->internal);
|
||||
pfree(extended->internal);
|
||||
extended->internal = NULL;
|
||||
}
|
||||
return compressed;
|
||||
}
|
||||
|
||||
static BoolCompressed *
|
||||
bool_compressed_from_parts(Simple8bRleSerialized *values, Simple8bRleSerialized *validity_bitmap)
|
||||
{
|
||||
uint32 validity_bitmap_size = 0;
|
||||
Size compressed_size;
|
||||
char *compressed_data;
|
||||
BoolCompressed *compressed;
|
||||
uint32 num_values = values != NULL ? values->num_elements : 0;
|
||||
uint32 values_size = values != NULL ? simple8brle_serialized_total_size(values) : 0;
|
||||
|
||||
if (num_values == 0)
|
||||
return NULL;
|
||||
|
||||
if (validity_bitmap != NULL)
|
||||
validity_bitmap_size = simple8brle_serialized_total_size(validity_bitmap);
|
||||
|
||||
compressed_size = sizeof(BoolCompressed) + values_size + validity_bitmap_size;
|
||||
|
||||
if (!AllocSizeIsValid(compressed_size))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||
errmsg("compressed size exceeds the maximum allowed (%d)", (int) MaxAllocSize)));
|
||||
|
||||
compressed_data = palloc(compressed_size);
|
||||
compressed = (BoolCompressed *) compressed_data;
|
||||
SET_VARSIZE(&compressed->vl_len_, compressed_size);
|
||||
|
||||
compressed->compression_algorithm = COMPRESSION_ALGORITHM_BOOL;
|
||||
compressed->has_nulls = validity_bitmap_size != 0 ? 1 : 0;
|
||||
|
||||
compressed_data += sizeof(*compressed);
|
||||
compressed_data = bytes_serialize_simple8b_and_advance(compressed_data, values_size, values);
|
||||
|
||||
if (compressed->has_nulls == 1 && validity_bitmap != NULL)
|
||||
{
|
||||
CheckCompressedData(validity_bitmap->num_elements == num_values);
|
||||
bytes_serialize_simple8b_and_advance(compressed_data,
|
||||
validity_bitmap_size,
|
||||
validity_bitmap);
|
||||
}
|
||||
|
||||
return compressed;
|
||||
}
|
||||
|
||||
static void
|
||||
decompression_iterator_init(BoolDecompressionIterator *iter, void *compressed, Oid element_type,
|
||||
bool forward)
|
||||
{
|
||||
StringInfoData si = { .data = compressed, .len = VARSIZE(compressed) };
|
||||
BoolCompressed *header = consumeCompressedData(&si, sizeof(BoolCompressed));
|
||||
Simple8bRleSerialized *values = bytes_deserialize_simple8b_and_advance(&si);
|
||||
|
||||
Assert(header->has_nulls == 0 || header->has_nulls == 1);
|
||||
Assert(element_type == BOOLOID);
|
||||
|
||||
const bool has_nulls = header->has_nulls == 1;
|
||||
|
||||
CheckCompressedData(has_nulls == 0 || has_nulls == 1);
|
||||
|
||||
*iter = (BoolDecompressionIterator){
|
||||
.base = { .compression_algorithm = COMPRESSION_ALGORITHM_BOOL,
|
||||
.forward = forward,
|
||||
.element_type = element_type,
|
||||
.try_next = (forward ? bool_decompression_iterator_try_next_forward :
|
||||
bool_decompression_iterator_try_next_reverse) },
|
||||
.values = { 0 },
|
||||
.validity_bitmap = { 0 },
|
||||
.position = 0,
|
||||
};
|
||||
|
||||
iter->values = simple8brle_bitmap_decompress(values);
|
||||
|
||||
if (has_nulls)
|
||||
{
|
||||
Simple8bRleSerialized *validity_bitmap = bytes_deserialize_simple8b_and_advance(&si);
|
||||
iter->validity_bitmap = simple8brle_bitmap_decompress(validity_bitmap);
|
||||
CheckCompressedData(iter->validity_bitmap.num_elements == iter->values.num_elements);
|
||||
}
|
||||
|
||||
if (!forward)
|
||||
{
|
||||
iter->position = iter->values.num_elements - 1;
|
||||
}
|
||||
}
|
75
tsl/src/compression/algorithms/bool_compress.h
Normal file
75
tsl/src/compression/algorithms/bool_compress.h
Normal file
@ -0,0 +1,75 @@
|
||||
/*
|
||||
* This file and its contents are licensed under the Timescale License.
|
||||
* Please see the included NOTICE for copyright information and
|
||||
* LICENSE-TIMESCALE for a copy of the license.
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
* bool_compress is used to encode boolean values using the simple8b_rle algorithm.
|
||||
*
|
||||
* The bool compressor differs from the other compressors in that it does store a value
|
||||
* even for nulls, which is the last value seen befere the null. With this the bool
|
||||
* compressor always creates a compressed block even for nulls only.
|
||||
*
|
||||
* The boolean compressor represents the boolean values in a batch with two parallel
|
||||
* bitmaps, value bitmap and validity bitmap, like in the Arrow representation.
|
||||
* These bitmaps are compressed with our common bit-packing algorithm.
|
||||
*
|
||||
* The validity bitmap stores a 0 for a null value and a 1 for a non-null value as
|
||||
* required by the Arrow specification. This is the opposite of what the other compression
|
||||
* algorithms do in their nulls bitmaps.
|
||||
*/
|
||||
|
||||
#include <postgres.h>
|
||||
#include <fmgr.h>
|
||||
#include <lib/stringinfo.h>
|
||||
|
||||
#include "compression/compression.h"
|
||||
|
||||
typedef struct BoolCompressor BoolCompressor;
|
||||
typedef struct BoolCompressed BoolCompressed;
|
||||
typedef struct BoolDecompressionIterator BoolDecompressionIterator;
|
||||
|
||||
/*
|
||||
* Compressor framework functions and definitions for the bool_compress algorithm.
|
||||
*/
|
||||
|
||||
extern BoolCompressor *bool_compressor_alloc(void);
|
||||
extern void bool_compressor_append_null(BoolCompressor *compressor);
|
||||
extern void bool_compressor_append_value(BoolCompressor *compressor, bool next_val);
|
||||
extern void *bool_compressor_finish(BoolCompressor *compressor);
|
||||
extern bool bool_compressed_has_nulls(const CompressedDataHeader *header);
|
||||
|
||||
extern DecompressResult bool_decompression_iterator_try_next_forward(DecompressionIterator *iter);
|
||||
|
||||
extern DecompressionIterator *bool_decompression_iterator_from_datum_forward(Datum bool_compressed,
|
||||
Oid element_type);
|
||||
|
||||
extern DecompressResult bool_decompression_iterator_try_next_reverse(DecompressionIterator *iter);
|
||||
|
||||
extern DecompressionIterator *bool_decompression_iterator_from_datum_reverse(Datum bool_compressed,
|
||||
Oid element_type);
|
||||
|
||||
extern void bool_compressed_send(CompressedDataHeader *header, StringInfo buffer);
|
||||
|
||||
extern Datum bool_compressed_recv(StringInfo buf);
|
||||
|
||||
extern Compressor *bool_compressor_for_type(Oid element_type);
|
||||
|
||||
#define BOOL_COMPRESS_ALGORITHM_DEFINITION \
|
||||
{ \
|
||||
.iterator_init_forward = bool_decompression_iterator_from_datum_forward, \
|
||||
.iterator_init_reverse = bool_decompression_iterator_from_datum_reverse, \
|
||||
.decompress_all = NULL, .compressed_data_send = bool_compressed_send, \
|
||||
.compressed_data_recv = bool_compressed_recv, \
|
||||
.compressor_for_type = bool_compressor_for_type, \
|
||||
.compressed_data_storage = TOAST_STORAGE_EXTERNAL, \
|
||||
}
|
||||
|
||||
/*
|
||||
* Cross-module functions for the bool_compress algorithm.
|
||||
*/
|
||||
|
||||
extern Datum tsl_bool_compressor_append(PG_FUNCTION_ARGS);
|
||||
extern Datum tsl_bool_compressor_finish(PG_FUNCTION_ARGS);
|
@ -19,6 +19,7 @@
|
||||
#include "compat/compat.h"
|
||||
|
||||
#include "algorithms/array.h"
|
||||
#include "algorithms/bool_compress.h"
|
||||
#include "algorithms/deltadelta.h"
|
||||
#include "algorithms/dictionary.h"
|
||||
#include "algorithms/gorilla.h"
|
||||
@ -47,6 +48,7 @@ static const CompressionAlgorithmDefinition definitions[_END_COMPRESSION_ALGORIT
|
||||
[COMPRESSION_ALGORITHM_DICTIONARY] = DICTIONARY_ALGORITHM_DEFINITION,
|
||||
[COMPRESSION_ALGORITHM_GORILLA] = GORILLA_ALGORITHM_DEFINITION,
|
||||
[COMPRESSION_ALGORITHM_DELTADELTA] = DELTA_DELTA_ALGORITHM_DEFINITION,
|
||||
[COMPRESSION_ALGORITHM_BOOL] = BOOL_COMPRESS_ALGORITHM_DEFINITION,
|
||||
};
|
||||
|
||||
static NameData compression_algorithm_name[] = {
|
||||
@ -55,6 +57,7 @@ static NameData compression_algorithm_name[] = {
|
||||
[COMPRESSION_ALGORITHM_DICTIONARY] = { "DICTIONARY" },
|
||||
[COMPRESSION_ALGORITHM_GORILLA] = { "GORILLA" },
|
||||
[COMPRESSION_ALGORITHM_DELTADELTA] = { "DELTADELTA" },
|
||||
[COMPRESSION_ALGORITHM_BOOL] = { "BOOL" },
|
||||
};
|
||||
|
||||
Name
|
||||
@ -1802,6 +1805,9 @@ tsl_compressed_data_info(PG_FUNCTION_ARGS)
|
||||
case COMPRESSION_ALGORITHM_ARRAY:
|
||||
has_nulls = array_compressed_has_nulls(header);
|
||||
break;
|
||||
case COMPRESSION_ALGORITHM_BOOL:
|
||||
has_nulls = bool_compressed_has_nulls(header);
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "unknown compression algorithm %d", header->compression_algorithm);
|
||||
break;
|
||||
@ -1840,6 +1846,9 @@ tsl_compressed_data_has_nulls(PG_FUNCTION_ARGS)
|
||||
case COMPRESSION_ALGORITHM_ARRAY:
|
||||
has_nulls = array_compressed_has_nulls(header);
|
||||
break;
|
||||
case COMPRESSION_ALGORITHM_BOOL:
|
||||
has_nulls = bool_compressed_has_nulls(header);
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "unknown compression algorithm %d", header->compression_algorithm);
|
||||
break;
|
||||
@ -1884,6 +1893,12 @@ compression_get_default_algorithm(Oid typeoid)
|
||||
case NUMERICOID:
|
||||
return COMPRESSION_ALGORITHM_ARRAY;
|
||||
|
||||
case BOOLOID:
|
||||
if (ts_guc_enable_bool_compression)
|
||||
return COMPRESSION_ALGORITHM_BOOL;
|
||||
else
|
||||
return COMPRESSION_ALGORITHM_ARRAY;
|
||||
|
||||
default:
|
||||
{
|
||||
/* use dictionary if possible, otherwise use array */
|
||||
|
@ -193,6 +193,7 @@ typedef enum CompressionAlgorithm
|
||||
COMPRESSION_ALGORITHM_DICTIONARY,
|
||||
COMPRESSION_ALGORITHM_GORILLA,
|
||||
COMPRESSION_ALGORITHM_DELTADELTA,
|
||||
COMPRESSION_ALGORITHM_BOOL,
|
||||
|
||||
/* When adding an algorithm also add a static assert statement below */
|
||||
/* end of real values */
|
||||
@ -315,13 +316,14 @@ pg_attribute_unused() assert_num_compression_algorithms_sane(void)
|
||||
StaticAssertStmt(COMPRESSION_ALGORITHM_DICTIONARY == 2, "algorithm index has changed");
|
||||
StaticAssertStmt(COMPRESSION_ALGORITHM_GORILLA == 3, "algorithm index has changed");
|
||||
StaticAssertStmt(COMPRESSION_ALGORITHM_DELTADELTA == 4, "algorithm index has changed");
|
||||
StaticAssertStmt(COMPRESSION_ALGORITHM_BOOL == 5, "algorithm index has changed");
|
||||
|
||||
/*
|
||||
* This should change when adding a new algorithm after adding the new
|
||||
* algorithm to the assert list above. This statement prevents adding a
|
||||
* new algorithm without updating the asserts above
|
||||
*/
|
||||
StaticAssertStmt(_END_COMPRESSION_ALGORITHMS == 5,
|
||||
StaticAssertStmt(_END_COMPRESSION_ALGORITHMS == 6,
|
||||
"number of algorithms have changed, the asserts should be updated");
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "chunk.h"
|
||||
#include "chunk_api.h"
|
||||
#include "compression/algorithms/array.h"
|
||||
#include "compression/algorithms/bool_compress.h"
|
||||
#include "compression/algorithms/deltadelta.h"
|
||||
#include "compression/algorithms/dictionary.h"
|
||||
#include "compression/algorithms/gorilla.h"
|
||||
@ -167,6 +168,8 @@ CrossModuleFunctions tsl_cm_functions = {
|
||||
.dictionary_compressor_finish = tsl_dictionary_compressor_finish,
|
||||
.array_compressor_append = tsl_array_compressor_append,
|
||||
.array_compressor_finish = tsl_array_compressor_finish,
|
||||
.bool_compressor_append = tsl_bool_compressor_append,
|
||||
.bool_compressor_finish = tsl_bool_compressor_finish,
|
||||
.process_compress_table = tsl_process_compress_table,
|
||||
.process_altertable_cmd = tsl_process_altertable_cmd,
|
||||
.process_rename_cmd = tsl_process_rename_cmd,
|
||||
|
@ -1538,6 +1538,217 @@ CREATE TABLE base_texts AS SELECT row_number() OVER() as rn, NULLIF(NULLIF(NULLI
|
||||
(1 row)
|
||||
|
||||
DROP TABLE base_texts;
|
||||
----------------------
|
||||
-- Bool Compression --
|
||||
----------------------
|
||||
SELECT
|
||||
$$
|
||||
select item from base_bools order by rn
|
||||
$$ AS "QUERY"
|
||||
\gset
|
||||
\set TABLE_NAME base_bools
|
||||
\set TYPE boolean
|
||||
\set COMPRESSION_CMD _timescaledb_internal.compress_bool(item)
|
||||
\set DECOMPRESS_FORWARD_CMD _timescaledb_internal.decompress_forward(c::_timescaledb_internal.compressed_data, NULL::boolean)
|
||||
\set DECOMPRESS_REVERSE_CMD _timescaledb_internal.decompress_reverse(c::_timescaledb_internal.compressed_data, NULL::boolean)
|
||||
-- bool test, flipping values betweem true and false, no nulls
|
||||
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, (item%2=0)::boolean as item FROM (SELECT generate_series(1, 1000) item) sub;
|
||||
\ir include/compression_test.sql
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
\set ECHO errors
|
||||
algorithm | has_nulls | compressed size
|
||||
-----------+-----------+-----------------
|
||||
BOOL | f | 152
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
-------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original and decompressed forward (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
--------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original and decompressed reversed (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
----------------------------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original, decompressed, and decompressed deserializeed (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | ?column?
|
||||
-----------------------------------------------------------------------------------------------------+----------
|
||||
Test that deserialization, decompression, recompression, and serialization results in the same text | t
|
||||
(1 row)
|
||||
|
||||
DROP TABLE base_bools;
|
||||
-- bool test, all true values, no nulls
|
||||
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, true as item FROM (SELECT generate_series(1, 1000) item) sub;
|
||||
\ir include/compression_test.sql
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
\set ECHO errors
|
||||
algorithm | has_nulls | compressed size
|
||||
-----------+-----------+-----------------
|
||||
BOOL | f | 29
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
-------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original and decompressed forward (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
--------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original and decompressed reversed (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
----------------------------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original, decompressed, and decompressed deserializeed (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | ?column?
|
||||
-----------------------------------------------------------------------------------------------------+----------
|
||||
Test that deserialization, decompression, recompression, and serialization results in the same text | t
|
||||
(1 row)
|
||||
|
||||
DROP TABLE base_bools;
|
||||
-- bool test, all false, no nulls
|
||||
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, false as item FROM (SELECT generate_series(1, 1000) item) sub;
|
||||
\ir include/compression_test.sql
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
\set ECHO errors
|
||||
algorithm | has_nulls | compressed size
|
||||
-----------+-----------+-----------------
|
||||
BOOL | f | 29
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
-------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original and decompressed forward (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
--------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original and decompressed reversed (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
----------------------------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original, decompressed, and decompressed deserializeed (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | ?column?
|
||||
-----------------------------------------------------------------------------------------------------+----------
|
||||
Test that deserialization, decompression, recompression, and serialization results in the same text | t
|
||||
(1 row)
|
||||
|
||||
DROP TABLE base_bools;
|
||||
-- a single true element
|
||||
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, true as item FROM (SELECT generate_series(1, 1) item) sub;
|
||||
\ir include/compression_test.sql
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
\set ECHO errors
|
||||
algorithm | has_nulls | compressed size
|
||||
-----------+-----------+-----------------
|
||||
BOOL | f | 29
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
-------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original and decompressed forward (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
--------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original and decompressed reversed (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
----------------------------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original, decompressed, and decompressed deserializeed (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | ?column?
|
||||
-----------------------------------------------------------------------------------------------------+----------
|
||||
Test that deserialization, decompression, recompression, and serialization results in the same text | t
|
||||
(1 row)
|
||||
|
||||
DROP TABLE base_bools;
|
||||
-- all true, except every 43rd value is null
|
||||
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, ((NULLIF(i, (CASE WHEN i%43=0 THEN i ELSE -1 END)))>0)::boolean item FROM generate_series(1, 1000) i;
|
||||
\ir include/compression_test.sql
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
\set ECHO errors
|
||||
algorithm | has_nulls | compressed size
|
||||
-----------+-----------+-----------------
|
||||
BOOL | t | 176
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
-------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original and decompressed forward (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
--------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original and decompressed reversed (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
----------------------------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original, decompressed, and decompressed deserializeed (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | ?column?
|
||||
-----------------------------------------------------------------------------------------------------+----------
|
||||
Test that deserialization, decompression, recompression, and serialization results in the same text | t
|
||||
(1 row)
|
||||
|
||||
DROP TABLE base_bools;
|
||||
-- all false, except every 29th value is null
|
||||
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, ((NULLIF(i, (CASE WHEN i%29=0 THEN i ELSE -1 END)))<0)::boolean item FROM generate_series(1, 1000) i;
|
||||
\ir include/compression_test.sql
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
\set ECHO errors
|
||||
algorithm | has_nulls | compressed size
|
||||
-----------+-----------+-----------------
|
||||
BOOL | t | 176
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
-------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original and decompressed forward (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
--------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original and decompressed reversed (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | count
|
||||
----------------------------------------------------------------------------------------------------+-------
|
||||
Number of rows different between original, decompressed, and decompressed deserializeed (expect 0) | 0
|
||||
(1 row)
|
||||
|
||||
?column? | ?column?
|
||||
-----------------------------------------------------------------------------------------------------+----------
|
||||
Test that deserialization, decompression, recompression, and serialization results in the same text | t
|
||||
(1 row)
|
||||
|
||||
DROP TABLE base_bools;
|
||||
-----------------------------------------------
|
||||
-- Interesting corrupt data found by fuzzing --
|
||||
-----------------------------------------------
|
||||
|
204
tsl/test/expected/compression_bools.out
Normal file
204
tsl/test/expected/compression_bools.out
Normal file
@ -0,0 +1,204 @@
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
--install necessary functions for tests
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
\ir include/compression_utils.sql
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
\set ECHO errors
|
||||
\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
|
||||
-- Create some data with NULLs and bools
|
||||
CREATE TABLE d (ts int, b bool);
|
||||
INSERT INTO d SELECT g AS ts, NULL AS b FROM generate_series(1, 5000) g;
|
||||
-- set b to true for even ts values and set some values to NULL
|
||||
UPDATE d SET b = (ts % 2 = 0);
|
||||
UPDATE d SET b = NULL WHERE (ts % 10 = 0);
|
||||
-- add some bools that can be RLE compressed
|
||||
INSERT INTO d SELECT g AS ts, true AS b FROM generate_series(5001, 20000) g;
|
||||
-- add a few bool columns
|
||||
CREATE TABLE t (ts int, b1 bool, b2 bool, b3 bool);
|
||||
SELECT create_hypertable('t', 'ts', chunk_time_interval => 5000);
|
||||
NOTICE: adding not-null constraint to column "ts"
|
||||
create_hypertable
|
||||
-------------------
|
||||
(1,public,t,t)
|
||||
(1 row)
|
||||
|
||||
-- explicitly disable bool compression so the test
|
||||
-- doesn't depend on the default setting
|
||||
SET timescaledb.enable_bool_compression = off;
|
||||
INSERT INTO t
|
||||
SELECT
|
||||
d.ts,
|
||||
d.b AS b1, d.b AS b2, d.b AS b3
|
||||
FROM d ORDER BY ts;
|
||||
SELECT max(ts) FROM t;
|
||||
max
|
||||
-------
|
||||
20000
|
||||
(1 row)
|
||||
|
||||
ALTER TABLE t SET (timescaledb.compress, timescaledb.compress_orderby = 'ts');
|
||||
WARNING: there was some uncertainty picking the default segment by for the hypertable: You do not have any indexes on columns that can be used for segment_by and thus we are not using segment_by for compression. Please make sure you are not missing any indexes
|
||||
NOTICE: default segment by for hypertable "t" is set to ""
|
||||
SELECT compress_chunk(show_chunks('t'));
|
||||
compress_chunk
|
||||
----------------------------------------
|
||||
_timescaledb_internal._hyper_1_1_chunk
|
||||
_timescaledb_internal._hyper_1_2_chunk
|
||||
_timescaledb_internal._hyper_1_3_chunk
|
||||
_timescaledb_internal._hyper_1_4_chunk
|
||||
_timescaledb_internal._hyper_1_5_chunk
|
||||
(5 rows)
|
||||
|
||||
CREATE TABLE chunks_done AS SELECT show_chunks('t') AS chunk_name;
|
||||
SELECT * FROM chunks_done;
|
||||
chunk_name
|
||||
----------------------------------------
|
||||
_timescaledb_internal._hyper_1_1_chunk
|
||||
_timescaledb_internal._hyper_1_2_chunk
|
||||
_timescaledb_internal._hyper_1_3_chunk
|
||||
_timescaledb_internal._hyper_1_4_chunk
|
||||
_timescaledb_internal._hyper_1_5_chunk
|
||||
(5 rows)
|
||||
|
||||
SELECT
|
||||
chunk_schema, chunk_name, compression_status,
|
||||
after_compression_total_bytes
|
||||
-- the before compression size differs on platforms, so I dont't
|
||||
-- display it here, and consequently the compression ratio as well
|
||||
FROM
|
||||
chunk_compression_stats('t');
|
||||
chunk_schema | chunk_name | compression_status | after_compression_total_bytes
|
||||
-----------------------+------------------+--------------------+-------------------------------
|
||||
_timescaledb_internal | _hyper_1_1_chunk | Compressed | 65536
|
||||
_timescaledb_internal | _hyper_1_2_chunk | Compressed | 65536
|
||||
_timescaledb_internal | _hyper_1_3_chunk | Compressed | 65536
|
||||
_timescaledb_internal | _hyper_1_4_chunk | Compressed | 65536
|
||||
_timescaledb_internal | _hyper_1_5_chunk | Compressed | 40960
|
||||
(5 rows)
|
||||
|
||||
-- enable bool compression and add more data, so the two compression
|
||||
-- methods will co-exist and we can test both, plus compare the
|
||||
-- compression ratio
|
||||
--
|
||||
SET timescaledb.enable_bool_compression = on;
|
||||
INSERT INTO t
|
||||
SELECT
|
||||
(SELECT max(ts) FROM t)+d.ts,
|
||||
d.b AS b1, d.b AS b2, d.b AS b3
|
||||
FROM d ORDER BY ts;
|
||||
SELECT max(ts) FROM t;
|
||||
max
|
||||
-------
|
||||
40000
|
||||
(1 row)
|
||||
|
||||
SELECT
|
||||
compress_chunk(c)
|
||||
FROM
|
||||
show_chunks('t') c
|
||||
WHERE
|
||||
c NOT IN (SELECT chunk_name FROM chunks_done);
|
||||
compress_chunk
|
||||
-----------------------------------------
|
||||
_timescaledb_internal._hyper_1_11_chunk
|
||||
_timescaledb_internal._hyper_1_12_chunk
|
||||
_timescaledb_internal._hyper_1_13_chunk
|
||||
_timescaledb_internal._hyper_1_14_chunk
|
||||
(4 rows)
|
||||
|
||||
SELECT
|
||||
chunk_schema, chunk_name, compression_status,
|
||||
after_compression_total_bytes
|
||||
-- the before compression size differs on platforms, so I dont't
|
||||
-- display it here, and consequently the compression ratio as well
|
||||
--
|
||||
-- the after compression size should be smaller than it was before
|
||||
-- the bool compression was enabled
|
||||
--
|
||||
FROM
|
||||
chunk_compression_stats('t')
|
||||
WHERE
|
||||
format('%I.%I', chunk_schema, chunk_name)::regclass NOT IN (SELECT chunk_name FROM chunks_done);
|
||||
chunk_schema | chunk_name | compression_status | after_compression_total_bytes
|
||||
-----------------------+-------------------+--------------------+-------------------------------
|
||||
_timescaledb_internal | _hyper_1_11_chunk | Compressed | 40960
|
||||
_timescaledb_internal | _hyper_1_12_chunk | Compressed | 40960
|
||||
_timescaledb_internal | _hyper_1_13_chunk | Compressed | 40960
|
||||
_timescaledb_internal | _hyper_1_14_chunk | Compressed | 40960
|
||||
(4 rows)
|
||||
|
||||
-- check the compression algorithm for the compressed chunks
|
||||
CREATE TABLE compressed_chunks AS
|
||||
SELECT
|
||||
format('%I.%I', comp.schema_name, comp.table_name)::regclass as compressed_chunk,
|
||||
ccs.compressed_heap_size,
|
||||
ccs.compressed_toast_size,
|
||||
ccs.compressed_index_size,
|
||||
ccs.numrows_pre_compression,
|
||||
ccs.numrows_post_compression
|
||||
FROM
|
||||
show_chunks('t') c
|
||||
INNER JOIN _timescaledb_catalog.chunk cat
|
||||
ON (c = format('%I.%I', cat.schema_name, cat.table_name)::regclass)
|
||||
INNER JOIN _timescaledb_catalog.chunk comp
|
||||
ON (cat.compressed_chunk_id = comp.id)
|
||||
INNER JOIN _timescaledb_catalog.compression_chunk_size ccs
|
||||
ON (comp.id = ccs.compressed_chunk_id);
|
||||
CREATE TABLE compression_info (compressed_chunk regclass, result text, compressed_size int, num_rows int);
|
||||
DO $$
|
||||
DECLARE
|
||||
table_ref regclass;
|
||||
BEGIN
|
||||
FOR table_ref IN
|
||||
SELECT compressed_chunk as table_ref FROM compressed_chunks
|
||||
LOOP
|
||||
EXECUTE format(
|
||||
'INSERT INTO compression_info (
|
||||
SELECT
|
||||
%L::regclass as compressed_chunk,
|
||||
(_timescaledb_functions.compressed_data_info(b1))::text as result,
|
||||
sum(pg_column_size(b1)::int) as compressed_size,
|
||||
count(*) as num_rows
|
||||
FROM %s
|
||||
GROUP BY 1,2)',
|
||||
table_ref, table_ref
|
||||
);
|
||||
END LOOP;
|
||||
END;
|
||||
$$;
|
||||
SELECT
|
||||
ci.*,
|
||||
ccs.compressed_toast_size,
|
||||
ccs.numrows_pre_compression,
|
||||
ccs.numrows_post_compression
|
||||
FROM
|
||||
compression_info ci
|
||||
INNER JOIN compressed_chunks ccs
|
||||
ON (ci.compressed_chunk = ccs.compressed_chunk)
|
||||
ORDER BY
|
||||
1,2,3;
|
||||
compressed_chunk | result | compressed_size | num_rows | compressed_toast_size | numrows_pre_compression | numrows_post_compression
|
||||
-------------------------------------------------+-----------+-----------------+----------+-----------------------+-------------------------+--------------------------
|
||||
_timescaledb_internal.compress_hyper_2_6_chunk | (ARRAY,t) | 320 | 5 | 32768 | 4999 | 5
|
||||
_timescaledb_internal.compress_hyper_2_7_chunk | (ARRAY,f) | 172 | 4 | 32768 | 5000 | 5
|
||||
_timescaledb_internal.compress_hyper_2_7_chunk | (ARRAY,t) | 62 | 1 | 32768 | 5000 | 5
|
||||
_timescaledb_internal.compress_hyper_2_8_chunk | (ARRAY,f) | 215 | 5 | 32768 | 5000 | 5
|
||||
_timescaledb_internal.compress_hyper_2_9_chunk | (ARRAY,f) | 215 | 5 | 32768 | 5000 | 5
|
||||
_timescaledb_internal.compress_hyper_2_10_chunk | (ARRAY,f) | 38 | 1 | 8192 | 1 | 1
|
||||
_timescaledb_internal.compress_hyper_2_15_chunk | (BOOL,f) | 116 | 4 | 8192 | 5000 | 5
|
||||
_timescaledb_internal.compress_hyper_2_15_chunk | (BOOL,t) | 69 | 1 | 8192 | 5000 | 5
|
||||
_timescaledb_internal.compress_hyper_2_16_chunk | (BOOL,f) | 145 | 5 | 8192 | 5000 | 5
|
||||
_timescaledb_internal.compress_hyper_2_17_chunk | (BOOL,f) | 145 | 5 | 8192 | 5000 | 5
|
||||
_timescaledb_internal.compress_hyper_2_18_chunk | (BOOL,f) | 29 | 1 | 8192 | 1 | 1
|
||||
(11 rows)
|
||||
|
||||
DROP TABLE t;
|
||||
DROP TABLE d;
|
||||
DROP TABLE chunks_done;
|
||||
DROP TABLE compression_info;
|
||||
-- reset the compression setting
|
||||
SET timescaledb.enable_bool_compression = default;
|
@ -86,6 +86,7 @@ if(CMAKE_BUILD_TYPE MATCHES Debug)
|
||||
chunk_utils_internal.sql
|
||||
compression_algos.sql
|
||||
compression_bgw.sql
|
||||
compression_bools.sql
|
||||
compression_ddl.sql
|
||||
compression_hypertable.sql
|
||||
compression_merge.sql
|
||||
|
@ -370,6 +370,51 @@ CREATE TABLE base_texts AS SELECT row_number() OVER() as rn, NULLIF(NULLIF(NULLI
|
||||
\ir include/compression_test.sql
|
||||
DROP TABLE base_texts;
|
||||
|
||||
----------------------
|
||||
-- Bool Compression --
|
||||
----------------------
|
||||
|
||||
SELECT
|
||||
$$
|
||||
select item from base_bools order by rn
|
||||
$$ AS "QUERY"
|
||||
\gset
|
||||
\set TABLE_NAME base_bools
|
||||
\set TYPE boolean
|
||||
\set COMPRESSION_CMD _timescaledb_internal.compress_bool(item)
|
||||
\set DECOMPRESS_FORWARD_CMD _timescaledb_internal.decompress_forward(c::_timescaledb_internal.compressed_data, NULL::boolean)
|
||||
\set DECOMPRESS_REVERSE_CMD _timescaledb_internal.decompress_reverse(c::_timescaledb_internal.compressed_data, NULL::boolean)
|
||||
|
||||
-- bool test, flipping values betweem true and false, no nulls
|
||||
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, (item%2=0)::boolean as item FROM (SELECT generate_series(1, 1000) item) sub;
|
||||
\ir include/compression_test.sql
|
||||
DROP TABLE base_bools;
|
||||
|
||||
-- bool test, all true values, no nulls
|
||||
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, true as item FROM (SELECT generate_series(1, 1000) item) sub;
|
||||
\ir include/compression_test.sql
|
||||
DROP TABLE base_bools;
|
||||
|
||||
-- bool test, all false, no nulls
|
||||
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, false as item FROM (SELECT generate_series(1, 1000) item) sub;
|
||||
\ir include/compression_test.sql
|
||||
DROP TABLE base_bools;
|
||||
|
||||
-- a single true element
|
||||
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, true as item FROM (SELECT generate_series(1, 1) item) sub;
|
||||
\ir include/compression_test.sql
|
||||
DROP TABLE base_bools;
|
||||
|
||||
-- all true, except every 43rd value is null
|
||||
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, ((NULLIF(i, (CASE WHEN i%43=0 THEN i ELSE -1 END)))>0)::boolean item FROM generate_series(1, 1000) i;
|
||||
\ir include/compression_test.sql
|
||||
DROP TABLE base_bools;
|
||||
|
||||
-- all false, except every 29th value is null
|
||||
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, ((NULLIF(i, (CASE WHEN i%29=0 THEN i ELSE -1 END)))<0)::boolean item FROM generate_series(1, 1000) i;
|
||||
\ir include/compression_test.sql
|
||||
DROP TABLE base_bools;
|
||||
|
||||
-----------------------------------------------
|
||||
-- Interesting corrupt data found by fuzzing --
|
||||
-----------------------------------------------
|
||||
|
143
tsl/test/sql/compression_bools.sql
Normal file
143
tsl/test/sql/compression_bools.sql
Normal file
@ -0,0 +1,143 @@
|
||||
-- This file and its contents are licensed under the Timescale License.
|
||||
-- Please see the included NOTICE for copyright information and
|
||||
-- LICENSE-TIMESCALE for a copy of the license.
|
||||
|
||||
--install necessary functions for tests
|
||||
\c :TEST_DBNAME :ROLE_SUPERUSER
|
||||
\ir include/compression_utils.sql
|
||||
\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
|
||||
|
||||
-- Create some data with NULLs and bools
|
||||
CREATE TABLE d (ts int, b bool);
|
||||
INSERT INTO d SELECT g AS ts, NULL AS b FROM generate_series(1, 5000) g;
|
||||
|
||||
-- set b to true for even ts values and set some values to NULL
|
||||
UPDATE d SET b = (ts % 2 = 0);
|
||||
UPDATE d SET b = NULL WHERE (ts % 10 = 0);
|
||||
|
||||
-- add some bools that can be RLE compressed
|
||||
INSERT INTO d SELECT g AS ts, true AS b FROM generate_series(5001, 20000) g;
|
||||
|
||||
-- add a few bool columns
|
||||
CREATE TABLE t (ts int, b1 bool, b2 bool, b3 bool);
|
||||
SELECT create_hypertable('t', 'ts', chunk_time_interval => 5000);
|
||||
|
||||
-- explicitly disable bool compression so the test
|
||||
-- doesn't depend on the default setting
|
||||
SET timescaledb.enable_bool_compression = off;
|
||||
INSERT INTO t
|
||||
SELECT
|
||||
d.ts,
|
||||
d.b AS b1, d.b AS b2, d.b AS b3
|
||||
FROM d ORDER BY ts;
|
||||
SELECT max(ts) FROM t;
|
||||
|
||||
ALTER TABLE t SET (timescaledb.compress, timescaledb.compress_orderby = 'ts');
|
||||
SELECT compress_chunk(show_chunks('t'));
|
||||
|
||||
CREATE TABLE chunks_done AS SELECT show_chunks('t') AS chunk_name;
|
||||
SELECT * FROM chunks_done;
|
||||
|
||||
SELECT
|
||||
chunk_schema, chunk_name, compression_status,
|
||||
after_compression_total_bytes
|
||||
-- the before compression size differs on platforms, so I dont't
|
||||
-- display it here, and consequently the compression ratio as well
|
||||
FROM
|
||||
chunk_compression_stats('t');
|
||||
|
||||
-- enable bool compression and add more data, so the two compression
|
||||
-- methods will co-exist and we can test both, plus compare the
|
||||
-- compression ratio
|
||||
--
|
||||
SET timescaledb.enable_bool_compression = on;
|
||||
INSERT INTO t
|
||||
SELECT
|
||||
(SELECT max(ts) FROM t)+d.ts,
|
||||
d.b AS b1, d.b AS b2, d.b AS b3
|
||||
FROM d ORDER BY ts;
|
||||
SELECT max(ts) FROM t;
|
||||
|
||||
SELECT
|
||||
compress_chunk(c)
|
||||
FROM
|
||||
show_chunks('t') c
|
||||
WHERE
|
||||
c NOT IN (SELECT chunk_name FROM chunks_done);
|
||||
|
||||
SELECT
|
||||
chunk_schema, chunk_name, compression_status,
|
||||
after_compression_total_bytes
|
||||
-- the before compression size differs on platforms, so I dont't
|
||||
-- display it here, and consequently the compression ratio as well
|
||||
--
|
||||
-- the after compression size should be smaller than it was before
|
||||
-- the bool compression was enabled
|
||||
--
|
||||
FROM
|
||||
chunk_compression_stats('t')
|
||||
WHERE
|
||||
format('%I.%I', chunk_schema, chunk_name)::regclass NOT IN (SELECT chunk_name FROM chunks_done);
|
||||
|
||||
|
||||
-- check the compression algorithm for the compressed chunks
|
||||
CREATE TABLE compressed_chunks AS
|
||||
SELECT
|
||||
format('%I.%I', comp.schema_name, comp.table_name)::regclass as compressed_chunk,
|
||||
ccs.compressed_heap_size,
|
||||
ccs.compressed_toast_size,
|
||||
ccs.compressed_index_size,
|
||||
ccs.numrows_pre_compression,
|
||||
ccs.numrows_post_compression
|
||||
FROM
|
||||
show_chunks('t') c
|
||||
INNER JOIN _timescaledb_catalog.chunk cat
|
||||
ON (c = format('%I.%I', cat.schema_name, cat.table_name)::regclass)
|
||||
INNER JOIN _timescaledb_catalog.chunk comp
|
||||
ON (cat.compressed_chunk_id = comp.id)
|
||||
INNER JOIN _timescaledb_catalog.compression_chunk_size ccs
|
||||
ON (comp.id = ccs.compressed_chunk_id);
|
||||
|
||||
CREATE TABLE compression_info (compressed_chunk regclass, result text, compressed_size int, num_rows int);
|
||||
|
||||
DO $$
|
||||
DECLARE
|
||||
table_ref regclass;
|
||||
BEGIN
|
||||
FOR table_ref IN
|
||||
SELECT compressed_chunk as table_ref FROM compressed_chunks
|
||||
LOOP
|
||||
EXECUTE format(
|
||||
'INSERT INTO compression_info (
|
||||
SELECT
|
||||
%L::regclass as compressed_chunk,
|
||||
(_timescaledb_functions.compressed_data_info(b1))::text as result,
|
||||
sum(pg_column_size(b1)::int) as compressed_size,
|
||||
count(*) as num_rows
|
||||
FROM %s
|
||||
GROUP BY 1,2)',
|
||||
table_ref, table_ref
|
||||
);
|
||||
END LOOP;
|
||||
END;
|
||||
$$;
|
||||
|
||||
SELECT
|
||||
ci.*,
|
||||
ccs.compressed_toast_size,
|
||||
ccs.numrows_pre_compression,
|
||||
ccs.numrows_post_compression
|
||||
FROM
|
||||
compression_info ci
|
||||
INNER JOIN compressed_chunks ccs
|
||||
ON (ci.compressed_chunk = ccs.compressed_chunk)
|
||||
ORDER BY
|
||||
1,2,3;
|
||||
|
||||
DROP TABLE t;
|
||||
DROP TABLE d;
|
||||
DROP TABLE chunks_done;
|
||||
DROP TABLE compression_info;
|
||||
|
||||
-- reset the compression setting
|
||||
SET timescaledb.enable_bool_compression = default;
|
@ -74,6 +74,16 @@ CREATE OR REPLACE FUNCTION _timescaledb_internal.array_compressor_finish(interna
|
||||
AS :MODULE_PATHNAME, 'ts_array_compressor_finish'
|
||||
LANGUAGE C IMMUTABLE PARALLEL SAFE STRICT;
|
||||
|
||||
CREATE OR REPLACE FUNCTION _timescaledb_internal.bool_compressor_append(internal, ANYELEMENT)
|
||||
RETURNS internal
|
||||
AS :MODULE_PATHNAME, 'ts_bool_compressor_append'
|
||||
LANGUAGE C IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION _timescaledb_internal.bool_compressor_finish(internal)
|
||||
RETURNS _timescaledb_internal.compressed_data
|
||||
AS :MODULE_PATHNAME, 'ts_bool_compressor_finish'
|
||||
LANGUAGE C IMMUTABLE PARALLEL SAFE STRICT;
|
||||
|
||||
CREATE AGGREGATE _timescaledb_internal.compress_deltadelta(BIGINT) (
|
||||
STYPE = internal,
|
||||
SFUNC = _timescaledb_internal.deltadelta_compressor_append,
|
||||
@ -104,4 +114,10 @@ CREATE AGGREGATE _timescaledb_internal.compress_array(ANYELEMENT) (
|
||||
FINALFUNC = _timescaledb_internal.array_compressor_finish
|
||||
);
|
||||
|
||||
CREATE AGGREGATE _timescaledb_internal.compress_bool(boolean) (
|
||||
STYPE = internal,
|
||||
SFUNC = _timescaledb_internal.bool_compressor_append,
|
||||
FINALFUNC = _timescaledb_internal.bool_compressor_finish
|
||||
);
|
||||
|
||||
\set ECHO all
|
||||
|
@ -36,6 +36,10 @@ get_compression_algorithm(char *name)
|
||||
{
|
||||
return COMPRESSION_ALGORITHM_DICTIONARY;
|
||||
}
|
||||
else if (pg_strcasecmp(name, "bool") == 0)
|
||||
{
|
||||
return COMPRESSION_ALGORITHM_BOOL;
|
||||
}
|
||||
|
||||
ereport(ERROR, (errmsg("unknown compression algorithm %s", name)));
|
||||
return _INVALID_COMPRESSION_ALGORITHM;
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <access/htup_details.h>
|
||||
#include <catalog/pg_type.h>
|
||||
#include <fmgr.h>
|
||||
#include <guc.h>
|
||||
#include <lib/stringinfo.h>
|
||||
#include <libpq/pqformat.h>
|
||||
#include <utils/array.h>
|
||||
@ -24,6 +25,7 @@
|
||||
#include <export.h>
|
||||
|
||||
#include "compression/algorithms/array.h"
|
||||
#include "compression/algorithms/bool_compress.h"
|
||||
#include "compression/algorithms/deltadelta.h"
|
||||
#include "compression/algorithms/dictionary.h"
|
||||
#include "compression/algorithms/float_utils.h"
|
||||
@ -617,6 +619,233 @@ test_delta4(const int32 *values, int n)
|
||||
TestAssertTrue(i == n);
|
||||
}
|
||||
|
||||
static void
|
||||
test_bool_rle(bool nulls, int run_length, int expected_size)
|
||||
{
|
||||
Compressor *compressor = bool_compressor_for_type(BOOLOID);
|
||||
int rlen = run_length;
|
||||
bool val = true;
|
||||
for (int i = 0; i < TEST_ELEMENTS; ++i)
|
||||
{
|
||||
if (rlen == 0)
|
||||
{
|
||||
if (nulls)
|
||||
compressor->append_null(compressor);
|
||||
else
|
||||
compressor->append_val(compressor, BoolGetDatum(val));
|
||||
rlen = run_length;
|
||||
val = !val;
|
||||
}
|
||||
else
|
||||
{
|
||||
compressor->append_val(compressor, BoolGetDatum(val));
|
||||
--rlen;
|
||||
}
|
||||
}
|
||||
|
||||
Datum compressed = (Datum) compressor->finish(compressor);
|
||||
TestAssertTrue(DatumGetPointer(compressed) != NULL);
|
||||
TestAssertInt64Eq(VARSIZE(DatumGetPointer(compressed)), expected_size);
|
||||
|
||||
rlen = run_length;
|
||||
val = true;
|
||||
DecompressionIterator *iter =
|
||||
bool_decompression_iterator_from_datum_forward(compressed, BOOLOID);
|
||||
|
||||
for (int i = 0; i < TEST_ELEMENTS; ++i)
|
||||
{
|
||||
DecompressResult r = bool_decompression_iterator_try_next_forward(iter);
|
||||
TestAssertTrue(!r.is_done);
|
||||
if (rlen == 0)
|
||||
{
|
||||
if (nulls)
|
||||
TestAssertTrue(r.is_null);
|
||||
else
|
||||
TestAssertTrue(DatumGetBool(r.val) == val);
|
||||
rlen = run_length;
|
||||
val = !val;
|
||||
}
|
||||
else
|
||||
{
|
||||
TestAssertTrue(r.is_null == false);
|
||||
TestAssertTrue(DatumGetBool(r.val) == val);
|
||||
--rlen;
|
||||
}
|
||||
}
|
||||
|
||||
DecompressResult r = bool_decompression_iterator_try_next_forward(iter);
|
||||
TestAssertTrue(r.is_done);
|
||||
}
|
||||
|
||||
static void
|
||||
test_bool_array(bool nulls, int run_length, int expected_size)
|
||||
{
|
||||
Compressor *compressor = array_compressor_for_type(BOOLOID);
|
||||
int rlen = run_length;
|
||||
bool val = true;
|
||||
for (int i = 0; i < TEST_ELEMENTS; ++i)
|
||||
{
|
||||
if (rlen == 0)
|
||||
{
|
||||
if (nulls)
|
||||
compressor->append_null(compressor);
|
||||
else
|
||||
compressor->append_val(compressor, BoolGetDatum(val));
|
||||
rlen = run_length;
|
||||
val = !val;
|
||||
}
|
||||
else
|
||||
{
|
||||
compressor->append_val(compressor, BoolGetDatum(val));
|
||||
--rlen;
|
||||
}
|
||||
}
|
||||
|
||||
Datum compressed = (Datum) compressor->finish(compressor);
|
||||
TestAssertTrue(DatumGetPointer(compressed) != NULL);
|
||||
TestAssertInt64Eq(VARSIZE(DatumGetPointer(compressed)), expected_size);
|
||||
|
||||
rlen = run_length;
|
||||
val = true;
|
||||
DecompressionIterator *iter =
|
||||
tsl_array_decompression_iterator_from_datum_forward(compressed, BOOLOID);
|
||||
|
||||
for (int i = 0; i < TEST_ELEMENTS; ++i)
|
||||
{
|
||||
DecompressResult r = array_decompression_iterator_try_next_forward(iter);
|
||||
TestAssertTrue(!r.is_done);
|
||||
if (rlen == 0)
|
||||
{
|
||||
if (nulls)
|
||||
TestAssertTrue(r.is_null);
|
||||
else
|
||||
TestAssertTrue(DatumGetBool(r.val) == val);
|
||||
rlen = run_length;
|
||||
val = !val;
|
||||
}
|
||||
else
|
||||
{
|
||||
TestAssertTrue(r.is_null == false);
|
||||
TestAssertTrue(DatumGetBool(r.val) == val);
|
||||
--rlen;
|
||||
}
|
||||
}
|
||||
|
||||
DecompressResult r = array_decompression_iterator_try_next_forward(iter);
|
||||
TestAssertTrue(r.is_done);
|
||||
}
|
||||
|
||||
static void
|
||||
test_empty_bool_compressor()
|
||||
{
|
||||
Compressor *compressor = bool_compressor_for_type(BOOLOID);
|
||||
Datum compressed = (Datum) compressor->finish(compressor);
|
||||
TestAssertTrue(DatumGetPointer(compressed) == NULL);
|
||||
|
||||
/* further abusing finish: */
|
||||
compressed = (Datum) compressor->finish(NULL);
|
||||
TestAssertTrue(DatumGetPointer(compressed) == NULL);
|
||||
|
||||
/* passing an empty compressor returns NULL */
|
||||
TestEnsureError(DirectFunctionCall1(tsl_bool_compressor_finish, PointerGetDatum(compressor)));
|
||||
|
||||
/* passing a NULL pointer returns NULL */
|
||||
TestEnsureError(DirectFunctionCall1(tsl_bool_compressor_finish, PointerGetDatum(NULL)));
|
||||
|
||||
/* make codecov happy */
|
||||
TestAssertTrue(bool_compressor_finish(NULL) == NULL);
|
||||
TestEnsureError(bool_compressor_for_type(FLOAT4OID));
|
||||
|
||||
bool old_val = ts_guc_enable_bool_compression;
|
||||
ts_guc_enable_bool_compression = true;
|
||||
TestAssertTrue(compression_get_default_algorithm(BOOLOID) == COMPRESSION_ALGORITHM_BOOL);
|
||||
ts_guc_enable_bool_compression = false;
|
||||
TestAssertTrue(compression_get_default_algorithm(BOOLOID) == COMPRESSION_ALGORITHM_ARRAY);
|
||||
ts_guc_enable_bool_compression = old_val;
|
||||
}
|
||||
|
||||
static void
|
||||
test_bool_compressor_extended()
|
||||
{
|
||||
Compressor *compressor = bool_compressor_for_type(BOOLOID);
|
||||
void *finished = compressor->finish(compressor);
|
||||
TestAssertTrue(finished == NULL);
|
||||
|
||||
/* adding a null value should reinitialize the compressor */
|
||||
compressor->append_null(compressor);
|
||||
finished = compressor->finish(compressor);
|
||||
TestAssertTrue(finished != NULL &&
|
||||
"having only nulls should return compressed data because of fake values");
|
||||
|
||||
/* finishing a finished compressor should return NULL */
|
||||
finished = compressor->finish(compressor);
|
||||
TestAssertTrue(finished == NULL && "finishing a finished compressor should return NULL");
|
||||
|
||||
/* adding a non-null value should reinitialize the compressor */
|
||||
compressor->append_val(compressor, BoolGetDatum(true));
|
||||
finished = compressor->finish(compressor);
|
||||
TestAssertTrue(finished != NULL);
|
||||
}
|
||||
|
||||
static uint32
|
||||
bool_compressed_size(int num_values, int flip_nth)
|
||||
{
|
||||
Compressor *compressor = bool_compressor_for_type(BOOLOID);
|
||||
for (int i = 1; i < (num_values + 1); ++i)
|
||||
{
|
||||
if (i % flip_nth == 0)
|
||||
compressor->append_val(compressor, BoolGetDatum(false));
|
||||
else
|
||||
compressor->append_val(compressor, BoolGetDatum(true));
|
||||
}
|
||||
|
||||
Datum compressed = (Datum) compressor->finish(compressor);
|
||||
TestAssertTrue(DatumGetPointer(compressed) != NULL);
|
||||
return VARSIZE(DatumGetPointer(compressed));
|
||||
}
|
||||
|
||||
static void
|
||||
test_bool()
|
||||
{
|
||||
/* code covareage and simple tests */
|
||||
test_empty_bool_compressor();
|
||||
test_bool_compressor_extended();
|
||||
|
||||
/* testing a few RLE configurations with or without nulls: */
|
||||
test_bool_rle(/* nulls = */ false, /* run_length = */ 1, /* expected_size = */ 152);
|
||||
test_bool_rle(/* nulls = */ false, /* run_length = */ 5, /* expected_size = */ 152);
|
||||
test_bool_rle(/* nulls = */ true, /* run_length = */ 19, /* expected_size = */ 296);
|
||||
test_bool_rle(/* nulls = */ false, /* run_length = */ 27, /* expected_size = */ 152);
|
||||
test_bool_rle(/* nulls = */ true, /* run_length = */ 43, /* expected_size = */ 296);
|
||||
test_bool_rle(/* nulls = */ false, /* run_length = */ 61, /* expected_size = */ 152);
|
||||
test_bool_rle(/* nulls = */ false, /* run_length = */ 65, /* expected_size = */ 152);
|
||||
test_bool_rle(/* nulls = */ false, /* run_length = */ 100, /* expected_size = */ 112);
|
||||
test_bool_rle(/* nulls = */ true, /* run_length = */ 97, /* expected_size = */ 256);
|
||||
test_bool_rle(/* nulls = */ false, /* run_length = */ 191, /* expected_size = */ 72);
|
||||
test_bool_rle(/* nulls = */ true, /* run_length = */ 237, /* expected_size = */ 144);
|
||||
test_bool_rle(/* nulls = */ false, /* run_length = */ 600, /* expected_size = */ 40);
|
||||
test_bool_rle(/* nulls = */ true, /* run_length = */ 720, /* expected_size = */ 80);
|
||||
test_bool_rle(/* nulls = */ false,
|
||||
/* run_length = */ TEST_ELEMENTS + 1,
|
||||
/* expected_size = */ 32);
|
||||
/* few select cases for comparison against bool compression: */
|
||||
test_bool_array(/* nulls = */ false, /* run_length = */ 1, /* expected_size = */ 1055);
|
||||
test_bool_array(/* nulls = */ true, /* run_length = */ 19, /* expected_size = */ 1149);
|
||||
test_bool_array(/* nulls = */ false, /* run_length = */ 600, /* expected_size = */ 1055);
|
||||
test_bool_array(/* nulls = */ true, /* run_length = */ 720, /* expected_size = */ 1094);
|
||||
test_bool_array(/* nulls = */ false,
|
||||
/* run_length = */ TEST_ELEMENTS + 1,
|
||||
/* expected_size = */ 1055);
|
||||
|
||||
int baseline = bool_compressed_size(1, 1);
|
||||
int no_rle = bool_compressed_size(64, 2);
|
||||
/* verify that we can pack 64 bits into the same size */
|
||||
TestAssertTrue(no_rle == baseline);
|
||||
int rle_size = bool_compressed_size(65, 66);
|
||||
/* verify that we can RLE 65 bits into the same size */
|
||||
TestAssertTrue(rle_size == baseline);
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_test_compression(PG_FUNCTION_ARGS)
|
||||
{
|
||||
@ -636,6 +865,7 @@ ts_test_compression(PG_FUNCTION_ARGS)
|
||||
test_delta3(/* have_nulls = */ false, /* have_random = */ true);
|
||||
test_delta3(/* have_nulls = */ true, /* have_random = */ false);
|
||||
test_delta3(/* have_nulls = */ true, /* have_random = */ true);
|
||||
test_bool();
|
||||
|
||||
/* Some tests for zig-zag encoding overflowing the original element width. */
|
||||
test_delta4(test_delta4_case1, sizeof(test_delta4_case1) / sizeof(*test_delta4_case1));
|
||||
|
Loading…
x
Reference in New Issue
Block a user