Implement bool compression

Reusing the existing Simple8bRLE algorithm for bools. I added a new
compression type specifically for this case called 'bool'.

A new GUC is introduced so we can revert to the previous, array
compression for bools: `timescaledb.enable_bool_compression`.
It defaults to `false`.

To enable bool compression set the GUC:

`timescaledb.enable_bool_compression=true`

Fixes #7233
This commit is contained in:
David Beck 2025-02-10 18:02:38 +00:00 committed by David Beck
parent 4128feb262
commit d7a8b4b8e2
22 changed files with 1425 additions and 2 deletions

1
.unreleased/pr_7701 Normal file
View File

@ -0,0 +1 @@
Implements: #7701 Implement a custom compression algorithm for bool columns. It is experimental and can undergo backwards-incompatible changes. For testing, enable it using timescaledb.enable_bool_compression = on.

View File

@ -6,3 +6,6 @@ CREATE FUNCTION _timescaledb_functions.compressed_data_has_nulls(_timescaledb_in
RETURNS BOOL
LANGUAGE C STRICT IMMUTABLE
AS '@MODULE_PATHNAME@', 'ts_update_placeholder';
INSERT INTO _timescaledb_catalog.compression_algorithm( id, version, name, description) values
( 5, 1, 'COMPRESSION_ALGORITHM_BOOL', 'bool');

View File

@ -6,3 +6,4 @@ ALTER TABLE _timescaledb_internal.bgw_job_stat_history
DROP FUNCTION IF EXISTS _timescaledb_functions.compressed_data_has_nulls(_timescaledb_internal.compressed_data);
DELETE FROM _timescaledb_catalog.compression_algorithm WHERE id = 5 AND version = 1 AND name = 'COMPRESSION_ALGORITHM_BOOL';

View File

@ -77,6 +77,8 @@ CROSSMODULE_WRAPPER(dictionary_compressor_append);
CROSSMODULE_WRAPPER(dictionary_compressor_finish);
CROSSMODULE_WRAPPER(array_compressor_append);
CROSSMODULE_WRAPPER(array_compressor_finish);
CROSSMODULE_WRAPPER(bool_compressor_append);
CROSSMODULE_WRAPPER(bool_compressor_finish);
CROSSMODULE_WRAPPER(create_compressed_chunk);
CROSSMODULE_WRAPPER(compress_chunk);
CROSSMODULE_WRAPPER(decompress_chunk);
@ -419,6 +421,8 @@ TSDLLEXPORT CrossModuleFunctions ts_cm_functions_default = {
.dictionary_compressor_finish = error_no_default_fn_pg_community,
.array_compressor_append = error_no_default_fn_pg_community,
.array_compressor_finish = error_no_default_fn_pg_community,
.bool_compressor_append = error_no_default_fn_pg_community,
.bool_compressor_finish = error_no_default_fn_pg_community,
.hypercore_handler = process_hypercore_handler,
.hypercore_proxy_handler = process_hypercore_proxy_handler,
.is_compressed_tid = error_no_default_fn_pg_community,

View File

@ -150,6 +150,8 @@ typedef struct CrossModuleFunctions
PGFunction dictionary_compressor_finish;
PGFunction array_compressor_append;
PGFunction array_compressor_finish;
PGFunction bool_compressor_append;
PGFunction bool_compressor_finish;
PGFunction hypercore_handler;
PGFunction hypercore_proxy_handler;
PGFunction is_compressed_tid;

View File

@ -149,6 +149,7 @@ TSDLLEXPORT bool ts_guc_auto_sparse_indexes = true;
TSDLLEXPORT bool ts_guc_default_hypercore_use_access_method = false;
bool ts_guc_enable_chunk_skipping = false;
TSDLLEXPORT bool ts_guc_enable_segmentwise_recompression = true;
TSDLLEXPORT bool ts_guc_enable_bool_compression = false;
/* Enable of disable columnar scans for columnar-oriented storage engines. If
* disabled, regular sequence scans will be used instead. */
@ -746,6 +747,17 @@ _guc_init(void)
NULL,
NULL);
DefineCustomBoolVariable(MAKE_EXTOPTION("enable_bool_compression"),
"Enable experimental bool compression functionality",
"Enable bool compression",
&ts_guc_enable_bool_compression,
false,
PGC_USERSET,
0,
NULL,
NULL,
NULL);
/*
* Define the limit on number of invalidation-based refreshes we allow per
* refresh call. If this limit is exceeded, fall back to a single refresh that

View File

@ -69,6 +69,7 @@ extern TSDLLEXPORT bool ts_guc_enable_delete_after_compression;
extern TSDLLEXPORT bool ts_guc_enable_merge_on_cagg_refresh;
extern bool ts_guc_enable_chunk_skipping;
extern TSDLLEXPORT bool ts_guc_enable_segmentwise_recompression;
extern TSDLLEXPORT bool ts_guc_enable_bool_compression;
#ifdef USE_TELEMETRY
typedef enum TelemetryLevel

View File

@ -67,6 +67,15 @@ structure and does not actually compress it (though TOAST-based compression
can be applied on top). It is the compression mechanism used when no other
compression mechanism works. It can store any type of data.
### Bool Compressor
The bool compressor is a simple compression algorithm that stores boolean values
using the simple8b_rle algorithm only, without any additional processing. During
decompression it decompresses the data and stores it in memory as a bitmap. The
row based iterators then walk through the bitmap. The bool compressor differs from
the other compressors in that it stores the last non-value as a place holder for
the null values. This is done to make vectorization easier.
# Merging chunks while compressing #
## Setup ##

View File

@ -3,5 +3,6 @@ set(SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/datum_serialize.c
${CMAKE_CURRENT_SOURCE_DIR}/deltadelta.c
${CMAKE_CURRENT_SOURCE_DIR}/dictionary.c
${CMAKE_CURRENT_SOURCE_DIR}/gorilla.c)
${CMAKE_CURRENT_SOURCE_DIR}/gorilla.c
${CMAKE_CURRENT_SOURCE_DIR}/bool_compress.c)
target_sources(${TSL_LIBRARY_NAME} PRIVATE ${SOURCES})

View File

@ -0,0 +1,440 @@
/*
* This file and its contents are licensed under the Timescale License.
* Please see the included NOTICE for copyright information and
* LICENSE-TIMESCALE for a copy of the license.
*/
#include "bool_compress.h"
#include "compression/arrow_c_data_interface.h"
#include "compression/compression.h"
#include "simple8b_rle.h"
#include "simple8b_rle_bitmap.h"
typedef struct BoolCompressed
{
CompressedDataHeaderFields;
uint8 has_nulls; /* 1 if this has a NULLs bitmap after the values, 0 otherwise */
uint8 padding[2]; /* padding added because of Simple8bRleSerialized format */
char values[FLEXIBLE_ARRAY_MEMBER];
} BoolCompressed;
typedef struct BoolDecompressionIterator
{
DecompressionIterator base;
Simple8bRleBitmap values;
Simple8bRleBitmap validity_bitmap;
int32 position;
} BoolDecompressionIterator;
typedef struct BoolCompressor
{
Simple8bRleCompressor values;
Simple8bRleCompressor validity_bitmap;
bool has_nulls;
bool last_value;
} BoolCompressor;
typedef struct ExtendedCompressor
{
Compressor base;
BoolCompressor *internal;
} ExtendedCompressor;
/*
* Local helpers
*/
static void bool_compressor_append_bool(Compressor *compressor, Datum val);
static void bool_compressor_append_null_value(Compressor *compressor);
static void *bool_compressor_finish_and_reset(Compressor *compressor);
const Compressor bool_compressor_initializer = {
.append_val = bool_compressor_append_bool,
.append_null = bool_compressor_append_null_value,
.finish = bool_compressor_finish_and_reset,
};
static BoolCompressed *bool_compressed_from_parts(Simple8bRleSerialized *values,
Simple8bRleSerialized *validity_bitmap);
static void decompression_iterator_init(BoolDecompressionIterator *iter, void *compressed,
Oid element_type, bool forward);
/*
* Compressor framework functions and definitions for the bool_compress algorithm.
*/
extern BoolCompressor *
bool_compressor_alloc(void)
{
BoolCompressor *compressor = palloc0(sizeof(*compressor));
simple8brle_compressor_init(&compressor->values);
simple8brle_compressor_init(&compressor->validity_bitmap);
return compressor;
}
extern void
bool_compressor_append_null(BoolCompressor *compressor)
{
/*
* We use parallel bitmaps of same size for validity and values, to support
* zero-copy decompression into ArrowArray. When an element is null,
* the particular value that goes into the values bitmap doesn't matter, so
* we add the last seen value, not to break the RLE sequences.
*/
compressor->has_nulls = true;
simple8brle_compressor_append(&compressor->values, compressor->last_value);
simple8brle_compressor_append(&compressor->validity_bitmap, 0);
}
extern void
bool_compressor_append_value(BoolCompressor *compressor, bool next_val)
{
compressor->last_value = next_val;
simple8brle_compressor_append(&compressor->values, next_val);
simple8brle_compressor_append(&compressor->validity_bitmap, 1);
}
extern void *
bool_compressor_finish(BoolCompressor *compressor)
{
if (compressor == NULL)
return NULL;
Simple8bRleSerialized *values = simple8brle_compressor_finish(&compressor->values);
if (values == NULL)
return NULL;
Simple8bRleSerialized *validity_bitmap =
simple8brle_compressor_finish(&compressor->validity_bitmap);
BoolCompressed *compressed;
compressed = bool_compressed_from_parts(values, compressor->has_nulls ? validity_bitmap : NULL);
/* When only nulls are present, we can return NULL */
Assert(compressed == NULL || compressed->compression_algorithm == COMPRESSION_ALGORITHM_BOOL);
return compressed;
}
extern bool
bool_compressed_has_nulls(const CompressedDataHeader *header)
{
const BoolCompressed *ddc = (const BoolCompressed *) header;
return ddc->has_nulls;
}
extern DecompressResult
bool_decompression_iterator_try_next_forward(DecompressionIterator *iter)
{
Assert(iter->compression_algorithm == COMPRESSION_ALGORITHM_BOOL && iter->forward);
Assert(iter->element_type == BOOLOID);
BoolDecompressionIterator *bool_iter = (BoolDecompressionIterator *) iter;
if (bool_iter->position >= bool_iter->values.num_elements)
return (DecompressResult){
.is_done = true,
};
/* check nulls */
if (bool_iter->validity_bitmap.num_elements > 0)
{
bool is_null = !simple8brle_bitmap_get_at(&bool_iter->validity_bitmap, bool_iter->position);
if (is_null)
{
bool_iter->position++;
return (DecompressResult){
.is_null = true,
};
}
}
bool val = simple8brle_bitmap_get_at(&bool_iter->values, bool_iter->position);
bool_iter->position++;
return (DecompressResult){
.val = BoolGetDatum(val),
};
}
extern DecompressionIterator *
bool_decompression_iterator_from_datum_forward(Datum bool_compressed, Oid element_type)
{
BoolDecompressionIterator *iterator = palloc(sizeof(*iterator));
decompression_iterator_init(iterator,
(void *) PG_DETOAST_DATUM(bool_compressed),
element_type,
true);
return &iterator->base;
}
extern DecompressResult
bool_decompression_iterator_try_next_reverse(DecompressionIterator *iter)
{
Assert(iter->compression_algorithm == COMPRESSION_ALGORITHM_BOOL && !iter->forward);
Assert(iter->element_type == BOOLOID);
BoolDecompressionIterator *bool_iter = (BoolDecompressionIterator *) iter;
if (bool_iter->position < 0)
return (DecompressResult){
.is_done = true,
};
/* check nulls */
if (bool_iter->validity_bitmap.num_elements > 0)
{
bool is_null = !simple8brle_bitmap_get_at(&bool_iter->validity_bitmap, bool_iter->position);
if (is_null)
{
bool_iter->position--;
return (DecompressResult){
.is_null = true,
};
}
}
bool val = simple8brle_bitmap_get_at(&bool_iter->values, bool_iter->position);
bool_iter->position--;
return (DecompressResult){
.val = BoolGetDatum(val),
};
}
extern DecompressionIterator *
bool_decompression_iterator_from_datum_reverse(Datum bool_compressed, Oid element_type)
{
BoolDecompressionIterator *iterator = palloc(sizeof(*iterator));
decompression_iterator_init(iterator,
(void *) PG_DETOAST_DATUM(bool_compressed),
element_type,
false);
return &iterator->base;
}
extern void
bool_compressed_send(CompressedDataHeader *header, StringInfo buffer)
{
const BoolCompressed *data = (BoolCompressed *) header;
Assert(header->compression_algorithm == COMPRESSION_ALGORITHM_BOOL);
pq_sendbyte(buffer, data->has_nulls);
simple8brle_serialized_send(buffer, (Simple8bRleSerialized *) data->values);
if (data->has_nulls)
{
Simple8bRleSerialized *validity_bitmap =
(Simple8bRleSerialized *) (((char *) data->values) +
simple8brle_serialized_total_size(
(Simple8bRleSerialized *) data->values));
simple8brle_serialized_send(buffer, validity_bitmap);
}
}
extern Datum
bool_compressed_recv(StringInfo buffer)
{
uint8 has_nulls;
Simple8bRleSerialized *values;
Simple8bRleSerialized *validity_bitmap = NULL;
BoolCompressed *compressed;
has_nulls = pq_getmsgbyte(buffer);
CheckCompressedData(has_nulls == 0 || has_nulls == 1);
values = simple8brle_serialized_recv(buffer);
if (has_nulls)
validity_bitmap = simple8brle_serialized_recv(buffer);
compressed = bool_compressed_from_parts(values, validity_bitmap);
PG_RETURN_POINTER(compressed);
}
extern Compressor *
bool_compressor_for_type(Oid element_type)
{
ExtendedCompressor *compressor = palloc(sizeof(*compressor));
switch (element_type)
{
case BOOLOID:
*compressor = (ExtendedCompressor){ .base = bool_compressor_initializer };
return &compressor->base;
default:
elog(ERROR, "invalid type for bool compressor \"%s\"", format_type_be(element_type));
}
pg_unreachable();
}
/*
* Cross-module functions for the bool_compress algorithm.
*/
extern Datum
tsl_bool_compressor_append(PG_FUNCTION_ARGS)
{
MemoryContext old_context;
MemoryContext agg_context;
BoolCompressor *compressor = (BoolCompressor *) (PG_ARGISNULL(0) ? NULL : PG_GETARG_POINTER(0));
if (!AggCheckCallContext(fcinfo, &agg_context))
{
/* cannot be called directly because of internal-type argument */
elog(ERROR, "tsl_bool_compressor_append called in non-aggregate context");
}
old_context = MemoryContextSwitchTo(agg_context);
if (compressor == NULL)
{
compressor = bool_compressor_alloc();
if (PG_NARGS() > 2)
elog(ERROR, "append expects two arguments");
}
if (PG_ARGISNULL(1))
bool_compressor_append_null(compressor);
else
{
bool next_val = PG_GETARG_BOOL(1);
bool_compressor_append_value(compressor, next_val);
}
MemoryContextSwitchTo(old_context);
PG_RETURN_POINTER(compressor);
}
extern Datum
tsl_bool_compressor_finish(PG_FUNCTION_ARGS)
{
BoolCompressor *compressor = PG_ARGISNULL(0) ? NULL : (BoolCompressor *) PG_GETARG_POINTER(0);
void *compressed;
if (compressor == NULL)
PG_RETURN_NULL();
compressed = bool_compressor_finish(compressor);
if (compressed == NULL)
PG_RETURN_NULL();
PG_RETURN_POINTER(compressed);
}
/*
* Local helpers
*/
static void
bool_compressor_append_bool(Compressor *compressor, Datum val)
{
ExtendedCompressor *extended = (ExtendedCompressor *) compressor;
if (extended->internal == NULL)
extended->internal = bool_compressor_alloc();
bool_compressor_append_value(extended->internal, DatumGetBool(val) ? true : false);
}
static void
bool_compressor_append_null_value(Compressor *compressor)
{
ExtendedCompressor *extended = (ExtendedCompressor *) compressor;
if (extended->internal == NULL)
extended->internal = bool_compressor_alloc();
bool_compressor_append_null(extended->internal);
}
static void *
bool_compressor_finish_and_reset(Compressor *compressor)
{
ExtendedCompressor *extended = (ExtendedCompressor *) compressor;
void *compressed = NULL;
if (extended != NULL && extended->internal != NULL)
{
compressed = bool_compressor_finish(extended->internal);
pfree(extended->internal);
extended->internal = NULL;
}
return compressed;
}
static BoolCompressed *
bool_compressed_from_parts(Simple8bRleSerialized *values, Simple8bRleSerialized *validity_bitmap)
{
uint32 validity_bitmap_size = 0;
Size compressed_size;
char *compressed_data;
BoolCompressed *compressed;
uint32 num_values = values != NULL ? values->num_elements : 0;
uint32 values_size = values != NULL ? simple8brle_serialized_total_size(values) : 0;
if (num_values == 0)
return NULL;
if (validity_bitmap != NULL)
validity_bitmap_size = simple8brle_serialized_total_size(validity_bitmap);
compressed_size = sizeof(BoolCompressed) + values_size + validity_bitmap_size;
if (!AllocSizeIsValid(compressed_size))
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("compressed size exceeds the maximum allowed (%d)", (int) MaxAllocSize)));
compressed_data = palloc(compressed_size);
compressed = (BoolCompressed *) compressed_data;
SET_VARSIZE(&compressed->vl_len_, compressed_size);
compressed->compression_algorithm = COMPRESSION_ALGORITHM_BOOL;
compressed->has_nulls = validity_bitmap_size != 0 ? 1 : 0;
compressed_data += sizeof(*compressed);
compressed_data = bytes_serialize_simple8b_and_advance(compressed_data, values_size, values);
if (compressed->has_nulls == 1 && validity_bitmap != NULL)
{
CheckCompressedData(validity_bitmap->num_elements == num_values);
bytes_serialize_simple8b_and_advance(compressed_data,
validity_bitmap_size,
validity_bitmap);
}
return compressed;
}
static void
decompression_iterator_init(BoolDecompressionIterator *iter, void *compressed, Oid element_type,
bool forward)
{
StringInfoData si = { .data = compressed, .len = VARSIZE(compressed) };
BoolCompressed *header = consumeCompressedData(&si, sizeof(BoolCompressed));
Simple8bRleSerialized *values = bytes_deserialize_simple8b_and_advance(&si);
Assert(header->has_nulls == 0 || header->has_nulls == 1);
Assert(element_type == BOOLOID);
const bool has_nulls = header->has_nulls == 1;
CheckCompressedData(has_nulls == 0 || has_nulls == 1);
*iter = (BoolDecompressionIterator){
.base = { .compression_algorithm = COMPRESSION_ALGORITHM_BOOL,
.forward = forward,
.element_type = element_type,
.try_next = (forward ? bool_decompression_iterator_try_next_forward :
bool_decompression_iterator_try_next_reverse) },
.values = { 0 },
.validity_bitmap = { 0 },
.position = 0,
};
iter->values = simple8brle_bitmap_decompress(values);
if (has_nulls)
{
Simple8bRleSerialized *validity_bitmap = bytes_deserialize_simple8b_and_advance(&si);
iter->validity_bitmap = simple8brle_bitmap_decompress(validity_bitmap);
CheckCompressedData(iter->validity_bitmap.num_elements == iter->values.num_elements);
}
if (!forward)
{
iter->position = iter->values.num_elements - 1;
}
}

View File

@ -0,0 +1,75 @@
/*
* This file and its contents are licensed under the Timescale License.
* Please see the included NOTICE for copyright information and
* LICENSE-TIMESCALE for a copy of the license.
*/
#pragma once
/*
* bool_compress is used to encode boolean values using the simple8b_rle algorithm.
*
* The bool compressor differs from the other compressors in that it does store a value
* even for nulls, which is the last value seen befere the null. With this the bool
* compressor always creates a compressed block even for nulls only.
*
* The boolean compressor represents the boolean values in a batch with two parallel
* bitmaps, value bitmap and validity bitmap, like in the Arrow representation.
* These bitmaps are compressed with our common bit-packing algorithm.
*
* The validity bitmap stores a 0 for a null value and a 1 for a non-null value as
* required by the Arrow specification. This is the opposite of what the other compression
* algorithms do in their nulls bitmaps.
*/
#include <postgres.h>
#include <fmgr.h>
#include <lib/stringinfo.h>
#include "compression/compression.h"
typedef struct BoolCompressor BoolCompressor;
typedef struct BoolCompressed BoolCompressed;
typedef struct BoolDecompressionIterator BoolDecompressionIterator;
/*
* Compressor framework functions and definitions for the bool_compress algorithm.
*/
extern BoolCompressor *bool_compressor_alloc(void);
extern void bool_compressor_append_null(BoolCompressor *compressor);
extern void bool_compressor_append_value(BoolCompressor *compressor, bool next_val);
extern void *bool_compressor_finish(BoolCompressor *compressor);
extern bool bool_compressed_has_nulls(const CompressedDataHeader *header);
extern DecompressResult bool_decompression_iterator_try_next_forward(DecompressionIterator *iter);
extern DecompressionIterator *bool_decompression_iterator_from_datum_forward(Datum bool_compressed,
Oid element_type);
extern DecompressResult bool_decompression_iterator_try_next_reverse(DecompressionIterator *iter);
extern DecompressionIterator *bool_decompression_iterator_from_datum_reverse(Datum bool_compressed,
Oid element_type);
extern void bool_compressed_send(CompressedDataHeader *header, StringInfo buffer);
extern Datum bool_compressed_recv(StringInfo buf);
extern Compressor *bool_compressor_for_type(Oid element_type);
#define BOOL_COMPRESS_ALGORITHM_DEFINITION \
{ \
.iterator_init_forward = bool_decompression_iterator_from_datum_forward, \
.iterator_init_reverse = bool_decompression_iterator_from_datum_reverse, \
.decompress_all = NULL, .compressed_data_send = bool_compressed_send, \
.compressed_data_recv = bool_compressed_recv, \
.compressor_for_type = bool_compressor_for_type, \
.compressed_data_storage = TOAST_STORAGE_EXTERNAL, \
}
/*
* Cross-module functions for the bool_compress algorithm.
*/
extern Datum tsl_bool_compressor_append(PG_FUNCTION_ARGS);
extern Datum tsl_bool_compressor_finish(PG_FUNCTION_ARGS);

View File

@ -19,6 +19,7 @@
#include "compat/compat.h"
#include "algorithms/array.h"
#include "algorithms/bool_compress.h"
#include "algorithms/deltadelta.h"
#include "algorithms/dictionary.h"
#include "algorithms/gorilla.h"
@ -47,6 +48,7 @@ static const CompressionAlgorithmDefinition definitions[_END_COMPRESSION_ALGORIT
[COMPRESSION_ALGORITHM_DICTIONARY] = DICTIONARY_ALGORITHM_DEFINITION,
[COMPRESSION_ALGORITHM_GORILLA] = GORILLA_ALGORITHM_DEFINITION,
[COMPRESSION_ALGORITHM_DELTADELTA] = DELTA_DELTA_ALGORITHM_DEFINITION,
[COMPRESSION_ALGORITHM_BOOL] = BOOL_COMPRESS_ALGORITHM_DEFINITION,
};
static NameData compression_algorithm_name[] = {
@ -55,6 +57,7 @@ static NameData compression_algorithm_name[] = {
[COMPRESSION_ALGORITHM_DICTIONARY] = { "DICTIONARY" },
[COMPRESSION_ALGORITHM_GORILLA] = { "GORILLA" },
[COMPRESSION_ALGORITHM_DELTADELTA] = { "DELTADELTA" },
[COMPRESSION_ALGORITHM_BOOL] = { "BOOL" },
};
Name
@ -1802,6 +1805,9 @@ tsl_compressed_data_info(PG_FUNCTION_ARGS)
case COMPRESSION_ALGORITHM_ARRAY:
has_nulls = array_compressed_has_nulls(header);
break;
case COMPRESSION_ALGORITHM_BOOL:
has_nulls = bool_compressed_has_nulls(header);
break;
default:
elog(ERROR, "unknown compression algorithm %d", header->compression_algorithm);
break;
@ -1840,6 +1846,9 @@ tsl_compressed_data_has_nulls(PG_FUNCTION_ARGS)
case COMPRESSION_ALGORITHM_ARRAY:
has_nulls = array_compressed_has_nulls(header);
break;
case COMPRESSION_ALGORITHM_BOOL:
has_nulls = bool_compressed_has_nulls(header);
break;
default:
elog(ERROR, "unknown compression algorithm %d", header->compression_algorithm);
break;
@ -1884,6 +1893,12 @@ compression_get_default_algorithm(Oid typeoid)
case NUMERICOID:
return COMPRESSION_ALGORITHM_ARRAY;
case BOOLOID:
if (ts_guc_enable_bool_compression)
return COMPRESSION_ALGORITHM_BOOL;
else
return COMPRESSION_ALGORITHM_ARRAY;
default:
{
/* use dictionary if possible, otherwise use array */

View File

@ -193,6 +193,7 @@ typedef enum CompressionAlgorithm
COMPRESSION_ALGORITHM_DICTIONARY,
COMPRESSION_ALGORITHM_GORILLA,
COMPRESSION_ALGORITHM_DELTADELTA,
COMPRESSION_ALGORITHM_BOOL,
/* When adding an algorithm also add a static assert statement below */
/* end of real values */
@ -315,13 +316,14 @@ pg_attribute_unused() assert_num_compression_algorithms_sane(void)
StaticAssertStmt(COMPRESSION_ALGORITHM_DICTIONARY == 2, "algorithm index has changed");
StaticAssertStmt(COMPRESSION_ALGORITHM_GORILLA == 3, "algorithm index has changed");
StaticAssertStmt(COMPRESSION_ALGORITHM_DELTADELTA == 4, "algorithm index has changed");
StaticAssertStmt(COMPRESSION_ALGORITHM_BOOL == 5, "algorithm index has changed");
/*
* This should change when adding a new algorithm after adding the new
* algorithm to the assert list above. This statement prevents adding a
* new algorithm without updating the asserts above
*/
StaticAssertStmt(_END_COMPRESSION_ALGORITHMS == 5,
StaticAssertStmt(_END_COMPRESSION_ALGORITHMS == 6,
"number of algorithms have changed, the asserts should be updated");
}

View File

@ -17,6 +17,7 @@
#include "chunk.h"
#include "chunk_api.h"
#include "compression/algorithms/array.h"
#include "compression/algorithms/bool_compress.h"
#include "compression/algorithms/deltadelta.h"
#include "compression/algorithms/dictionary.h"
#include "compression/algorithms/gorilla.h"
@ -167,6 +168,8 @@ CrossModuleFunctions tsl_cm_functions = {
.dictionary_compressor_finish = tsl_dictionary_compressor_finish,
.array_compressor_append = tsl_array_compressor_append,
.array_compressor_finish = tsl_array_compressor_finish,
.bool_compressor_append = tsl_bool_compressor_append,
.bool_compressor_finish = tsl_bool_compressor_finish,
.process_compress_table = tsl_process_compress_table,
.process_altertable_cmd = tsl_process_altertable_cmd,
.process_rename_cmd = tsl_process_rename_cmd,

View File

@ -1538,6 +1538,217 @@ CREATE TABLE base_texts AS SELECT row_number() OVER() as rn, NULLIF(NULLIF(NULLI
(1 row)
DROP TABLE base_texts;
----------------------
-- Bool Compression --
----------------------
SELECT
$$
select item from base_bools order by rn
$$ AS "QUERY"
\gset
\set TABLE_NAME base_bools
\set TYPE boolean
\set COMPRESSION_CMD _timescaledb_internal.compress_bool(item)
\set DECOMPRESS_FORWARD_CMD _timescaledb_internal.decompress_forward(c::_timescaledb_internal.compressed_data, NULL::boolean)
\set DECOMPRESS_REVERSE_CMD _timescaledb_internal.decompress_reverse(c::_timescaledb_internal.compressed_data, NULL::boolean)
-- bool test, flipping values betweem true and false, no nulls
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, (item%2=0)::boolean as item FROM (SELECT generate_series(1, 1000) item) sub;
\ir include/compression_test.sql
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
\set ECHO errors
algorithm | has_nulls | compressed size
-----------+-----------+-----------------
BOOL | f | 152
(1 row)
?column? | count
-------------------------------------------------------------------------------+-------
Number of rows different between original and decompressed forward (expect 0) | 0
(1 row)
?column? | count
--------------------------------------------------------------------------------+-------
Number of rows different between original and decompressed reversed (expect 0) | 0
(1 row)
?column? | count
----------------------------------------------------------------------------------------------------+-------
Number of rows different between original, decompressed, and decompressed deserializeed (expect 0) | 0
(1 row)
?column? | ?column?
-----------------------------------------------------------------------------------------------------+----------
Test that deserialization, decompression, recompression, and serialization results in the same text | t
(1 row)
DROP TABLE base_bools;
-- bool test, all true values, no nulls
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, true as item FROM (SELECT generate_series(1, 1000) item) sub;
\ir include/compression_test.sql
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
\set ECHO errors
algorithm | has_nulls | compressed size
-----------+-----------+-----------------
BOOL | f | 29
(1 row)
?column? | count
-------------------------------------------------------------------------------+-------
Number of rows different between original and decompressed forward (expect 0) | 0
(1 row)
?column? | count
--------------------------------------------------------------------------------+-------
Number of rows different between original and decompressed reversed (expect 0) | 0
(1 row)
?column? | count
----------------------------------------------------------------------------------------------------+-------
Number of rows different between original, decompressed, and decompressed deserializeed (expect 0) | 0
(1 row)
?column? | ?column?
-----------------------------------------------------------------------------------------------------+----------
Test that deserialization, decompression, recompression, and serialization results in the same text | t
(1 row)
DROP TABLE base_bools;
-- bool test, all false, no nulls
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, false as item FROM (SELECT generate_series(1, 1000) item) sub;
\ir include/compression_test.sql
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
\set ECHO errors
algorithm | has_nulls | compressed size
-----------+-----------+-----------------
BOOL | f | 29
(1 row)
?column? | count
-------------------------------------------------------------------------------+-------
Number of rows different between original and decompressed forward (expect 0) | 0
(1 row)
?column? | count
--------------------------------------------------------------------------------+-------
Number of rows different between original and decompressed reversed (expect 0) | 0
(1 row)
?column? | count
----------------------------------------------------------------------------------------------------+-------
Number of rows different between original, decompressed, and decompressed deserializeed (expect 0) | 0
(1 row)
?column? | ?column?
-----------------------------------------------------------------------------------------------------+----------
Test that deserialization, decompression, recompression, and serialization results in the same text | t
(1 row)
DROP TABLE base_bools;
-- a single true element
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, true as item FROM (SELECT generate_series(1, 1) item) sub;
\ir include/compression_test.sql
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
\set ECHO errors
algorithm | has_nulls | compressed size
-----------+-----------+-----------------
BOOL | f | 29
(1 row)
?column? | count
-------------------------------------------------------------------------------+-------
Number of rows different between original and decompressed forward (expect 0) | 0
(1 row)
?column? | count
--------------------------------------------------------------------------------+-------
Number of rows different between original and decompressed reversed (expect 0) | 0
(1 row)
?column? | count
----------------------------------------------------------------------------------------------------+-------
Number of rows different between original, decompressed, and decompressed deserializeed (expect 0) | 0
(1 row)
?column? | ?column?
-----------------------------------------------------------------------------------------------------+----------
Test that deserialization, decompression, recompression, and serialization results in the same text | t
(1 row)
DROP TABLE base_bools;
-- all true, except every 43rd value is null
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, ((NULLIF(i, (CASE WHEN i%43=0 THEN i ELSE -1 END)))>0)::boolean item FROM generate_series(1, 1000) i;
\ir include/compression_test.sql
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
\set ECHO errors
algorithm | has_nulls | compressed size
-----------+-----------+-----------------
BOOL | t | 176
(1 row)
?column? | count
-------------------------------------------------------------------------------+-------
Number of rows different between original and decompressed forward (expect 0) | 0
(1 row)
?column? | count
--------------------------------------------------------------------------------+-------
Number of rows different between original and decompressed reversed (expect 0) | 0
(1 row)
?column? | count
----------------------------------------------------------------------------------------------------+-------
Number of rows different between original, decompressed, and decompressed deserializeed (expect 0) | 0
(1 row)
?column? | ?column?
-----------------------------------------------------------------------------------------------------+----------
Test that deserialization, decompression, recompression, and serialization results in the same text | t
(1 row)
DROP TABLE base_bools;
-- all false, except every 29th value is null
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, ((NULLIF(i, (CASE WHEN i%29=0 THEN i ELSE -1 END)))<0)::boolean item FROM generate_series(1, 1000) i;
\ir include/compression_test.sql
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
\set ECHO errors
algorithm | has_nulls | compressed size
-----------+-----------+-----------------
BOOL | t | 176
(1 row)
?column? | count
-------------------------------------------------------------------------------+-------
Number of rows different between original and decompressed forward (expect 0) | 0
(1 row)
?column? | count
--------------------------------------------------------------------------------+-------
Number of rows different between original and decompressed reversed (expect 0) | 0
(1 row)
?column? | count
----------------------------------------------------------------------------------------------------+-------
Number of rows different between original, decompressed, and decompressed deserializeed (expect 0) | 0
(1 row)
?column? | ?column?
-----------------------------------------------------------------------------------------------------+----------
Test that deserialization, decompression, recompression, and serialization results in the same text | t
(1 row)
DROP TABLE base_bools;
-----------------------------------------------
-- Interesting corrupt data found by fuzzing --
-----------------------------------------------

View File

@ -0,0 +1,204 @@
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
--install necessary functions for tests
\c :TEST_DBNAME :ROLE_SUPERUSER
\ir include/compression_utils.sql
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
\set ECHO errors
\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
-- Create some data with NULLs and bools
CREATE TABLE d (ts int, b bool);
INSERT INTO d SELECT g AS ts, NULL AS b FROM generate_series(1, 5000) g;
-- set b to true for even ts values and set some values to NULL
UPDATE d SET b = (ts % 2 = 0);
UPDATE d SET b = NULL WHERE (ts % 10 = 0);
-- add some bools that can be RLE compressed
INSERT INTO d SELECT g AS ts, true AS b FROM generate_series(5001, 20000) g;
-- add a few bool columns
CREATE TABLE t (ts int, b1 bool, b2 bool, b3 bool);
SELECT create_hypertable('t', 'ts', chunk_time_interval => 5000);
NOTICE: adding not-null constraint to column "ts"
create_hypertable
-------------------
(1,public,t,t)
(1 row)
-- explicitly disable bool compression so the test
-- doesn't depend on the default setting
SET timescaledb.enable_bool_compression = off;
INSERT INTO t
SELECT
d.ts,
d.b AS b1, d.b AS b2, d.b AS b3
FROM d ORDER BY ts;
SELECT max(ts) FROM t;
max
-------
20000
(1 row)
ALTER TABLE t SET (timescaledb.compress, timescaledb.compress_orderby = 'ts');
WARNING: there was some uncertainty picking the default segment by for the hypertable: You do not have any indexes on columns that can be used for segment_by and thus we are not using segment_by for compression. Please make sure you are not missing any indexes
NOTICE: default segment by for hypertable "t" is set to ""
SELECT compress_chunk(show_chunks('t'));
compress_chunk
----------------------------------------
_timescaledb_internal._hyper_1_1_chunk
_timescaledb_internal._hyper_1_2_chunk
_timescaledb_internal._hyper_1_3_chunk
_timescaledb_internal._hyper_1_4_chunk
_timescaledb_internal._hyper_1_5_chunk
(5 rows)
CREATE TABLE chunks_done AS SELECT show_chunks('t') AS chunk_name;
SELECT * FROM chunks_done;
chunk_name
----------------------------------------
_timescaledb_internal._hyper_1_1_chunk
_timescaledb_internal._hyper_1_2_chunk
_timescaledb_internal._hyper_1_3_chunk
_timescaledb_internal._hyper_1_4_chunk
_timescaledb_internal._hyper_1_5_chunk
(5 rows)
SELECT
chunk_schema, chunk_name, compression_status,
after_compression_total_bytes
-- the before compression size differs on platforms, so I dont't
-- display it here, and consequently the compression ratio as well
FROM
chunk_compression_stats('t');
chunk_schema | chunk_name | compression_status | after_compression_total_bytes
-----------------------+------------------+--------------------+-------------------------------
_timescaledb_internal | _hyper_1_1_chunk | Compressed | 65536
_timescaledb_internal | _hyper_1_2_chunk | Compressed | 65536
_timescaledb_internal | _hyper_1_3_chunk | Compressed | 65536
_timescaledb_internal | _hyper_1_4_chunk | Compressed | 65536
_timescaledb_internal | _hyper_1_5_chunk | Compressed | 40960
(5 rows)
-- enable bool compression and add more data, so the two compression
-- methods will co-exist and we can test both, plus compare the
-- compression ratio
--
SET timescaledb.enable_bool_compression = on;
INSERT INTO t
SELECT
(SELECT max(ts) FROM t)+d.ts,
d.b AS b1, d.b AS b2, d.b AS b3
FROM d ORDER BY ts;
SELECT max(ts) FROM t;
max
-------
40000
(1 row)
SELECT
compress_chunk(c)
FROM
show_chunks('t') c
WHERE
c NOT IN (SELECT chunk_name FROM chunks_done);
compress_chunk
-----------------------------------------
_timescaledb_internal._hyper_1_11_chunk
_timescaledb_internal._hyper_1_12_chunk
_timescaledb_internal._hyper_1_13_chunk
_timescaledb_internal._hyper_1_14_chunk
(4 rows)
SELECT
chunk_schema, chunk_name, compression_status,
after_compression_total_bytes
-- the before compression size differs on platforms, so I dont't
-- display it here, and consequently the compression ratio as well
--
-- the after compression size should be smaller than it was before
-- the bool compression was enabled
--
FROM
chunk_compression_stats('t')
WHERE
format('%I.%I', chunk_schema, chunk_name)::regclass NOT IN (SELECT chunk_name FROM chunks_done);
chunk_schema | chunk_name | compression_status | after_compression_total_bytes
-----------------------+-------------------+--------------------+-------------------------------
_timescaledb_internal | _hyper_1_11_chunk | Compressed | 40960
_timescaledb_internal | _hyper_1_12_chunk | Compressed | 40960
_timescaledb_internal | _hyper_1_13_chunk | Compressed | 40960
_timescaledb_internal | _hyper_1_14_chunk | Compressed | 40960
(4 rows)
-- check the compression algorithm for the compressed chunks
CREATE TABLE compressed_chunks AS
SELECT
format('%I.%I', comp.schema_name, comp.table_name)::regclass as compressed_chunk,
ccs.compressed_heap_size,
ccs.compressed_toast_size,
ccs.compressed_index_size,
ccs.numrows_pre_compression,
ccs.numrows_post_compression
FROM
show_chunks('t') c
INNER JOIN _timescaledb_catalog.chunk cat
ON (c = format('%I.%I', cat.schema_name, cat.table_name)::regclass)
INNER JOIN _timescaledb_catalog.chunk comp
ON (cat.compressed_chunk_id = comp.id)
INNER JOIN _timescaledb_catalog.compression_chunk_size ccs
ON (comp.id = ccs.compressed_chunk_id);
CREATE TABLE compression_info (compressed_chunk regclass, result text, compressed_size int, num_rows int);
DO $$
DECLARE
table_ref regclass;
BEGIN
FOR table_ref IN
SELECT compressed_chunk as table_ref FROM compressed_chunks
LOOP
EXECUTE format(
'INSERT INTO compression_info (
SELECT
%L::regclass as compressed_chunk,
(_timescaledb_functions.compressed_data_info(b1))::text as result,
sum(pg_column_size(b1)::int) as compressed_size,
count(*) as num_rows
FROM %s
GROUP BY 1,2)',
table_ref, table_ref
);
END LOOP;
END;
$$;
SELECT
ci.*,
ccs.compressed_toast_size,
ccs.numrows_pre_compression,
ccs.numrows_post_compression
FROM
compression_info ci
INNER JOIN compressed_chunks ccs
ON (ci.compressed_chunk = ccs.compressed_chunk)
ORDER BY
1,2,3;
compressed_chunk | result | compressed_size | num_rows | compressed_toast_size | numrows_pre_compression | numrows_post_compression
-------------------------------------------------+-----------+-----------------+----------+-----------------------+-------------------------+--------------------------
_timescaledb_internal.compress_hyper_2_6_chunk | (ARRAY,t) | 320 | 5 | 32768 | 4999 | 5
_timescaledb_internal.compress_hyper_2_7_chunk | (ARRAY,f) | 172 | 4 | 32768 | 5000 | 5
_timescaledb_internal.compress_hyper_2_7_chunk | (ARRAY,t) | 62 | 1 | 32768 | 5000 | 5
_timescaledb_internal.compress_hyper_2_8_chunk | (ARRAY,f) | 215 | 5 | 32768 | 5000 | 5
_timescaledb_internal.compress_hyper_2_9_chunk | (ARRAY,f) | 215 | 5 | 32768 | 5000 | 5
_timescaledb_internal.compress_hyper_2_10_chunk | (ARRAY,f) | 38 | 1 | 8192 | 1 | 1
_timescaledb_internal.compress_hyper_2_15_chunk | (BOOL,f) | 116 | 4 | 8192 | 5000 | 5
_timescaledb_internal.compress_hyper_2_15_chunk | (BOOL,t) | 69 | 1 | 8192 | 5000 | 5
_timescaledb_internal.compress_hyper_2_16_chunk | (BOOL,f) | 145 | 5 | 8192 | 5000 | 5
_timescaledb_internal.compress_hyper_2_17_chunk | (BOOL,f) | 145 | 5 | 8192 | 5000 | 5
_timescaledb_internal.compress_hyper_2_18_chunk | (BOOL,f) | 29 | 1 | 8192 | 1 | 1
(11 rows)
DROP TABLE t;
DROP TABLE d;
DROP TABLE chunks_done;
DROP TABLE compression_info;
-- reset the compression setting
SET timescaledb.enable_bool_compression = default;

View File

@ -86,6 +86,7 @@ if(CMAKE_BUILD_TYPE MATCHES Debug)
chunk_utils_internal.sql
compression_algos.sql
compression_bgw.sql
compression_bools.sql
compression_ddl.sql
compression_hypertable.sql
compression_merge.sql

View File

@ -370,6 +370,51 @@ CREATE TABLE base_texts AS SELECT row_number() OVER() as rn, NULLIF(NULLIF(NULLI
\ir include/compression_test.sql
DROP TABLE base_texts;
----------------------
-- Bool Compression --
----------------------
SELECT
$$
select item from base_bools order by rn
$$ AS "QUERY"
\gset
\set TABLE_NAME base_bools
\set TYPE boolean
\set COMPRESSION_CMD _timescaledb_internal.compress_bool(item)
\set DECOMPRESS_FORWARD_CMD _timescaledb_internal.decompress_forward(c::_timescaledb_internal.compressed_data, NULL::boolean)
\set DECOMPRESS_REVERSE_CMD _timescaledb_internal.decompress_reverse(c::_timescaledb_internal.compressed_data, NULL::boolean)
-- bool test, flipping values betweem true and false, no nulls
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, (item%2=0)::boolean as item FROM (SELECT generate_series(1, 1000) item) sub;
\ir include/compression_test.sql
DROP TABLE base_bools;
-- bool test, all true values, no nulls
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, true as item FROM (SELECT generate_series(1, 1000) item) sub;
\ir include/compression_test.sql
DROP TABLE base_bools;
-- bool test, all false, no nulls
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, false as item FROM (SELECT generate_series(1, 1000) item) sub;
\ir include/compression_test.sql
DROP TABLE base_bools;
-- a single true element
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, true as item FROM (SELECT generate_series(1, 1) item) sub;
\ir include/compression_test.sql
DROP TABLE base_bools;
-- all true, except every 43rd value is null
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, ((NULLIF(i, (CASE WHEN i%43=0 THEN i ELSE -1 END)))>0)::boolean item FROM generate_series(1, 1000) i;
\ir include/compression_test.sql
DROP TABLE base_bools;
-- all false, except every 29th value is null
CREATE TABLE base_bools AS SELECT row_number() OVER() as rn, ((NULLIF(i, (CASE WHEN i%29=0 THEN i ELSE -1 END)))<0)::boolean item FROM generate_series(1, 1000) i;
\ir include/compression_test.sql
DROP TABLE base_bools;
-----------------------------------------------
-- Interesting corrupt data found by fuzzing --
-----------------------------------------------

View File

@ -0,0 +1,143 @@
-- This file and its contents are licensed under the Timescale License.
-- Please see the included NOTICE for copyright information and
-- LICENSE-TIMESCALE for a copy of the license.
--install necessary functions for tests
\c :TEST_DBNAME :ROLE_SUPERUSER
\ir include/compression_utils.sql
\c :TEST_DBNAME :ROLE_DEFAULT_PERM_USER
-- Create some data with NULLs and bools
CREATE TABLE d (ts int, b bool);
INSERT INTO d SELECT g AS ts, NULL AS b FROM generate_series(1, 5000) g;
-- set b to true for even ts values and set some values to NULL
UPDATE d SET b = (ts % 2 = 0);
UPDATE d SET b = NULL WHERE (ts % 10 = 0);
-- add some bools that can be RLE compressed
INSERT INTO d SELECT g AS ts, true AS b FROM generate_series(5001, 20000) g;
-- add a few bool columns
CREATE TABLE t (ts int, b1 bool, b2 bool, b3 bool);
SELECT create_hypertable('t', 'ts', chunk_time_interval => 5000);
-- explicitly disable bool compression so the test
-- doesn't depend on the default setting
SET timescaledb.enable_bool_compression = off;
INSERT INTO t
SELECT
d.ts,
d.b AS b1, d.b AS b2, d.b AS b3
FROM d ORDER BY ts;
SELECT max(ts) FROM t;
ALTER TABLE t SET (timescaledb.compress, timescaledb.compress_orderby = 'ts');
SELECT compress_chunk(show_chunks('t'));
CREATE TABLE chunks_done AS SELECT show_chunks('t') AS chunk_name;
SELECT * FROM chunks_done;
SELECT
chunk_schema, chunk_name, compression_status,
after_compression_total_bytes
-- the before compression size differs on platforms, so I dont't
-- display it here, and consequently the compression ratio as well
FROM
chunk_compression_stats('t');
-- enable bool compression and add more data, so the two compression
-- methods will co-exist and we can test both, plus compare the
-- compression ratio
--
SET timescaledb.enable_bool_compression = on;
INSERT INTO t
SELECT
(SELECT max(ts) FROM t)+d.ts,
d.b AS b1, d.b AS b2, d.b AS b3
FROM d ORDER BY ts;
SELECT max(ts) FROM t;
SELECT
compress_chunk(c)
FROM
show_chunks('t') c
WHERE
c NOT IN (SELECT chunk_name FROM chunks_done);
SELECT
chunk_schema, chunk_name, compression_status,
after_compression_total_bytes
-- the before compression size differs on platforms, so I dont't
-- display it here, and consequently the compression ratio as well
--
-- the after compression size should be smaller than it was before
-- the bool compression was enabled
--
FROM
chunk_compression_stats('t')
WHERE
format('%I.%I', chunk_schema, chunk_name)::regclass NOT IN (SELECT chunk_name FROM chunks_done);
-- check the compression algorithm for the compressed chunks
CREATE TABLE compressed_chunks AS
SELECT
format('%I.%I', comp.schema_name, comp.table_name)::regclass as compressed_chunk,
ccs.compressed_heap_size,
ccs.compressed_toast_size,
ccs.compressed_index_size,
ccs.numrows_pre_compression,
ccs.numrows_post_compression
FROM
show_chunks('t') c
INNER JOIN _timescaledb_catalog.chunk cat
ON (c = format('%I.%I', cat.schema_name, cat.table_name)::regclass)
INNER JOIN _timescaledb_catalog.chunk comp
ON (cat.compressed_chunk_id = comp.id)
INNER JOIN _timescaledb_catalog.compression_chunk_size ccs
ON (comp.id = ccs.compressed_chunk_id);
CREATE TABLE compression_info (compressed_chunk regclass, result text, compressed_size int, num_rows int);
DO $$
DECLARE
table_ref regclass;
BEGIN
FOR table_ref IN
SELECT compressed_chunk as table_ref FROM compressed_chunks
LOOP
EXECUTE format(
'INSERT INTO compression_info (
SELECT
%L::regclass as compressed_chunk,
(_timescaledb_functions.compressed_data_info(b1))::text as result,
sum(pg_column_size(b1)::int) as compressed_size,
count(*) as num_rows
FROM %s
GROUP BY 1,2)',
table_ref, table_ref
);
END LOOP;
END;
$$;
SELECT
ci.*,
ccs.compressed_toast_size,
ccs.numrows_pre_compression,
ccs.numrows_post_compression
FROM
compression_info ci
INNER JOIN compressed_chunks ccs
ON (ci.compressed_chunk = ccs.compressed_chunk)
ORDER BY
1,2,3;
DROP TABLE t;
DROP TABLE d;
DROP TABLE chunks_done;
DROP TABLE compression_info;
-- reset the compression setting
SET timescaledb.enable_bool_compression = default;

View File

@ -74,6 +74,16 @@ CREATE OR REPLACE FUNCTION _timescaledb_internal.array_compressor_finish(interna
AS :MODULE_PATHNAME, 'ts_array_compressor_finish'
LANGUAGE C IMMUTABLE PARALLEL SAFE STRICT;
CREATE OR REPLACE FUNCTION _timescaledb_internal.bool_compressor_append(internal, ANYELEMENT)
RETURNS internal
AS :MODULE_PATHNAME, 'ts_bool_compressor_append'
LANGUAGE C IMMUTABLE PARALLEL SAFE;
CREATE OR REPLACE FUNCTION _timescaledb_internal.bool_compressor_finish(internal)
RETURNS _timescaledb_internal.compressed_data
AS :MODULE_PATHNAME, 'ts_bool_compressor_finish'
LANGUAGE C IMMUTABLE PARALLEL SAFE STRICT;
CREATE AGGREGATE _timescaledb_internal.compress_deltadelta(BIGINT) (
STYPE = internal,
SFUNC = _timescaledb_internal.deltadelta_compressor_append,
@ -104,4 +114,10 @@ CREATE AGGREGATE _timescaledb_internal.compress_array(ANYELEMENT) (
FINALFUNC = _timescaledb_internal.array_compressor_finish
);
CREATE AGGREGATE _timescaledb_internal.compress_bool(boolean) (
STYPE = internal,
SFUNC = _timescaledb_internal.bool_compressor_append,
FINALFUNC = _timescaledb_internal.bool_compressor_finish
);
\set ECHO all

View File

@ -36,6 +36,10 @@ get_compression_algorithm(char *name)
{
return COMPRESSION_ALGORITHM_DICTIONARY;
}
else if (pg_strcasecmp(name, "bool") == 0)
{
return COMPRESSION_ALGORITHM_BOOL;
}
ereport(ERROR, (errmsg("unknown compression algorithm %s", name)));
return _INVALID_COMPRESSION_ALGORITHM;

View File

@ -10,6 +10,7 @@
#include <access/htup_details.h>
#include <catalog/pg_type.h>
#include <fmgr.h>
#include <guc.h>
#include <lib/stringinfo.h>
#include <libpq/pqformat.h>
#include <utils/array.h>
@ -24,6 +25,7 @@
#include <export.h>
#include "compression/algorithms/array.h"
#include "compression/algorithms/bool_compress.h"
#include "compression/algorithms/deltadelta.h"
#include "compression/algorithms/dictionary.h"
#include "compression/algorithms/float_utils.h"
@ -617,6 +619,233 @@ test_delta4(const int32 *values, int n)
TestAssertTrue(i == n);
}
static void
test_bool_rle(bool nulls, int run_length, int expected_size)
{
Compressor *compressor = bool_compressor_for_type(BOOLOID);
int rlen = run_length;
bool val = true;
for (int i = 0; i < TEST_ELEMENTS; ++i)
{
if (rlen == 0)
{
if (nulls)
compressor->append_null(compressor);
else
compressor->append_val(compressor, BoolGetDatum(val));
rlen = run_length;
val = !val;
}
else
{
compressor->append_val(compressor, BoolGetDatum(val));
--rlen;
}
}
Datum compressed = (Datum) compressor->finish(compressor);
TestAssertTrue(DatumGetPointer(compressed) != NULL);
TestAssertInt64Eq(VARSIZE(DatumGetPointer(compressed)), expected_size);
rlen = run_length;
val = true;
DecompressionIterator *iter =
bool_decompression_iterator_from_datum_forward(compressed, BOOLOID);
for (int i = 0; i < TEST_ELEMENTS; ++i)
{
DecompressResult r = bool_decompression_iterator_try_next_forward(iter);
TestAssertTrue(!r.is_done);
if (rlen == 0)
{
if (nulls)
TestAssertTrue(r.is_null);
else
TestAssertTrue(DatumGetBool(r.val) == val);
rlen = run_length;
val = !val;
}
else
{
TestAssertTrue(r.is_null == false);
TestAssertTrue(DatumGetBool(r.val) == val);
--rlen;
}
}
DecompressResult r = bool_decompression_iterator_try_next_forward(iter);
TestAssertTrue(r.is_done);
}
static void
test_bool_array(bool nulls, int run_length, int expected_size)
{
Compressor *compressor = array_compressor_for_type(BOOLOID);
int rlen = run_length;
bool val = true;
for (int i = 0; i < TEST_ELEMENTS; ++i)
{
if (rlen == 0)
{
if (nulls)
compressor->append_null(compressor);
else
compressor->append_val(compressor, BoolGetDatum(val));
rlen = run_length;
val = !val;
}
else
{
compressor->append_val(compressor, BoolGetDatum(val));
--rlen;
}
}
Datum compressed = (Datum) compressor->finish(compressor);
TestAssertTrue(DatumGetPointer(compressed) != NULL);
TestAssertInt64Eq(VARSIZE(DatumGetPointer(compressed)), expected_size);
rlen = run_length;
val = true;
DecompressionIterator *iter =
tsl_array_decompression_iterator_from_datum_forward(compressed, BOOLOID);
for (int i = 0; i < TEST_ELEMENTS; ++i)
{
DecompressResult r = array_decompression_iterator_try_next_forward(iter);
TestAssertTrue(!r.is_done);
if (rlen == 0)
{
if (nulls)
TestAssertTrue(r.is_null);
else
TestAssertTrue(DatumGetBool(r.val) == val);
rlen = run_length;
val = !val;
}
else
{
TestAssertTrue(r.is_null == false);
TestAssertTrue(DatumGetBool(r.val) == val);
--rlen;
}
}
DecompressResult r = array_decompression_iterator_try_next_forward(iter);
TestAssertTrue(r.is_done);
}
static void
test_empty_bool_compressor()
{
Compressor *compressor = bool_compressor_for_type(BOOLOID);
Datum compressed = (Datum) compressor->finish(compressor);
TestAssertTrue(DatumGetPointer(compressed) == NULL);
/* further abusing finish: */
compressed = (Datum) compressor->finish(NULL);
TestAssertTrue(DatumGetPointer(compressed) == NULL);
/* passing an empty compressor returns NULL */
TestEnsureError(DirectFunctionCall1(tsl_bool_compressor_finish, PointerGetDatum(compressor)));
/* passing a NULL pointer returns NULL */
TestEnsureError(DirectFunctionCall1(tsl_bool_compressor_finish, PointerGetDatum(NULL)));
/* make codecov happy */
TestAssertTrue(bool_compressor_finish(NULL) == NULL);
TestEnsureError(bool_compressor_for_type(FLOAT4OID));
bool old_val = ts_guc_enable_bool_compression;
ts_guc_enable_bool_compression = true;
TestAssertTrue(compression_get_default_algorithm(BOOLOID) == COMPRESSION_ALGORITHM_BOOL);
ts_guc_enable_bool_compression = false;
TestAssertTrue(compression_get_default_algorithm(BOOLOID) == COMPRESSION_ALGORITHM_ARRAY);
ts_guc_enable_bool_compression = old_val;
}
static void
test_bool_compressor_extended()
{
Compressor *compressor = bool_compressor_for_type(BOOLOID);
void *finished = compressor->finish(compressor);
TestAssertTrue(finished == NULL);
/* adding a null value should reinitialize the compressor */
compressor->append_null(compressor);
finished = compressor->finish(compressor);
TestAssertTrue(finished != NULL &&
"having only nulls should return compressed data because of fake values");
/* finishing a finished compressor should return NULL */
finished = compressor->finish(compressor);
TestAssertTrue(finished == NULL && "finishing a finished compressor should return NULL");
/* adding a non-null value should reinitialize the compressor */
compressor->append_val(compressor, BoolGetDatum(true));
finished = compressor->finish(compressor);
TestAssertTrue(finished != NULL);
}
static uint32
bool_compressed_size(int num_values, int flip_nth)
{
Compressor *compressor = bool_compressor_for_type(BOOLOID);
for (int i = 1; i < (num_values + 1); ++i)
{
if (i % flip_nth == 0)
compressor->append_val(compressor, BoolGetDatum(false));
else
compressor->append_val(compressor, BoolGetDatum(true));
}
Datum compressed = (Datum) compressor->finish(compressor);
TestAssertTrue(DatumGetPointer(compressed) != NULL);
return VARSIZE(DatumGetPointer(compressed));
}
static void
test_bool()
{
/* code covareage and simple tests */
test_empty_bool_compressor();
test_bool_compressor_extended();
/* testing a few RLE configurations with or without nulls: */
test_bool_rle(/* nulls = */ false, /* run_length = */ 1, /* expected_size = */ 152);
test_bool_rle(/* nulls = */ false, /* run_length = */ 5, /* expected_size = */ 152);
test_bool_rle(/* nulls = */ true, /* run_length = */ 19, /* expected_size = */ 296);
test_bool_rle(/* nulls = */ false, /* run_length = */ 27, /* expected_size = */ 152);
test_bool_rle(/* nulls = */ true, /* run_length = */ 43, /* expected_size = */ 296);
test_bool_rle(/* nulls = */ false, /* run_length = */ 61, /* expected_size = */ 152);
test_bool_rle(/* nulls = */ false, /* run_length = */ 65, /* expected_size = */ 152);
test_bool_rle(/* nulls = */ false, /* run_length = */ 100, /* expected_size = */ 112);
test_bool_rle(/* nulls = */ true, /* run_length = */ 97, /* expected_size = */ 256);
test_bool_rle(/* nulls = */ false, /* run_length = */ 191, /* expected_size = */ 72);
test_bool_rle(/* nulls = */ true, /* run_length = */ 237, /* expected_size = */ 144);
test_bool_rle(/* nulls = */ false, /* run_length = */ 600, /* expected_size = */ 40);
test_bool_rle(/* nulls = */ true, /* run_length = */ 720, /* expected_size = */ 80);
test_bool_rle(/* nulls = */ false,
/* run_length = */ TEST_ELEMENTS + 1,
/* expected_size = */ 32);
/* few select cases for comparison against bool compression: */
test_bool_array(/* nulls = */ false, /* run_length = */ 1, /* expected_size = */ 1055);
test_bool_array(/* nulls = */ true, /* run_length = */ 19, /* expected_size = */ 1149);
test_bool_array(/* nulls = */ false, /* run_length = */ 600, /* expected_size = */ 1055);
test_bool_array(/* nulls = */ true, /* run_length = */ 720, /* expected_size = */ 1094);
test_bool_array(/* nulls = */ false,
/* run_length = */ TEST_ELEMENTS + 1,
/* expected_size = */ 1055);
int baseline = bool_compressed_size(1, 1);
int no_rle = bool_compressed_size(64, 2);
/* verify that we can pack 64 bits into the same size */
TestAssertTrue(no_rle == baseline);
int rle_size = bool_compressed_size(65, 66);
/* verify that we can RLE 65 bits into the same size */
TestAssertTrue(rle_size == baseline);
}
Datum
ts_test_compression(PG_FUNCTION_ARGS)
{
@ -636,6 +865,7 @@ ts_test_compression(PG_FUNCTION_ARGS)
test_delta3(/* have_nulls = */ false, /* have_random = */ true);
test_delta3(/* have_nulls = */ true, /* have_random = */ false);
test_delta3(/* have_nulls = */ true, /* have_random = */ true);
test_bool();
/* Some tests for zig-zag encoding overflowing the original element width. */
test_delta4(test_delta4_case1, sizeof(test_delta4_case1) / sizeof(*test_delta4_case1));