Bulk decompression of text columns

Implement bulk decompression for text columns. This will allow us to use them in the vectorized computation pipeline.
2025-05-16 02:23:49 +08:00 · 2024-01-31 13:43:12 +01:00 · 2024-01-31 13:43:12 +01:00 · bf20e5f970
commit bf20e5f970
parent 85b27b4f34
70 changed files with 562 additions and 92 deletions
--- a/.github/workflows/libfuzzer.yaml
+++ b/.github/workflows/libfuzzer.yaml
@ -65,7 +65,7 @@ jobs:
        CC=clang ./configure --prefix=$HOME/$PG_INSTALL_DIR --with-openssl \
          --without-readline --without-zlib --without-libxml --enable-cassert \
          --enable-debug CC=clang \
-          CFLAGS="-DTS_COMPRESSION_FUZZING=1 -fuse-ld=lld -ggdb3 -O2 -fno-omit-frame-pointer"
+          CFLAGS="-fuse-ld=lld -ggdb3 -O2 -fno-omit-frame-pointer"
        make -j$(nproc)
    - name: Install PostgreSQL
@ -93,7 +93,7 @@ jobs:
        cmake -B build -S . -DASSERTIONS=ON -DLINTER=OFF -DCMAKE_VERBOSE_MAKEFILE=1 \
            -DWARNINGS_AS_ERRORS=1 -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=clang \
            -DCMAKE_C_FLAGS="-fsanitize=fuzzer-no-link -lstdc++ -L$LIBFUZZER_PATH -l:libclang_rt.fuzzer_no_main-x86_64.a -static-libsan" \
-            -DPG_PATH=$HOME/$PG_INSTALL_DIR
+            -DCOMPRESSION_FUZZING=1 -DPG_PATH=$HOME/$PG_INSTALL_DIR
        make -C build -j$(nproc) install
@ -124,7 +124,9 @@ jobs:
            # array has a peculiar recv function that recompresses all input, so
            # fuzzing it is much slower. The dictionary recv also uses it.
            { algo: array     , pgtype: text  , bulk: false, runs:   10000000 },
            { algo: array     , pgtype: text  , bulk: true , runs:   10000000 },
            { algo: dictionary, pgtype: text  , bulk: false, runs:  100000000 },
            { algo: dictionary, pgtype: text  , bulk: true , runs:  100000000 },
            ]
    name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}
@ -288,10 +290,11 @@ jobs:
    - name: Save fuzzer-generated crash cases
      if: always()
-      uses: actions/upload-artifact@v3
+      uses: actions/upload-artifact@v4
      with:
        name: Crash cases for ${{ steps.config.outputs.name }}
        path: db/crash-*
        if-no-files-found: ignore
    - name: Save interesting cases
      if: always()
--- a/tsl/CMakeLists.txt
+++ b/tsl/CMakeLists.txt
@ -1,2 +1,8 @@
 option(CODECOVERAGE "Enable fuzzing of compression using Libfuzzer" OFF)
 if(COMPRESSION_FUZZING)
  add_compile_definitions(TS_COMPRESSION_FUZZING=1)
 endif()
 add_subdirectory(test)
 add_subdirectory(src)
--- a/tsl/src/CMakeLists.txt
+++ b/tsl/src/CMakeLists.txt
@ -18,7 +18,7 @@ set(TSL_LIBRARY_NAME ${PROJECT_NAME}-tsl)
 include(build-defs.cmake)
-if(CMAKE_BUILD_TYPE MATCHES Debug)
+if(CMAKE_BUILD_TYPE MATCHES Debug OR COMPRESSION_FUZZING)
  add_library(${TSL_LIBRARY_NAME} MODULE
              ${SOURCES} $<TARGET_OBJECTS:${TSL_TESTS_LIB_NAME}>)
 else()
--- a/tsl/src/compression/CMakeLists.txt
+++ b/tsl/src/compression/CMakeLists.txt
@ -3,8 +3,6 @@ set(SOURCES
    ${CMAKE_CURRENT_SOURCE_DIR}/array.c
    ${CMAKE_CURRENT_SOURCE_DIR}/compression.c
    ${CMAKE_CURRENT_SOURCE_DIR}/compression_storage.c
    ${CMAKE_CURRENT_SOURCE_DIR}/compression_test.c
    ${CMAKE_CURRENT_SOURCE_DIR}/decompress_text_test_impl.c
    ${CMAKE_CURRENT_SOURCE_DIR}/create.c
    ${CMAKE_CURRENT_SOURCE_DIR}/datum_serialize.c
    ${CMAKE_CURRENT_SOURCE_DIR}/deltadelta.c
--- a/tsl/src/compression/array.c
+++ b/tsl/src/compression/array.c
@ -17,8 +17,11 @@
 #include "compression/array.h"
 #include "compression/compression.h"
 #include "compression/simple8b_rle.h"
 #include "compression/simple8b_rle_bitmap.h"
 #include "datum_serialize.h"
 #include "compression/arrow_c_data_interface.h"
 /* A "compressed" array
 *     uint8 has_nulls: 1 iff this has a nulls bitmap stored before the data
 *     Oid element_type: the element stored by this array
@ -460,6 +463,148 @@ tsl_array_decompression_iterator_from_datum_reverse(Datum compressed_array, Oid
 	return &iterator->base;
 }
 #define ELEMENT_TYPE uint32
 #include "simple8b_rle_decompress_all.h"
 #undef ELEMENT_TYPE
 ArrowArray *
 tsl_text_array_decompress_all(Datum compressed_array, Oid element_type, MemoryContext dest_mctx)
 {
 	Assert(element_type == TEXTOID);
 	void *compressed_data = PG_DETOAST_DATUM(compressed_array);
 	StringInfoData si = { .data = compressed_data, .len = VARSIZE(compressed_data) };
 	ArrayCompressed *header = consumeCompressedData(&si, sizeof(ArrayCompressed));
 	Assert(header->compression_algorithm == COMPRESSION_ALGORITHM_ARRAY);
 	CheckCompressedData(header->element_type == TEXTOID);
 	return text_array_decompress_all_serialized_no_header(&si, header->has_nulls, dest_mctx);
 }
 ArrowArray *
 text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls,
 											   MemoryContext dest_mctx)
 {
 	Simple8bRleSerialized *nulls_serialized = NULL;
 	if (has_nulls)
 	{
 		nulls_serialized = bytes_deserialize_simple8b_and_advance(si);
 	}
 	Simple8bRleSerialized *sizes_serialized = bytes_deserialize_simple8b_and_advance(si);
 	uint32 sizes[GLOBAL_MAX_ROWS_PER_COMPRESSION];
 	const uint16 n_notnull =
 		simple8brle_decompress_all_buf_uint32(sizes_serialized,
 											  sizes,
 											  sizeof(sizes) / sizeof(sizes[0]));
 	const int n_total = has_nulls ? nulls_serialized->num_elements : n_notnull;
 	uint32 *offsets =
 		(uint32 *) MemoryContextAllocZero(dest_mctx,
 										  pad_to_multiple(64, sizeof(*offsets) * (n_total + 1)));
 	uint8 *arrow_bodies =
 		(uint8 *) MemoryContextAllocZero(dest_mctx, pad_to_multiple(64, si->len - si->cursor));
 	uint32 offset = 0;
 	for (int i = 0; i < n_notnull; i++)
 	{
 		void *vardata = consumeCompressedData(si, sizes[i]);
 		/*
 		 * Check for potentially corrupt varlena headers since we're reading them
 		 * directly from compressed data. We can only have a plain datum
 		 * with 1-byte or 4-byte header here, no TOAST or compressed data.
 		 */
 		CheckCompressedData(VARATT_IS_4B_U(vardata) ||
 							(VARATT_IS_1B(vardata) && !VARATT_IS_1B_E(vardata)));
 		/*
 		 * Full varsize must be larger or equal than the header size so that the
 		 * calculation of size without header doesn't overflow.
 		 */
 		CheckCompressedData((VARATT_IS_1B(vardata) && VARSIZE_1B(vardata) >= VARHDRSZ_SHORT) ||
 							(VARSIZE_4B(vardata) >= VARHDRSZ));
 		/* Varsize must match the size stored in the sizes array for this element. */
 		CheckCompressedData(VARSIZE_ANY(vardata) == sizes[i]);
 		const uint32 textlen = VARSIZE_ANY_EXHDR(vardata);
 		memcpy(&arrow_bodies[offset], VARDATA_ANY(vardata), textlen);
 		offsets[i] = offset;
 		CheckCompressedData(offset <= offset + textlen); /* Check for overflow. */
 		offset += textlen;
 	}
 	offsets[n_notnull] = offset;
 	const int validity_bitmap_bytes = sizeof(uint64) * (pad_to_multiple(64, n_total) / 64);
 	uint64 *restrict validity_bitmap = MemoryContextAlloc(dest_mctx, validity_bitmap_bytes);
 	memset(validity_bitmap, 0xFF, validity_bitmap_bytes);
 	if (has_nulls)
 	{
 		/*
 		 * We have decompressed the data with nulls skipped, reshuffle it
 		 * according to the nulls bitmap.
 		 */
 		Simple8bRleBitmap nulls = simple8brle_bitmap_decompress(nulls_serialized);
 		CheckCompressedData(n_notnull + simple8brle_bitmap_num_ones(&nulls) == n_total);
 		int current_notnull_element = n_notnull - 1;
 		for (int i = n_total - 1; i >= 0; i--)
 		{
 			Assert(i >= current_notnull_element);
 			/*
 			 * The index of the corresponding offset is higher by one than
 			 * the index of the element. The offset[0] is never affected by
 			 * this shuffling and is always 0.
 			 * Note that unlike the usual null reshuffling in other algorithms,
 			 * for offsets, even if all elements are null, the starting offset
 			 * is well-defined and we can do this assignment. This case is only
 			 * accessible through fuzzing. Through SQL, all-null batches result
 			 * in a null compressed value.
 			 */
 			Assert(current_notnull_element + 1 >= 0);
 			offsets[i + 1] = offsets[current_notnull_element + 1];
 			if (simple8brle_bitmap_get_at(&nulls, i))
 			{
 				arrow_set_row_validity(validity_bitmap, i, false);
 			}
 			else
 			{
 				Assert(current_notnull_element >= 0);
 				current_notnull_element--;
 			}
 		}
 		Assert(current_notnull_element == -1);
 	}
 	else
 	{
 		/*
 		 * The validity bitmap size is a multiple of 64 bits. Fill the tail bits
 		 * with zeros, because the corresponding elements are not valid.
 		 */
 		if (n_total % 64)
 		{
 			const uint64 tail_mask = -1ULL >> (64 - n_total % 64);
 			validity_bitmap[n_total / 64] &= tail_mask;
 		}
 	}
 	ArrowArray *result = MemoryContextAllocZero(dest_mctx, sizeof(ArrowArray) + sizeof(void *) * 3);
 	const void **buffers = (const void **) &result[1];
 	buffers[0] = validity_bitmap;
 	buffers[1] = offsets;
 	buffers[2] = arrow_bodies;
 	result->n_buffers = 3;
 	result->buffers = buffers;
 	result->length = n_total;
 	result->null_count = n_total - n_notnull;
 	return result;
 }
 DecompressResult
 array_decompression_iterator_try_next_reverse(DecompressionIterator *base_iter)
 {
--- a/tsl/src/compression/array.h
+++ b/tsl/src/compression/array.h
@ -64,6 +64,12 @@ extern void array_compressed_send(CompressedDataHeader *header, StringInfo buffe
 extern Datum tsl_array_compressor_append(PG_FUNCTION_ARGS);
 extern Datum tsl_array_compressor_finish(PG_FUNCTION_ARGS);
 ArrowArray *tsl_text_array_decompress_all(Datum compressed_array, Oid element_type,
 										  MemoryContext dest_mctx);
 ArrowArray *text_array_decompress_all_serialized_no_header(StringInfo si, bool has_nulls,
 														   MemoryContext dest_mctx);
 #define ARRAY_ALGORITHM_DEFINITION                                                                 \
 	{                                                                                              \
 		.iterator_init_forward = tsl_array_decompression_iterator_from_datum_forward,              \
@ -72,4 +78,5 @@ extern Datum tsl_array_compressor_finish(PG_FUNCTION_ARGS);
 		.compressed_data_recv = array_compressed_recv,                                             \
 		.compressor_for_type = array_compressor_for_type,                                          \
 		.compressed_data_storage = TOAST_STORAGE_EXTENDED,                                         \
 		.decompress_all = tsl_text_array_decompress_all,                                           \
 	}
--- a/tsl/src/compression/arrow_c_data_interface.h
+++ b/tsl/src/compression/arrow_c_data_interface.h
@ -153,3 +153,10 @@ arrow_set_row_validity(uint64 *bitmap, size_t row_number, bool value)
 	Assert(arrow_row_is_valid(bitmap, row_number) == value);
 }
 /* Increase the `source_value` to be an even multiple of `pad_to`. */
 static inline uint64
 pad_to_multiple(uint64 pad_to, uint64 source_value)
 {
 	return ((source_value + pad_to - 1) / pad_to) * pad_to;
 }
--- a/tsl/src/compression/compression.c
+++ b/tsl/src/compression/compression.c
@ -47,7 +47,6 @@
 #include "array.h"
 #include "chunk.h"
 #include "compression.h"
 #include "compression_test.h"
 #include "create.h"
 #include "custom_type_cache.h"
 #include "debug_assert.h"
@ -133,6 +132,13 @@ tsl_get_decompress_all_function(CompressionAlgorithm algorithm, Oid type)
 	if (algorithm >= _END_COMPRESSION_ALGORITHMS)
 		elog(ERROR, "invalid compression algorithm %d", algorithm);
 	if (type != TEXTOID &&
 		(algorithm == COMPRESSION_ALGORITHM_DICTIONARY || algorithm == COMPRESSION_ALGORITHM_ARRAY))
 	{
 		/* Bulk decompression of array and dictionary is only supported for text. */
 		return NULL;
 	}
 	return definitions[algorithm].decompress_all;
 }
--- a/tsl/src/compression/compression.h
+++ b/tsl/src/compression/compression.h
@ -404,3 +404,5 @@ consumeCompressedData(StringInfo si, int bytes)
 * We use this limit for sanity checks in case the compressed data is corrupt.
 */
 #define GLOBAL_MAX_ROWS_PER_COMPRESSION 1015
 const CompressionAlgorithmDefinition *algorithm_definition(CompressionAlgorithm algo);
--- a/tsl/src/compression/dictionary.c
+++ b/tsl/src/compression/dictionary.c
@ -22,9 +22,11 @@
 #include "compression/compression.h"
 #include "compression/dictionary.h"
 #include "compression/simple8b_rle.h"
 #include "compression/simple8b_rle_bitmap.h"
 #include "compression/array.h"
 #include "compression/dictionary_hash.h"
 #include "compression/datum_serialize.h"
 #include "compression/arrow_c_data_interface.h"
 /*
 * A compression bitmap is stored as
@ -395,6 +397,117 @@ dictionary_decompression_iterator_init(DictionaryDecompressionIterator *iter, co
 	}
 	Assert(array_decompression_iterator_try_next_forward(dictionary_iterator).is_done);
 }
 #define ELEMENT_TYPE int16
 #include "simple8b_rle_decompress_all.h"
 #undef ELEMENT_TYPE
 ArrowArray *
 tsl_text_dictionary_decompress_all(Datum compressed, Oid element_type, MemoryContext dest_mctx)
 {
 	Assert(element_type == TEXTOID);
 	compressed = PointerGetDatum(PG_DETOAST_DATUM(compressed));
 	StringInfoData si = { .data = DatumGetPointer(compressed), .len = VARSIZE(compressed) };
 	const DictionaryCompressed *header = consumeCompressedData(&si, sizeof(DictionaryCompressed));
 	Assert(header->compression_algorithm == COMPRESSION_ALGORITHM_DICTIONARY);
 	CheckCompressedData(header->element_type == TEXTOID);
 	Simple8bRleSerialized *indices_serialized = bytes_deserialize_simple8b_and_advance(&si);
 	Simple8bRleSerialized *nulls_serialized = NULL;
 	if (header->has_nulls)
 	{
 		nulls_serialized = bytes_deserialize_simple8b_and_advance(&si);
 	}
 	const uint16 n_notnull = indices_serialized->num_elements;
 	const uint16 n_total = header->has_nulls ? nulls_serialized->num_elements : n_notnull;
 	const uint16 n_padded =
 		n_total + 63; /* This is the padding requirement of simple8brle_decompress_all. */
 	int16 *restrict indices = MemoryContextAlloc(dest_mctx, sizeof(int16) * n_padded);
 	const uint16 n_decompressed =
 		simple8brle_decompress_all_buf_int16(indices_serialized, indices, n_padded);
 	CheckCompressedData(n_decompressed == n_notnull);
 	/* Check that the dictionary indices that we've just read are not out of bounds. */
 	CheckCompressedData(header->num_distinct <= GLOBAL_MAX_ROWS_PER_COMPRESSION);
 	CheckCompressedData(header->num_distinct <= INT16_MAX);
 	bool have_incorrect_index = false;
 	for (int i = 0; i < n_notnull; i++)
 	{
 		have_incorrect_index |= indices[i] >= (int16) header->num_distinct;
 	}
 	CheckCompressedData(!have_incorrect_index);
 	/* Decompress the actual values in the dictionary. */
 	ArrowArray *dict =
 		text_array_decompress_all_serialized_no_header(&si, /* has_nulls = */ false, dest_mctx);
 	CheckCompressedData(header->num_distinct == dict->length);
 	/* Fill validity and indices of the array elements, reshuffling for nulls if needed. */
 	const int validity_bitmap_bytes = sizeof(uint64) * pad_to_multiple(64, n_total) / 64;
 	uint64 *restrict validity_bitmap = MemoryContextAlloc(dest_mctx, validity_bitmap_bytes);
 	memset(validity_bitmap, 0xFF, validity_bitmap_bytes);
 	if (header->has_nulls)
 	{
 		/*
 		 * We have decompressed the data with nulls skipped, reshuffle it
 		 * according to the nulls bitmap.
 		 */
 		Simple8bRleBitmap nulls = simple8brle_bitmap_decompress(nulls_serialized);
 		CheckCompressedData(n_notnull + simple8brle_bitmap_num_ones(&nulls) == n_total);
 		int current_notnull_element = n_notnull - 1;
 		for (int i = n_total - 1; i >= 0; i--)
 		{
 			Assert(i >= current_notnull_element);
 			if (simple8brle_bitmap_get_at(&nulls, i))
 			{
 				arrow_set_row_validity(validity_bitmap, i, false);
 				indices[i] = 0;
 			}
 			else
 			{
 				Assert(current_notnull_element >= 0);
 				indices[i] = indices[current_notnull_element];
 				current_notnull_element--;
 			}
 		}
 		Assert(current_notnull_element == -1);
 	}
 	else
 	{
 		/*
 		 * The validity bitmap size is a multiple of 64 bits. Fill the tail bits
 		 * with zeros, because the corresponding elements are not valid.
 		 */
 		if (n_total % 64)
 		{
 			const uint64 tail_mask = -1ULL >> (64 - n_total % 64);
 			validity_bitmap[n_total / 64] &= tail_mask;
 		}
 	}
 	ArrowArray *result = MemoryContextAllocZero(dest_mctx, sizeof(ArrowArray) + sizeof(void *) * 2);
 	const void **buffers = (const void **) &result[1];
 	buffers[0] = validity_bitmap;
 	buffers[1] = indices;
 	result->n_buffers = 2;
 	result->buffers = buffers;
 	result->length = n_total;
 	result->null_count = n_total - n_notnull;
 	result->dictionary = dict;
 	return result;
 }
 DecompressionIterator *
 tsl_dictionary_decompression_iterator_from_datum_forward(Datum dictionary_compressed,
 														 Oid element_type)
--- a/tsl/src/compression/dictionary.h
+++ b/tsl/src/compression/dictionary.h
@ -47,6 +47,12 @@ extern Datum dictionary_compressed_recv(StringInfo buf);
 extern Datum tsl_dictionary_compressor_append(PG_FUNCTION_ARGS);
 extern Datum tsl_dictionary_compressor_finish(PG_FUNCTION_ARGS);
 ArrowArray *tsl_text_array_decompress_all(Datum compressed_array, Oid element_type,
 										  MemoryContext dest_mctx);
 ArrowArray *tsl_text_dictionary_decompress_all(Datum compressed, Oid element_type,
 											   MemoryContext dest_mctx);
 #define DICTIONARY_ALGORITHM_DEFINITION                                                            \
 	{                                                                                              \
 		.iterator_init_forward = tsl_dictionary_decompression_iterator_from_datum_forward,         \
@ -55,4 +61,5 @@ extern Datum tsl_dictionary_compressor_finish(PG_FUNCTION_ARGS);
 		.compressed_data_recv = dictionary_compressed_recv,                                        \
 		.compressor_for_type = dictionary_compressor_for_type,                                     \
 		.compressed_data_storage = TOAST_STORAGE_EXTENDED,                                         \
 		.decompress_all = tsl_text_dictionary_decompress_all,                                      \
 	}
--- a/tsl/src/compression/simple8b_rle_decompress_all.h
+++ b/tsl/src/compression/simple8b_rle_decompress_all.h
@ -10,6 +10,9 @@
 /*
 * Specialization of bulk simple8brle decompression for a data type specified by
 * ELEMENT_TYPE macro.
 *
 * The buffer must have a padding of 63 elements after the last one, because
 * decompression is performed always in full blocks.
 */
 static uint16
 FUNCTION_NAME(simple8brle_decompress_all_buf,
--- a/tsl/src/nodes/decompress_chunk/compressed_batch.c
+++ b/tsl/src/nodes/decompress_chunk/compressed_batch.c
@ -77,6 +77,23 @@ make_single_value_arrow(Oid pgtype, Datum datum, bool isnull)
 	return arrow;
 }
 static int
 get_max_text_datum_size(ArrowArray *text_array)
 {
 	int maxbytes = 0;
 	uint32 *offsets = (uint32 *) text_array->buffers[1];
 	for (int i = 0; i < text_array->length; i++)
 	{
 		const int curbytes = offsets[i + 1] - offsets[i];
 		if (curbytes > maxbytes)
 		{
 			maxbytes = curbytes;
 		}
 	}
 	return maxbytes;
 }
 static void
 decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state, int i)
 {
@ -170,8 +187,37 @@ decompress_column(DecompressContext *dcontext, DecompressBatchState *batch_state
 	}
 	else
 	{
-		/* No variable-width columns support bulk decompression. */
+		/*
-		Assert(false);
+		 * Text column. Pre-allocate memory for its text Datum in the
 		 * decompressed scan slot. We can't put direct references to Arrow
 		 * memory there, because it doesn't have the varlena headers that
 		 * Postgres expects for text.
 		 */
 		const int maxbytes =
 			VARHDRSZ + (arrow->dictionary ? get_max_text_datum_size(arrow->dictionary) :
 											get_max_text_datum_size(arrow));
 		*column_values->output_value =
 			PointerGetDatum(MemoryContextAlloc(batch_state->per_batch_context, maxbytes));
 		/*
 		 * Set up the datum conversion based on whether we use the dictionary.
 		 */
 		if (arrow->dictionary == NULL)
 		{
 			column_values->decompression_type = DT_ArrowText;
 			column_values->buffers[0] = arrow->buffers[0];
 			column_values->buffers[1] = arrow->buffers[1];
 			column_values->buffers[2] = arrow->buffers[2];
 		}
 		else
 		{
 			column_values->decompression_type = DT_ArrowTextDict;
 			column_values->buffers[0] = arrow->buffers[0];
 			column_values->buffers[1] = arrow->dictionary->buffers[1];
 			column_values->buffers[2] = arrow->dictionary->buffers[2];
 			column_values->buffers[3] = arrow->buffers[1];
 		}
 	}
 }
@ -656,6 +702,21 @@ compressed_batch_set_compressed_tuple(DecompressContext *dcontext,
 	MemoryContextSwitchTo(old_context);
 }
 static void
 store_text_datum(CompressedColumnValues *column_values, int arrow_row)
 {
 	const uint32 start = ((uint32 *) column_values->buffers[1])[arrow_row];
 	const int32 value_bytes = ((uint32 *) column_values->buffers[1])[arrow_row + 1] - start;
 	Assert(value_bytes >= 0);
 	const int total_bytes = value_bytes + VARHDRSZ;
 	Assert(DatumGetPointer(*column_values->output_value) != NULL);
 	SET_VARSIZE(*column_values->output_value, total_bytes);
 	memcpy(VARDATA(*column_values->output_value),
 		   &((uint8 *) column_values->buffers[2])[start],
 		   value_bytes);
 }
 /*
 * Construct the next tuple in the decompressed scan slot.
 * Doesn't check the quals.
@ -685,39 +746,47 @@ make_next_tuple(DecompressBatchState *batch_state, uint16 arrow_row, int num_com
 			*column_values->output_isnull = result.is_null;
 			*column_values->output_value = result.val;
 		}
-		else if (column_values->decompression_type > 0)
+		else if (column_values->decompression_type > SIZEOF_DATUM)
 		{
-			Assert(column_values->decompression_type <= 8);
+			/*
 			 * Fixed-width by-reference type that doesn't fit into a Datum.
 			 * For now this only happens for 8-byte types on 32-bit systems,
 			 * but eventually we could also use it for bigger by-value types
 			 * such as UUID.
 			 */
 			const uint8 value_bytes = column_values->decompression_type;
 			const char *restrict src = column_values->buffers[1];
-
+			*column_values->output_value = PointerGetDatum(&src[value_bytes * arrow_row]);
 			*column_values->output_isnull =
 				!arrow_row_is_valid(column_values->buffers[0], arrow_row);
 		}
 		else if (column_values->decompression_type > 0)
 		{
 			/*
 			 * Fixed-width by-value type that fits into a Datum.
 			 *
 			 * The conversion of Datum to more narrow types will truncate
 			 * the higher bytes, so we don't care if we read some garbage
 			 * into them, and can always read 8 bytes. These are unaligned
 			 * reads, so technically we have to do memcpy.
 			 */
-			uint64 value;
+			const uint8 value_bytes = column_values->decompression_type;
-			memcpy(&value, &src[value_bytes * arrow_row], 8);
+			Assert(value_bytes <= SIZEOF_DATUM);
-
+			const char *restrict src = column_values->buffers[1];
-#ifdef USE_FLOAT8_BYVAL
+			memcpy(column_values->output_value, &src[value_bytes * arrow_row], SIZEOF_DATUM);
-			Datum datum = Int64GetDatum(value);
+			*column_values->output_isnull =
-#else
+				!arrow_row_is_valid(column_values->buffers[0], arrow_row);
-			/*
+		}
-			 * On 32-bit systems, the data larger than 4 bytes go by
+		else if (column_values->decompression_type == DT_ArrowText)
-			 * reference, so we have to jump through these hoops.
+		{
-			 */
+			store_text_datum(column_values, arrow_row);
-			Datum datum;
+			*column_values->output_isnull =
-			if (value_bytes <= 4)
+				!arrow_row_is_valid(column_values->buffers[0], arrow_row);
-			{
+		}
-				datum = Int32GetDatum((uint32) value);
+		else if (column_values->decompression_type == DT_ArrowTextDict)
-			}
+		{
-			else
+			const int16 index = ((int16 *) column_values->buffers[3])[arrow_row];
-			{
+			store_text_datum(column_values, index);
 				datum = Int64GetDatum(value);
 			}
 #endif
 			*column_values->output_value = datum;
 			*column_values->output_isnull =
 				!arrow_row_is_valid(column_values->buffers[0], arrow_row);
 		}
--- a/tsl/src/nodes/decompress_chunk/compressed_batch.h
+++ b/tsl/src/nodes/decompress_chunk/compressed_batch.h
@ -13,6 +13,8 @@ typedef struct ArrowArray ArrowArray;
 /* How to obtain the decompressed datum for individual row. */
 typedef enum
 {
 	DT_ArrowTextDict = -4,
 	DT_ArrowText = -3,
 	DT_Default = -2,
 	DT_Iterator = -1,
 	DT_Invalid = 0,
@ -36,8 +38,10 @@ typedef struct CompressedColumnValues
 	 * Depending on decompression type, they are as follows:
 	 * iterator:        iterator
 	 * arrow fixed:     validity, value
 	 * arrow text:      validity, uint32* offsets, void* bodies
 	 * arrow dict text: validity, uint32* dict offsets, void* dict bodies, int16* indices
 	 */
-	const void *restrict buffers[2];
+	const void *restrict buffers[4];
 	/*
 	 * The source arrow array, if any. We don't use it for building the
--- a/tsl/src/nodes/decompress_chunk/exec.c
+++ b/tsl/src/nodes/decompress_chunk/exec.c
@ -362,9 +362,14 @@ decompress_chunk_begin(CustomScanState *node, EState *estate, int eflags)
 			CompressionColumnDescription *column = &dcontext->template_columns[i];
 			if (column->bulk_decompression_supported)
 			{
-				/* Values array, with 64 element padding (actually we have less). */
+				/*
-				batch_memory_context_bytes +=
+				 * Values array, with 64 element padding (actually we have less).
-					(GLOBAL_MAX_ROWS_PER_COMPRESSION + 64) * column->value_bytes;
+				 *
 				 * For variable-length types (we only have text) we can't
 				 * estimate the width currently.
 				 */
 				batch_memory_context_bytes += (GLOBAL_MAX_ROWS_PER_COMPRESSION + 64) *
 											  (column->value_bytes > 0 ? column->value_bytes : 16);
 				/* Nulls bitmap, one uint64 per 64 rows. */
 				batch_memory_context_bytes +=
 					((GLOBAL_MAX_ROWS_PER_COMPRESSION + 63) / 64) * sizeof(uint64);
--- a/tsl/test/CMakeLists.txt
+++ b/tsl/test/CMakeLists.txt
@ -131,6 +131,6 @@ if(_install_checks)
  add_dependencies(installcheck installcheck-t)
 endif()
-if(CMAKE_BUILD_TYPE MATCHES Debug)
+if(CMAKE_BUILD_TYPE MATCHES Debug OR COMPRESSION_FUZZING)
  add_subdirectory(src)
-endif(CMAKE_BUILD_TYPE MATCHES Debug)
+endif()
--- a/tsl/test/expected/compression_algos.out
+++ b/tsl/test/expected/compression_algos.out
@ -1588,34 +1588,37 @@ group by 2, 3 order by 1 desc
 \set algo array
 \set type text
 select count(*)
    , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result
    , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result
-from :fn false) rowbyrow
+from :fn true) bulk join :fn false) rowbyrow using (path)
-group by 2 order by 1 desc
+group by 2, 3 order by 1 desc
 ;
- count | rowbyrow_result 
+ count | bulk_result | rowbyrow_result 
-------+-----------------
+-------+-------------+-----------------
-    13 | XX001
+    21 | XX001       | XX001
-     4 | 08P01
+     6 | 08P01       | 08P01
-     3 | true
+     2 | 3F000       | 3F000
-     1 | false
+     2 | true        | true
-     1 | 22021
+     1 | 22021       | 22021
-     1 | 3F000
+     1 | false       | false
 (6 rows)
 \set algo dictionary
 \set type text
 select count(*)
    , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result
    , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result
-from :fn false) rowbyrow
+from :fn true) bulk join :fn false) rowbyrow using (path)
-group by 2 order by 1 desc
+group by 2, 3 order by 1 desc
 ;
- count | rowbyrow_result 
+ count | bulk_result | rowbyrow_result 
-------+-----------------
+-------+-------------+-----------------
-    22 | XX001
+    51 | XX001       | XX001
-     4 | 08P01
+     4 | 08P01       | 08P01
-     2 | true
+     4 | XX001       | true
-     1 | false
+     2 | true        | true
-     1 | 22021
+     2 | 22021       | 22021
-     1 | 3F000
+     1 | 3F000       | 3F000
-(6 rows)
+     1 | false       | false
 (7 rows)
--- a/tsl/test/expected/decompress_vector_qual.out
+++ b/tsl/test/expected/decompress_vector_qual.out
@ -141,7 +141,7 @@ select * from arithmetic where
 (1 row)
 -- Test columns that don't support bulk decompression.
-alter table vectorqual add column tag text;
+alter table vectorqual add column tag name;
 insert into vectorqual(ts, device, metric2, metric3, metric4, tag) values ('2025-01-01 00:00:00', 5, 52, 53, 54, 'tag5');
 select count(compress_chunk(x, true)) from show_chunks('vectorqual') x;
 NOTICE:  chunk "_hyper_1_1_chunk" is already compressed
--- a/tsl/test/fuzzing/compression/array-text/0e356ba505631fbf715758bed27d503f8b260e3a
+++ b/tsl/test/fuzzing/compression/array-text/0e356ba505631fbf715758bed27d503f8b260e3a
--- a/tsl/test/fuzzing/compression/array-text/0f873e20f9cc905c940207795842a8f89598bb78
+++ b/tsl/test/fuzzing/compression/array-text/0f873e20f9cc905c940207795842a8f89598bb78
--- a/tsl/test/fuzzing/compression/array-text/143a44ebe20d00b1c6bdf12487758974467b504f
+++ b/tsl/test/fuzzing/compression/array-text/143a44ebe20d00b1c6bdf12487758974467b504f
--- a/tsl/test/fuzzing/compression/array-text/2123898ce1d45564480b9ff51cf391b87dcc5a07
+++ b/tsl/test/fuzzing/compression/array-text/2123898ce1d45564480b9ff51cf391b87dcc5a07
--- a/tsl/test/fuzzing/compression/array-text/513033f491e3f9ae4cf779c239158c9063f2af4d
+++ b/tsl/test/fuzzing/compression/array-text/513033f491e3f9ae4cf779c239158c9063f2af4d
--- a/tsl/test/fuzzing/compression/array-text/6c0295b5f6b25ca492bafd609ba5c9494785651f
+++ b/tsl/test/fuzzing/compression/array-text/6c0295b5f6b25ca492bafd609ba5c9494785651f
--- a/tsl/test/fuzzing/compression/array-text/98e49024fd7e15859ec345ee83a01fec0656ad94
+++ b/tsl/test/fuzzing/compression/array-text/98e49024fd7e15859ec345ee83a01fec0656ad94
--- a/tsl/test/fuzzing/compression/array-text/a24f8ad32bdadaee87b839765599bc63dfcbd62a
+++ b/tsl/test/fuzzing/compression/array-text/a24f8ad32bdadaee87b839765599bc63dfcbd62a
--- a/tsl/test/fuzzing/compression/array-text/b6f695dd09d681d144c71e52ebe565a2567a23f9
+++ b/tsl/test/fuzzing/compression/array-text/b6f695dd09d681d144c71e52ebe565a2567a23f9
--- a/tsl/test/fuzzing/compression/array-text/b71fa13e7c2fee50d39a87fc927c31256f8c4af3
+++ b/tsl/test/fuzzing/compression/array-text/b71fa13e7c2fee50d39a87fc927c31256f8c4af3
--- a/tsl/test/fuzzing/compression/dictionary-text/06a9bb98f465ce2136ba9c5c3b15912de9101d7b
+++ b/tsl/test/fuzzing/compression/dictionary-text/06a9bb98f465ce2136ba9c5c3b15912de9101d7b
--- a/tsl/test/fuzzing/compression/dictionary-text/20294feb1598e5893bda9b1fe7b9568ea0af237c
+++ b/tsl/test/fuzzing/compression/dictionary-text/20294feb1598e5893bda9b1fe7b9568ea0af237c
--- a/tsl/test/fuzzing/compression/dictionary-text/29e8abf085d862cb208f5e476f628644de1c22a0
+++ b/tsl/test/fuzzing/compression/dictionary-text/29e8abf085d862cb208f5e476f628644de1c22a0
--- a/tsl/test/fuzzing/compression/dictionary-text/2ee77e4ad0a5c13eb219c48f0e8964d9f6124737
+++ b/tsl/test/fuzzing/compression/dictionary-text/2ee77e4ad0a5c13eb219c48f0e8964d9f6124737
--- a/tsl/test/fuzzing/compression/dictionary-text/3716f49a4dc3a527cc3682d04ae2036204c406ce
+++ b/tsl/test/fuzzing/compression/dictionary-text/3716f49a4dc3a527cc3682d04ae2036204c406ce
--- a/tsl/test/fuzzing/compression/dictionary-text/3e8223a2a8a5034ca9935ccb1a9c8f41f4dfd782
+++ b/tsl/test/fuzzing/compression/dictionary-text/3e8223a2a8a5034ca9935ccb1a9c8f41f4dfd782
--- a/tsl/test/fuzzing/compression/dictionary-text/3f1762bf4bdff8f21a3de2f04afe2cc9bfd538a5
+++ b/tsl/test/fuzzing/compression/dictionary-text/3f1762bf4bdff8f21a3de2f04afe2cc9bfd538a5
--- a/tsl/test/fuzzing/compression/dictionary-text/44991dd092a92994af67db1b51c9ca42261c27d3
+++ b/tsl/test/fuzzing/compression/dictionary-text/44991dd092a92994af67db1b51c9ca42261c27d3
--- a/tsl/test/fuzzing/compression/dictionary-text/44dae141798a56015e84bb90b8d47f2d1d9db66e
+++ b/tsl/test/fuzzing/compression/dictionary-text/44dae141798a56015e84bb90b8d47f2d1d9db66e
--- a/tsl/test/fuzzing/compression/dictionary-text/4e4ec17ed15eab3b2aaee34c46ca44e72789f384
+++ b/tsl/test/fuzzing/compression/dictionary-text/4e4ec17ed15eab3b2aaee34c46ca44e72789f384
--- a/tsl/test/fuzzing/compression/dictionary-text/55b0c7dbbd7470a644c43aaf8aaaa520631e3bb5
+++ b/tsl/test/fuzzing/compression/dictionary-text/55b0c7dbbd7470a644c43aaf8aaaa520631e3bb5
--- a/tsl/test/fuzzing/compression/dictionary-text/57a99548ae911ad4a20406a03b0a0ac7a9adc63a
+++ b/tsl/test/fuzzing/compression/dictionary-text/57a99548ae911ad4a20406a03b0a0ac7a9adc63a
--- a/tsl/test/fuzzing/compression/dictionary-text/5a72bac420b736c0d530a9d4c861a374ad32f5a5
+++ b/tsl/test/fuzzing/compression/dictionary-text/5a72bac420b736c0d530a9d4c861a374ad32f5a5
--- a/tsl/test/fuzzing/compression/dictionary-text/5c99325fac6e6a77d673cf223fb3b3e62fb1e07e
+++ b/tsl/test/fuzzing/compression/dictionary-text/5c99325fac6e6a77d673cf223fb3b3e62fb1e07e
--- a/tsl/test/fuzzing/compression/dictionary-text/6407ff9cce2be245bacff6693615b8e382ba2a96
+++ b/tsl/test/fuzzing/compression/dictionary-text/6407ff9cce2be245bacff6693615b8e382ba2a96
--- a/tsl/test/fuzzing/compression/dictionary-text/664e56319f5a1ffc9bd3e9554f2358ace5a739ca
+++ b/tsl/test/fuzzing/compression/dictionary-text/664e56319f5a1ffc9bd3e9554f2358ace5a739ca
--- a/tsl/test/fuzzing/compression/dictionary-text/6af6e86bfe31a3941d3085227f051c05777657de
+++ b/tsl/test/fuzzing/compression/dictionary-text/6af6e86bfe31a3941d3085227f051c05777657de
--- a/tsl/test/fuzzing/compression/dictionary-text/6f04561347c9100edce326d87e065789d2d56185
+++ b/tsl/test/fuzzing/compression/dictionary-text/6f04561347c9100edce326d87e065789d2d56185
--- a/tsl/test/fuzzing/compression/dictionary-text/726be829733ebbca258b51fc29a79e543de46677
+++ b/tsl/test/fuzzing/compression/dictionary-text/726be829733ebbca258b51fc29a79e543de46677
--- a/tsl/test/fuzzing/compression/dictionary-text/76bbc1ead78624711303acd22377969f0962736b
+++ b/tsl/test/fuzzing/compression/dictionary-text/76bbc1ead78624711303acd22377969f0962736b
--- a/tsl/test/fuzzing/compression/dictionary-text/8127c19b14b9a5750a4731aef2f900a72ec6d802
+++ b/tsl/test/fuzzing/compression/dictionary-text/8127c19b14b9a5750a4731aef2f900a72ec6d802
--- a/tsl/test/fuzzing/compression/dictionary-text/85e53271e14006f0265921d02d4d736cdc580b0b
+++ b/tsl/test/fuzzing/compression/dictionary-text/85e53271e14006f0265921d02d4d736cdc580b0b
@ -0,0 +1 @@
 <EFBFBD>
--- a/tsl/test/fuzzing/compression/dictionary-text/8f1eab4f75b343ac81f12c926a077aaa572cd002
+++ b/tsl/test/fuzzing/compression/dictionary-text/8f1eab4f75b343ac81f12c926a077aaa572cd002
--- a/tsl/test/fuzzing/compression/dictionary-text/9a88a0ae40cf185ed2c9bf4ebde71b048030211d
+++ b/tsl/test/fuzzing/compression/dictionary-text/9a88a0ae40cf185ed2c9bf4ebde71b048030211d
--- a/tsl/test/fuzzing/compression/dictionary-text/a707473dd0d734a745a15b98f20645839d69a660
+++ b/tsl/test/fuzzing/compression/dictionary-text/a707473dd0d734a745a15b98f20645839d69a660
--- a/tsl/test/fuzzing/compression/dictionary-text/af33a704edf520f6ccc1c6c51b06d39b5a7e82f8
+++ b/tsl/test/fuzzing/compression/dictionary-text/af33a704edf520f6ccc1c6c51b06d39b5a7e82f8
--- a/tsl/test/fuzzing/compression/dictionary-text/b1eb62fe7596e0f62ef933c269b429851f73853b
+++ b/tsl/test/fuzzing/compression/dictionary-text/b1eb62fe7596e0f62ef933c269b429851f73853b
--- a/tsl/test/fuzzing/compression/dictionary-text/c78643e37119bb0f817531ba2ff265d6ef53c64e
+++ b/tsl/test/fuzzing/compression/dictionary-text/c78643e37119bb0f817531ba2ff265d6ef53c64e
--- a/tsl/test/fuzzing/compression/dictionary-text/crash-e5143387e8896dcfb0f95f8111538502cee38ce0
+++ b/tsl/test/fuzzing/compression/dictionary-text/crash-e5143387e8896dcfb0f95f8111538502cee38ce0
--- a/tsl/test/fuzzing/compression/dictionary-text/d421e94ef02d0e45a2f783b63f3fe6622b6776cd
+++ b/tsl/test/fuzzing/compression/dictionary-text/d421e94ef02d0e45a2f783b63f3fe6622b6776cd
--- a/tsl/test/fuzzing/compression/dictionary-text/e0485f22a1d04b0df70035eafa33f1278a52b8a6
+++ b/tsl/test/fuzzing/compression/dictionary-text/e0485f22a1d04b0df70035eafa33f1278a52b8a6
--- a/tsl/test/fuzzing/compression/dictionary-text/e5c4a84e1935991b3103fecf70bf563eb82f7936
+++ b/tsl/test/fuzzing/compression/dictionary-text/e5c4a84e1935991b3103fecf70bf563eb82f7936
--- a/tsl/test/fuzzing/compression/dictionary-text/eb02ce7f9339084b7dfa707b412ee1b1f7046885
+++ b/tsl/test/fuzzing/compression/dictionary-text/eb02ce7f9339084b7dfa707b412ee1b1f7046885
--- a/tsl/test/fuzzing/compression/dictionary-text/fe56cff03603408c02ef6579df1958ba3cdbdd48
+++ b/tsl/test/fuzzing/compression/dictionary-text/fe56cff03603408c02ef6579df1958ba3cdbdd48
--- a/tsl/test/sql/compression_algos.sql
+++ b/tsl/test/sql/compression_algos.sql
@ -410,16 +410,18 @@ group by 2, 3 order by 1 desc
 \set algo array
 \set type text
 select count(*)
    , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result
    , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result
-from :fn false) rowbyrow
+from :fn true) bulk join :fn false) rowbyrow using (path)
-group by 2 order by 1 desc
+group by 2, 3 order by 1 desc
 ;
 \set algo dictionary
 \set type text
 select count(*)
    , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result
    , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result
-from :fn false) rowbyrow
+from :fn true) bulk join :fn false) rowbyrow using (path)
-group by 2 order by 1 desc
+group by 2, 3 order by 1 desc
 ;
--- a/tsl/test/sql/decompress_vector_qual.sql
+++ b/tsl/test/sql/decompress_vector_qual.sql
@ -57,7 +57,7 @@ select * from arithmetic where
 -- Test columns that don't support bulk decompression.
-alter table vectorqual add column tag text;
+alter table vectorqual add column tag name;
 insert into vectorqual(ts, device, metric2, metric3, metric4, tag) values ('2025-01-01 00:00:00', 5, 52, 53, 54, 'tag5');
 select count(compress_chunk(x, true)) from show_chunks('vectorqual') x;
--- a/tsl/test/src/CMakeLists.txt
+++ b/tsl/test/src/CMakeLists.txt
@ -1,5 +1,6 @@
-set(SOURCES test_chunk_stats.c test_merge_chunk.c test_compression.c
+set(SOURCES
-            test_continuous_agg.c)
+    test_chunk_stats.c test_merge_chunk.c compression_unit_test.c
    compression_sql_test.c decompress_text_test_impl.c test_continuous_agg.c)
 include(${PROJECT_SOURCE_DIR}/tsl/src/build-defs.cmake)
--- a/tsl/src/compression/compression_test.c
+++ b/tsl/src/compression/compression_test.c
@ -11,10 +11,9 @@
 #include <funcapi.h>
 #include <utils/builtins.h>
-#include "compression_test.h"
+#include "compression_sql_test.h"
-#include "compression.h"
+#include "compression/arrow_c_data_interface.h"
 #include "arrow_c_data_interface.h"
 #if !defined(NDEBUG) || defined(TS_COMPRESSION_FUZZING)
@ -75,7 +74,9 @@ get_compression_algorithm(char *name)
 	X(DELTADELTA, INT8, true)                                                                      \
 	X(DELTADELTA, INT8, false)                                                                     \
 	X(ARRAY, TEXT, false)                                                                          \
-	X(DICTIONARY, TEXT, false)
+	X(ARRAY, TEXT, true)                                                                           \
 	X(DICTIONARY, TEXT, false)                                                                     \
 	X(DICTIONARY, TEXT, true)
 static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Size, bool bulk)
 {
--- a/tsl/src/compression/compression_test.h
+++ b/tsl/src/compression/compression_test.h
@ -5,7 +5,7 @@
 */
 #pragma once
-#include "compression.h"
+#include "compression/compression.h"
 int decompress_ARRAY_TEXT(const uint8 *Data, size_t Size, bool bulk);
--- a/tsl/test/src/compression_unit_test.c
+++ b/tsl/test/src/compression_unit_test.c
@ -280,8 +280,8 @@ test_gorilla_float()
 	GorillaCompressor *compressor = gorilla_compressor_alloc();
 	GorillaCompressed *compressed;
 	DecompressionIterator *iter;
-	for (int i = 0.0; i < TEST_ELEMENTS; i++)
+	for (int x = 0; x < TEST_ELEMENTS; x++)
-		gorilla_compressor_append_value(compressor, float_get_bits((float) i));
+		gorilla_compressor_append_value(compressor, float_get_bits((float) x));
 	compressed = gorilla_compressor_finish(compressor);
 	TestAssertTrue(compressed != NULL);
--- a/tsl/src/compression/decompress_arithmetic_test_impl.c
+++ b/tsl/src/compression/decompress_arithmetic_test_impl.c
--- a/tsl/src/compression/decompress_text_test_impl.c
+++ b/tsl/src/compression/decompress_text_test_impl.c
@ -7,24 +7,91 @@
 #include <libpq/pqformat.h>
-#include "compression.h"
+#include "compression_sql_test.h"
-#include "compression_test.h"
+#include "compression/arrow_c_data_interface.h"
 static uint32
 arrow_get_str(ArrowArray *arrow, int arrow_row, const char **str)
 {
 	if (!arrow->dictionary)
 	{
 		const uint32 *offsets = (uint32 *) arrow->buffers[1];
 		const char *values = (char *) arrow->buffers[2];
 		const uint32 start = offsets[arrow_row];
 		const uint32 end = offsets[arrow_row + 1];
 		const uint32 arrow_len = end - start;
 		*str = &values[start];
 		return arrow_len;
 	}
 	const int16 dict_row = ((int16 *) arrow->buffers[1])[arrow_row];
 	return arrow_get_str(arrow->dictionary, dict_row, str);
 }
 static void
 decompress_generic_text_check_arrow(ArrowArray *arrow, int errorlevel, DecompressResult *results,
 									int n)
 {
 	/* Check that both ways of decompression match. */
 	if (n != arrow->length)
 	{
 		ereport(errorlevel,
 				(errcode(ERRCODE_INTERNAL_ERROR),
 				 errmsg("the bulk decompression result does not match"),
 				 errdetail("Expected %d elements, got %d.", n, (int) arrow->length)));
 	}
 	for (int i = 0; i < n; i++)
 	{
 		const bool arrow_isnull = !arrow_row_is_valid(arrow->buffers[0], i);
 		if (arrow_isnull != results[i].is_null)
 		{
 			ereport(errorlevel,
 					(errcode(ERRCODE_INTERNAL_ERROR),
 					 errmsg("the bulk decompression result does not match"),
 					 errdetail("Expected null %d, got %d at row %d.",
 							   results[i].is_null,
 							   arrow_isnull,
 							   i)));
 		}
 		if (!results[i].is_null)
 		{
 			const char *arrow_cstring;
 			size_t arrow_len = arrow_get_str(arrow, i, &arrow_cstring);
 			const Datum rowbyrow_varlena = results[i].val;
 			const size_t rowbyrow_len = VARSIZE_ANY_EXHDR(rowbyrow_varlena);
 			const char *rowbyrow_cstring = VARDATA_ANY(rowbyrow_varlena);
 			if (rowbyrow_len != arrow_len)
 			{
 				ereport(errorlevel,
 						(errcode(ERRCODE_INTERNAL_ERROR),
 						 errmsg("the bulk decompression result does not match"),
 						 errdetail("At row %d\n", i)));
 			}
 			if (strncmp(arrow_cstring, rowbyrow_cstring, rowbyrow_len) != 0)
 			{
 				ereport(errorlevel,
 						(errcode(ERRCODE_INTERNAL_ERROR),
 						 errmsg("the bulk decompression result does not match"),
 						 errdetail("At row %d\n", i)));
 			}
 		}
 	}
 }
 /*
- * Try to decompress the given compressed data. Used for fuzzing and for checking
+ * Try to decompress the given compressed data.
 * the examples found by fuzzing. For fuzzing we do less checks to keep it
 * faster and the coverage space smaller. This is a generic implementation
 * for arithmetic types.
 */
 static int
 decompress_generic_text(const uint8 *Data, size_t Size, bool bulk, int requested_algo)
 {
 	if (bulk)
 	{
 		elog(ERROR, "bulk decompression not supported for text");
 	}
 	StringInfoData si = { .data = (char *) Data, .len = Size };
 	const int data_algo = pq_getmsgbyte(&si);
@ -40,9 +107,19 @@ decompress_generic_text(const uint8 *Data, size_t Size, bool bulk, int requested
 		 */
 		return -1;
 	}
 	const CompressionAlgorithmDefinition *def = algorithm_definition(data_algo);
 	Datum compressed_data = def->compressed_data_recv(&si);
 	DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo, TEXTOID);
 	ArrowArray *arrow = NULL;
 	if (bulk)
 	{
 		/*
 		 * Check that the arrow decompression works. Have to do this before the
 		 * row-by-row decompression so that it doesn't hide the possible errors.
 		 */
 		arrow = decompress_all(compressed_data, TEXTOID, CurrentMemoryContext);
 	}
 	/*
 	 * Test row-by-row decompression.
@ -60,17 +137,20 @@ decompress_generic_text(const uint8 *Data, size_t Size, bool bulk, int requested
 		results[n++] = r;
 	}
 	/*
 	 * For row-by-row decompression, check that the result is still the same
 	 * after we compress and decompress back.
 	 * Don't perform this check for other types of tests.
 	 */
 	if (bulk)
 	{
 		/*
 		 * Check that the arrow decompression result matches.
 		 */
 		decompress_generic_text_check_arrow(arrow, ERROR, results, n);
 		return n;
 	}
 	/*
 	 * For row-by-row decompression, check that the result is still the same
 	 * after we compress and decompress back.
 	 * Don't perform this check for other types of tests.
 	 *
 	 * 1) Compress.
 	 */
 	Compressor *compressor = def->compressor_for_type(TEXTOID);
@ -136,6 +216,13 @@ decompress_generic_text(const uint8 *Data, size_t Size, bool bulk, int requested
 		nn++;
 	}
 	/*
 	 * 3) The bulk decompression must absolutely work on the correct compressed
 	 * data we've just generated.
 	 */
 	arrow = decompress_all(compressed_data, TEXTOID, CurrentMemoryContext);
 	decompress_generic_text_check_arrow(arrow, PANIC, results, n);
 	return n;
 }