From 4f2f6585d311c4a3a6f8b8fd3f49e498b62213b2 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov <36882414+akuzm@users.noreply.github.com> Date: Wed, 20 Dec 2023 11:44:21 +0100 Subject: [PATCH] More tests for decompression Add separate testing for bulk and row-by-row decompression, so that the errors in one don't mask the errors in the other. Also add fuzzing for row-by-row decompression, for text columns as well. --- .github/workflows/libfuzzer.yaml | 207 ++++++-- src/adts/bit_array_impl.h | 6 +- tsl/src/compression/CMakeLists.txt | 2 + tsl/src/compression/array.c | 13 +- tsl/src/compression/compression.c | 365 +------------- tsl/src/compression/compression.h | 11 +- tsl/src/compression/compression_test.c | 459 ++++++++++++++++++ tsl/src/compression/compression_test.h | 14 + tsl/src/compression/datum_serialize.c | 21 +- .../decompress_arithmetic_test_impl.c | 216 +++++++++ tsl/src/compression/decompress_test_impl.c | 204 -------- .../compression/decompress_text_test_impl.c | 152 ++++++ tsl/src/compression/deltadelta_impl.c | 3 + tsl/src/compression/dictionary.c | 33 +- .../compression/simple8b_rle_decompress_all.h | 18 +- tsl/test/expected/compression_algos.out | 99 +++- .../0d699bc41031c7525fa65c0ab267f34f608eef6a | Bin 0 -> 24 bytes .../0dbf553220bcd27478f10999d679d564a11632a1 | 1 + .../13f402104e20e5a38290bdc5fec85a46ae36bd73 | Bin 0 -> 170 bytes .../1641a06baa3defcf9a1b704cb94ea3387f40f2ad | Bin 0 -> 27 bytes .../22e70b0d023eac54b28a067aac0ab8e4eb75887b | Bin 0 -> 25 bytes .../3862930f38ef2ac7387e3e47191234094aee7c0a | Bin 0 -> 41 bytes .../428361124252a8847f1182747c936696bc43543b | Bin 0 -> 85 bytes .../4cd1b3841a01a3abc7f1cec6325130fd109dee84 | Bin 0 -> 24 bytes .../58420143cbcd2fe40fd1409948b6a78d3bf14a32 | Bin 0 -> 14 bytes .../592e2bafa4637d9786e9d14c5f1ca512e0076940 | Bin 0 -> 27 bytes .../5ba93c9db0cff93f52b521d7420e43f6eda2784f | Bin 0 -> 1 bytes .../76023b236d960f02d7fb41c7a1fa4d28dafa7c2d | Bin 0 -> 1051 bytes .../9159cb8bcee7fcb95582f140960cdae72788d326 | 1 + .../a3d453f14af5370aae60089101d659fb12c3aff4 | Bin 0 -> 14 bytes .../a42c6cf1de3abfdea9b95f34687cbbe92b9a7383 | 1 + .../fuzzing/compression/array-text/array1 | Bin 0 -> 15917 bytes .../c13eaced24e2a5039d3fbeef655fc3cf827a2be7 | Bin 0 -> 32 bytes .../c88b988789743b6aad8ef68278fc383847a37ddf | Bin 0 -> 23 bytes ...h-1ac67d1b8ebedeb93c515b5244f6e60613c1af0b | Bin 0 -> 440 bytes ...h-b6cfa8632a8bf28e90198ec167f3f63258880f77 | Bin 0 -> 478 bytes .../f2f014ef49bdaf4ff29d4a7116feff81e7015283 | Bin 0 -> 48 bytes .../f41f46df995dd4c7690f27978152ead25ccd5c75 | Bin 0 -> 48 bytes .../fuzzing/compression/array-text/with-nulls | Bin 0 -> 13613 bytes .../127d1cd7df6314984a355234cad8daee2a0a6f85 | Bin 0 -> 89 bytes .../1a155dbc0885a4ce404a03ccad4f90e8dfb6838b | Bin 0 -> 22 bytes .../1bdd8892fa9cd727ebf1b97101a04456b8ba7bc2 | Bin 0 -> 14 bytes .../27de45122a7e9da969165f24f6942fc131cb17df | Bin 0 -> 56 bytes .../2a0fa91e546f986d25159ed1e7507ec4793df3a4 | Bin 0 -> 50 bytes .../2d79b560f5c1dde8b7841ae6d77d616f26f5b3ab | Bin 0 -> 145 bytes .../2e10aad1b62e9ad833ea94af977cd498ba7da057 | Bin 0 -> 49 bytes .../4e8af02cd72c9425df8c3164b3a14bc1b70c6498 | Bin 0 -> 26 bytes .../559b65125ca556ff1a57f82f9ae55a86b71c6296 | 1 + .../5ba93c9db0cff93f52b521d7420e43f6eda2784f | Bin 0 -> 1 bytes .../5c9409528b92b40afa79d037eadcb73b859e94e6 | Bin 0 -> 49 bytes .../62ca0c60044ab7f33aa33d10a4ae715ee06b7748 | Bin 0 -> 51 bytes .../687464af84370f5d43d25acba442acc7fd14beec | Bin 0 -> 50 bytes .../8c92cd8b3e908dad0b490baa09ee984fdf224b21 | Bin 0 -> 84 bytes .../95f1a48e7e1cbe4b91461f1251416245497ff131 | Bin 0 -> 8 bytes .../994cc577406fe37f59e27ea1028a9d0a814af721 | Bin 0 -> 42 bytes .../a42f35cc555c689a38ef471b21fad93692f36a9c | Bin 0 -> 48 bytes .../ba200d8a4886abcdba4174f4b131db56e9128785 | Bin 0 -> 88 bytes .../bf8b4530d8d246dd74ac53a13471bba17941dff7 | 1 + .../c92920944247d80c842eaa65fd01efec1c84c342 | 1 + ...h-49789ae0866d7d630f2075dc26812433f4af1db3 | Bin 0 -> 231 bytes ...h-5eeac6ca5053992914dfb318e02e4c657a65c7cf | Bin 0 -> 174 bytes ...h-707526606a02c72364e1c8ea82357eead6c74f60 | Bin 0 -> 81 bytes ...h-b0db762535226b28c0b55ffe00d5537fd8ef7e39 | Bin 0 -> 52 bytes ...h-bd6e8aa1ebeb96cf8aa644c5ef6eb2214dee0ffc | Bin 0 -> 56 bytes .../d0f63f55c89c3220cd326e9395c12e2f4bd57942 | Bin 0 -> 1353 bytes .../fuzzing/compression/dictionary-text/dict1 | Bin 0 -> 56 bytes .../ecbd22c462813a437898cfe2848a46e5d6a460c5 | Bin 0 -> 49 bytes .../eddf750270b16df6744f3bbfa6ee82271961f573 | Bin 0 -> 26 bytes .../efb13296f8f471aadfdf8083380d1e7ac9a6bbc5 | Bin 0 -> 49 bytes .../compression/dictionary-text/with-nulls | Bin 0 -> 211 bytes tsl/test/sql/compression_algos.sql | 51 +- 71 files changed, 1203 insertions(+), 677 deletions(-) create mode 100644 tsl/src/compression/compression_test.c create mode 100644 tsl/src/compression/compression_test.h create mode 100644 tsl/src/compression/decompress_arithmetic_test_impl.c delete mode 100644 tsl/src/compression/decompress_test_impl.c create mode 100644 tsl/src/compression/decompress_text_test_impl.c create mode 100644 tsl/test/fuzzing/compression/array-text/0d699bc41031c7525fa65c0ab267f34f608eef6a create mode 100644 tsl/test/fuzzing/compression/array-text/0dbf553220bcd27478f10999d679d564a11632a1 create mode 100644 tsl/test/fuzzing/compression/array-text/13f402104e20e5a38290bdc5fec85a46ae36bd73 create mode 100644 tsl/test/fuzzing/compression/array-text/1641a06baa3defcf9a1b704cb94ea3387f40f2ad create mode 100644 tsl/test/fuzzing/compression/array-text/22e70b0d023eac54b28a067aac0ab8e4eb75887b create mode 100644 tsl/test/fuzzing/compression/array-text/3862930f38ef2ac7387e3e47191234094aee7c0a create mode 100644 tsl/test/fuzzing/compression/array-text/428361124252a8847f1182747c936696bc43543b create mode 100644 tsl/test/fuzzing/compression/array-text/4cd1b3841a01a3abc7f1cec6325130fd109dee84 create mode 100644 tsl/test/fuzzing/compression/array-text/58420143cbcd2fe40fd1409948b6a78d3bf14a32 create mode 100644 tsl/test/fuzzing/compression/array-text/592e2bafa4637d9786e9d14c5f1ca512e0076940 create mode 100644 tsl/test/fuzzing/compression/array-text/5ba93c9db0cff93f52b521d7420e43f6eda2784f create mode 100644 tsl/test/fuzzing/compression/array-text/76023b236d960f02d7fb41c7a1fa4d28dafa7c2d create mode 100644 tsl/test/fuzzing/compression/array-text/9159cb8bcee7fcb95582f140960cdae72788d326 create mode 100644 tsl/test/fuzzing/compression/array-text/a3d453f14af5370aae60089101d659fb12c3aff4 create mode 100644 tsl/test/fuzzing/compression/array-text/a42c6cf1de3abfdea9b95f34687cbbe92b9a7383 create mode 100644 tsl/test/fuzzing/compression/array-text/array1 create mode 100644 tsl/test/fuzzing/compression/array-text/c13eaced24e2a5039d3fbeef655fc3cf827a2be7 create mode 100644 tsl/test/fuzzing/compression/array-text/c88b988789743b6aad8ef68278fc383847a37ddf create mode 100644 tsl/test/fuzzing/compression/array-text/crash-1ac67d1b8ebedeb93c515b5244f6e60613c1af0b create mode 100644 tsl/test/fuzzing/compression/array-text/crash-b6cfa8632a8bf28e90198ec167f3f63258880f77 create mode 100644 tsl/test/fuzzing/compression/array-text/f2f014ef49bdaf4ff29d4a7116feff81e7015283 create mode 100644 tsl/test/fuzzing/compression/array-text/f41f46df995dd4c7690f27978152ead25ccd5c75 create mode 100644 tsl/test/fuzzing/compression/array-text/with-nulls create mode 100644 tsl/test/fuzzing/compression/dictionary-text/127d1cd7df6314984a355234cad8daee2a0a6f85 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/1a155dbc0885a4ce404a03ccad4f90e8dfb6838b create mode 100644 tsl/test/fuzzing/compression/dictionary-text/1bdd8892fa9cd727ebf1b97101a04456b8ba7bc2 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/27de45122a7e9da969165f24f6942fc131cb17df create mode 100644 tsl/test/fuzzing/compression/dictionary-text/2a0fa91e546f986d25159ed1e7507ec4793df3a4 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/2d79b560f5c1dde8b7841ae6d77d616f26f5b3ab create mode 100644 tsl/test/fuzzing/compression/dictionary-text/2e10aad1b62e9ad833ea94af977cd498ba7da057 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/4e8af02cd72c9425df8c3164b3a14bc1b70c6498 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/559b65125ca556ff1a57f82f9ae55a86b71c6296 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/5ba93c9db0cff93f52b521d7420e43f6eda2784f create mode 100644 tsl/test/fuzzing/compression/dictionary-text/5c9409528b92b40afa79d037eadcb73b859e94e6 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/62ca0c60044ab7f33aa33d10a4ae715ee06b7748 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/687464af84370f5d43d25acba442acc7fd14beec create mode 100644 tsl/test/fuzzing/compression/dictionary-text/8c92cd8b3e908dad0b490baa09ee984fdf224b21 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/95f1a48e7e1cbe4b91461f1251416245497ff131 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/994cc577406fe37f59e27ea1028a9d0a814af721 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/a42f35cc555c689a38ef471b21fad93692f36a9c create mode 100644 tsl/test/fuzzing/compression/dictionary-text/ba200d8a4886abcdba4174f4b131db56e9128785 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/bf8b4530d8d246dd74ac53a13471bba17941dff7 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/c92920944247d80c842eaa65fd01efec1c84c342 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-49789ae0866d7d630f2075dc26812433f4af1db3 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-5eeac6ca5053992914dfb318e02e4c657a65c7cf create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-707526606a02c72364e1c8ea82357eead6c74f60 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-b0db762535226b28c0b55ffe00d5537fd8ef7e39 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/crash-bd6e8aa1ebeb96cf8aa644c5ef6eb2214dee0ffc create mode 100644 tsl/test/fuzzing/compression/dictionary-text/d0f63f55c89c3220cd326e9395c12e2f4bd57942 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/dict1 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/ecbd22c462813a437898cfe2848a46e5d6a460c5 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/eddf750270b16df6744f3bbfa6ee82271961f573 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/efb13296f8f471aadfdf8083380d1e7ac9a6bbc5 create mode 100644 tsl/test/fuzzing/compression/dictionary-text/with-nulls diff --git a/.github/workflows/libfuzzer.yaml b/.github/workflows/libfuzzer.yaml index 883d71113..35f9d87ce 100644 --- a/.github/workflows/libfuzzer.yaml +++ b/.github/workflows/libfuzzer.yaml @@ -9,17 +9,14 @@ name: Libfuzzer - prerelease_test - trigger/libfuzzer pull_request: - paths: .github/workflows/libfuzzer.yaml + paths: + - .github/workflows/libfuzzer.yaml + - 'tsl/test/fuzzing/compression/**' jobs: - fuzz: - strategy: - fail-fast: false - matrix: - case: [ { algo: gorilla, type: float8 }, { algo: deltadelta, type: int8 } ] - - name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.type }} + build: runs-on: ubuntu-22.04 + name: Build PostgreSQL and TimescaleDB env: PG_SRC_DIR: pgbuild PG_INSTALL_DIR: postgresql @@ -30,7 +27,7 @@ jobs: # Don't add ddebs here because the ddebs mirror is always 503 Service Unavailable. # If needed, install them before opening the core dump. sudo apt-get update - sudo apt-get install clang lld llvm flex bison lcov systemd-coredump gdb libipc-run-perl \ + sudo apt-get install 7zip clang lld llvm flex bison libipc-run-perl \ libtest-most-perl tree - name: Checkout TimescaleDB @@ -68,7 +65,7 @@ jobs: CC=clang ./configure --prefix=$HOME/$PG_INSTALL_DIR --with-openssl \ --without-readline --without-zlib --without-libxml --enable-cassert \ --enable-debug CC=clang \ - CFLAGS="-DTS_COMPRESSION_FUZZING=1 -fuse-ld=lld -ggdb3 -Og -fno-omit-frame-pointer" + CFLAGS="-DTS_COMPRESSION_FUZZING=1 -fuse-ld=lld -ggdb3 -O2 -fno-omit-frame-pointer" make -j$(nproc) - name: Install PostgreSQL @@ -89,13 +86,68 @@ jobs: export LIBFUZZER_PATH=$(dirname "$(find $(llvm-config --libdir) -name libclang_rt.fuzzer_no_main-x86_64.a | head -1)") + # Some pointers for the next time we have linking/undefined symbol problems: + # http://web.archive.org/web/20200926071757/https://github.com/google/sanitizers/issues/111 + # http://web.archive.org/web/20231101091231/https://github.com/cms-sw/cmssw/issues/40680 + cmake -B build -S . -DASSERTIONS=ON -DLINTER=OFF -DCMAKE_VERBOSE_MAKEFILE=1 \ - -DWARNINGS_AS_ERRORS=1 -DCMAKE_BUILD_TYPE=Debug -DCMAKE_C_COMPILER=clang \ + -DWARNINGS_AS_ERRORS=1 -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_C_COMPILER=clang \ -DCMAKE_C_FLAGS="-fsanitize=fuzzer-no-link -lstdc++ -L$LIBFUZZER_PATH -l:libclang_rt.fuzzer_no_main-x86_64.a -static-libsan" \ -DPG_PATH=$HOME/$PG_INSTALL_DIR make -C build -j$(nproc) install + # Incredibly, the upload-artifact action can't preserve executable permissions: + # https://github.com/actions/upload-artifact/issues/38 + # It's also extremely slow. + - name: Compress the installation directory + run: 7z a install.7z $HOME/$PG_INSTALL_DIR + + - name: Save the installation directory + uses: actions/upload-artifact@v3 + with: + name: fuzzing-install-dir + path: install.7z + if-no-files-found: error + retention-days: 1 + + fuzz: + needs: build + strategy: + fail-fast: false + matrix: + case: [ + { algo: gorilla , pgtype: float8, bulk: false, runs: 500000000 }, + { algo: deltadelta, pgtype: int8 , bulk: false, runs: 500000000 }, + { algo: gorilla , pgtype: float8, bulk: true , runs: 1000000000 }, + { algo: deltadelta, pgtype: int8 , bulk: true , runs: 1000000000 }, + # array has a peculiar recv function that recompresses all input, so + # fuzzing it is much slower. The dictionary recv also uses it. + { algo: array , pgtype: text , bulk: false, runs: 10000000 }, + { algo: dictionary, pgtype: text , bulk: false, runs: 100000000 }, + ] + + name: Fuzz decompression ${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }} + runs-on: ubuntu-22.04 + env: + PG_SRC_DIR: pgbuild + PG_INSTALL_DIR: postgresql + + steps: + - name: Install Linux dependencies + run: sudo apt install 7zip systemd-coredump gdb + + - name: Checkout TimescaleDB + uses: actions/checkout@v3 + + - name: Download the installation directory + uses: actions/download-artifact@v3 + with: + name: fuzzing-install-dir + + - name: Unpack the installation directory + run: 7z x -o$HOME install.7z + - name: initdb run: | # Have to do this before initializing the corpus, or initdb will complain. @@ -108,24 +160,37 @@ jobs: initdb echo "shared_preload_libraries = 'timescaledb'" >> $PGDATA/postgresql.conf - - name: Restore the cached fuzzing corpus - id: restore-corpus-cache + - name: Set configuration + id: config + run: | + set -x + echo "cache_prefix=${{ format('libfuzzer-corpus-2-{0}-{1}', matrix.case.algo, matrix.case.pgtype) }}" >> $GITHUB_OUTPUT + echo "name=${{ matrix.case.algo }} ${{ matrix.case.pgtype }} ${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}" >> $GITHUB_OUTPUT + + - name: Restore the cached fuzzing corpus (bulk) + id: restore-corpus-cache-bulk uses: actions/cache/restore@v3 with: - path: db/corpus - # If the initial corpus changes, probably it was updated by hand with - # some important examples, and it makes sense to start anew from it. - key: "libfuzzer-corpus-2-${{ matrix.case.algo }}-${{ matrix.case.type }}-\ - ${{ hashFiles(format('tsl/test/fuzzing/compression/{0}-{1}', matrix.case.algo, matrix.case.type)) }}" + path: db/corpus-bulk + key: "${{ steps.config.outputs.cache_prefix }}-bulk" + + # We save the row-by-row corpus separately from the bulk corpus, so that + # they don't overwrite each other. Now we are going to combine them. + - name: Restore the cached fuzzing corpus (rowbyrow) + id: restore-corpus-cache-rowbyrow + uses: actions/cache/restore@v3 + with: + path: db/corpus-rowbyrow + key: "${{ steps.config.outputs.cache_prefix }}-rowbyrow" - name: Initialize the fuzzing corpus - # cache-hit is only true for exact key matches, and we use prefix matches. - if: steps.restore-corpus-cache.outputs.cache-matched-key == '' run: | - # Copy the intial corpus files from the repository. The github actions - # cache doesn't follow symlinks. - mkdir -p db/corpus - find "tsl/test/fuzzing/compression/${{ matrix.case.algo }}-${{ matrix.case.type }}" -type f -exec cp -t db/corpus {} + + # Combine the cached corpus from rowbyrow and bulk fuzzing, and from repository. + mkdir -p db/corpus{,-rowbyrow,-bulk} + find "tsl/test/fuzzing/compression/${{ matrix.case.algo }}-${{ matrix.case.pgtype }}" -type f -exec cp -n -t db/corpus {} + + find "db/corpus-rowbyrow" -type f -exec cp -n -t db/corpus {} + + find "db/corpus-bulk" -type f -exec cp -n -t db/corpus {} + + ls db/corpus | wc -l - name: Run libfuzzer for compression run: | @@ -135,25 +200,36 @@ jobs: export PGPORT=5432 export PGDATABASE=postgres export PATH=$HOME/$PG_INSTALL_DIR/bin:$PATH - pg_ctl -l postmaster.log start + pg_ctl -l postgres.log start psql -c "create extension timescaledb;" - # Create the fuzzing function + # Create the fuzzing functions export MODULE_NAME=$(basename $(find $HOME/$PG_INSTALL_DIR -name "timescaledb-tsl-*.so")) - psql -a -c "create or replace function fuzz(algo cstring, type regtype, runs int) returns int as '"$MODULE_NAME"', 'ts_fuzz_compression' language c;" + psql -a -c "create or replace function fuzz(algo cstring, pgtype regtype, + bulk bool, runs int) + returns int as '"$MODULE_NAME"', 'ts_fuzz_compression' language c; + + create or replace function ts_read_compressed_data_directory(algo cstring, + pgtype regtype, path cstring, bulk bool) + returns table(path text, bytes int, rows int, sqlstate text, location text) + as '"$MODULE_NAME"', 'ts_read_compressed_data_directory' language c; + + " # Start more fuzzing processes in the background. We won't even monitor # their progress, because the server will panic if they find an error. for x in {2..$(nproc)} do - psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', '${{ matrix.case.type }}', 100000000);" & + psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', + '${{ matrix.case.pgtype }}', '${{ matrix.case.bulk }}', ${{ matrix.case.runs }});" & done # Start the one fuzzing process that we will monitor, in foreground. # The LLVM fuzzing driver calls exit(), so we expect to lose the connection. ret=0 - psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', '${{ matrix.case.type }}', 100000000);" || ret=$? + psql -v ON_ERROR_STOP=1 -c "select fuzz('${{ matrix.case.algo }}', + '${{ matrix.case.pgtype }}', '${{ matrix.case.bulk }}', ${{ matrix.case.runs }});" || ret=$? if ! [ $ret -eq 2 ] then >&2 echo "Unexpected psql exit code $ret" @@ -163,11 +239,38 @@ jobs: # Check that the server is still alive. psql -c "select 1" + ls db/corpus | wc -l + + fn="ts_read_compressed_data_directory('${{ matrix.case.algo }}', + '${{ matrix.case.pgtype }}', 'corpus', '${{ matrix.case.bulk }}')" + + # Show the statistics about fuzzing corpus + psql -c "select count(*), location, min(sqlstate), min(path) + from $fn + group by location order by count(*) desc + " + + # Save interesting cases because the caches are not available for download from UI + mkdir -p interesting + psql -qtAX -c "select distinct on (location) 'db/' || path from $fn + order by location, bytes + " | xargs cp -t interesting + + # Check that we don't have any internal errors + errors=$(psql -qtAX --set=ON_ERROR_STOP=1 -c "select count(*) + from $fn + where sqlstate = 'XX000'") + echo "Internal program errors: $errors" + [ $errors -eq 0 ] || exit 1 + + + # Shouldn't have any WARNINGS in the log. + ! grep -F "] WARNING: " postgres.log + - name: Collect the logs if: always() id: collectlogs run: | - find . -name postmaster.log -exec cat {} + > postgres.log # wait in case there are in-progress coredumps sleep 10 if coredumpctl -q list >/dev/null; then echo "coredumps=true" >>$GITHUB_OUTPUT; fi @@ -178,26 +281,52 @@ jobs: if: always() uses: actions/upload-artifact@v3 with: - name: PostgreSQL log for ${{ matrix.case.algo }} ${{ matrix.case.type }} + name: PostgreSQL log for ${{ steps.config.outputs.name }} path: postgres.log - name: Save fuzzer-generated crash cases if: always() uses: actions/upload-artifact@v3 with: - name: Crash cases for ${{ matrix.case.algo }} ${{ matrix.case.type }} + name: Crash cases for ${{ steps.config.outputs.name }} path: db/crash-* + - name: Save interesting cases + if: always() + uses: actions/upload-artifact@v3 + with: + name: Interesting cases for ${{ steps.config.outputs.name }} + path: interesting/ + # We use separate restore/save actions, because the default action won't - # save the updated folder after the cache hit. We also can't overwrite the - # existing cache, so we add a unique suffix. The cache is matched by key - # prefix, not exact key, and picks the newest matching item, so this works. + # save the updated folder after the cache hit. We also want to save the + # cache after fuzzing errors, and the default action doesn't save after + # errors. + # We can't overwrite the existing cache, so we add a unique suffix. The + # cache is matched by key prefix, not exact key, and picks the newest + # matching item, so this works. + # The caches for rowbyrow and bulk fuzzing are saved separately, otherwise + # the slower job would always overwrite the cache from the faster one. We + # want to combine corpuses from bulk and rowbyrow fuzzing for better + # coverage. + # Note that the cache action cannot be restored on a path different from the + # one it was saved from. To make our lives more interesting, it is not + # directly documented anywhere, but we can deduce it from path influencing + # the version. + - name: Change corpus path to please the 'actions/cache' GitHub Action + if: always() + run: | + rm -rf db/corpus-{bulk,rowbyrow} ||: + mv -fT db/corpus{,-${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }}} + - name: Save fuzzer corpus + if: always() uses: actions/cache/save@v3 with: - path: db/corpus - key: "${{ format('{0}-{1}-{2}', - steps.restore-corpus-cache.outputs.cache-primary-key, + path: db/corpus-${{ matrix.case.bulk && 'bulk' || 'rowbyrow' }} + key: "${{ format('{0}-{1}-{2}-{3}', + steps.config.outputs.cache_prefix, + matrix.case.bulk && 'bulk' || 'rowbyrow', github.run_id, github.run_attempt) }}" - name: Stack trace @@ -224,5 +353,5 @@ jobs: if: always() && steps.collectlogs.outputs.coredumps == 'true' uses: actions/upload-artifact@v3 with: - name: Coredumps for ${{ matrix.case.algo }} ${{ matrix.case.type }} + name: Coredumps for ${{ steps.config.outputs.name }} path: coredumps diff --git a/src/adts/bit_array_impl.h b/src/adts/bit_array_impl.h index c9555613d..8de4c7e84 100644 --- a/src/adts/bit_array_impl.h +++ b/src/adts/bit_array_impl.h @@ -347,8 +347,6 @@ bit_array_append_bucket(BitArray *array, uint8 bits_used, uint64 bucket) static uint64 bit_array_low_bits_mask(uint8 bits_used) { - if (bits_used >= 64) - return PG_UINT64_MAX; - else - return (UINT64CONST(1) << bits_used) - UINT64CONST(1); + Assert(bits_used > 0); + return -1ULL >> (64 - bits_used); } diff --git a/tsl/src/compression/CMakeLists.txt b/tsl/src/compression/CMakeLists.txt index 9e8e97248..647b7884a 100644 --- a/tsl/src/compression/CMakeLists.txt +++ b/tsl/src/compression/CMakeLists.txt @@ -2,6 +2,8 @@ set(SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/api.c ${CMAKE_CURRENT_SOURCE_DIR}/array.c ${CMAKE_CURRENT_SOURCE_DIR}/compression.c + ${CMAKE_CURRENT_SOURCE_DIR}/compression_test.c + ${CMAKE_CURRENT_SOURCE_DIR}/decompress_text_test_impl.c ${CMAKE_CURRENT_SOURCE_DIR}/create.c ${CMAKE_CURRENT_SOURCE_DIR}/datum_serialize.c ${CMAKE_CURRENT_SOURCE_DIR}/deltadelta.c diff --git a/tsl/src/compression/array.c b/tsl/src/compression/array.c index 58535be92..febd6937f 100644 --- a/tsl/src/compression/array.c +++ b/tsl/src/compression/array.c @@ -240,6 +240,7 @@ array_compression_serialization_size(ArrayCompressorSerializationInfo *info) uint32 array_compression_serialization_num_elements(ArrayCompressorSerializationInfo *info) { + CheckCompressedData(info->sizes != NULL); return info->sizes->num_elements; } @@ -405,12 +406,12 @@ array_decompression_iterator_try_next_forward(DecompressionIterator *general_ite .is_done = true, }; - Assert(iter->data_offset + datum_size.val <= iter->num_data_bytes); + CheckCompressedData(iter->data_offset + datum_size.val <= iter->num_data_bytes); start_pointer = iter->data + iter->data_offset; val = bytes_to_datum_and_advance(iter->deserializer, &start_pointer); iter->data_offset += datum_size.val; - Assert(iter->data + iter->data_offset == start_pointer); + CheckCompressedData(iter->data + iter->data_offset == start_pointer); return (DecompressResult){ .val = val, @@ -602,7 +603,6 @@ array_compressed_data_send(StringInfo buffer, const char *_serialized_data, Size Datum array_compressed_recv(StringInfo buffer) { - ArrayCompressorSerializationInfo *data; uint8 has_nulls; Oid element_type; @@ -611,9 +611,12 @@ array_compressed_recv(StringInfo buffer) element_type = binary_string_get_type(buffer); - data = array_compressed_data_recv(buffer, element_type); + ArrayCompressorSerializationInfo *info = array_compressed_data_recv(buffer, element_type); - PG_RETURN_POINTER(array_compressed_from_serialization_info(data, element_type)); + CheckCompressedData(info->sizes != NULL); + CheckCompressedData(has_nulls == (info->nulls != NULL)); + + PG_RETURN_POINTER(array_compressed_from_serialization_info(info, element_type)); } void diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index 10b4dadff..0e0c239db 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -42,16 +41,15 @@ #include #include #include -#include #include "compat/compat.h" #include "array.h" #include "chunk.h" #include "compression.h" +#include "compression_test.h" #include "create.h" #include "custom_type_cache.h" -#include "arrow_c_data_interface.h" #include "debug_assert.h" #include "debug_point.h" #include "deltadelta.h" @@ -2219,366 +2217,13 @@ decompress_batches_for_insert(ChunkInsertState *cis, Chunk *chunk, TupleTableSlo table_close(in_rel, NoLock); } -#if !defined(NDEBUG) || defined(TS_COMPRESSION_FUZZING) - -static int -get_compression_algorithm(char *name) +const CompressionAlgorithmDefinition * +algorithm_definition(CompressionAlgorithm algo) { - if (pg_strcasecmp(name, "deltadelta") == 0) - { - return COMPRESSION_ALGORITHM_DELTADELTA; - } - else if (pg_strcasecmp(name, "gorilla") == 0) - { - return COMPRESSION_ALGORITHM_GORILLA; - } - - ereport(ERROR, (errmsg("unknown comrpession algorithm %s", name))); - return _INVALID_COMPRESSION_ALGORITHM; + Assert(algo > 0 && algo < _END_COMPRESSION_ALGORITHMS); + return &definitions[algo]; } -#define ALGO gorilla -#define CTYPE float8 -#define PGTYPE FLOAT8OID -#define DATUM_TO_CTYPE DatumGetFloat8 -#include "decompress_test_impl.c" -#undef ALGO -#undef CTYPE -#undef PGTYPE -#undef DATUM_TO_CTYPE - -#define ALGO deltadelta -#define CTYPE int64 -#define PGTYPE INT8OID -#define DATUM_TO_CTYPE DatumGetInt64 -#include "decompress_test_impl.c" -#undef ALGO -#undef CTYPE -#undef PGTYPE -#undef DATUM_TO_CTYPE - -static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Size, - bool extra_checks) -{ - if (algo == COMPRESSION_ALGORITHM_GORILLA && type == FLOAT8OID) - { - return decompress_gorilla_float8; - } - else if (algo == COMPRESSION_ALGORITHM_DELTADELTA && type == INT8OID) - { - return decompress_deltadelta_int64; - } - - elog(ERROR, - "no decompression function for compression algorithm %d with element type %d", - algo, - type); - pg_unreachable(); -} - -/* - * Read and decompress compressed data from file. Useful for debugging the - * results of fuzzing. - * The out parameter bytes is volatile because we want to fill it even - * if we error out later. - */ -static void -read_compressed_data_file_impl(int algo, Oid type, const char *path, volatile int *bytes, int *rows) -{ - FILE *f = fopen(path, "r"); - - if (!f) - { - elog(ERROR, "could not open the file '%s'", path); - } - - fseek(f, 0, SEEK_END); - const size_t fsize = ftell(f); - fseek(f, 0, SEEK_SET); /* same as rewind(f); */ - - *rows = 0; - *bytes = fsize; - - if (fsize == 0) - { - /* - * Skip empty data, because we'll just get "no data left in message" - * right away. - */ - return; - } - - char *string = palloc(fsize + 1); - size_t elements_read = fread(string, fsize, 1, f); - - if (elements_read != 1) - { - elog(ERROR, "failed to read file '%s'", path); - } - - fclose(f); - - string[fsize] = 0; - - *rows = get_decompress_fn(algo, type)((const uint8 *) string, fsize, /* extra_checks = */ true); -} - -TS_FUNCTION_INFO_V1(ts_read_compressed_data_file); - -/* Read and decompress compressed data from file -- SQL-callable wrapper. */ -Datum -ts_read_compressed_data_file(PG_FUNCTION_ARGS) -{ - int rows; - int bytes; - read_compressed_data_file_impl(get_compression_algorithm(PG_GETARG_CSTRING(0)), - PG_GETARG_OID(1), - PG_GETARG_CSTRING(2), - &bytes, - &rows); - PG_RETURN_INT32(rows); -} - -TS_FUNCTION_INFO_V1(ts_read_compressed_data_directory); - -/* - * Read and decomrpess all compressed data files from directory. Useful for - * checking the fuzzing corpuses in the regression tests. - */ -Datum -ts_read_compressed_data_directory(PG_FUNCTION_ARGS) -{ - /* Output columns of this function. */ - enum - { - out_path = 0, - out_bytes, - out_rows, - out_sqlstate, - out_location, - _out_columns - }; - - /* Cross-call context for this set-returning function. */ - struct user_context - { - DIR *dp; - struct dirent *ep; - }; - - char *name = PG_GETARG_CSTRING(2); - const int algo = get_compression_algorithm(PG_GETARG_CSTRING(0)); - - FuncCallContext *funcctx; - struct user_context *c; - MemoryContext call_memory_context = CurrentMemoryContext; - - /* stuff done only on the first call of the function */ - if (SRF_IS_FIRSTCALL()) - { - /* create a function context for cross-call persistence */ - funcctx = SRF_FIRSTCALL_INIT(); - - /* switch to memory context appropriate for multiple function calls */ - MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); - - /* Build a tuple descriptor for our result type */ - if (get_call_result_type(fcinfo, NULL, &funcctx->tuple_desc) != TYPEFUNC_COMPOSITE) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("function returning record called in context " - "that cannot accept type record"))); - - /* - * generate attribute metadata needed later to produce tuples from raw - * C strings - */ - funcctx->attinmeta = TupleDescGetAttInMetadata(funcctx->tuple_desc); - - funcctx->user_fctx = palloc(sizeof(struct user_context)); - c = funcctx->user_fctx; - - c->dp = opendir(name); - - if (!c->dp) - { - elog(ERROR, "could not open directory '%s'", name); - } - - MemoryContextSwitchTo(call_memory_context); - } - - funcctx = SRF_PERCALL_SETUP(); - c = (struct user_context *) funcctx->user_fctx; - - Datum values[_out_columns] = { 0 }; - bool nulls[_out_columns] = { 0 }; - for (int i = 0; i < _out_columns; i++) - { - nulls[i] = true; - } - - while ((c->ep = readdir(c->dp))) - { - if (c->ep->d_name[0] == '.') - { - continue; - } - - char *path = psprintf("%s/%s", name, c->ep->d_name); - - /* The return values are: path, ret, sqlstate, status, location. */ - values[out_path] = PointerGetDatum(cstring_to_text(path)); - nulls[out_path] = false; - - int rows; - volatile int bytes = 0; - PG_TRY(); - { - read_compressed_data_file_impl(algo, PG_GETARG_OID(1), path, &bytes, &rows); - values[out_rows] = Int32GetDatum(rows); - nulls[out_rows] = false; - } - PG_CATCH(); - { - MemoryContextSwitchTo(call_memory_context); - - ErrorData *error = CopyErrorData(); - - values[out_sqlstate] = - PointerGetDatum(cstring_to_text(unpack_sql_state(error->sqlerrcode))); - nulls[out_sqlstate] = false; - - if (error->filename) - { - values[out_location] = PointerGetDatum( - cstring_to_text(psprintf("%s:%d", error->filename, error->lineno))); - nulls[out_location] = false; - } - - FlushErrorState(); - } - PG_END_TRY(); - - values[out_bytes] = Int32GetDatum(bytes); - nulls[out_bytes] = false; - - SRF_RETURN_NEXT(funcctx, - HeapTupleGetDatum(heap_form_tuple(funcctx->tuple_desc, values, nulls))); - } - - (void) closedir(c->dp); - - SRF_RETURN_DONE(funcctx); -} - -#endif - -#ifdef TS_COMPRESSION_FUZZING - -/* - * This is our test function that will be called by the libfuzzer driver. It - * has to catch the postgres exceptions normally produced for corrupt data. - */ -static int -llvm_fuzz_target_generic(int (*target)(const uint8_t *Data, size_t Size, bool extra_checks), - const uint8_t *Data, size_t Size) -{ - MemoryContextReset(CurrentMemoryContext); - - PG_TRY(); - { - CHECK_FOR_INTERRUPTS(); - target(Data, Size, /* extra_checks = */ false); - } - PG_CATCH(); - { - FlushErrorState(); - } - PG_END_TRY(); - - /* We always return 0, and -1 would mean "don't include it into corpus". */ - return 0; -} - -static int -llvm_fuzz_target_gorilla_float8(const uint8_t *Data, size_t Size) -{ - return llvm_fuzz_target_generic(decompress_gorilla_float8, Data, Size); -} -static int -llvm_fuzz_target_deltadelta_int64(const uint8_t *Data, size_t Size) -{ - return llvm_fuzz_target_generic(decompress_deltadelta_int64, Data, Size); -} - -/* - * libfuzzer fuzzing driver that we import from LLVM libraries. It will run our - * test functions with random inputs. - */ -extern int LLVMFuzzerRunDriver(int *argc, char ***argv, - int (*UserCb)(const uint8_t *Data, size_t Size)); - -/* - * The SQL function to perform fuzzing. - */ -TS_FUNCTION_INFO_V1(ts_fuzz_compression); - -Datum -ts_fuzz_compression(PG_FUNCTION_ARGS) -{ - /* - * We use the memory context size larger than default here, so that all data - * allocated by fuzzing fit into the first chunk. The first chunk is not - * deallocated when the memory context is reset, so this reduces overhead - * caused by repeated reallocations. - * The particular value of 8MB is somewhat arbitrary and large. In practice, - * we have inputs of 1k rows max here, which decompress to 8 kB max. - */ - MemoryContext fuzzing_context = - AllocSetContextCreate(CurrentMemoryContext, "fuzzing", 0, 8 * 1024 * 1024, 8 * 1024 * 1024); - MemoryContext old_context = MemoryContextSwitchTo(fuzzing_context); - - char *argvdata[] = { "PostgresFuzzer", - "-timeout=1", - "-report_slow_units=1", - // "-use_value_profile=1", - "-reload=1", - //"-print_coverage=1", - //"-print_full_coverage=1", - //"-print_final_stats=1", - //"-help=1", - psprintf("-runs=%d", PG_GETARG_INT32(2)), - "corpus" /* in the database directory */, - NULL }; - char **argv = argvdata; - int argc = sizeof(argvdata) / sizeof(*argvdata) - 1; - - int algo = get_compression_algorithm(PG_GETARG_CSTRING(0)); - Oid type = PG_GETARG_OID(1); - int (*target)(const uint8_t *, size_t); - if (algo == COMPRESSION_ALGORITHM_GORILLA && type == FLOAT8OID) - { - target = llvm_fuzz_target_gorilla_float8; - } - else if (algo == COMPRESSION_ALGORITHM_DELTADELTA && type == INT8OID) - { - target = llvm_fuzz_target_deltadelta_int64; - } - else - { - elog(ERROR, "no llvm fuzz target for compression algorithm %d and type %d", algo, type); - } - - int res = LLVMFuzzerRunDriver(&argc, &argv, target); - - MemoryContextSwitchTo(old_context); - - PG_RETURN_INT32(res); -} - -#endif - #if PG14_GE static BatchFilter * make_batchfilter(char *column_name, StrategyNumber strategy, Oid collation, RegProcedure opcode, diff --git a/tsl/src/compression/compression.h b/tsl/src/compression/compression.h index 94f7455af..cc60a8823 100644 --- a/tsl/src/compression/compression.h +++ b/tsl/src/compression/compression.h @@ -376,22 +376,21 @@ extern enum CompressionAlgorithms compress_get_default_algorithm(Oid typeoid); * to pollute the logs. */ #ifndef TS_COMPRESSION_FUZZING -#define CORRUPT_DATA_MESSAGE \ - (errmsg("the compressed data is corrupt"), errcode(ERRCODE_DATA_CORRUPTED)) +#define CORRUPT_DATA_MESSAGE(X) \ + (errmsg("the compressed data is corrupt"), errdetail(X), errcode(ERRCODE_DATA_CORRUPTED)) #else -#define CORRUPT_DATA_MESSAGE (errcode(ERRCODE_DATA_CORRUPTED)) +#define CORRUPT_DATA_MESSAGE(X) (errcode(ERRCODE_DATA_CORRUPTED)) #endif #define CheckCompressedData(X) \ if (unlikely(!(X))) \ - ereport(ERROR, CORRUPT_DATA_MESSAGE) + ereport(ERROR, CORRUPT_DATA_MESSAGE(#X)) inline static void * consumeCompressedData(StringInfo si, int bytes) { CheckCompressedData(bytes >= 0); - CheckCompressedData(bytes < PG_INT32_MAX / 2); - CheckCompressedData(si->cursor + bytes >= 0); + CheckCompressedData(si->cursor + bytes >= si->cursor); /* Check for overflow. */ CheckCompressedData(si->cursor + bytes <= si->len); void *result = si->data + si->cursor; diff --git a/tsl/src/compression/compression_test.c b/tsl/src/compression/compression_test.c new file mode 100644 index 000000000..55fbc5b03 --- /dev/null +++ b/tsl/src/compression/compression_test.c @@ -0,0 +1,459 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ +#include + +#include + +#include +#include +#include + +#include "compression_test.h" + +#include "compression.h" +#include "arrow_c_data_interface.h" + +#if !defined(NDEBUG) || defined(TS_COMPRESSION_FUZZING) + +static int +get_compression_algorithm(char *name) +{ + if (pg_strcasecmp(name, "deltadelta") == 0) + { + return COMPRESSION_ALGORITHM_DELTADELTA; + } + else if (pg_strcasecmp(name, "gorilla") == 0) + { + return COMPRESSION_ALGORITHM_GORILLA; + } + else if (pg_strcasecmp(name, "array") == 0) + { + return COMPRESSION_ALGORITHM_ARRAY; + } + else if (pg_strcasecmp(name, "dictionary") == 0) + { + return COMPRESSION_ALGORITHM_DICTIONARY; + } + + ereport(ERROR, (errmsg("unknown comrpession algorithm %s", name))); + return _INVALID_COMPRESSION_ALGORITHM; +} + +/* + * Specializations of test functions for arithmetic types. + */ +#define ALGO GORILLA +#define CTYPE float8 +#define PG_TYPE_PREFIX FLOAT8 +#define DATUM_TO_CTYPE DatumGetFloat8 +#include "decompress_arithmetic_test_impl.c" +#undef ALGO +#undef CTYPE +#undef PG_TYPE_PREFIX +#undef DATUM_TO_CTYPE + +#define ALGO DELTADELTA +#define CTYPE int64 +#define PG_TYPE_PREFIX INT8 +#define DATUM_TO_CTYPE DatumGetInt64 +#include "decompress_arithmetic_test_impl.c" +#undef ALGO +#undef CTYPE +#undef PG_TYPE_PREFIX +#undef DATUM_TO_CTYPE + +/* + * The table of the supported testing configurations. We use it to generate + * dispatch tables and specializations of test functions. + */ +#define APPLY_FOR_TYPES(X) \ + X(GORILLA, FLOAT8, true) \ + X(GORILLA, FLOAT8, false) \ + X(DELTADELTA, INT8, true) \ + X(DELTADELTA, INT8, false) \ + X(ARRAY, TEXT, false) \ + X(DICTIONARY, TEXT, false) + +static int (*get_decompress_fn(int algo, Oid type))(const uint8 *Data, size_t Size, bool bulk) +{ +#define DISPATCH(ALGO, PGTYPE, BULK) \ + if (algo == COMPRESSION_ALGORITHM_##ALGO && type == PGTYPE##OID) \ + { \ + return decompress_##ALGO##_##PGTYPE; \ + } + + APPLY_FOR_TYPES(DISPATCH) + + elog(ERROR, + "no decompression function for compression algorithm %d with element type %d", + algo, + type); + pg_unreachable(); +#undef DISPATCH +} + +/* + * Read and decompress compressed data from file. Useful for debugging the + * results of fuzzing. + * The out parameter bytes is volatile because we want to fill it even + * if we error out later. + */ +static void +read_compressed_data_file_impl(int algo, Oid type, const char *path, bool bulk, volatile int *bytes, + int *rows) +{ + FILE *f = fopen(path, "r"); + + if (!f) + { + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FILE), errmsg("could not open the file '%s'", path))); + } + + fseek(f, 0, SEEK_END); + const size_t fsize = ftell(f); + fseek(f, 0, SEEK_SET); /* same as rewind(f); */ + + *rows = 0; + *bytes = fsize; + + if (fsize == 0) + { + /* + * Skip empty data, because we'll just get "no data left in message" + * right away. + */ + return; + } + + char *string = palloc(fsize + 1); + size_t elements_read = fread(string, fsize, 1, f); + + if (elements_read != 1) + { + ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FILE), errmsg("failed to read file '%s'", path))); + } + + fclose(f); + + string[fsize] = 0; + + *rows = get_decompress_fn(algo, type)((const uint8 *) string, fsize, bulk); +} + +TS_FUNCTION_INFO_V1(ts_read_compressed_data_file); + +/* Read and decompress compressed data from file -- SQL-callable wrapper. */ +Datum +ts_read_compressed_data_file(PG_FUNCTION_ARGS) +{ + int rows; + int bytes; + read_compressed_data_file_impl(get_compression_algorithm(PG_GETARG_CSTRING(0)), + PG_GETARG_OID(1), + PG_GETARG_CSTRING(2), + PG_GETARG_BOOL(3), + &bytes, + &rows); + PG_RETURN_INT32(rows); +} + +TS_FUNCTION_INFO_V1(ts_read_compressed_data_directory); + +/* + * Read and decomrpess all compressed data files from directory. Useful for + * checking the fuzzing corpuses in the regression tests. + */ +Datum +ts_read_compressed_data_directory(PG_FUNCTION_ARGS) +{ + /* Output columns of this function. */ + enum + { + out_path = 0, + out_bytes, + out_rows, + out_sqlstate, + out_location, + _out_columns + }; + + /* Cross-call context for this set-returning function. */ + struct user_context + { + DIR *dp; + struct dirent *ep; + }; + + char *name = PG_GETARG_CSTRING(2); + const int algo = get_compression_algorithm(PG_GETARG_CSTRING(0)); + + FuncCallContext *funcctx; + struct user_context *c; + MemoryContext call_memory_context = CurrentMemoryContext; + + /* stuff done only on the first call of the function */ + if (SRF_IS_FIRSTCALL()) + { + /* create a function context for cross-call persistence */ + funcctx = SRF_FIRSTCALL_INIT(); + + /* switch to memory context appropriate for multiple function calls */ + MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &funcctx->tuple_desc) != TYPEFUNC_COMPOSITE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("function returning record called in context " + "that cannot accept type record"))); + + /* + * generate attribute metadata needed later to produce tuples from raw + * C strings + */ + funcctx->attinmeta = TupleDescGetAttInMetadata(funcctx->tuple_desc); + + funcctx->user_fctx = palloc(sizeof(struct user_context)); + c = funcctx->user_fctx; + + c->dp = opendir(name); + + if (!c->dp) + { + elog(ERROR, "could not open directory '%s'", name); + } + + MemoryContextSwitchTo(call_memory_context); + } + + funcctx = SRF_PERCALL_SETUP(); + c = (struct user_context *) funcctx->user_fctx; + + Datum values[_out_columns] = { 0 }; + bool nulls[_out_columns] = { 0 }; + for (int i = 0; i < _out_columns; i++) + { + nulls[i] = true; + } + + while ((c->ep = readdir(c->dp))) + { + if (c->ep->d_name[0] == '.') + { + continue; + } + + char *path = psprintf("%s/%s", name, c->ep->d_name); + + /* The return values are: path, ret, sqlstate, status, location. */ + values[out_path] = PointerGetDatum(cstring_to_text(path)); + nulls[out_path] = false; + + int rows; + volatile int bytes = 0; + PG_TRY(); + { + read_compressed_data_file_impl(algo, + PG_GETARG_OID(1), + path, + PG_GETARG_BOOL(3), + &bytes, + &rows); + values[out_rows] = Int32GetDatum(rows); + nulls[out_rows] = false; + } + PG_CATCH(); + { + MemoryContextSwitchTo(call_memory_context); + + ErrorData *error = CopyErrorData(); + + values[out_sqlstate] = + PointerGetDatum(cstring_to_text(unpack_sql_state(error->sqlerrcode))); + nulls[out_sqlstate] = false; + + if (error->filename) + { + values[out_location] = PointerGetDatum( + cstring_to_text(psprintf("%s:%d", error->filename, error->lineno))); + nulls[out_location] = false; + } + + FlushErrorState(); + } + PG_END_TRY(); + + values[out_bytes] = Int32GetDatum(bytes); + nulls[out_bytes] = false; + + SRF_RETURN_NEXT(funcctx, + HeapTupleGetDatum(heap_form_tuple(funcctx->tuple_desc, values, nulls))); + } + + (void) closedir(c->dp); + + SRF_RETURN_DONE(funcctx); +} + +#endif + +#ifdef TS_COMPRESSION_FUZZING + +/* + * Fuzzing target for all supported types. + */ +static int +target_generic(const uint8 *Data, size_t Size, CompressionAlgorithm requested_algo, Oid pg_type, + bool bulk) +{ + StringInfoData si = { .data = (char *) Data, .len = Size }; + + const CompressionAlgorithm data_algo = pq_getmsgbyte(&si); + + CheckCompressedData(data_algo > 0 && data_algo < _END_COMPRESSION_ALGORITHMS); + + if (data_algo != requested_algo) + { + /* + * It's convenient to fuzz only one algorithm at a time. We specialize + * the fuzz target for one algorithm, so that the fuzzer doesn't waste + * time discovering others from scratch. + */ + return -1; + } + + const CompressionAlgorithmDefinition *def = algorithm_definition(data_algo); + Datum compressed_data = def->compressed_data_recv(&si); + + if (bulk) + { + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo, pg_type); + decompress_all(compressed_data, pg_type, CurrentMemoryContext); + return 0; + } + + DecompressionIterator *iter = def->iterator_init_forward(compressed_data, pg_type); + for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) + ; + return 0; +} + +/* + * This is a wrapper for fuzzing target. It will called by the libfuzzer driver. + * It has to catch the postgres exceptions normally produced for corrupt data. + */ +static int +target_wrapper(const uint8_t *Data, size_t Size, CompressionAlgorithm requested_algo, Oid pg_type, + bool bulk) +{ + MemoryContextReset(CurrentMemoryContext); + + int res = 0; + PG_TRY(); + { + CHECK_FOR_INTERRUPTS(); + res = target_generic(Data, Size, requested_algo, pg_type, bulk); + } + PG_CATCH(); + { + /* EmitErrorReport(); */ + FlushErrorState(); + } + PG_END_TRY(); + + /* + * -1 means "don't include it into corpus", return it if the test function + * says so, otherwise return 0. Some test functions also returns the number + * of rows for the correct data, the fuzzer doesn't understand these values. + */ + return res == -1 ? -1 : 0; +} + +/* + * Specializations of fuzzing targets for supported types that will be directly + * called by the fuzzing driver. + */ +#define DECLARE_TARGET(ALGO, PGTYPE, BULK) \ + static int target_##ALGO##_##PGTYPE##_##BULK(const uint8_t *D, size_t S) \ + { \ + return target_wrapper(D, S, COMPRESSION_ALGORITHM_##ALGO, PGTYPE##OID, BULK); \ + } + +APPLY_FOR_TYPES(DECLARE_TARGET) + +#undef DECLARE_TARGET + +/* + * libfuzzer fuzzing driver that we import from LLVM libraries. It will run our + * test functions with random inputs. + */ +extern int LLVMFuzzerRunDriver(int *argc, char ***argv, + int (*UserCb)(const uint8_t *Data, size_t Size)); + +/* + * The SQL function to perform fuzzing. + */ +TS_FUNCTION_INFO_V1(ts_fuzz_compression); + +Datum +ts_fuzz_compression(PG_FUNCTION_ARGS) +{ + /* + * We use the memory context size larger than default here, so that all data + * allocated by fuzzing fit into the first chunk. The first chunk is not + * deallocated when the memory context is reset, so this reduces overhead + * caused by repeated reallocations. + * The particular value of 8MB is somewhat arbitrary and large. In practice, + * we have inputs of 1k rows max here, which decompress to 8 kB max. + */ + MemoryContext fuzzing_context = + AllocSetContextCreate(CurrentMemoryContext, "fuzzing", 0, 8 * 1024 * 1024, 8 * 1024 * 1024); + MemoryContext old_context = MemoryContextSwitchTo(fuzzing_context); + + char *argvdata[] = { "PostgresFuzzer", + "-timeout=1", + "-report_slow_units=1", + // "-use_value_profile=1", + "-reload=1", + //"-print_coverage=1", + //"-print_full_coverage=1", + //"-print_final_stats=1", + //"-help=1", + psprintf("-runs=%d", PG_GETARG_INT32(3)), + "corpus" /* in the database directory */, + NULL }; + char **argv = argvdata; + int argc = sizeof(argvdata) / sizeof(*argvdata) - 1; + + int algo = get_compression_algorithm(PG_GETARG_CSTRING(0)); + Oid type = PG_GETARG_OID(1); + bool bulk = PG_GETARG_BOOL(2); + + int (*target)(const uint8_t *, size_t) = NULL; + +#define DISPATCH(ALGO, PGTYPE, BULK) \ + if (algo == COMPRESSION_ALGORITHM_##ALGO && type == PGTYPE##OID && bulk == BULK) \ + { \ + target = target_##ALGO##_##PGTYPE##_##BULK; \ + } + + APPLY_FOR_TYPES(DISPATCH) +#undef DISPATCH + + if (target == NULL) + { + elog(ERROR, "no llvm fuzz target for compression algorithm %d and type %d", algo, type); + } + + int res = LLVMFuzzerRunDriver(&argc, &argv, target); + + MemoryContextSwitchTo(old_context); + + PG_RETURN_INT32(res); +} + +#endif diff --git a/tsl/src/compression/compression_test.h b/tsl/src/compression/compression_test.h new file mode 100644 index 000000000..f10402eee --- /dev/null +++ b/tsl/src/compression/compression_test.h @@ -0,0 +1,14 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ +#pragma once + +#include "compression.h" + +int decompress_ARRAY_TEXT(const uint8 *Data, size_t Size, bool bulk); + +int decompress_DICTIONARY_TEXT(const uint8 *Data, size_t Size, bool bulk); + +const CompressionAlgorithmDefinition *algorithm_definition(CompressionAlgorithm algo); diff --git a/tsl/src/compression/datum_serialize.c b/tsl/src/compression/datum_serialize.c index c2ac0c06b..d22ca1c0d 100644 --- a/tsl/src/compression/datum_serialize.c +++ b/tsl/src/compression/datum_serialize.c @@ -20,6 +20,8 @@ #include "datum_serialize.h" #include "compat/compat.h" +#include "compression.h" + typedef struct DatumSerializer { Oid type_oid; @@ -305,6 +307,22 @@ bytes_to_datum_and_advance(DatumDeserializer *deserializer, const char **ptr) *ptr = (Pointer) att_align_pointer(*ptr, deserializer->type_align, deserializer->type_len, *ptr); + if (deserializer->type_len == -1) + { + /* + * Check for potentially corrupt varlena headers since we're reading them + * directly from compressed data. We can only have a plain datum + * with 1-byte or 4-byte header here, no TOAST or compressed data. + */ + CheckCompressedData(VARATT_IS_4B_U(*ptr) || (VARATT_IS_1B(*ptr) && !VARATT_IS_1B_E(*ptr))); + + /* + * Full varsize must be larger or equal than the header size so that the + * calculation of size without header doesn't overflow. + */ + CheckCompressedData((VARATT_IS_1B(*ptr) && VARSIZE_1B(*ptr) >= VARHDRSZ_SHORT) || + (VARSIZE_4B(*ptr) > VARHDRSZ)); + } res = fetch_att(*ptr, deserializer->type_by_val, deserializer->type_len); *ptr = att_addlength_pointer(*ptr, deserializer->type_len, *ptr); return res; @@ -343,8 +361,7 @@ binary_string_get_type(StringInfo buffer) Anum_pg_type_oid, PointerGetDatum(element_type_name), ObjectIdGetDatum(namespace_oid)); - if (!OidIsValid(type_oid)) - elog(ERROR, "could not find type %s.%s", element_type_namespace, element_type_name); + CheckCompressedData(OidIsValid(type_oid)); return type_oid; } diff --git a/tsl/src/compression/decompress_arithmetic_test_impl.c b/tsl/src/compression/decompress_arithmetic_test_impl.c new file mode 100644 index 000000000..c9a207d14 --- /dev/null +++ b/tsl/src/compression/decompress_arithmetic_test_impl.c @@ -0,0 +1,216 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ + +#define FUNCTION_NAME_HELPER3(X, Y, Z) X##_##Y##_##Z +#define FUNCTION_NAME3(X, Y, Z) FUNCTION_NAME_HELPER3(X, Y, Z) +#define FUNCTION_NAME_HELPER2(X, Y) X##_##Y +#define FUNCTION_NAME2(X, Y) FUNCTION_NAME_HELPER2(X, Y) + +#define PG_TYPE_OID_HELPER(X) X##OID +#define PG_TYPE_OID_HELPER2(X) PG_TYPE_OID_HELPER(X) +#define PG_TYPE_OID PG_TYPE_OID_HELPER2(PG_TYPE_PREFIX) + +static void +FUNCTION_NAME2(check_arrow, CTYPE)(ArrowArray *arrow, int error_type, DecompressResult *results, + int n) +{ + if (n != arrow->length) + { + ereport(error_type, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("Expected %d elements, got %d.", n, (int) arrow->length))); + } + + for (int i = 0; i < n; i++) + { + const bool arrow_isnull = !arrow_row_is_valid(arrow->buffers[0], i); + if (arrow_isnull != results[i].is_null) + { + ereport(error_type, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("Expected null %d, got %d at row %d.", + results[i].is_null, + arrow_isnull, + i))); + } + + if (!results[i].is_null) + { + const CTYPE arrow_value = ((CTYPE *) arrow->buffers[1])[i]; + const CTYPE rowbyrow_value = DATUM_TO_CTYPE(results[i].val); + + /* + * Floats can also be NaN/infinite and the comparison doesn't + * work in that case. + */ + if (isfinite((double) arrow_value) != isfinite((double) rowbyrow_value)) + { + ereport(error_type, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("At row %d\n", i))); + } + + if (isfinite((double) arrow_value) && arrow_value != rowbyrow_value) + { + ereport(error_type, + (errcode(ERRCODE_INTERNAL_ERROR), + errmsg("the bulk decompression result does not match"), + errdetail("At row %d\n", i))); + } + } + } +} + +/* + * Try to decompress the given compressed data. Used for fuzzing and for checking + * the examples found by fuzzing. For fuzzing we do less checks to keep it + * faster and the coverage space smaller. This is a generic implementation + * for arithmetic types. + */ +static int +FUNCTION_NAME3(decompress, ALGO, PG_TYPE_PREFIX)(const uint8 *Data, size_t Size, bool bulk) +{ + StringInfoData si = { .data = (char *) Data, .len = Size }; + + const int data_algo = pq_getmsgbyte(&si); + + CheckCompressedData(data_algo > 0 && data_algo < _END_COMPRESSION_ALGORITHMS); + + if (data_algo != FUNCTION_NAME2(COMPRESSION_ALGORITHM, ALGO)) + { + /* + * It's convenient to fuzz only one algorithm at a time. We specialize + * the fuzz target for one algorithm, so that the fuzzer doesn't waste + * time discovering others from scratch. + */ + return -1; + } + + const CompressionAlgorithmDefinition *def = algorithm_definition(data_algo); + Datum compressed_data = def->compressed_data_recv(&si); + + DecompressAllFunction decompress_all = tsl_get_decompress_all_function(data_algo, PG_TYPE_OID); + + ArrowArray *arrow = NULL; + if (bulk) + { + /* + * Test bulk decompression. Have to do this before row-by-row decompression + * so that the latter doesn't hide the errors. + */ + arrow = decompress_all(compressed_data, PG_TYPE_OID, CurrentMemoryContext); + } + + /* + * Test row-by-row decompression. + */ + DecompressionIterator *iter = def->iterator_init_forward(compressed_data, PG_TYPE_OID); + DecompressResult results[GLOBAL_MAX_ROWS_PER_COMPRESSION]; + int n = 0; + for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) + { + if (n >= GLOBAL_MAX_ROWS_PER_COMPRESSION) + { + elog(ERROR, "too many compressed rows"); + } + + results[n++] = r; + } + + /* Check that both ways of decompression match. */ + if (bulk) + { + FUNCTION_NAME2(check_arrow, CTYPE)(arrow, ERROR, results, n); + return n; + } + + /* + * For row-by-row decompression, check that the result is still the same + * after we compress and decompress back. + * + * 1) Compress. + */ + Compressor *compressor = def->compressor_for_type(PG_TYPE_OID); + + for (int i = 0; i < n; i++) + { + if (results[i].is_null) + { + compressor->append_null(compressor); + } + else + { + compressor->append_val(compressor, results[i].val); + } + } + + compressed_data = (Datum) compressor->finish(compressor); + if (compressed_data == 0) + { + /* The gorilla compressor returns NULL for all-null input sets. */ + return n; + }; + + /* + * 2) Decompress and check that it's the same. + */ + iter = def->iterator_init_forward(compressed_data, PG_TYPE_OID); + int nn = 0; + for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) + { + if (r.is_null != results[nn].is_null) + { + elog(ERROR, "the repeated decompression result doesn't match"); + } + + if (!r.is_null) + { + CTYPE old_value = DATUM_TO_CTYPE(results[nn].val); + CTYPE new_value = DATUM_TO_CTYPE(r.val); + /* + * Floats can also be NaN/infinite and the comparison doesn't + * work in that case. + */ + if (isfinite((double) old_value) != isfinite((double) new_value)) + { + elog(ERROR, "the repeated decompression result doesn't match"); + } + + if (isfinite((double) old_value) && old_value != new_value) + { + elog(ERROR, "the repeated decompression result doesn't match"); + } + } + + nn++; + + if (nn > n) + { + elog(ERROR, "the repeated recompression result doesn't match"); + } + } + + /* + * 3) The bulk decompression must absolutely work on the correct compressed + * data we've just generated. + */ + arrow = decompress_all(compressed_data, PG_TYPE_OID, CurrentMemoryContext); + FUNCTION_NAME2(check_arrow, CTYPE)(arrow, PANIC, results, n); + + return n; +} + +#undef FUNCTION_NAME3 +#undef FUNCTION_NAME_HELPER3 +#undef FUNCTION_NAME2 +#undef FUNCTION_NAME_HELPER2 + +#undef PG_TYPE_OID +#undef PG_TYPE_OID_HELPER +#undef PG_TYPE_OID_HELPER2 diff --git a/tsl/src/compression/decompress_test_impl.c b/tsl/src/compression/decompress_test_impl.c deleted file mode 100644 index c452b1733..000000000 --- a/tsl/src/compression/decompress_test_impl.c +++ /dev/null @@ -1,204 +0,0 @@ -/* - * This file and its contents are licensed under the Timescale License. - * Please see the included NOTICE for copyright information and - * LICENSE-TIMESCALE for a copy of the license. - */ - -#define FUNCTION_NAME_HELPER(X, Y) decompress_##X##_##Y -#define FUNCTION_NAME(X, Y) FUNCTION_NAME_HELPER(X, Y) - -#define TOSTRING_HELPER(x) #x -#define TOSTRING(x) TOSTRING_HELPER(x) - -/* - * Try to decompress the given compressed data. Used for fuzzing and for checking - * the examples found by fuzzing. For fuzzing we do less checks to keep it - * faster and the coverage space smaller. - */ -static int -FUNCTION_NAME(ALGO, CTYPE)(const uint8 *Data, size_t Size, bool extra_checks) -{ - StringInfoData si = { .data = (char *) Data, .len = Size }; - - const int algo = pq_getmsgbyte(&si); - - CheckCompressedData(algo > 0 && algo < _END_COMPRESSION_ALGORITHMS); - - if (algo != get_compression_algorithm(TOSTRING(ALGO))) - { - /* - * It's convenient to fuzz only one algorithm at a time. We specialize - * the fuzz target for one algorithm, so that the fuzzer doesn't waste - * time discovering others from scratch. - */ - return -1; - } - - Datum compressed_data = definitions[algo].compressed_data_recv(&si); - - if (!extra_checks) - { - /* - * For routine fuzzing, we only run bulk decompression to make it faster - * and the coverage space smaller. - */ - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo, PGTYPE); - decompress_all(compressed_data, PGTYPE, CurrentMemoryContext); - return 0; - } - - /* - * Test bulk decompression. This might hide some errors in the row-by-row - * decompression, but testing both is significantly more complicated, and - * the row-by-row is old and stable. - */ - ArrowArray *arrow = NULL; - DecompressAllFunction decompress_all = tsl_get_decompress_all_function(algo, PGTYPE); - if (decompress_all) - { - arrow = decompress_all(compressed_data, PGTYPE, CurrentMemoryContext); - } - - /* - * Test row-by-row decompression. - */ - DecompressionIterator *iter = definitions[algo].iterator_init_forward(compressed_data, PGTYPE); - DecompressResult results[GLOBAL_MAX_ROWS_PER_COMPRESSION]; - int n = 0; - for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) - { - if (n >= GLOBAL_MAX_ROWS_PER_COMPRESSION) - { - elog(ERROR, "too many compressed rows"); - } - - results[n++] = r; - } - - /* Check that both ways of decompression match. */ - if (arrow) - { - if (n != arrow->length) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("the bulk decompression result does not match"), - errdetail("Expected %d elements, got %d.", n, (int) arrow->length))); - } - - for (int i = 0; i < n; i++) - { - const bool arrow_isnull = !arrow_row_is_valid(arrow->buffers[0], i); - if (arrow_isnull != results[i].is_null) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("the bulk decompression result does not match"), - errdetail("Expected null %d, got %d at row %d.", - results[i].is_null, - arrow_isnull, - i))); - } - - if (!results[i].is_null) - { - const CTYPE arrow_value = ((CTYPE *) arrow->buffers[1])[i]; - const CTYPE rowbyrow_value = DATUM_TO_CTYPE(results[i].val); - - /* - * Floats can also be NaN/infinite and the comparison doesn't - * work in that case. - */ - if (isfinite((double) arrow_value) != isfinite((double) rowbyrow_value)) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("the bulk decompression result does not match"), - errdetail("At row %d\n", i))); - } - - if (isfinite((double) arrow_value) && arrow_value != rowbyrow_value) - { - ereport(ERROR, - (errcode(ERRCODE_INTERNAL_ERROR), - errmsg("the bulk decompression result does not match"), - errdetail("At row %d\n", i))); - } - } - } - } - - /* - * Check that the result is still the same after we compress and decompress - * back. - * - * 1) Compress. - */ - Compressor *compressor = definitions[algo].compressor_for_type(PGTYPE); - - for (int i = 0; i < n; i++) - { - if (results[i].is_null) - { - compressor->append_null(compressor); - } - else - { - compressor->append_val(compressor, results[i].val); - } - } - - compressed_data = (Datum) compressor->finish(compressor); - if (compressed_data == 0) - { - /* The gorilla compressor returns NULL for all-null input sets. */ - return n; - }; - - /* - * 2) Decompress and check that it's the same. - */ - iter = definitions[algo].iterator_init_forward(compressed_data, PGTYPE); - int nn = 0; - for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) - { - if (r.is_null != results[nn].is_null) - { - elog(ERROR, "the repeated decompression result doesn't match"); - } - - if (!r.is_null) - { - CTYPE old_value = DATUM_TO_CTYPE(results[nn].val); - CTYPE new_value = DATUM_TO_CTYPE(r.val); - /* - * Floats can also be NaN/infinite and the comparison doesn't - * work in that case. - */ - if (isfinite((double) old_value) != isfinite((double) new_value)) - { - elog(ERROR, "the repeated decompression result doesn't match"); - } - - if (isfinite((double) old_value) && old_value != new_value) - { - elog(ERROR, "the repeated decompression result doesn't match"); - } - } - - nn++; - - if (nn > n) - { - elog(ERROR, "the repeated recompression result doesn't match"); - } - } - - return n; -} - -#undef TOSTRING -#undef TOSTRING_HELPER - -#undef FUNCTION_NAME -#undef FUNCTION_NAME_HELPER diff --git a/tsl/src/compression/decompress_text_test_impl.c b/tsl/src/compression/decompress_text_test_impl.c new file mode 100644 index 000000000..d90ed30ce --- /dev/null +++ b/tsl/src/compression/decompress_text_test_impl.c @@ -0,0 +1,152 @@ +/* + * This file and its contents are licensed under the Timescale License. + * Please see the included NOTICE for copyright information and + * LICENSE-TIMESCALE for a copy of the license. + */ +#include + +#include + +#include "compression.h" + +#include "compression_test.h" + +/* + * Try to decompress the given compressed data. Used for fuzzing and for checking + * the examples found by fuzzing. For fuzzing we do less checks to keep it + * faster and the coverage space smaller. This is a generic implementation + * for arithmetic types. + */ +static int +decompress_generic_text(const uint8 *Data, size_t Size, bool bulk, int requested_algo) +{ + if (bulk) + { + elog(ERROR, "bulk decompression not supported for text"); + } + + StringInfoData si = { .data = (char *) Data, .len = Size }; + + const int data_algo = pq_getmsgbyte(&si); + + CheckCompressedData(data_algo > 0 && data_algo < _END_COMPRESSION_ALGORITHMS); + + if (data_algo != requested_algo) + { + /* + * It's convenient to fuzz only one algorithm at a time. We specialize + * the fuzz target for one algorithm, so that the fuzzer doesn't waste + * time discovering others from scratch. + */ + return -1; + } + + const CompressionAlgorithmDefinition *def = algorithm_definition(data_algo); + Datum compressed_data = def->compressed_data_recv(&si); + + /* + * Test row-by-row decompression. + */ + DecompressionIterator *iter = def->iterator_init_forward(compressed_data, TEXTOID); + DecompressResult results[GLOBAL_MAX_ROWS_PER_COMPRESSION]; + int n = 0; + for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) + { + if (n >= GLOBAL_MAX_ROWS_PER_COMPRESSION) + { + elog(ERROR, "too many compressed rows"); + } + + results[n++] = r; + } + + /* + * For row-by-row decompression, check that the result is still the same + * after we compress and decompress back. + * Don't perform this check for other types of tests. + */ + if (bulk) + { + return n; + } + + /* + * 1) Compress. + */ + Compressor *compressor = def->compressor_for_type(TEXTOID); + + for (int i = 0; i < n; i++) + { + if (results[i].is_null) + { + compressor->append_null(compressor); + } + else + { + compressor->append_val(compressor, results[i].val); + } + } + + compressed_data = (Datum) compressor->finish(compressor); + if (compressed_data == 0) + { + /* Some compressors return NULL when all rows are null. */ + return n; + } + + /* + * 2) Decompress and check that it's the same. + */ + iter = def->iterator_init_forward(compressed_data, TEXTOID); + int nn = 0; + for (DecompressResult r = iter->try_next(iter); !r.is_done; r = iter->try_next(iter)) + { + if (r.is_null != results[nn].is_null) + { + elog(ERROR, "the repeated decompression result doesn't match"); + } + + if (!r.is_null) + { + const Datum old_value = results[nn].val; + const Datum new_value = r.val; + + /* + * Floats can also be NaN/infinite and the comparison doesn't + * work in that case. + */ + if (VARSIZE_ANY_EXHDR(old_value) != VARSIZE_ANY_EXHDR(new_value)) + { + elog(ERROR, "the repeated decompression result doesn't match"); + } + + if (strncmp(VARDATA_ANY(old_value), + VARDATA_ANY(new_value), + VARSIZE_ANY_EXHDR(new_value))) + { + elog(ERROR, "the repeated decompression result doesn't match"); + } + } + + nn++; + + if (nn > n) + { + elog(ERROR, "the repeated recompression result doesn't match"); + } + } + + return n; +} + +int +decompress_ARRAY_TEXT(const uint8 *Data, size_t Size, bool bulk) +{ + return decompress_generic_text(Data, Size, bulk, COMPRESSION_ALGORITHM_ARRAY); +} + +int +decompress_DICTIONARY_TEXT(const uint8 *Data, size_t Size, bool bulk) +{ + return decompress_generic_text(Data, Size, bulk, COMPRESSION_ALGORITHM_DICTIONARY); +} diff --git a/tsl/src/compression/deltadelta_impl.c b/tsl/src/compression/deltadelta_impl.c index 48562aaec..0a27b7b53 100644 --- a/tsl/src/compression/deltadelta_impl.c +++ b/tsl/src/compression/deltadelta_impl.c @@ -75,6 +75,9 @@ FUNCTION_NAME(delta_delta_decompress_all, ELEMENT_TYPE)(Datum compressed, Memory * * Also tried using SIMD prefix sum from here twice: * https://en.algorithmica.org/hpc/algorithms/prefix/, it's slower. + * + * Also tried zig-zag decoding in a separate loop, seems to be slightly + * slower, around the noise threshold. */ #define INNER_LOOP_SIZE 8 Assert(n_notnull_padded % INNER_LOOP_SIZE == 0); diff --git a/tsl/src/compression/dictionary.c b/tsl/src/compression/dictionary.c index 06fa9c6d5..0ad9b600d 100644 --- a/tsl/src/compression/dictionary.c +++ b/tsl/src/compression/dictionary.c @@ -450,7 +450,7 @@ dictionary_decompression_iterator_try_next_forward(DecompressionIterator *iter_b .is_done = true, }; - Assert(result.val < iter->compressed->num_distinct); + CheckCompressedData(result.val < iter->compressed->num_distinct); return (DecompressResult){ .val = iter->values[result.val], .is_null = false, @@ -596,7 +596,7 @@ dictionary_compressed_send(CompressedDataHeader *header, StringInfo buffer) Datum dictionary_compressed_recv(StringInfo buffer) { - DictionaryCompressorSerializationInfo data = { 0 }; + DictionaryCompressorSerializationInfo info = { 0 }; uint8 has_nulls; Oid element_type; @@ -604,27 +604,30 @@ dictionary_compressed_recv(StringInfo buffer) CheckCompressedData(has_nulls == 0 || has_nulls == 1); element_type = binary_string_get_type(buffer); - data.dictionary_compressed_indexes = simple8brle_serialized_recv(buffer); - data.bitmaps_size = simple8brle_serialized_total_size(data.dictionary_compressed_indexes); - data.total_size = MAXALIGN(sizeof(DictionaryCompressed)) + data.bitmaps_size; + info.dictionary_compressed_indexes = simple8brle_serialized_recv(buffer); + info.bitmaps_size = simple8brle_serialized_total_size(info.dictionary_compressed_indexes); + info.total_size = MAXALIGN(sizeof(DictionaryCompressed)) + info.bitmaps_size; if (has_nulls) { - data.compressed_nulls = simple8brle_serialized_recv(buffer); - data.nulls_size = simple8brle_serialized_total_size(data.compressed_nulls); - data.total_size += data.nulls_size; + info.compressed_nulls = simple8brle_serialized_recv(buffer); + info.nulls_size = simple8brle_serialized_total_size(info.compressed_nulls); + info.total_size += info.nulls_size; } - data.dictionary_serialization_info = array_compressed_data_recv(buffer, element_type); - data.dictionary_size = array_compression_serialization_size(data.dictionary_serialization_info); - data.total_size += data.dictionary_size; - data.num_distinct = - array_compression_serialization_num_elements(data.dictionary_serialization_info); + info.dictionary_serialization_info = array_compressed_data_recv(buffer, element_type); - if (!AllocSizeIsValid(data.total_size)) + CheckCompressedData(info.dictionary_serialization_info != NULL); + + info.dictionary_size = array_compression_serialization_size(info.dictionary_serialization_info); + info.total_size += info.dictionary_size; + info.num_distinct = + array_compression_serialization_num_elements(info.dictionary_serialization_info); + + if (!AllocSizeIsValid(info.total_size)) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("compressed size exceeds the maximum allowed (%d)", (int) MaxAllocSize))); - return PointerGetDatum(dictionary_compressed_from_serialization_info(data, element_type)); + return PointerGetDatum(dictionary_compressed_from_serialization_info(info, element_type)); } diff --git a/tsl/src/compression/simple8b_rle_decompress_all.h b/tsl/src/compression/simple8b_rle_decompress_all.h index 48a168fb5..e20c43639 100644 --- a/tsl/src/compression/simple8b_rle_decompress_all.h +++ b/tsl/src/compression/simple8b_rle_decompress_all.h @@ -56,10 +56,13 @@ FUNCTION_NAME(simple8brle_decompress_all_buf, const uint16 n_block_values = simple8brle_rledata_repeatcount(block_data); CheckCompressedData(decompressed_index + n_block_values <= n_buffer_elements); - const ELEMENT_TYPE repeated_value = simple8brle_rledata_value(block_data); + const uint64 repeated_value_raw = simple8brle_rledata_value(block_data); + const ELEMENT_TYPE repeated_value_converted = repeated_value_raw; + CheckCompressedData(repeated_value_raw == (uint64) repeated_value_converted); + for (uint16 i = 0; i < n_block_values; i++) { - decompressed_values[decompressed_index + i] = repeated_value; + decompressed_values[decompressed_index + i] = repeated_value_converted; } decompressed_index += n_block_values; @@ -77,7 +80,7 @@ FUNCTION_NAME(simple8brle_decompress_all_buf, * produces, which is easier for testing. \ */ \ const uint8 bits_per_value = SIMPLE8B_BIT_LENGTH[X]; \ - CheckCompressedData(bits_per_value / 8 <= sizeof(ELEMENT_TYPE)); \ + CheckCompressedData(bits_per_value <= sizeof(ELEMENT_TYPE) * 8); \ \ /* \ * The last block might have less values than normal, but we have \ @@ -86,7 +89,7 @@ FUNCTION_NAME(simple8brle_decompress_all_buf, * might be incorrect. \ */ \ const uint16 n_block_values = SIMPLE8B_NUM_ELEMENTS[X]; \ - CheckCompressedData(decompressed_index + n_block_values < n_buffer_elements); \ + CheckCompressedData(decompressed_index + n_block_values <= n_buffer_elements); \ \ const uint64 bitmask = simple8brle_selector_get_bitmask(X); \ \ @@ -155,10 +158,11 @@ FUNCTION_NAME(simple8brle_decompress_all, ELEMENT_TYPE)(Simple8bRleSerialized *c Assert(n_total_values <= GLOBAL_MAX_ROWS_PER_COMPRESSION); /* - * We need a significant padding of 64 elements, not bytes, here, because we - * work in Simple8B blocks which can contain up to 64 elements. + * We need a quite significant padding of 63 elements, not bytes, after the + * last element, because we work in Simple8B blocks which can contain up to + * 64 elements. */ - const uint16 n_buffer_elements = ((n_total_values + 63) / 64 + 1) * 64; + const uint16 n_buffer_elements = n_total_values + 63; ELEMENT_TYPE *restrict decompressed_values = palloc(sizeof(ELEMENT_TYPE) * n_buffer_elements); diff --git a/tsl/test/expected/compression_algos.out b/tsl/test/expected/compression_algos.out index d2a8a6a56..4335e7913 100644 --- a/tsl/test/expected/compression_algos.out +++ b/tsl/test/expected/compression_algos.out @@ -1542,31 +1542,80 @@ DROP TABLE base_texts; -- Interesting corrupt data found by fuzzing -- ----------------------------------------------- \c :TEST_DBNAME :ROLE_SUPERUSER -create or replace function ts_read_compressed_data_directory(cstring, regtype, cstring) -returns table(path text, bytes int, rows int, sqlstate text, location text) -as :TSL_MODULE_PATHNAME, 'ts_read_compressed_data_directory' language c; -select count(*), coalesce((rows >= 0)::text, sqlstate) result -from ts_read_compressed_data_directory('gorilla', 'float8', (:'TEST_INPUT_DIR' || '/fuzzing/compression/gorilla-float8')::cstring) -group by 2 order by 1 desc; - count | result --------+-------- - 224 | XX001 - 55 | true - 23 | 08P01 -(3 rows) - -select count(*), coalesce((rows >= 0)::text, sqlstate) result -from ts_read_compressed_data_directory('deltadelta', 'int8', (:'TEST_INPUT_DIR' || '/fuzzing/compression/deltadelta-int8')::cstring) -group by 2 order by 1 desc; - count | result --------+-------- - 157 | XX001 - 80 | true - 13 | 08P01 - 1 | false -(4 rows) - -create or replace function ts_read_compressed_data_file(cstring, regtype, cstring) returns int +create or replace function ts_read_compressed_data_file(cstring, regtype, cstring, bool = true) returns int as :TSL_MODULE_PATHNAME, 'ts_read_compressed_data_file' language c; +\set ON_ERROR_STOP 0 select ts_read_compressed_data_file('gorilla', 'float8', '--nonexistent'); ERROR: could not open the file '--nonexistent' +\set ON_ERROR_STOP 1 +create or replace function ts_read_compressed_data_directory(cstring, regtype, cstring, bool) +returns table(path text, bytes int, rows int, sqlstate text, location text) +as :TSL_MODULE_PATHNAME, 'ts_read_compressed_data_directory' language c; +\set fn 'ts_read_compressed_data_directory(:''algo'', :''type'', format(''%s/fuzzing/compression/%s-%s'', :''TEST_INPUT_DIR'', :''algo'', :''type'')::cstring, ' +\set algo gorilla +\set type float8 +select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc +; + count | bulk_result | rowbyrow_result +-------+-------------+----------------- + 142 | XX001 | true + 82 | XX001 | XX001 + 55 | true | true + 23 | 08P01 | 08P01 +(4 rows) + +\set algo deltadelta +\set type int8 +select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc +; + count | bulk_result | rowbyrow_result +-------+-------------+----------------- + 106 | XX001 | XX001 + 69 | true | true + 62 | XX001 | true + 13 | 08P01 | 08P01 + 1 | false | false +(5 rows) + +\set algo array +\set type text +select count(*) + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn false) rowbyrow +group by 2 order by 1 desc +; + count | rowbyrow_result +-------+----------------- + 13 | XX001 + 4 | 08P01 + 3 | true + 1 | false + 1 | 22021 + 1 | 3F000 +(6 rows) + +\set algo dictionary +\set type text +select count(*) + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn false) rowbyrow +group by 2 order by 1 desc +; + count | rowbyrow_result +-------+----------------- + 22 | XX001 + 4 | 08P01 + 2 | true + 1 | false + 1 | 22021 + 1 | 3F000 +(6 rows) + diff --git a/tsl/test/fuzzing/compression/array-text/0d699bc41031c7525fa65c0ab267f34f608eef6a b/tsl/test/fuzzing/compression/array-text/0d699bc41031c7525fa65c0ab267f34f608eef6a new file mode 100644 index 0000000000000000000000000000000000000000..10d1dbde63f8f5ea6d02dea14051cfefe4fc4fca GIT binary patch literal 24 dcmZQ%C`gY_PAo~x$xmk}Nv$YhU|?hb0svSF1=#=q literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/0dbf553220bcd27478f10999d679d564a11632a1 b/tsl/test/fuzzing/compression/array-text/0dbf553220bcd27478f10999d679d564a11632a1 new file mode 100644 index 000000000..2791843a6 --- /dev/null +++ b/tsl/test/fuzzing/compression/array-text/0dbf553220bcd27478f10999d679d564a11632a1 @@ -0,0 +1 @@ +e \ No newline at end of file diff --git a/tsl/test/fuzzing/compression/array-text/13f402104e20e5a38290bdc5fec85a46ae36bd73 b/tsl/test/fuzzing/compression/array-text/13f402104e20e5a38290bdc5fec85a46ae36bd73 new file mode 100644 index 0000000000000000000000000000000000000000..1dc4c2a2edb10da51f8f08994c496e2d50de44cd GIT binary patch literal 170 zcmZQ%EJ%+}PAo~x$xmk}Nv$YhU}Rume!;-NAb>2)laiU1mRgjW_q4-*A{$^Y*JZn0cQt4PZ$?N65PAViW*NM{gZaBp=yvkSL7k|7`uO*6 zfBEY#-+%eXzkd7p{_VftKR$l?`1r&BK0ZGF{I|dV`s=sRt zeL>$ReV_Dw()UTLTH`ectK<;kJF=>f+$GectNA;Jv*pxQ)+ST@u{3&s$v%+_ukKT?*W` z&s$vvytfwtxAA$a3xM19d8_OBzipqlx*q@A_Ia!8>A!8Cx4Itw_x5`CZ{zb;*QU!#L+vlyWhyJ#G-s*bhZ`;?c lety5AkLlI)-n|a>o?b=o>2;>}^y+y}uOq#uSIv8h{{tyD3_Sn< literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/c13eaced24e2a5039d3fbeef655fc3cf827a2be7 b/tsl/test/fuzzing/compression/array-text/c13eaced24e2a5039d3fbeef655fc3cf827a2be7 new file mode 100644 index 0000000000000000000000000000000000000000..bd3dcdc9fe2bba58d58d61f99f0392c7b0f7538b GIT binary patch literal 32 jcmZQ%C`gY_PAo~x$xmk}Nv$YhU}OLxAdnE2P0a%Ui7^KC literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/c88b988789743b6aad8ef68278fc383847a37ddf b/tsl/test/fuzzing/compression/array-text/c88b988789743b6aad8ef68278fc383847a37ddf new file mode 100644 index 0000000000000000000000000000000000000000..f1abc3f4398eca8058ace24a84409dc20980faba GIT binary patch literal 23 ecmZQ%C`gY_PAo~x$xmk}Nv$YhXl!g0&I15g%?I27 literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/crash-1ac67d1b8ebedeb93c515b5244f6e60613c1af0b b/tsl/test/fuzzing/compression/array-text/crash-1ac67d1b8ebedeb93c515b5244f6e60613c1af0b new file mode 100644 index 0000000000000000000000000000000000000000..305bfd2a1814bfc7e8effbc8a29c55cdd9daab65 GIT binary patch literal 440 zcmZQ%C`gY_PAo~x$xmk}Nv$Yh0E4u&)S}e95)*S%AeSd46V5k5;~S&$?ahrK0ts*( zhNvpc(fDSTsPbkOXnb=tz8M-H>1J>Gw5>A7vHm#5lpeh4L<`~#gY4G3hk~}5<>m>y}$4l3y z_VySl#aALK$Y!_Ib@kTWroRqDoMQjyO!1yVE!t4d4aN*yd6u(db=bSSoaS>C(mH9} z^&m&!18@xtdIEX^dhGQA2j9nEJ^L{AWAJ0}WANi2zxDEK7RqFiOcuyw&&k9eklu%a literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/f2f014ef49bdaf4ff29d4a7116feff81e7015283 b/tsl/test/fuzzing/compression/array-text/f2f014ef49bdaf4ff29d4a7116feff81e7015283 new file mode 100644 index 0000000000000000000000000000000000000000..d8b0a00476546c8bf7dae491565e357ffb582a28 GIT binary patch literal 48 vcmZQ%EJ%+}PAo~x$xmk}Nv$YhU}Rume!;*1qCud5fnh(GVa>oWGej2v5FQCi literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/f41f46df995dd4c7690f27978152ead25ccd5c75 b/tsl/test/fuzzing/compression/array-text/f41f46df995dd4c7690f27978152ead25ccd5c75 new file mode 100644 index 0000000000000000000000000000000000000000..637bb34ce31ae11fa3ff0de4683a7536d889122c GIT binary patch literal 48 xcmZQ%EJ%+}PAo~x$xmk}Nv$YhU}Rume!;*1qV*UA{{R2aP!Hq*!FQk#3jjE?3w!_o literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/array-text/with-nulls b/tsl/test/fuzzing/compression/array-text/with-nulls new file mode 100644 index 0000000000000000000000000000000000000000..a006fd327dc1ee650c6b65005bfdb88563f10a54 GIT binary patch literal 13613 zcmc)ML29N&5P;!7cmZ9grx-}Oy1Kd!;#vmHj00gtG@?iL03N{njmz&@n_VcXzn26a z>fzznk3atO{`LLKmyfsC=f7TW54YP_f8TDmufO^4=ZF94=dIy<`7P+-pSNGW{rS^} z56{0ne|~-U=cm_S-hKY#<>mJ$4MKy|pfu>W!FQJ*zkmIDa{v7GU! zmNO2S*IUO-jGS@Q#FjITo7YcoT2ICtmumK7aVbez)@4?5!|*{w4&3BPAfXC=(M8KicTv!t?0C((~3?jI<4rmqSJ~_D>|*{ zw4&3BPAfXC=(M8KicTv!t?0C((~3?jI<4rmqSJ~_D>|*{w4&3BPAfXC=(M8KicTv! zt!P@&w4!N6(~71QO)HvKG_7b_(X^szMbnC=6-_IeRy3_>TG6zkX+_hDrWH*qnpQNe zXj;* zTG6zk%Ze^5x~%B3qRWadE4r-cvZBk1E-SjM=(3{AiY_a0x~=H8 zqT7mYE4r=dwxZjLZY#R2=(eKUif${qt?0I*+lp>0x~=H8qT7mYE4r=dwxZjLZY#R2 z=(eKUif${qt?0I*+lp>0x~=H8qT7nL6>TfpR? zqHRUninbMPE814Ht!P`(wxVrC+lsaoZ7bSVw5@1c(YB&(McazD6>TfpR?qHRUninbN)?tcMV3?K{u literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/127d1cd7df6314984a355234cad8daee2a0a6f85 b/tsl/test/fuzzing/compression/dictionary-text/127d1cd7df6314984a355234cad8daee2a0a6f85 new file mode 100644 index 0000000000000000000000000000000000000000..db0d03a4eaad2f45bd3f283d42065695d8e53b3a GIT binary patch literal 89 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh17+C&80Hc@=!#AAR literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/1a155dbc0885a4ce404a03ccad4f90e8dfb6838b b/tsl/test/fuzzing/compression/dictionary-text/1a155dbc0885a4ce404a03ccad4f90e8dfb6838b new file mode 100644 index 0000000000000000000000000000000000000000..d22df394c03b0fa0a9a43a4db406f6654341bbfd GIT binary patch literal 22 dcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?im002yG1=;`r literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/1bdd8892fa9cd727ebf1b97101a04456b8ba7bc2 b/tsl/test/fuzzing/compression/dictionary-text/1bdd8892fa9cd727ebf1b97101a04456b8ba7bc2 new file mode 100644 index 0000000000000000000000000000000000000000..c54e63b4f91b0f8f63a1572583df40e7ffeec09d GIT binary patch literal 14 VcmZQ#C`gY_PAo~x$xmls001731QY-O literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/27de45122a7e9da969165f24f6942fc131cb17df b/tsl/test/fuzzing/compression/dictionary-text/27de45122a7e9da969165f24f6942fc131cb17df new file mode 100644 index 0000000000000000000000000000000000000000..a0938d30ec118bd2414f44b579b243a6d8e3caf5 GIT binary patch literal 56 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh1AyHUIzs literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/2d79b560f5c1dde8b7841ae6d77d616f26f5b3ab b/tsl/test/fuzzing/compression/dictionary-text/2d79b560f5c1dde8b7841ae6d77d616f26f5b3ab new file mode 100644 index 0000000000000000000000000000000000000000..674c892c76c9286bff06e619e40f7c514ee70d32 GIT binary patch literal 145 zcmZQ#EJ%+}PAo~x$xmk}Nv$YhU|?YU0wRDI1o#;k2)1_n+b&BegL#JCCu1Q{V( oVP*hzSwYPKF@fqK<^WC6aA1A`l?S~5 literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/4e8af02cd72c9425df8c3164b3a14bc1b70c6498 b/tsl/test/fuzzing/compression/dictionary-text/4e8af02cd72c9425df8c3164b3a14bc1b70c6498 new file mode 100644 index 0000000000000000000000000000000000000000..4a9f44897c69d60cd01eaadccd2a0caeb8273333 GIT binary patch literal 26 fcmZQ#EJ%+}PAo~x$xmk}Nv$YhU|?YU0wfp!XtD;+ literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/559b65125ca556ff1a57f82f9ae55a86b71c6296 b/tsl/test/fuzzing/compression/dictionary-text/559b65125ca556ff1a57f82f9ae55a86b71c6296 new file mode 100644 index 000000000..d0cd44bd3 --- /dev/null +++ b/tsl/test/fuzzing/compression/dictionary-text/559b65125ca556ff1a57f82f9ae55a86b71c6296 @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tsl/test/fuzzing/compression/dictionary-text/5ba93c9db0cff93f52b521d7420e43f6eda2784f b/tsl/test/fuzzing/compression/dictionary-text/5ba93c9db0cff93f52b521d7420e43f6eda2784f new file mode 100644 index 0000000000000000000000000000000000000000..f76dd238ade08917e6712764a16a22005a50573d GIT binary patch literal 1 IcmZPo000310RR91 literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/5c9409528b92b40afa79d037eadcb73b859e94e6 b/tsl/test/fuzzing/compression/dictionary-text/5c9409528b92b40afa79d037eadcb73b859e94e6 new file mode 100644 index 0000000000000000000000000000000000000000..591d905c1da3e3f4ba69bb004b2c456996c57235 GIT binary patch literal 49 tcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEiK7zFqk4D1@z8o+FjC;%6R2x$NS literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/62ca0c60044ab7f33aa33d10a4ae715ee06b7748 b/tsl/test/fuzzing/compression/dictionary-text/62ca0c60044ab7f33aa33d10a4ae715ee06b7748 new file mode 100644 index 0000000000000000000000000000000000000000..403168ba05e9b360d62e79fc2c311290886340b5 GIT binary patch literal 51 tcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh1{zCvKkOKt#4qysM005_w4i*3a literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/687464af84370f5d43d25acba442acc7fd14beec b/tsl/test/fuzzing/compression/dictionary-text/687464af84370f5d43d25acba442acc7fd14beec new file mode 100644 index 0000000000000000000000000000000000000000..225c0114040ff659cf0f67624e2ef7226720f32e GIT binary patch literal 50 rcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEiK7zBVcD})6inHU%V3r+^j literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/8c92cd8b3e908dad0b490baa09ee984fdf224b21 b/tsl/test/fuzzing/compression/dictionary-text/8c92cd8b3e908dad0b490baa09ee984fdf224b21 new file mode 100644 index 0000000000000000000000000000000000000000..51520764e495d3caaf82e904d1f4bd0fef12bcc5 GIT binary patch literal 84 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh17+C%T0powM|NnIuzUedo1q~q*%upJr M!0P{hpcpCu01WRGNB{r; literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/95f1a48e7e1cbe4b91461f1251416245497ff131 b/tsl/test/fuzzing/compression/dictionary-text/95f1a48e7e1cbe4b91461f1251416245497ff131 new file mode 100644 index 0000000000000000000000000000000000000000..aeef6e030439354e9b41baf40c724cd8a9126630 GIT binary patch literal 8 PcmZQ#OvvG8U|;|M1g8Mo literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/994cc577406fe37f59e27ea1028a9d0a814af721 b/tsl/test/fuzzing/compression/dictionary-text/994cc577406fe37f59e27ea1028a9d0a814af721 new file mode 100644 index 0000000000000000000000000000000000000000..511199a8bb6f937c28b86eaa98fae7102bbb2594 GIT binary patch literal 42 scmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VKE?7+79_cy>U{z`(%(0O-^RbpQYW literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/a42f35cc555c689a38ef471b21fad93692f36a9c b/tsl/test/fuzzing/compression/dictionary-text/a42f35cc555c689a38ef471b21fad93692f36a9c new file mode 100644 index 0000000000000000000000000000000000000000..ac547d9c5236d04fc071c167226b3868cf26bec6 GIT binary patch literal 48 tcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEi~7)0_jaN0EhX(J$J000J;2hRWi literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/ba200d8a4886abcdba4174f4b131db56e9128785 b/tsl/test/fuzzing/compression/dictionary-text/ba200d8a4886abcdba4174f4b131db56e9128785 new file mode 100644 index 0000000000000000000000000000000000000000..e28432a6e458707f35b9cb5cc2b8752035b268ed GIT binary patch literal 88 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=03;Y07+C&80Hc@=!#AAH!SFv+qyzv7?i%s{ literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/bf8b4530d8d246dd74ac53a13471bba17941dff7 b/tsl/test/fuzzing/compression/dictionary-text/bf8b4530d8d246dd74ac53a13471bba17941dff7 new file mode 100644 index 000000000..6b2aaa764 --- /dev/null +++ b/tsl/test/fuzzing/compression/dictionary-text/bf8b4530d8d246dd74ac53a13471bba17941dff7 @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tsl/test/fuzzing/compression/dictionary-text/c92920944247d80c842eaa65fd01efec1c84c342 b/tsl/test/fuzzing/compression/dictionary-text/c92920944247d80c842eaa65fd01efec1c84c342 new file mode 100644 index 000000000..03afaa5de --- /dev/null +++ b/tsl/test/fuzzing/compression/dictionary-text/c92920944247d80c842eaa65fd01efec1c84c342 @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-49789ae0866d7d630f2075dc26812433f4af1db3 b/tsl/test/fuzzing/compression/dictionary-text/crash-49789ae0866d7d630f2075dc26812433f4af1db3 new file mode 100644 index 0000000000000000000000000000000000000000..cf31c012e9210d5588c44ed71abbebed662689a3 GIT binary patch literal 231 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEiK7zFqk80;Dt8o+EY4V)PIuulNdL^vA& literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-5eeac6ca5053992914dfb318e02e4c657a65c7cf b/tsl/test/fuzzing/compression/dictionary-text/crash-5eeac6ca5053992914dfb318e02e4c657a65c7cf new file mode 100644 index 0000000000000000000000000000000000000000..8c347f8548e293f9c51b3e3213fa7f2f02863ae8 GIT binary patch literal 174 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh17+C&80Hc@=!#AADGhr%V0O^A0 K1u|#`5=#JO!5Urw literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-707526606a02c72364e1c8ea82357eead6c74f60 b/tsl/test/fuzzing/compression/dictionary-text/crash-707526606a02c72364e1c8ea82357eead6c74f60 new file mode 100644 index 0000000000000000000000000000000000000000..2061e2e7aad1a533bf4e22a2ec38d64897318540 GIT binary patch literal 81 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEh17+4q>1R4G_FgS|oFnqIX0LcI`6GMS| W{N#Dvzd>ps%0Z+Z5Hm1vFaQAMFAv-R literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-b0db762535226b28c0b55ffe00d5537fd8ef7e39 b/tsl/test/fuzzing/compression/dictionary-text/crash-b0db762535226b28c0b55ffe00d5537fd8ef7e39 new file mode 100644 index 0000000000000000000000000000000000000000..a75b22fb44f6f6f52a8e2183159a8aced13ee384 GIT binary patch literal 52 ucmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEi~7)0_jaN0FMMHq?`b5j8*NC`v$ literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/crash-bd6e8aa1ebeb96cf8aa644c5ef6eb2214dee0ffc b/tsl/test/fuzzing/compression/dictionary-text/crash-bd6e8aa1ebeb96cf8aa644c5ef6eb2214dee0ffc new file mode 100644 index 0000000000000000000000000000000000000000..8262f92c11513080a09cb9e6e0b8b455d952fdcd GIT binary patch literal 56 tcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?YW1SA-N7zFq&AT&aRr8qG+6#zeF2*CgV literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/d0f63f55c89c3220cd326e9395c12e2f4bd57942 b/tsl/test/fuzzing/compression/dictionary-text/d0f63f55c89c3220cd326e9395c12e2f4bd57942 new file mode 100644 index 0000000000000000000000000000000000000000..4932a19bda51d3a96bf3134b4b892973d12fba66 GIT binary patch literal 1353 zcmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEWG_!c9OfC7F721W)BGcz+iAO-@MIG7ed z;W2Qcav{7r28KWsagYsW1_qYq7Ut##=7t79^Fas%0+1|*h+rXE;GlqH0Mv@bM3RN5 mV+zUE4!8!qfu}Lx&Kq^WXb6xN0tOh$IWR;J0uBf!oCN?mJtTYp literal 0 HcmV?d00001 diff --git a/tsl/test/fuzzing/compression/dictionary-text/dict1 b/tsl/test/fuzzing/compression/dictionary-text/dict1 new file mode 100644 index 0000000000000000000000000000000000000000..08ae62cb6b1d9b958cf4342fe295478b58187fef GIT binary patch literal 56 ycmZQ#C`gY_PAo~x$xmk}Nv$YhU|?W=0VEiK7zFqk80;FrERYBYvlJ)hrUC#*m2)2Idz)T0jsARuKt8!ypPn t(ZH%zOTi*U8nX(dI18v4=mxMpunR%1= 0)::text, sqlstate) result -from ts_read_compressed_data_directory('gorilla', 'float8', (:'TEST_INPUT_DIR' || '/fuzzing/compression/gorilla-float8')::cstring) -group by 2 order by 1 desc; +\set fn 'ts_read_compressed_data_directory(:''algo'', :''type'', format(''%s/fuzzing/compression/%s-%s'', :''TEST_INPUT_DIR'', :''algo'', :''type'')::cstring, ' -select count(*), coalesce((rows >= 0)::text, sqlstate) result -from ts_read_compressed_data_directory('deltadelta', 'int8', (:'TEST_INPUT_DIR' || '/fuzzing/compression/deltadelta-int8')::cstring) -group by 2 order by 1 desc; +\set algo gorilla +\set type float8 +select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc +; -create or replace function ts_read_compressed_data_file(cstring, regtype, cstring) returns int -as :TSL_MODULE_PATHNAME, 'ts_read_compressed_data_file' language c; +\set algo deltadelta +\set type int8 +select count(*) + , coalesce((bulk.rows >= 0)::text, bulk.sqlstate) bulk_result + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn true) bulk join :fn false) rowbyrow using (path) +group by 2, 3 order by 1 desc +; + +\set algo array +\set type text +select count(*) + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn false) rowbyrow +group by 2 order by 1 desc +; + +\set algo dictionary +\set type text +select count(*) + , coalesce((rowbyrow.rows >= 0)::text, rowbyrow.sqlstate) rowbyrow_result +from :fn false) rowbyrow +group by 2 order by 1 desc +; -select ts_read_compressed_data_file('gorilla', 'float8', '--nonexistent'); \ No newline at end of file