mirror of
https://github.com/apple/foundationdb.git
synced 2025-05-17 11:22:20 +08:00
* Enable ZSTD compression filter Description diff-4: Randomize Knob Compression filter selection diff-3: Minor refactoring diff-2: Limit ZSTD availability to CLANG compiler diff-1: Add ZSTD compression option to BlobGranule tests Major changes includes: 1. Update FDB CMake to download, install and build Boost with ZSTD compatibility 2. Update CompressionUtils to enable boost::iostreams::zstd compression filter Testing CompressionUtilsUnit.toml BlobGranuleCorrectness/BlobGranuleCorrectnessClean devRunCorrectness - 100K (in-progress)
268 lines
7.7 KiB
C++
268 lines
7.7 KiB
C++
/*
|
|
* CompressionUtils.cpp
|
|
*
|
|
* This source file is part of the FoundationDB open source project
|
|
*
|
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "flow/CompressionUtils.h"
|
|
|
|
#include "flow/Arena.h"
|
|
#include "flow/Error.h"
|
|
#include "flow/IRandom.h"
|
|
#include "flow/UnitTest.h"
|
|
|
|
#include <boost/iostreams/copy.hpp>
|
|
#ifdef ZLIB_LIB_SUPPORTED
|
|
#include <boost/iostreams/filter/gzip.hpp>
|
|
#endif
|
|
#include <boost/iostreams/filtering_streambuf.hpp>
|
|
#ifdef ZSTD_LIB_SUPPORTED
|
|
#include <boost/iostreams/filter/zstd.hpp>
|
|
#endif
|
|
#include <sstream>
|
|
|
|
namespace {
|
|
std::unordered_set<CompressionFilter> getSupportedFilters() {
|
|
std::unordered_set<CompressionFilter> filters;
|
|
|
|
filters.insert(CompressionFilter::NONE);
|
|
#ifdef ZLIB_LIB_SUPPORTED
|
|
filters.insert(CompressionFilter::GZIP);
|
|
#endif
|
|
#ifdef ZSTD_LIB_SUPPORTED
|
|
filters.insert(CompressionFilter::ZSTD);
|
|
#endif
|
|
ASSERT_GE(filters.size(), 1);
|
|
return filters;
|
|
}
|
|
} // namespace
|
|
|
|
std::unordered_set<CompressionFilter> CompressionUtils::supportedFilters = getSupportedFilters();
|
|
|
|
StringRef CompressionUtils::compress(const CompressionFilter filter, const StringRef& data, Arena& arena) {
|
|
checkFilterSupported(filter);
|
|
|
|
if (filter == CompressionFilter::NONE) {
|
|
return StringRef(arena, data);
|
|
}
|
|
|
|
namespace bio = boost::iostreams;
|
|
#ifdef ZLIB_LIB_SUPPORTED
|
|
if (filter == CompressionFilter::GZIP) {
|
|
return CompressionUtils::compress(filter, data, bio::gzip::default_compression, arena);
|
|
}
|
|
#endif
|
|
#ifdef ZSTD_LIB_SUPPORTED
|
|
if (filter == CompressionFilter::ZSTD) {
|
|
return CompressionUtils::compress(filter, data, bio::zstd::default_compression, arena);
|
|
}
|
|
#endif
|
|
|
|
throw internal_error(); // We should never get here
|
|
}
|
|
|
|
StringRef CompressionUtils::compress(const CompressionFilter filter, const StringRef& data, int level, Arena& arena) {
|
|
checkFilterSupported(filter);
|
|
|
|
if (filter == CompressionFilter::NONE) {
|
|
return StringRef(arena, data);
|
|
}
|
|
|
|
namespace bio = boost::iostreams;
|
|
std::stringstream compStream;
|
|
std::stringstream decomStream(data.toString());
|
|
|
|
bio::filtering_streambuf<bio::input> out;
|
|
#ifdef ZLIB_LIB_SUPPORTED
|
|
if (filter == CompressionFilter::GZIP) {
|
|
out.push(bio::gzip_compressor(bio::gzip_params(level)));
|
|
}
|
|
#endif
|
|
#ifdef ZSTD_LIB_SUPPORTED
|
|
if (filter == CompressionFilter::ZSTD) {
|
|
out.push(bio::zstd_compressor(bio::zstd_params(level)));
|
|
}
|
|
#endif
|
|
|
|
out.push(decomStream);
|
|
bio::copy(out, compStream);
|
|
|
|
return StringRef(arena, compStream.str());
|
|
}
|
|
|
|
StringRef CompressionUtils::decompress(const CompressionFilter filter, const StringRef& data, Arena& arena) {
|
|
checkFilterSupported(filter);
|
|
|
|
if (filter == CompressionFilter::NONE) {
|
|
return StringRef(arena, data);
|
|
}
|
|
|
|
namespace bio = boost::iostreams;
|
|
std::stringstream compStream(data.toString());
|
|
std::stringstream decompStream;
|
|
|
|
bio::filtering_streambuf<bio::input> out;
|
|
#ifdef ZLIB_LIB_SUPPORTED
|
|
if (filter == CompressionFilter::GZIP) {
|
|
out.push(bio::gzip_decompressor());
|
|
}
|
|
#endif
|
|
#ifdef ZSTD_LIB_SUPPORTED
|
|
if (filter == CompressionFilter::ZSTD) {
|
|
out.push(bio::zstd_decompressor());
|
|
}
|
|
#endif
|
|
|
|
out.push(compStream);
|
|
bio::copy(out, decompStream);
|
|
|
|
return StringRef(arena, decompStream.str());
|
|
}
|
|
|
|
int CompressionUtils::getDefaultCompressionLevel(CompressionFilter filter) {
|
|
checkFilterSupported(filter);
|
|
|
|
if (filter == CompressionFilter::NONE) {
|
|
return -1;
|
|
}
|
|
|
|
#ifdef ZLIB_LIB_SUPPORTED
|
|
if (filter == CompressionFilter::GZIP) {
|
|
// opt for high speed compression, larger levels have a high cpu cost and not much compression ratio
|
|
// improvement, according to benchmarks
|
|
// return boost::iostream::gzip::default_compression;
|
|
// return boost::iostream::gzip::best_compression;
|
|
return boost::iostreams::gzip::best_speed;
|
|
}
|
|
#endif
|
|
#ifdef ZSTD_LIB_SUPPORTED
|
|
if (filter == CompressionFilter::ZSTD) {
|
|
// opt for high speed compression, larger levels have a high cpu cost and not much compression ratio
|
|
// improvement, according to benchmarks
|
|
// return boost::iostreams::zstd::default_compression;
|
|
// return boost::iostreams::zstd::best_compression;
|
|
return boost::iostreams::zstd::best_speed;
|
|
}
|
|
#endif
|
|
|
|
throw internal_error(); // We should never get here
|
|
}
|
|
|
|
CompressionFilter CompressionUtils::getRandomFilter() {
|
|
ASSERT_GE(supportedFilters.size(), 1);
|
|
std::vector<CompressionFilter> filters;
|
|
filters.insert(filters.end(), CompressionUtils::supportedFilters.begin(), CompressionUtils::supportedFilters.end());
|
|
|
|
ASSERT_GE(filters.size(), 1);
|
|
|
|
CompressionFilter res;
|
|
if (filters.size() == 1) {
|
|
res = filters[0];
|
|
} else {
|
|
int idx = deterministicRandom()->randomInt(0, filters.size());
|
|
res = filters[idx];
|
|
}
|
|
|
|
ASSERT(supportedFilters.find(res) != supportedFilters.end());
|
|
return res;
|
|
}
|
|
|
|
// Only used to link unit tests
|
|
void forceLinkCompressionUtilsTest() {}
|
|
|
|
namespace {
|
|
void testCompression(CompressionFilter filter) {
|
|
Arena arena;
|
|
const int size = deterministicRandom()->randomInt(512, 1024);
|
|
Standalone<StringRef> uncompressed = makeString(size);
|
|
deterministicRandom()->randomBytes(mutateString(uncompressed), size);
|
|
|
|
Standalone<StringRef> compressed = CompressionUtils::compress(filter, uncompressed, arena);
|
|
ASSERT_NE(compressed.compare(uncompressed), 0);
|
|
|
|
StringRef verify = CompressionUtils::decompress(filter, compressed, arena);
|
|
ASSERT_EQ(verify.compare(uncompressed), 0);
|
|
}
|
|
|
|
void testCompression2(CompressionFilter filter) {
|
|
Arena arena;
|
|
const int size = deterministicRandom()->randomInt(512, 1024);
|
|
std::string s(size, 'x');
|
|
Standalone<StringRef> uncompressed = Standalone<StringRef>(StringRef(s));
|
|
printf("Size before: %d\n", (int)uncompressed.size());
|
|
|
|
Standalone<StringRef> compressed = CompressionUtils::compress(filter, uncompressed, arena);
|
|
ASSERT_NE(compressed.compare(uncompressed), 0);
|
|
printf("Size after: %d\n", (int)compressed.size());
|
|
// Assert compressed size is less than half.
|
|
ASSERT(compressed.size() * 2 < uncompressed.size());
|
|
|
|
StringRef verify = CompressionUtils::decompress(filter, compressed, arena);
|
|
ASSERT_EQ(verify.compare(uncompressed), 0);
|
|
}
|
|
|
|
} // namespace
|
|
|
|
TEST_CASE("/CompressionUtils/noCompression") {
|
|
Arena arena;
|
|
const int size = deterministicRandom()->randomInt(512, 1024);
|
|
Standalone<StringRef> uncompressed = makeString(size);
|
|
deterministicRandom()->randomBytes(mutateString(uncompressed), size);
|
|
|
|
Standalone<StringRef> compressed = CompressionUtils::compress(CompressionFilter::NONE, uncompressed, arena);
|
|
ASSERT_EQ(compressed.compare(uncompressed), 0);
|
|
|
|
StringRef verify = CompressionUtils::decompress(CompressionFilter::NONE, compressed, arena);
|
|
ASSERT_EQ(verify.compare(uncompressed), 0);
|
|
|
|
TraceEvent("NoCompressionDone");
|
|
|
|
return Void();
|
|
}
|
|
|
|
#ifdef ZLIB_LIB_SUPPORTED
|
|
TEST_CASE("/CompressionUtils/gzipCompression") {
|
|
testCompression(CompressionFilter::GZIP);
|
|
TraceEvent("GzipCompressionDone");
|
|
|
|
return Void();
|
|
}
|
|
|
|
TEST_CASE("/CompressionUtils/gzipCompression2") {
|
|
testCompression2(CompressionFilter::GZIP);
|
|
TraceEvent("GzipCompression2Done");
|
|
|
|
return Void();
|
|
}
|
|
#endif
|
|
|
|
#ifdef ZSTD_LIB_SUPPORTED
|
|
TEST_CASE("/CompressionUtils/zstdCompression") {
|
|
testCompression(CompressionFilter::ZSTD);
|
|
TraceEvent("ZstdCompressionDone");
|
|
|
|
return Void();
|
|
}
|
|
|
|
TEST_CASE("/CompressionUtils/zstdCompression2") {
|
|
testCompression2(CompressionFilter::ZSTD);
|
|
TraceEvent("ZstdCompression2Done");
|
|
|
|
return Void();
|
|
}
|
|
#endif
|