Skip to content

Commit

Permalink
Introduce a helper method UncompressData (facebook#7434)
Browse files Browse the repository at this point in the history
Summary:
The patch introduces a helper method in `util/compression.h` called `UncompressData`
that dispatches calls to the correct uncompression method based on type, and changes
`UncompressBlockContentsForCompressionType` and `Benchmark::Uncompress` in
`db_bench` so they are implemented in terms of the new method. This eliminates
some code duplication. (`Benchmark::Compress` is also updated to use the previously
introduced `CompressData` helper.)

In addition, the patch brings the implementation of `Snappy_Uncompress` into sync with
the other uncompression methods by making the method compute the buffer size and allocate
the buffer itself. Finally, the patch eliminates some potentially risky back-and-forth conversions
between various unsigned and signed integer types by exposing the size of the allocated buffer
as a `size_t` instead of an `int`.

Pull Request resolved: facebook#7434

Test Plan:
`make check`
`./db_bench -benchmarks=compress,uncompress --compression_type ...`

Reviewed By: riversand963

Differential Revision: D23900011

Pulled By: ltamasi

fbshipit-source-id: b25df63ceec4639889be94acb22eb53e530c54e0
  • Loading branch information
ltamasi authored and facebook-github-bot committed Sep 25, 2020
1 parent 9a63bbd commit 30fb9dd
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 217 deletions.
15 changes: 3 additions & 12 deletions port/win/xpress_win.cc
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,9 @@ bool Compress(const char* input, size_t length, std::string* output) {
}

char* Decompress(const char* input_data, size_t input_length,
int* decompress_size) {

size_t* uncompressed_size) {
assert(input_data != nullptr);
assert(decompress_size != nullptr);
assert(uncompressed_size != nullptr);

if (input_length == 0) {
return nullptr;
Expand Down Expand Up @@ -185,14 +184,6 @@ char* Decompress(const char* input_data, size_t input_length,

assert(decompressedBufferSize > 0);

// On Windows we are limited to a 32-bit int for the
// output data size argument
// so we hopefully never get here
if (decompressedBufferSize > std::numeric_limits<int>::max()) {
assert(false);
return nullptr;
}

// The callers are deallocating using delete[]
// thus we must allocate with new[]
std::unique_ptr<char[]> outputBuffer(new char[decompressedBufferSize]);
Expand All @@ -216,7 +207,7 @@ char* Decompress(const char* input_data, size_t input_length,
return nullptr;
}

*decompress_size = static_cast<int>(decompressedDataSize);
*uncompressed_size = decompressedDataSize;

// Return the raw buffer to the caller supporting the tradition
return outputBuffer.release();
Expand Down
3 changes: 1 addition & 2 deletions port/win/xpress_win.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ namespace xpress {
bool Compress(const char* input, size_t length, std::string* output);

char* Decompress(const char* input_data, size_t input_length,
int* decompress_size);

size_t* uncompressed_size);
}
}
} // namespace ROCKSDB_NAMESPACE
96 changes: 10 additions & 86 deletions table/format.cc
Original file line number Diff line number Diff line change
Expand Up @@ -347,100 +347,24 @@ Status UncompressBlockContentsForCompressionType(
BlockContents* contents, uint32_t format_version,
const ImmutableCFOptions& ioptions, MemoryAllocator* allocator) {
Status ret = Status::OK();
CacheAllocationPtr ubuf;

assert(uncompression_info.type() != kNoCompression &&
"Invalid compression type");

StopWatchNano timer(ioptions.env, ShouldReportDetailedTime(
ioptions.env, ioptions.statistics));
int decompress_size = 0;
switch (uncompression_info.type()) {
case kSnappyCompression: {
size_t ulength = 0;
static char snappy_corrupt_msg[] =
"Snappy not supported or corrupted Snappy compressed block contents";
if (!Snappy_GetUncompressedLength(data, n, &ulength)) {
return Status::Corruption(snappy_corrupt_msg);
}
ubuf = AllocateBlock(ulength, allocator);
if (!Snappy_Uncompress(data, n, ubuf.get())) {
return Status::Corruption(snappy_corrupt_msg);
}
*contents = BlockContents(std::move(ubuf), ulength);
break;
}
case kZlibCompression:
ubuf = Zlib_Uncompress(uncompression_info, data, n, &decompress_size,
GetCompressFormatForVersion(format_version),
allocator);
if (!ubuf) {
static char zlib_corrupt_msg[] =
"Zlib not supported or corrupted Zlib compressed block contents";
return Status::Corruption(zlib_corrupt_msg);
}
*contents = BlockContents(std::move(ubuf), decompress_size);
break;
case kBZip2Compression:
ubuf = BZip2_Uncompress(data, n, &decompress_size,
GetCompressFormatForVersion(format_version),
allocator);
if (!ubuf) {
static char bzip2_corrupt_msg[] =
"Bzip2 not supported or corrupted Bzip2 compressed block contents";
return Status::Corruption(bzip2_corrupt_msg);
}
*contents = BlockContents(std::move(ubuf), decompress_size);
break;
case kLZ4Compression:
ubuf = LZ4_Uncompress(uncompression_info, data, n, &decompress_size,
GetCompressFormatForVersion(format_version),
allocator);
if (!ubuf) {
static char lz4_corrupt_msg[] =
"LZ4 not supported or corrupted LZ4 compressed block contents";
return Status::Corruption(lz4_corrupt_msg);
}
*contents = BlockContents(std::move(ubuf), decompress_size);
break;
case kLZ4HCCompression:
ubuf = LZ4_Uncompress(uncompression_info, data, n, &decompress_size,
GetCompressFormatForVersion(format_version),
allocator);
if (!ubuf) {
static char lz4hc_corrupt_msg[] =
"LZ4HC not supported or corrupted LZ4HC compressed block contents";
return Status::Corruption(lz4hc_corrupt_msg);
}
*contents = BlockContents(std::move(ubuf), decompress_size);
break;
case kXpressCompression:
// XPRESS allocates memory internally, thus no support for custom
// allocator.
ubuf.reset(XPRESS_Uncompress(data, n, &decompress_size));
if (!ubuf) {
static char xpress_corrupt_msg[] =
"XPRESS not supported or corrupted XPRESS compressed block "
"contents";
return Status::Corruption(xpress_corrupt_msg);
}
*contents = BlockContents(std::move(ubuf), decompress_size);
break;
case kZSTD:
case kZSTDNotFinalCompression:
ubuf = ZSTD_Uncompress(uncompression_info, data, n, &decompress_size,
allocator);
if (!ubuf) {
static char zstd_corrupt_msg[] =
"ZSTD not supported or corrupted ZSTD compressed block contents";
return Status::Corruption(zstd_corrupt_msg);
}
*contents = BlockContents(std::move(ubuf), decompress_size);
break;
default:
return Status::Corruption("bad block type");
size_t uncompressed_size = 0;
CacheAllocationPtr ubuf =
UncompressData(uncompression_info, data, n, &uncompressed_size,
GetCompressFormatForVersion(format_version), allocator);
if (!ubuf) {
return Status::Corruption(
"Unsupported compression method or corrupted compressed block contents",
CompressionTypeToString(uncompression_info.type()));
}

*contents = BlockContents(std::move(ubuf), uncompressed_size);

if (ShouldReportDetailedTime(ioptions.env, ioptions.statistics)) {
RecordTimeToHistogram(ioptions.statistics, DECOMPRESSION_TIMES_NANOS,
timer.ElapsedNanos());
Expand Down
96 changes: 12 additions & 84 deletions tools/db_bench_tool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2443,40 +2443,10 @@ class Benchmark {

inline bool CompressSlice(const CompressionInfo& compression_info,
const Slice& input, std::string* compressed) {
bool ok = true;
switch (FLAGS_compression_type_e) {
case ROCKSDB_NAMESPACE::kSnappyCompression:
ok = Snappy_Compress(compression_info, input.data(), input.size(),
compressed);
break;
case ROCKSDB_NAMESPACE::kZlibCompression:
ok = Zlib_Compress(compression_info, 2, input.data(), input.size(),
compressed);
break;
case ROCKSDB_NAMESPACE::kBZip2Compression:
ok = BZip2_Compress(compression_info, 2, input.data(), input.size(),
compressed);
break;
case ROCKSDB_NAMESPACE::kLZ4Compression:
ok = LZ4_Compress(compression_info, 2, input.data(), input.size(),
compressed);
break;
case ROCKSDB_NAMESPACE::kLZ4HCCompression:
ok = LZ4HC_Compress(compression_info, 2, input.data(), input.size(),
compressed);
break;
case ROCKSDB_NAMESPACE::kXpressCompression:
ok = XPRESS_Compress(input.data(),
input.size(), compressed);
break;
case ROCKSDB_NAMESPACE::kZSTD:
ok = ZSTD_Compress(compression_info, input.data(), input.size(),
compressed);
break;
default:
ok = false;
}
return ok;
constexpr uint32_t compress_format_version = 2;

return CompressData(input, compression_info, compress_format_version,
compressed);
}

void PrintHeader() {
Expand Down Expand Up @@ -3601,57 +3571,15 @@ class Benchmark {

bool ok = CompressSlice(compression_info, input, &compressed);
int64_t bytes = 0;
int decompress_size;
size_t uncompressed_size = 0;
while (ok && bytes < 1024 * 1048576) {
CacheAllocationPtr uncompressed;
switch (FLAGS_compression_type_e) {
case ROCKSDB_NAMESPACE::kSnappyCompression: {
// get size and allocate here to make comparison fair
size_t ulength = 0;
if (!Snappy_GetUncompressedLength(compressed.data(),
compressed.size(), &ulength)) {
ok = false;
break;
}
uncompressed = AllocateBlock(ulength, nullptr);
ok = Snappy_Uncompress(compressed.data(), compressed.size(),
uncompressed.get());
break;
}
case ROCKSDB_NAMESPACE::kZlibCompression:
uncompressed =
Zlib_Uncompress(uncompression_info, compressed.data(),
compressed.size(), &decompress_size, 2);
ok = uncompressed.get() != nullptr;
break;
case ROCKSDB_NAMESPACE::kBZip2Compression:
uncompressed = BZip2_Uncompress(compressed.data(), compressed.size(),
&decompress_size, 2);
ok = uncompressed.get() != nullptr;
break;
case ROCKSDB_NAMESPACE::kLZ4Compression:
uncompressed = LZ4_Uncompress(uncompression_info, compressed.data(),
compressed.size(), &decompress_size, 2);
ok = uncompressed.get() != nullptr;
break;
case ROCKSDB_NAMESPACE::kLZ4HCCompression:
uncompressed = LZ4_Uncompress(uncompression_info, compressed.data(),
compressed.size(), &decompress_size, 2);
ok = uncompressed.get() != nullptr;
break;
case ROCKSDB_NAMESPACE::kXpressCompression:
uncompressed.reset(XPRESS_Uncompress(
compressed.data(), compressed.size(), &decompress_size));
ok = uncompressed.get() != nullptr;
break;
case ROCKSDB_NAMESPACE::kZSTD:
uncompressed = ZSTD_Uncompress(uncompression_info, compressed.data(),
compressed.size(), &decompress_size);
ok = uncompressed.get() != nullptr;
break;
default:
ok = false;
}
constexpr uint32_t compress_format_version = 2;

CacheAllocationPtr uncompressed = UncompressData(
uncompression_info, compressed.data(), compressed.size(),
&uncompressed_size, compress_format_version);

ok = uncompressed.get() != nullptr;
bytes += input.size();
thread->stats.FinishedOps(nullptr, nullptr, 1, kUncompress);
}
Expand Down
Loading

0 comments on commit 30fb9dd

Please sign in to comment.