|
| 1 | +#include "compression.h" |
| 2 | + |
| 3 | +std::vector<uint8_t> mongodb_zstd::compress(const std::vector<uint8_t>& data, |
| 4 | + size_t compression_level) { |
| 5 | + size_t output_buffer_size = ZSTD_compressBound(data.size()); |
| 6 | + std::vector<uint8_t> output(output_buffer_size); |
| 7 | + |
| 8 | + size_t result_code = |
| 9 | + ZSTD_compress(output.data(), output.size(), data.data(), data.size(), compression_level); |
| 10 | + |
| 11 | + if (ZSTD_isError(result_code)) { |
| 12 | + throw std::runtime_error(ZSTD_getErrorName(result_code)); |
| 13 | + } |
| 14 | + |
| 15 | + output.resize(result_code); |
| 16 | + |
| 17 | + return output; |
| 18 | +} |
| 19 | + |
| 20 | +std::vector<uint8_t> mongodb_zstd::decompress(const std::vector<uint8_t>& compressed) { |
| 21 | + std::vector<uint8_t> decompressed; |
| 22 | + |
| 23 | + using DCTX_Deleter = void (*)(ZSTD_DCtx*); |
| 24 | + |
| 25 | + std::unique_ptr<ZSTD_DCtx, DCTX_Deleter> decompression_context( |
| 26 | + ZSTD_createDCtx(), [](ZSTD_DCtx* ctx) { ZSTD_freeDCtx(ctx); }); |
| 27 | + |
| 28 | + ZSTD_inBuffer input = {compressed.data(), compressed.size(), 0}; |
| 29 | + std::vector<uint8_t> output_buffer(ZSTD_DStreamOutSize()); |
| 30 | + ZSTD_outBuffer output = {output_buffer.data(), output_buffer.size(), 0}; |
| 31 | + |
| 32 | + // Source: https://facebook.github.io/zstd/zstd_manual.html#Chapter9 |
| 33 | + // |
| 34 | + // Use ZSTD_decompressStream() repetitively to consume your input. |
| 35 | + // The function will update both `pos` fields. |
| 36 | + // If `input.pos < input.size`, some input has not been consumed. |
| 37 | + // It's up to the caller to present again remaining data. |
| 38 | + // The function tries to flush all data decoded immediately, respecting output buffer size. |
| 39 | + // If `output.pos < output.size`, decoder has flushed everything it could. |
| 40 | + // But if `output.pos == output.size`, there might be some data left within internal buffers., |
| 41 | + // In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. |
| 42 | + // Note : with no additional input provided, amount of data flushed is necessarily <= |
| 43 | + // ZSTD_BLOCKSIZE_MAX. |
| 44 | + // @return : 0 when a frame is completely decoded and fully flushed, |
| 45 | + // or an error code, which can be tested using ZSTD_isError(), |
| 46 | + // or any other value > 0, which means there is still some decoding or flushing to do to |
| 47 | + // complete current frame : |
| 48 | + // the return value is a suggested next input size (just a hint |
| 49 | + // for better latency) that will never request more than the |
| 50 | + // remaining frame size. |
| 51 | + auto inputRemains = [](const ZSTD_inBuffer& input) { return input.pos < input.size; }; |
| 52 | + auto isOutputBufferFlushed = [](const ZSTD_outBuffer& output) { |
| 53 | + return output.pos < output.size; |
| 54 | + }; |
| 55 | + |
| 56 | + while (inputRemains(input) || !isOutputBufferFlushed(output)) { |
| 57 | + size_t const ret = ZSTD_decompressStream(decompression_context.get(), &output, &input); |
| 58 | + if (ZSTD_isError(ret)) { |
| 59 | + throw std::runtime_error(ZSTD_getErrorName(ret)); |
| 60 | + } |
| 61 | + |
| 62 | + size_t decompressed_size = decompressed.size(); |
| 63 | + decompressed.resize(decompressed_size + output.pos); |
| 64 | + std::copy(output_buffer.data(), |
| 65 | + output_buffer.data() + output.pos, |
| 66 | + decompressed.data() + decompressed_size); |
| 67 | + |
| 68 | + // move the position back go 0, to indicate that we are ready for more data |
| 69 | + output.pos = 0; |
| 70 | + } |
| 71 | + |
| 72 | + return decompressed; |
| 73 | +} |
0 commit comments