forked from netty/netty
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Motivation: Bzip2Decoder provides receiving data compressed in bzip2 format. Modifications: Added classes: - Bzip2Decoder - Bzip2Constants - Bzip2BlockDecompressor - Bzip2HuffmanStageDecoder - Bzip2MoveToFrontTable - Bzip2Rand - Crc32 - Bzip2DecoderTest Result: Implemented and tested new decoder which can uncompress incoming data in bzip2 format.
- Loading branch information
1 parent
12a3e23
commit f9021a6
Showing
12 changed files
with
1,516 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
320 changes: 320 additions & 0 deletions
320
codec/src/main/java/io/netty/handler/codec/compression/Bzip2BlockDecompressor.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,320 @@ | ||
/* | ||
* Copyright 2014 The Netty Project | ||
* | ||
* The Netty Project licenses this file to you under the Apache License, | ||
* version 2.0 (the "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at: | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | ||
* License for the specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package io.netty.handler.codec.compression; | ||
|
||
import io.netty.buffer.ByteBuf; | ||
|
||
import static io.netty.handler.codec.compression.Bzip2Constants.*; | ||
|
||
final class Bzip2BlockDecompressor { | ||
/** | ||
* Calculates the block CRC from the fully decoded bytes of the block. | ||
*/ | ||
private final Crc32 crc = new Crc32(); | ||
|
||
/** | ||
* The CRC of the current block as read from the block header. | ||
*/ | ||
private final int blockCRC; | ||
|
||
/** | ||
* {@code true} if the current block is randomised, otherwise {@code false}. | ||
*/ | ||
private final boolean blockRandomised; | ||
|
||
/* Huffman Decoding stage */ | ||
/** | ||
* The end-of-block Huffman symbol. Decoding of the block ends when this is encountered. | ||
*/ | ||
int huffmanEndOfBlockSymbol; | ||
|
||
/** | ||
* Bitmap, of ranges of 16 bytes, present/not present. | ||
*/ | ||
int huffmanInUse16; | ||
|
||
/** | ||
* A map from Huffman symbol index to output character. Some types of data (e.g. ASCII text) | ||
* may contain only a limited number of byte values; Huffman symbols are only allocated to | ||
* those values that actually occur in the uncompressed data. | ||
*/ | ||
final byte[] huffmanSymbolMap = new byte[256]; | ||
|
||
/* Move To Front stage */ | ||
/** | ||
* Counts of each byte value within the {@link Bzip2BlockDecompressor#huffmanSymbolMap} data. | ||
* Collected at the Move To Front stage, consumed by the Inverse Burrows Wheeler Transform stage. | ||
*/ | ||
private final int[] bwtByteCounts = new int[256]; | ||
|
||
/** | ||
* The Burrows-Wheeler Transform processed data. Read at the Move To Front stage, consumed by the | ||
* Inverse Burrows Wheeler Transform stage. | ||
*/ | ||
private final byte[] bwtBlock; | ||
|
||
/** | ||
* Starting pointer into BWT for after untransform. | ||
*/ | ||
private final int bwtStartPointer; | ||
|
||
/* Inverse Burrows-Wheeler Transform stage */ | ||
/** | ||
* At each position contains the union of :- | ||
* An output character (8 bits) | ||
* A pointer from each position to its successor (24 bits, left shifted 8 bits) | ||
* As the pointer cannot exceed the maximum block size of 900k, 24 bits is more than enough to | ||
* hold it; Folding the character data into the spare bits while performing the inverse BWT, | ||
* when both pieces of information are available, saves a large number of memory accesses in | ||
* the final decoding stages. | ||
*/ | ||
private int[] bwtMergedPointers; | ||
|
||
/** | ||
* The current merged pointer into the Burrow-Wheeler Transform array. | ||
*/ | ||
private int bwtCurrentMergedPointer; | ||
|
||
/** | ||
* The actual length in bytes of the current block at the Inverse Burrows Wheeler Transform | ||
* stage (before final Run-Length Decoding). | ||
*/ | ||
private int bwtBlockLength; | ||
|
||
/** | ||
* The number of output bytes that have been decoded up to the Inverse Burrows Wheeler Transform stage. | ||
*/ | ||
private int bwtBytesDecoded; | ||
|
||
/* Run-Length Encoding and Random Perturbation stage */ | ||
/** | ||
* The most recently RLE decoded byte. | ||
*/ | ||
private int rleLastDecodedByte = -1; | ||
|
||
/** | ||
* The number of previous identical output bytes decoded. After 4 identical bytes, the next byte | ||
* decoded is an RLE repeat count. | ||
*/ | ||
private int rleAccumulator; | ||
|
||
/** | ||
* The RLE repeat count of the current decoded byte. When this reaches zero, a new byte is decoded. | ||
*/ | ||
private int rleRepeat; | ||
|
||
/** | ||
* If the current block is randomised, the position within the RNUMS randomisation array. | ||
*/ | ||
private int randomIndex; | ||
|
||
/** | ||
* If the current block is randomised, the remaining count at the current RNUMS position. | ||
*/ | ||
private int randomCount = Bzip2Rand.rNums(0) - 1; | ||
|
||
/** | ||
* Table for Move To Front transformations. | ||
*/ | ||
final Bzip2MoveToFrontTable symbolMTF = new Bzip2MoveToFrontTable(); | ||
|
||
int repeatCount; | ||
int repeatIncrement = 1; | ||
int mtfValue; | ||
|
||
Bzip2BlockDecompressor(int blockSize, int blockCRC, boolean blockRandomised, int bwtStartPointer) { | ||
bwtBlock = new byte[blockSize]; | ||
|
||
this.blockCRC = blockCRC; | ||
this.blockRandomised = blockRandomised; | ||
this.bwtStartPointer = bwtStartPointer; | ||
} | ||
|
||
/** | ||
* Reads the Huffman encoded data from the input stream, performs Run-Length Decoding and | ||
* applies the Move To Front transform to reconstruct the Burrows-Wheeler Transform array. | ||
*/ | ||
boolean decodeHuffmanData(final Bzip2HuffmanStageDecoder huffmanDecoder, ByteBuf in) { | ||
final byte[] bwtBlock = this.bwtBlock; | ||
final byte[] huffmanSymbolMap = this.huffmanSymbolMap; | ||
final int streamBlockSize = this.bwtBlock.length; | ||
final int huffmanEndOfBlockSymbol = this.huffmanEndOfBlockSymbol; | ||
final int[] bwtByteCounts = this.bwtByteCounts; | ||
final Bzip2MoveToFrontTable symbolMTF = this.symbolMTF; | ||
|
||
int bwtBlockLength = this.bwtBlockLength; | ||
int repeatCount = this.repeatCount; | ||
int repeatIncrement = this.repeatIncrement; | ||
int mtfValue = this.mtfValue; | ||
|
||
for (;;) { | ||
if (in.readableBytes() < 3) { // 3 = (HUFFMAN_DECODE_MAX_CODE_LENGTH + 1) bits / 8 | ||
this.bwtBlockLength = bwtBlockLength; | ||
this.repeatCount = repeatCount; | ||
this.repeatIncrement = repeatIncrement; | ||
this.mtfValue = mtfValue; | ||
return false; | ||
} | ||
final int nextSymbol = huffmanDecoder.nextSymbol(in); | ||
|
||
if (nextSymbol == HUFFMAN_SYMBOL_RUNA) { | ||
repeatCount += repeatIncrement; | ||
repeatIncrement <<= 1; | ||
} else if (nextSymbol == HUFFMAN_SYMBOL_RUNB) { | ||
repeatCount += repeatIncrement << 1; | ||
repeatIncrement <<= 1; | ||
} else { | ||
if (repeatCount > 0) { | ||
if (bwtBlockLength + repeatCount > streamBlockSize) { | ||
throw new DecompressionException("block exceeds declared block size"); | ||
} | ||
final byte nextByte = huffmanSymbolMap[mtfValue]; | ||
bwtByteCounts[nextByte & 0xff] += repeatCount; | ||
while (--repeatCount >= 0) { | ||
bwtBlock[bwtBlockLength++] = nextByte; | ||
} | ||
|
||
repeatCount = 0; | ||
repeatIncrement = 1; | ||
} | ||
|
||
if (nextSymbol == huffmanEndOfBlockSymbol) { | ||
break; | ||
} | ||
|
||
if (bwtBlockLength >= streamBlockSize) { | ||
throw new DecompressionException("block exceeds declared block size"); | ||
} | ||
|
||
mtfValue = symbolMTF.indexToFront(nextSymbol - 1) & 0xff; | ||
|
||
final byte nextByte = huffmanSymbolMap[mtfValue]; | ||
bwtByteCounts[nextByte & 0xff]++; | ||
bwtBlock[bwtBlockLength++] = nextByte; | ||
} | ||
} | ||
this.bwtBlockLength = bwtBlockLength; | ||
initialiseInverseBWT(); | ||
return true; | ||
} | ||
|
||
/** | ||
* Set up the Inverse Burrows-Wheeler Transform merged pointer array. | ||
*/ | ||
private void initialiseInverseBWT() { | ||
final int bwtStartPointer = this.bwtStartPointer; | ||
final byte[] bwtBlock = this.bwtBlock; | ||
final int[] bwtMergedPointers = new int[bwtBlockLength]; | ||
final int[] characterBase = new int[256]; | ||
|
||
if (bwtStartPointer < 0 || bwtStartPointer >= bwtBlockLength) { | ||
throw new DecompressionException("start pointer invalid"); | ||
} | ||
|
||
// Cumulative character counts | ||
System.arraycopy(bwtByteCounts, 0, characterBase, 1, 255); | ||
for (int i = 2; i <= 255; i++) { | ||
characterBase[i] += characterBase[i - 1]; | ||
} | ||
|
||
// Merged-Array Inverse Burrows-Wheeler Transform | ||
// Combining the output characters and forward pointers into a single array here, where we | ||
// have already read both of the corresponding values, cuts down on memory accesses in the | ||
// final walk through the array | ||
for (int i = 0; i < bwtBlockLength; i++) { | ||
int value = bwtBlock[i] & 0xff; | ||
bwtMergedPointers[characterBase[value]++] = (i << 8) + value; | ||
} | ||
|
||
this.bwtMergedPointers = bwtMergedPointers; | ||
bwtCurrentMergedPointer = bwtMergedPointers[bwtStartPointer]; | ||
} | ||
|
||
/** | ||
* Decodes a byte from the final Run-Length Encoding stage, pulling a new byte from the | ||
* Burrows-Wheeler Transform stage when required. | ||
* @return The decoded byte, or -1 if there are no more bytes | ||
*/ | ||
public int read() { | ||
while (rleRepeat < 1) { | ||
if (bwtBytesDecoded == bwtBlockLength) { | ||
return -1; | ||
} | ||
|
||
int nextByte = decodeNextBWTByte(); | ||
if (nextByte != rleLastDecodedByte) { | ||
// New byte, restart accumulation | ||
rleLastDecodedByte = nextByte; | ||
rleRepeat = 1; | ||
rleAccumulator = 1; | ||
crc.updateCRC(nextByte); | ||
} else { | ||
if (++rleAccumulator == 4) { | ||
// Accumulation complete, start repetition | ||
int rleRepeat = decodeNextBWTByte() + 1; | ||
this.rleRepeat = rleRepeat; | ||
rleAccumulator = 0; | ||
crc.updateCRC(nextByte, rleRepeat); | ||
} else { | ||
rleRepeat = 1; | ||
crc.updateCRC(nextByte); | ||
} | ||
} | ||
} | ||
rleRepeat--; | ||
|
||
return rleLastDecodedByte; | ||
} | ||
|
||
/** | ||
* Decodes a byte from the Burrows-Wheeler Transform stage. If the block has randomisation | ||
* applied, reverses the randomisation. | ||
* @return The decoded byte | ||
*/ | ||
private int decodeNextBWTByte() { | ||
int mergedPointer = bwtCurrentMergedPointer; | ||
int nextDecodedByte = mergedPointer & 0xff; | ||
bwtCurrentMergedPointer = bwtMergedPointers[mergedPointer >>> 8]; | ||
|
||
if (blockRandomised) { | ||
if (--randomCount == 0) { | ||
nextDecodedByte ^= 1; | ||
randomIndex = (randomIndex + 1) % 512; | ||
randomCount = Bzip2Rand.rNums(randomIndex); | ||
} | ||
} | ||
bwtBytesDecoded++; | ||
|
||
return nextDecodedByte; | ||
} | ||
|
||
public int blockLength() { | ||
return bwtBlockLength; | ||
} | ||
|
||
/** | ||
* Verify and return the block CRC. This method may only be called | ||
* after all of the block's bytes have been read. | ||
* @return The block CRC | ||
*/ | ||
int checkCRC() { | ||
final int computedBlockCRC = crc.getCRC(); | ||
if (blockCRC != computedBlockCRC) { | ||
throw new DecompressionException("block CRC error"); | ||
} | ||
return computedBlockCRC; | ||
} | ||
} |
Oops, something went wrong.