Skip to content

Commit

Permalink
bsc 3.1.8
Browse files Browse the repository at this point in the history
  • Loading branch information
IlyaGrebnov committed Aug 18, 2021
1 parent 1af0917 commit 9c09560
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 69 deletions.
3 changes: 3 additions & 0 deletions CHANGES
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
Changes in 3.1.7 (August, 18 2021)
- Slightly improved compression performance.

Changes in 3.1.7 (August, 15 2021)
- Slightly improved compression performance.

Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.1.7
3.1.8
2 changes: 1 addition & 1 deletion bsc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -835,7 +835,7 @@ void ProcessCommandline(int argc, char * argv[])

int main(int argc, char * argv[])
{
fprintf(stdout, "This is bsc, Block Sorting Compressor. Version 3.1.7. 15 August 2021.\n");
fprintf(stdout, "This is bsc, Block Sorting Compressor. Version 3.1.8. 18 August 2021.\n");
fprintf(stdout, "Copyright (c) 2009-2021 Ilya Grebnov <[email protected]>.\n\n");

#if defined(_OPENMP) && defined(__INTEL_COMPILER)
Expand Down
25 changes: 0 additions & 25 deletions libbsc/coder/common/tables.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,6 @@ See also the bsc and libbsc web site:

#include "../../platform/platform.h"

static const int bsc_log2_table[256] =
{
-1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
};

static const short bsc_stretch_table[4097] =
{
-2047,-2047,-1952,-1848,-1774,-1717,-1670,-1631,-1597,-1566,-1539,-1515,-1492,-1472,-1453,-1435,
Expand Down Expand Up @@ -1857,19 +1845,6 @@ static const unsigned char model_run_state_table[8192] =
85, 170, 219, 171, 0, 0, 0, 0, 94, 170, 219, 1, 0, 0, 0, 0, 94, 170, 219, 140, 0, 0, 0, 0, 94, 170, 219, 248, 0, 0, 0, 0,
};

static INLINE int bsc_log2_256(const int n)
{
return bsc_log2_table[n];
}

static INLINE int bsc_log2(const int n)
{
if (n < 0x100) return 0 + bsc_log2_table[n >> 0];
if (n < 0x10000) return 8 + bsc_log2_table[n >> 8];
if (n < 0x1000000) return 16 + bsc_log2_table[n >> 16];
return 24 + bsc_log2_table[n >> 24];
}

static INLINE int bsc_stretch(const int p)
{
return bsc_stretch_table[p];
Expand Down
133 changes: 91 additions & 42 deletions libbsc/coder/qlfc/qlfc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,20 +86,24 @@ See also the bsc and libbsc web site:

#if LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_AVX2
#define QLFC_TRANSFORM_FUNCTION_NAME bsc_qlfc_transform_avx2
#define QLFC_TRANSFORM_SCAN_FUNCTION_NAME bsc_qlfc_transform_scan_avx2
#elif LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_AVX
#define QLFC_TRANSFORM_FUNCTION_NAME bsc_qlfc_transform_avx
#define QLFC_TRANSFORM_SCAN_FUNCTION_NAME bsc_qlfc_transform_scan_avx
#define QLFC_ADAPTIVE_DECODE_FUNCTION_NAME bsc_qlfc_adaptive_decode_avx
#define QLFC_STATIC_DECODE_FUNCTION_NAME bsc_qlfc_static_decode_avx
#elif LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_SSE41
#define QLFC_ADAPTIVE_DECODE_FUNCTION_NAME bsc_qlfc_adaptive_decode_sse41
#define QLFC_STATIC_DECODE_FUNCTION_NAME bsc_qlfc_static_decode_sse41
#elif LIBBSC_CPU_FEATURE == LIBBSC_CPU_FEATURE_SSE2
#define QLFC_TRANSFORM_FUNCTION_NAME bsc_qlfc_transform_sse2
#define QLFC_TRANSFORM_SCAN_FUNCTION_NAME bsc_qlfc_transform_scan_sse2
#define QLFC_ADAPTIVE_DECODE_FUNCTION_NAME bsc_qlfc_adaptive_decode_sse2
#define QLFC_STATIC_DECODE_FUNCTION_NAME bsc_qlfc_static_decode_sse2
#define QLFC_TRANSFORM_FUNCTION_NAME bsc_qlfc_transform_sse2
#endif
#else
#define QLFC_TRANSFORM_FUNCTION_NAME bsc_qlfc_transform
#define QLFC_TRANSFORM_SCAN_FUNCTION_NAME bsc_qlfc_transform_scan
#define QLFC_ADAPTIVE_DECODE_FUNCTION_NAME bsc_qlfc_adaptive_decode
#define QLFC_STATIC_DECODE_FUNCTION_NAME bsc_qlfc_static_decode
#endif
Expand All @@ -108,6 +112,29 @@ See also the bsc and libbsc web site:

#if LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSE2

INLINE ptrdiff_t QLFC_TRANSFORM_SCAN_FUNCTION_NAME (const unsigned char * RESTRICT input, ptrdiff_t i, unsigned char currentChar)
{
#if LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_AVX2
__m256i v = _mm256_set1_epi8(currentChar);

while (i >= 32)
{
i -= 32; int m = _mm256_movemask_epi8(_mm256_cmpeq_epi8(_mm256_loadu_si256((const __m256i *)(input + i)), v));
if (m != (int)0xffffffff) { return i + bsc_bit_scan_reverse(((unsigned int)(~m))); }
}
#elif LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSE2
__m128i v = _mm_set1_epi8(currentChar);

while (i >= 16)
{
i -= 16; int m = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((const __m128i *)(input + i)), v));
if (m != 0xffff) { return i + bsc_bit_scan_reverse((unsigned int)(m ^ 0xffff)); }
}
#endif

do {} while ((--i >= 0) && (input[i] == currentChar)); return i;
}

unsigned char * QLFC_TRANSFORM_FUNCTION_NAME (const unsigned char * RESTRICT input, unsigned char * RESTRICT buffer, int n, unsigned char * RESTRICT MTFTable)
{
signed char ALIGNED(64) ranks[ALPHABET_SIZE];
Expand All @@ -118,10 +145,10 @@ unsigned char * QLFC_TRANSFORM_FUNCTION_NAME (const unsigned char * RESTRICT inp

ptrdiff_t i = (ptrdiff_t)n - 1, j = n; signed char nSymbols = 0;

for (; i >= 0;)
for (; i >= 0; )
{
unsigned char currentChar1 = input[i]; do {} while ((--i >= 0) && (input[i] == currentChar1)); if (i < 0) { i = 0; break; }
unsigned char currentChar2 = input[i]; do {} while ((--i >= 0) && (input[i] == currentChar2));
unsigned char currentChar1 = input[i]; i = QLFC_TRANSFORM_SCAN_FUNCTION_NAME(input, i, currentChar1); if (i < 0) { i = 0; break; }
unsigned char currentChar2 = input[i]; i = QLFC_TRANSFORM_SCAN_FUNCTION_NAME(input, i, currentChar2);

signed char rank1 = ranks[currentChar1], rank2 = ranks[currentChar2]; rank2 += rank1 > rank2;

Expand Down Expand Up @@ -294,14 +321,17 @@ int bsc_qlfc_adaptive_encode(const unsigned char * input, unsigned char * output

if (currentChar == prevChar)
{
maxRank = bsc_log2_256(rank - 1);
maxRank = bsc_bit_scan_reverse(rank - 1);
break;
}

prevChar = currentChar; usedChar[currentChar] = 1;
}

for (const unsigned char * inputEnd = input + inputSize; input < inputEnd;)
const unsigned char * inputEnd = input + inputSize;
const unsigned char * rankArrayEnd = buffer + inputSize;

for (; rankArray < rankArrayEnd; )
{
if (coder.CheckEOB())
{
Expand All @@ -311,22 +341,30 @@ int bsc_qlfc_adaptive_encode(const unsigned char * input, unsigned char * output
int currentChar = *input, runSize;
{
const unsigned char * inputStart = input++;
while (true)

if (rankArray >= rankArrayEnd - 16)
{
if (input <= inputEnd - 4)
{
if (input[0] != currentChar) { input += 0; break; }
if (input[1] != currentChar) { input += 1; break; }
if (input[2] != currentChar) { input += 2; break; }
if (input[3] != currentChar) { input += 3; break; }
while ((input < inputEnd) && (*input == currentChar)) { input++; }
}
else
{
#if LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSE2
__m128i v = _mm_set1_epi8(currentChar);

input += 4;
}
else
while (true)
{
while ((input < inputEnd) && (*input == currentChar)) ++input;
break;
int m = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((const __m128i *)input), v));
if (m != 0xffff)
{
input += bsc_bit_scan_forward((unsigned int)(~m));
break;
}

input += 16;
}
#else
while (*input == currentChar) { input++; }
#endif
}

runSize = (int)(input - inputStart);
Expand Down Expand Up @@ -367,7 +405,7 @@ int bsc_qlfc_adaptive_encode(const unsigned char * input, unsigned char * output
coder.EncodeBit1(mixer->MixupAndUpdateBit1(probability0, probability1, probability2, M_RANK_TM_LR0, M_RANK_TM_LR1, M_RANK_TM_LR2, M_RANK_TM_TH1, M_RANK_TM_AR1));
}

int bitRankSize = bsc_log2_256(rank); rankHistory[currentChar] = bitRankSize;
int bitRankSize = bsc_bit_scan_reverse(rank); rankHistory[currentChar] = bitRankSize;

statePredictor = & model->Rank.Exponent.StateModel[state][0];
charPredictor = & model->Rank.Exponent.CharModel[currentChar][0];
Expand Down Expand Up @@ -433,7 +471,7 @@ int bsc_qlfc_adaptive_encode(const unsigned char * input, unsigned char * output
}
else
{
rankHistory[currentChar] = bsc_log2_256(rank);
rankHistory[currentChar] = (unsigned char)bsc_bit_scan_reverse(rank);

statePredictor = & model->Rank.Escape.StateModel[state][0];
charPredictor = & model->Rank.Escape.CharModel[currentChar][0];
Expand Down Expand Up @@ -503,7 +541,7 @@ int bsc_qlfc_adaptive_encode(const unsigned char * input, unsigned char * output
coder.EncodeBit1(mixer->MixupAndUpdateBit1(probability0, probability1, probability2, M_RUN_TM_LR0, M_RUN_TM_LR1, M_RUN_TM_LR2, M_RUN_TM_TH1, M_RUN_TM_AR1));
}

int bitRunSize = bsc_log2(runSize); runHistory[currentChar] = (runHistory[currentChar] + 3 * bitRunSize + 3) >> 2;
int bitRunSize = bsc_bit_scan_reverse(runSize); runHistory[currentChar] = (runHistory[currentChar] + 3 * bitRunSize + 3) >> 2;

statePredictor = & model->Run.Exponent.StateModel[state][0];
charPredictor = & model->Run.Exponent.CharModel[currentChar][0];
Expand Down Expand Up @@ -631,14 +669,17 @@ int bsc_qlfc_static_encode(const unsigned char * input, unsigned char * output,

if (currentChar == prevChar)
{
maxRank = bsc_log2_256(rank - 1);
maxRank = bsc_bit_scan_reverse(rank - 1);
break;
}

prevChar = currentChar; usedChar[currentChar] = 1;
}

for (const unsigned char * inputEnd = input + inputSize; input < inputEnd;)
const unsigned char * inputEnd = input + inputSize;
const unsigned char * rankArrayEnd = buffer + inputSize;

for (; rankArray < rankArrayEnd; )
{
if (coder.CheckEOB())
{
Expand All @@ -648,22 +689,30 @@ int bsc_qlfc_static_encode(const unsigned char * input, unsigned char * output,
int currentChar = *input, runSize;
{
const unsigned char * inputStart = input++;
while (true)

if (rankArray >= rankArrayEnd - 16)
{
if (input <= inputEnd - 4)
{
if (input[0] != currentChar) { input += 0; break; }
if (input[1] != currentChar) { input += 1; break; }
if (input[2] != currentChar) { input += 2; break; }
if (input[3] != currentChar) { input += 3; break; }
while ((input < inputEnd) && (*input == currentChar)) { input++; }
}
else
{
#if LIBBSC_CPU_FEATURE >= LIBBSC_CPU_FEATURE_SSE2
__m128i v = _mm_set1_epi8(currentChar);

input += 4;
}
else
while (true)
{
while ((input < inputEnd) && (*input == currentChar)) ++input;
break;
int m = _mm_movemask_epi8(_mm_cmpeq_epi8(_mm_loadu_si128((const __m128i *)input), v));
if (m != 0xffff)
{
input += bsc_bit_scan_forward((unsigned int)(~m));
break;
}

input += 16;
}
#else
while (*input == currentChar) { input++; }
#endif
}

runSize = (int)(input - inputStart);
Expand Down Expand Up @@ -703,7 +752,7 @@ int bsc_qlfc_static_encode(const unsigned char * input, unsigned char * output,
coder.EncodeBit1(probability);
}

int bitRankSize = bsc_log2_256(rank); rankHistory[currentChar] = bitRankSize;
int bitRankSize = bsc_bit_scan_reverse(rank); rankHistory[currentChar] = bitRankSize;

statePredictor = & model->Rank.Exponent.StateModel[state][0];
charPredictor = & model->Rank.Exponent.CharModel[currentChar][0];
Expand Down Expand Up @@ -759,7 +808,7 @@ int bsc_qlfc_static_encode(const unsigned char * input, unsigned char * output,
}
else
{
rankHistory[currentChar] = bsc_log2_256(rank);
rankHistory[currentChar] = (unsigned char)bsc_bit_scan_reverse(rank);

statePredictor = & model->Rank.Escape.StateModel[state][0];
charPredictor = & model->Rank.Escape.CharModel[currentChar][0];
Expand Down Expand Up @@ -820,7 +869,7 @@ int bsc_qlfc_static_encode(const unsigned char * input, unsigned char * output,
coder.EncodeBit1(probability);
}

int bitRunSize = bsc_log2(runSize); runHistory[currentChar] = (runHistory[currentChar] + 3 * bitRunSize + 3) >> 2;
int bitRunSize = bsc_bit_scan_reverse(runSize); runHistory[currentChar] = (runHistory[currentChar] + 3 * bitRunSize + 3) >> 2;

statePredictor = & model->Run.Exponent.StateModel[state][0];
charPredictor = & model->Run.Exponent.CharModel[currentChar][0];
Expand Down Expand Up @@ -970,7 +1019,7 @@ int QLFC_ADAPTIVE_DECODE_FUNCTION_NAME (const unsigned char * input, unsigned ch

if (currentChar == prevChar)
{
maxRank = bsc_log2_256(rank - 1);
maxRank = bsc_bit_scan_reverse(rank - 1);
break;
}

Expand Down Expand Up @@ -1091,7 +1140,7 @@ int QLFC_ADAPTIVE_DECODE_FUNCTION_NAME (const unsigned char * input, unsigned ch
}
}

rankHistory[currentChar] = bsc_log2_256(rank);
rankHistory[currentChar] = (unsigned char)bsc_bit_scan_reverse(rank);
}

{
Expand Down Expand Up @@ -1268,7 +1317,7 @@ int QLFC_STATIC_DECODE_FUNCTION_NAME (const unsigned char * input, unsigned char

if (currentChar == prevChar)
{
maxRank = bsc_log2_256(rank - 1);
maxRank = bsc_bit_scan_reverse(rank - 1);
break;
}

Expand Down Expand Up @@ -1375,7 +1424,7 @@ int QLFC_STATIC_DECODE_FUNCTION_NAME (const unsigned char * input, unsigned char
}
}

rankHistory[currentChar] = bsc_log2_256(rank);
rankHistory[currentChar] = (unsigned char)bsc_bit_scan_reverse(rank);
}

{
Expand Down
22 changes: 22 additions & 0 deletions libbsc/platform/platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,28 @@ See also the bsc and libbsc web site:
#define RESTRICT /* */
#endif

#if defined(__GNUC__) || defined(__clang__)
#define bsc_bit_scan_reverse(x) (__builtin_clz(x) ^ 31)
#define bsc_bit_scan_forward(x) (__builtin_ctz(x))
#elif defined(_MSC_VER)
#pragma intrinsic(_BitScanReverse)
#pragma intrinsic(_BitScanForward)

static inline __forceinline unsigned long bsc_bit_scan_reverse(unsigned long x)
{
unsigned long index;
_BitScanReverse(&index, x);
return index;
}

static inline __forceinline unsigned long bsc_bit_scan_forward(unsigned long x)
{
unsigned long index;
_BitScanForward(&index, x);
return index;
}
#endif

#define ALPHABET_SIZE (256)

#define LIBBSC_CPU_FEATURE_NONE 0
Expand Down

0 comments on commit 9c09560

Please sign in to comment.