Skip to content

Commit

Permalink
add test code and sample table builder script
Browse files Browse the repository at this point in the history
support setting a non-zero base for freq. table


git-svn-id: http://svn.coderepos.org/share/lang/cplusplus/range_coder@7156 d0d07461-0603-4401-acd4-de1884942a52
  • Loading branch information
kazuho committed Feb 26, 2008
1 parent 2094770 commit 16d09fd
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 15 deletions.
83 changes: 83 additions & 0 deletions bench.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
extern "C" {
#include <assert.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
}
#include <algorithm>
#include "range_coder.hpp"

#include "table.c"

class writer_t {
char **p, *max;
public:
struct overrun_t {
};
writer_t(char **_p, char *_max) : p(_p), max(_max) {}
writer_t &operator=(char c) {
if (*p == max) {
throw overrun_t();
}
*(*p)++ = c;
return *this;
}
writer_t &operator*() { return *this; }
writer_t &operator++() { return *this; }
writer_t &operator++(int) { return *this; }
};

#define FREQ_BASE SHRT_MIN
#define LOOP_CNT 1024

int main(int argc, char **argv)
{
char buf[1024 * 1024], cbuf[1024 * 1024], rbuf[1024 * 1024];
size_t buflen, cbuflen;
unsigned long long start;
int i;

/* read */
buflen = fread(buf, 1, sizeof(buf) - 1, stdin);
/* compress */
start = rdtsc();
for (i = 0; i < LOOP_CNT; i++) {
char *cbufpt = cbuf;
rc_encoder_t<writer_t> enc(writer_t(&cbufpt, cbuf + sizeof(cbuf)));
for (const char *p = buf, *e = buf + buflen; p != e; p++) {
unsigned ch = (unsigned char)*p;
#ifdef USE_ORDERED_TABLE
ch = to_ordered[ch];
#endif
assert(freq[ch] != freq[ch + 1]);
enc.encode(freq[ch] - FREQ_BASE, freq[ch + 1] - FREQ_BASE,
freq[256] - FREQ_BASE);
}
enc.final();
cbuflen = cbufpt - cbuf;
}
printf("compression: %lu Mticks\n", (long)((rdtsc() - start) / 1024 / 1024));
/* decompress */
start = rdtsc();
for (i = 0; i < LOOP_CNT; i++) {
rc_decoder_t<const char*, rc_decoder_search_t<short, 256, FREQ_BASE> >
dec(cbuf, cbuf + cbuflen);
for (char *p = rbuf, *e = rbuf + buflen; p != e; p++) {
unsigned ch = dec.decode(freq[256] - FREQ_BASE, freq);
#ifdef USE_ORDERED_TABLE
ch = from_ordered[ch];
#endif
*p = ch;
}
}
printf("decompression: %lu Mticks\n",
(long)((rdtsc() - start) / 1024 / 1024));
/* check result */
if (memcmp(buf, rbuf, buflen) != 0) {
fprintf(stderr, "original data and decompressed data does not match.\n");
exit(99);
}

return 0;
}
42 changes: 42 additions & 0 deletions build_table.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#! /usr/bin/perl

use strict;
use warnings;

use Getopt::Long;
use List::Util qw/sum/;

my ($do_ordered);

GetOptions(
'ordered' => \$do_ordered,
);


my @cnt = map { 0 } 0..255;

while (<>) {
foreach my $c (split '', $_) {
$cnt[ord $c]++;
}
}

if ($do_ordered) {
my @order = sort { $cnt[$b] <=> $cnt[$a] } 0..255;
print "#define USE_ORDERED_TABLE 1\n";
print "static unsigned char from_ordered[] = {", join(',', @order), "};\n";
my %r = map { $order[$_] => $_ } @order;
print "static unsigned char to_ordered[] = {", join(',', map { $r{$_} } 0..255), "};\n";
@cnt = map { $cnt[$order[$_]] } 0..255;
}

my @freq;
my $acc = 0;
my $cc = sum @cnt;
for (my $i = 0; $i < 256; $i++) {
push @freq, $acc;
$acc += int(($cnt[$i] / $cc) * 0xfe00);
}
push @freq, $acc;

print "static short freq[] __attribute__((aligned(16))) = {", join(',', map { $_ - 0x8000 } @freq), "};\n";
30 changes: 15 additions & 15 deletions range_coder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,35 +89,34 @@ template <class Iter> class rc_encoder_t : public rc_type_t {
uint counter;
};

template <typename FreqType, unsigned _N> struct rc_decoder_search_traits_t : public rc_type_t {
template <typename FreqType, unsigned _N, int _BASE> struct rc_decoder_search_traits_t : public rc_type_t {
typedef FreqType freq_type;
enum {
N = _N
N = _N,
BASE = _BASE
};
};

template <typename FreqType, unsigned _N> struct rc_decoder_search_t : public rc_decoder_search_traits_t<FreqType, _N> {
static uint get_index(const FreqType *freq, uint pos) {
template <typename FreqType, unsigned _N, int _BASE = 0> struct rc_decoder_search_t : public rc_decoder_search_traits_t<FreqType, _N, _BASE> {
static uint get_index(const FreqType *freq, FreqType pos) {
uint left = 0;
uint right = _N;
while(left < right) {
uint mid = (left+right)/2;
if (static_cast<uint>(freq[mid+1]) <= pos)
left = mid+1;
else
right = mid;
if (freq[mid+1] <= pos) left = mid+1;
else right = mid;
}
return left;
}
};

#ifdef RANGE_CODER_USE_SSE

template<> struct rc_decoder_search_t<short, 256> : public rc_decoder_search_traits_t<short, 256> {
static uint get_index(const freq_type *freq, uint pos) {
template<int _BASE> struct rc_decoder_search_t<short, 256, _BASE> : public rc_decoder_search_traits_t<short, 256, _BASE> {
static uint get_index(const short *freq, short pos) {
__m128i v = _mm_set1_epi16(pos);
unsigned i, mask = 0;
for (i = 0; i < N; i += 16) {
for (i = 0; i < 256; i += 16) {
__m128i x = *reinterpret_cast<const __m128i*>(freq + i);
__m128i y = *reinterpret_cast<const __m128i*>(freq + i + 8);
__m128i a = _mm_cmplt_epi16(v, x);
Expand Down Expand Up @@ -147,12 +146,13 @@ template <class Iterator, class SearchType> class rc_decoder_t : public rc_type_
}
uint decode(const uint total, const freq_type* cumFreq) {
const uint r = R / total;
const uint targetPos = std::min(total-1, D / r);
const int targetPos = std::min(total-1, D / r);

//find target s.t. cumFreq[target] <= targetPos < cumFreq[target+1]
const uint target = search_type::get_index(cumFreq, targetPos);
const uint low = cumFreq[target];
const uint high = cumFreq[target+1];
const uint target =
search_type::get_index(cumFreq, targetPos + search_type::BASE);
const uint low = cumFreq[target] - search_type::BASE;
const uint high = cumFreq[target+1] - search_type::BASE;

D -= r * low;
if (high != total) {
Expand Down

0 comments on commit 16d09fd

Please sign in to comment.