Skip to content

Commit

Permalink
First Version
Browse files Browse the repository at this point in the history
  • Loading branch information
dkedyk committed Dec 7, 2020
1 parent 95adfe4 commit d177a47
Show file tree
Hide file tree
Showing 287 changed files with 150,434 additions and 0 deletions.
41 changes: 41 additions & 0 deletions AutoRegressionTest/test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#include "../Utils/UtilsTestAuto.h"
#include "../Sorting/SortTestAuto.h"
#include "../RandomTreap/DynamicSortedSequenceTestAuto.h"
#include "../HashTable/HashTableTestAuto.h"
#include "../Heaps/HeapTestAuto.h"
#include "../Graphs/GraphsTestAuto.h"
#include "../ExternalMemoryAlgorithms/ExternalMemoryAlgorithmsTestAuto.h"
#include "../StringAlgorithms/StringAlgorithmsTestAuto.h"
#include "../Compression/CompressionTestAuto.h"
#include "../MiscAlgs/MiscAlgsTestAuto.h"
#include "../Optimization/OptTestAuto.h"
#include "../LargeNumbers/LargeNumberTestAuto.h"
#include "../ComputationalGeometry/ComputationalGeometryTestAuto.h"
#include "../ErrorCorrectingCodes/ErrorCorrectingCodesTestAuto.h"
#include "../Cryptography/CryptographyTestAuto.h"
#include "../NumericalMethods/NumericalMethodsTestAuto.h"

using namespace igmdk;

int main()
{
DEBUG("All Tests Auto");
testAllAutoUtils();
testAllAutoSort();
testAllAutoDynamicSortedSequence();
testAllAutoHashTable();
testAllAutoHeaps();
testAllAutoGraphs();
testAllAutoExternalMemoryAlgorithms();
testAllAutoStringAlgorithms();
testAllAutoCompression();
testAllAutoMiscAlgorithms();
testAllAutoOpt();
testAllAutoComputationalGeometry();
testAllAutoErrorCorrectingCodes();
testAllAutoCryptography();
testAllAutoNumericalMethods();
DEBUG("All Tests Auto passed");

return 0;
}
131 changes: 131 additions & 0 deletions Compression/Compression.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
#ifndef IGMDK_COMPRESSION_H
#define IGMDK_COMPRESSION_H
#include "../StringAlgorithms/SuffixArray.h"
#include "Stream.h"
#include "StaticCodes.h"
#include "HuffmanTree.h"
#include "LZW.h"
#include <cstdlib>
namespace igmdk{

enum {RLE_E1 = (1 << numeric_limits<unsigned char>::digits) - 1,
RLE_E2 = RLE_E1 - 1};
Vector<unsigned char> RLECompress(Vector<unsigned char>const& byteArray)
{
Vector<unsigned char> result;
for(int i = 0; i < byteArray.getSize();)
{
unsigned char byte = byteArray[i++];
result.append(byte);
int count = 0;
while(count < RLE_E2 - 1 && i + count < byteArray.getSize() &&
byteArray[i + count] == byte) ++count;
if(count > 1 || (byte == RLE_E1 && count == 1))
{
result.append(RLE_E1);
result.append(count);
i += count;
}
else if(byte == RLE_E1) result.append(RLE_E2);
}
return result;
}
Vector<unsigned char> RLEUncompress(Vector<unsigned char>const& byteArray)
{
Vector<unsigned char> result;
for(int i = 0; i < byteArray.getSize();)
{
unsigned char byte = byteArray[i++];
if(byte == RLE_E1 && byteArray[i] != RLE_E1)
{
unsigned char count = byteArray[i++];
if(count == RLE_E2) count = 1;
else byte = result.lastItem();//need temp if vector reallocates
while(count--) result.append(byte);
}
else result.append(byte);
}
return result;
}

Vector<unsigned char> MoveToFrontTransform(bool compress,
Vector<unsigned char>const& byteArray)
{
unsigned char list[1 << numeric_limits<unsigned char>::digits], j, letter;
for(int i = 0; i < sizeof(list); ++i) list[i] = i;
Vector<unsigned char> resultArray;
for(int i = 0; i < byteArray.getSize(); ++i)
{
if(compress)
{//find and output rank
j = 0;
letter = byteArray[i];
while(list[j] != letter) ++j;
resultArray.append(j);
}
else
{//rank to byte
j = byteArray[i];
letter = list[j];
resultArray.append(letter);
}//move list back to make space for front item
for(; j > 0; --j) list[j] = list[j - 1];
list[0] = letter;
}
return resultArray;
}

Vector<unsigned char> BurrowsWheelerTransform(
Vector<unsigned char> const& byteArray)
{
int original = 0, size = byteArray.getSize();
Vector<int> BTWArray = suffixArray<BWTRank>(byteArray.getArray(), size);
Vector<unsigned char> result;
for(int i = 0; i < size; ++i)
{
int suffixIndex = BTWArray[i];
if(suffixIndex == 0)
{//found the original string
original = i;
suffixIndex = size;//avoid the % size in next step
}
result.append(byteArray[suffixIndex - 1]);
}//assume that 4 bytes is enough
Vector<unsigned char> code = ReinterpretEncode(original, 4);
for(int i = 0; i < code.getSize(); ++i) result.append(code[i]);
return result;
}

Vector<unsigned char> BurrowsWheelerReverseTransform(
Vector<unsigned char> const& byteArray)
{
enum{M = 1 << numeric_limits<unsigned char>::digits};
int counts[M], firstPositions[M],
textSize = byteArray.getSize() - 4;
for(int i = 0; i < M; ++i) counts[i] = 0;
Vector<int> ranks(textSize);//compute ranks
for(int i = 0; i < textSize; ++i) ranks[i] = counts[byteArray[i]]++;
firstPositions[0] = 0;//compute first positions
for(int i = 0; i < M - 1; ++i)
firstPositions[i + 1] = firstPositions[i] + counts[i];
Vector<unsigned char> index, result(textSize);//extract original rotation
for(int i = 0; i < 4; ++i) index.append(byteArray[i + textSize]);
//construct in reverse order
for(int i = textSize - 1, ix = ReinterpretDecode(index); i >= 0; --i)
ix = ranks[ix] + firstPositions[result[i] = byteArray[ix]];
return result;
}

Vector<unsigned char> BWTCompress(Vector<unsigned char>const& byteArray)
{
return HuffmanCompress(RLECompress(MoveToFrontTransform(true,
BurrowsWheelerTransform(byteArray))));
}
Vector<unsigned char> BWTUncompress(Vector<unsigned char>const& byteArray)
{
return BurrowsWheelerReverseTransform(MoveToFrontTransform(false,
RLEUncompress(HuffmanUncompress(byteArray))));
}

}//end namespace
#endif
80 changes: 80 additions & 0 deletions Compression/CompressionTestAuto.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#ifndef IGMDK_COMPRESSION_TEST_AUTO_H
#define IGMDK_COMPRESSION_TEST_AUTO_H
#include <string>
using namespace std;
#include "Compression.h"

namespace igmdk{

void testGammaCodeAuto()
{
DEBUG("testGammaCodeAuto");
BitStream result;
for(int i = 1; i < 1000; ++i) GammaEncode(i, result);
for(int i = 1; i < 1000; ++i) assert(GammaDecode(result) == i);
DEBUG("testGammaCodeAuto passed");
}

void testFibonacciCodeAuto()
{
DEBUG("testFibonacciCodeAuto");
BitStream result;
for(int i = 1; i < 1000; ++i) FibonacciEncode(i, result);
for(int i = 1; i < 1000; ++i) assert(FibonacciDecode(result) == i);
DEBUG("testFibonacciCodeAuto passed");
}

void testByteCodeAuto()
{
DEBUG("testGammaCodeAuto");
BitStream result;
for(int i = 0; i < 1000; ++i) byteEncode(i, result);
for(int i = 0; i < 1000; ++i) assert(byteDecode(result) == i);
DEBUG("testGammaCodeAuto passed");
}

Vector<unsigned char> getRandomBytes(int n = 10000)
{
Vector<unsigned char> w(n, 0);
for(int i = 0; i < n; ++i) w[i] = GlobalRNG().next();
return w;
}
void testBWTCompressAuto()
{
DEBUG("testBWTCompressAuto");
Vector<unsigned char> byteArray = getRandomBytes();
assert(byteArray == BWTUncompress(BWTCompress(byteArray)));
DEBUG("testBWTCompressAuto passed");
}

void testLZWAuto()
{
DEBUG("testLZWAuto");
Vector<unsigned char> byteArray = getRandomBytes(), code;
{
BitStream in(byteArray);
BitStream out;
LZWCompress(in, out);
code = ExtraBitsCompress(out.bitset);
}
{
BitStream in(ExtraBitsUncompress(code));
BitStream out;
LZWUncompress(in, out);
assert(byteArray == out.bitset.getStorage());
}
DEBUG("testLZWAuto passed");
}

void testAllAutoCompression()
{
DEBUG("testAllAutoCompression");
testGammaCodeAuto();
testFibonacciCodeAuto();
testByteCodeAuto();
testBWTCompressAuto();
testLZWAuto();
}

}//end namespace
#endif
126 changes: 126 additions & 0 deletions Compression/Compressor.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#include "../ExternalMemoryAlgorithms/File.h"
#include "../ExternalMemoryAlgorithms/CSV.h"
#include "../Utils/Debug.h"
#include <string>
#include "Compression.h"
using namespace std;

using namespace igmdk;

int compressor(File& in, File&out, bool compress, string const& smethod)
{
char method;
enum{HUF, BWT, LZW};
if(smethod == "Huffman") method = HUF;
else if(smethod == "BWT") method = BWT;
else if(smethod == "LZW") method = LZW;
else{DEBUG("Method Unknown"); return 0;}

enum{N = 8096};
unsigned char buffer[N];
Vector<unsigned char> original, v;
for(;;)
{
int size = min<long long>(N, in.bytesToEnd());
in.read(buffer, size);
for(int i = 0; i < size; ++i)
{
original.append(buffer[i]);
}
if(size < N) break;
}
if(compress)
{
if(method == LZW)
{
BitStream result;
BitStream in(original);
LZWCompress(in, result);
v = ExtraBitsCompress(result.bitset);
}
else if(method == BWT)
{
v = BWTCompress(original);
}
else if(method == HUF)
{
v = HuffmanCompress(original);
}
}
else
{
if(method == LZW)
{
BitStream in(ExtraBitsUncompress(original));
BitStream result;
LZWUncompress(in, result);
v = result.bitset.getStorage();
}
else if(method == BWT)
{
v = BWTUncompress(original);
}
else if(method == HUF)
{
v = HuffmanUncompress(original);
}
}
out.append(v.getArray(), v.getSize());
return out.getSize();
}

void testAllMethods()
{
//AAR decomp has bug for all
string methods[] = {"Huf", "BWT", "LZW"}, files[] = {"a.txt", "bible.txt",
"dickens.txt", "ecoli.txt", "mobydick.txt", "pi10mm.txt",//
"world192.txt"};
Vector<Vector<string> > matrix;
Vector<string> titles;
titles.append("File");
titles.append("Size");
for(int j = 0; j < sizeof(methods)/sizeof(methods[0]); ++j)
titles.append(methods[j]);
matrix.append(titles);
for(int i = 0; i < sizeof(files)/sizeof(files[0]); ++i)
{
File in(files[i].c_str(), false);
Vector<string> row;
DEBUG(files[i]);
row.append(files[i]);
int oriSize = in.getSize();
row.append(to_string(oriSize));
for(int j = 0; j < sizeof(methods)/sizeof(methods[0]); ++j)
{
in.setPosition(0);
DEBUG(methods[j]);
int size;
string outName = files[i] + "." + methods[j],
backName = outName + ".ori";
{
File out(outName.c_str(), true);
int start = clock();
size = compressor(in, out, true, methods[j]);
row.append(to_string(size));
int elapsed = clock()-start;
}
{
File out(outName.c_str(), false), back(backName.c_str(), true);
int start = clock();
int size2 = compressor(out, back, false, methods[j]);
assert(oriSize == size2);
int elapsed = clock()-start;
}
File::remove(outName.c_str());
File::remove(backName.c_str());
}
matrix.append(row);
}
createCSV(matrix, "CompressionResult.csv");
}

int main(int argc, char *argv[])
{
testAllMethods();
return 0;
}
Loading

0 comments on commit d177a47

Please sign in to comment.