forked from dkedyk/ImplementingUsefulAlgorithms
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
287 changed files
with
150,434 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#include "../Utils/UtilsTestAuto.h" | ||
#include "../Sorting/SortTestAuto.h" | ||
#include "../RandomTreap/DynamicSortedSequenceTestAuto.h" | ||
#include "../HashTable/HashTableTestAuto.h" | ||
#include "../Heaps/HeapTestAuto.h" | ||
#include "../Graphs/GraphsTestAuto.h" | ||
#include "../ExternalMemoryAlgorithms/ExternalMemoryAlgorithmsTestAuto.h" | ||
#include "../StringAlgorithms/StringAlgorithmsTestAuto.h" | ||
#include "../Compression/CompressionTestAuto.h" | ||
#include "../MiscAlgs/MiscAlgsTestAuto.h" | ||
#include "../Optimization/OptTestAuto.h" | ||
#include "../LargeNumbers/LargeNumberTestAuto.h" | ||
#include "../ComputationalGeometry/ComputationalGeometryTestAuto.h" | ||
#include "../ErrorCorrectingCodes/ErrorCorrectingCodesTestAuto.h" | ||
#include "../Cryptography/CryptographyTestAuto.h" | ||
#include "../NumericalMethods/NumericalMethodsTestAuto.h" | ||
|
||
using namespace igmdk; | ||
|
||
int main() | ||
{ | ||
DEBUG("All Tests Auto"); | ||
testAllAutoUtils(); | ||
testAllAutoSort(); | ||
testAllAutoDynamicSortedSequence(); | ||
testAllAutoHashTable(); | ||
testAllAutoHeaps(); | ||
testAllAutoGraphs(); | ||
testAllAutoExternalMemoryAlgorithms(); | ||
testAllAutoStringAlgorithms(); | ||
testAllAutoCompression(); | ||
testAllAutoMiscAlgorithms(); | ||
testAllAutoOpt(); | ||
testAllAutoComputationalGeometry(); | ||
testAllAutoErrorCorrectingCodes(); | ||
testAllAutoCryptography(); | ||
testAllAutoNumericalMethods(); | ||
DEBUG("All Tests Auto passed"); | ||
|
||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
#ifndef IGMDK_COMPRESSION_H | ||
#define IGMDK_COMPRESSION_H | ||
#include "../StringAlgorithms/SuffixArray.h" | ||
#include "Stream.h" | ||
#include "StaticCodes.h" | ||
#include "HuffmanTree.h" | ||
#include "LZW.h" | ||
#include <cstdlib> | ||
namespace igmdk{ | ||
|
||
enum {RLE_E1 = (1 << numeric_limits<unsigned char>::digits) - 1, | ||
RLE_E2 = RLE_E1 - 1}; | ||
Vector<unsigned char> RLECompress(Vector<unsigned char>const& byteArray) | ||
{ | ||
Vector<unsigned char> result; | ||
for(int i = 0; i < byteArray.getSize();) | ||
{ | ||
unsigned char byte = byteArray[i++]; | ||
result.append(byte); | ||
int count = 0; | ||
while(count < RLE_E2 - 1 && i + count < byteArray.getSize() && | ||
byteArray[i + count] == byte) ++count; | ||
if(count > 1 || (byte == RLE_E1 && count == 1)) | ||
{ | ||
result.append(RLE_E1); | ||
result.append(count); | ||
i += count; | ||
} | ||
else if(byte == RLE_E1) result.append(RLE_E2); | ||
} | ||
return result; | ||
} | ||
Vector<unsigned char> RLEUncompress(Vector<unsigned char>const& byteArray) | ||
{ | ||
Vector<unsigned char> result; | ||
for(int i = 0; i < byteArray.getSize();) | ||
{ | ||
unsigned char byte = byteArray[i++]; | ||
if(byte == RLE_E1 && byteArray[i] != RLE_E1) | ||
{ | ||
unsigned char count = byteArray[i++]; | ||
if(count == RLE_E2) count = 1; | ||
else byte = result.lastItem();//need temp if vector reallocates | ||
while(count--) result.append(byte); | ||
} | ||
else result.append(byte); | ||
} | ||
return result; | ||
} | ||
|
||
Vector<unsigned char> MoveToFrontTransform(bool compress, | ||
Vector<unsigned char>const& byteArray) | ||
{ | ||
unsigned char list[1 << numeric_limits<unsigned char>::digits], j, letter; | ||
for(int i = 0; i < sizeof(list); ++i) list[i] = i; | ||
Vector<unsigned char> resultArray; | ||
for(int i = 0; i < byteArray.getSize(); ++i) | ||
{ | ||
if(compress) | ||
{//find and output rank | ||
j = 0; | ||
letter = byteArray[i]; | ||
while(list[j] != letter) ++j; | ||
resultArray.append(j); | ||
} | ||
else | ||
{//rank to byte | ||
j = byteArray[i]; | ||
letter = list[j]; | ||
resultArray.append(letter); | ||
}//move list back to make space for front item | ||
for(; j > 0; --j) list[j] = list[j - 1]; | ||
list[0] = letter; | ||
} | ||
return resultArray; | ||
} | ||
|
||
Vector<unsigned char> BurrowsWheelerTransform( | ||
Vector<unsigned char> const& byteArray) | ||
{ | ||
int original = 0, size = byteArray.getSize(); | ||
Vector<int> BTWArray = suffixArray<BWTRank>(byteArray.getArray(), size); | ||
Vector<unsigned char> result; | ||
for(int i = 0; i < size; ++i) | ||
{ | ||
int suffixIndex = BTWArray[i]; | ||
if(suffixIndex == 0) | ||
{//found the original string | ||
original = i; | ||
suffixIndex = size;//avoid the % size in next step | ||
} | ||
result.append(byteArray[suffixIndex - 1]); | ||
}//assume that 4 bytes is enough | ||
Vector<unsigned char> code = ReinterpretEncode(original, 4); | ||
for(int i = 0; i < code.getSize(); ++i) result.append(code[i]); | ||
return result; | ||
} | ||
|
||
Vector<unsigned char> BurrowsWheelerReverseTransform( | ||
Vector<unsigned char> const& byteArray) | ||
{ | ||
enum{M = 1 << numeric_limits<unsigned char>::digits}; | ||
int counts[M], firstPositions[M], | ||
textSize = byteArray.getSize() - 4; | ||
for(int i = 0; i < M; ++i) counts[i] = 0; | ||
Vector<int> ranks(textSize);//compute ranks | ||
for(int i = 0; i < textSize; ++i) ranks[i] = counts[byteArray[i]]++; | ||
firstPositions[0] = 0;//compute first positions | ||
for(int i = 0; i < M - 1; ++i) | ||
firstPositions[i + 1] = firstPositions[i] + counts[i]; | ||
Vector<unsigned char> index, result(textSize);//extract original rotation | ||
for(int i = 0; i < 4; ++i) index.append(byteArray[i + textSize]); | ||
//construct in reverse order | ||
for(int i = textSize - 1, ix = ReinterpretDecode(index); i >= 0; --i) | ||
ix = ranks[ix] + firstPositions[result[i] = byteArray[ix]]; | ||
return result; | ||
} | ||
|
||
Vector<unsigned char> BWTCompress(Vector<unsigned char>const& byteArray) | ||
{ | ||
return HuffmanCompress(RLECompress(MoveToFrontTransform(true, | ||
BurrowsWheelerTransform(byteArray)))); | ||
} | ||
Vector<unsigned char> BWTUncompress(Vector<unsigned char>const& byteArray) | ||
{ | ||
return BurrowsWheelerReverseTransform(MoveToFrontTransform(false, | ||
RLEUncompress(HuffmanUncompress(byteArray)))); | ||
} | ||
|
||
}//end namespace | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#ifndef IGMDK_COMPRESSION_TEST_AUTO_H | ||
#define IGMDK_COMPRESSION_TEST_AUTO_H | ||
#include <string> | ||
using namespace std; | ||
#include "Compression.h" | ||
|
||
namespace igmdk{ | ||
|
||
void testGammaCodeAuto() | ||
{ | ||
DEBUG("testGammaCodeAuto"); | ||
BitStream result; | ||
for(int i = 1; i < 1000; ++i) GammaEncode(i, result); | ||
for(int i = 1; i < 1000; ++i) assert(GammaDecode(result) == i); | ||
DEBUG("testGammaCodeAuto passed"); | ||
} | ||
|
||
void testFibonacciCodeAuto() | ||
{ | ||
DEBUG("testFibonacciCodeAuto"); | ||
BitStream result; | ||
for(int i = 1; i < 1000; ++i) FibonacciEncode(i, result); | ||
for(int i = 1; i < 1000; ++i) assert(FibonacciDecode(result) == i); | ||
DEBUG("testFibonacciCodeAuto passed"); | ||
} | ||
|
||
void testByteCodeAuto() | ||
{ | ||
DEBUG("testGammaCodeAuto"); | ||
BitStream result; | ||
for(int i = 0; i < 1000; ++i) byteEncode(i, result); | ||
for(int i = 0; i < 1000; ++i) assert(byteDecode(result) == i); | ||
DEBUG("testGammaCodeAuto passed"); | ||
} | ||
|
||
Vector<unsigned char> getRandomBytes(int n = 10000) | ||
{ | ||
Vector<unsigned char> w(n, 0); | ||
for(int i = 0; i < n; ++i) w[i] = GlobalRNG().next(); | ||
return w; | ||
} | ||
void testBWTCompressAuto() | ||
{ | ||
DEBUG("testBWTCompressAuto"); | ||
Vector<unsigned char> byteArray = getRandomBytes(); | ||
assert(byteArray == BWTUncompress(BWTCompress(byteArray))); | ||
DEBUG("testBWTCompressAuto passed"); | ||
} | ||
|
||
void testLZWAuto() | ||
{ | ||
DEBUG("testLZWAuto"); | ||
Vector<unsigned char> byteArray = getRandomBytes(), code; | ||
{ | ||
BitStream in(byteArray); | ||
BitStream out; | ||
LZWCompress(in, out); | ||
code = ExtraBitsCompress(out.bitset); | ||
} | ||
{ | ||
BitStream in(ExtraBitsUncompress(code)); | ||
BitStream out; | ||
LZWUncompress(in, out); | ||
assert(byteArray == out.bitset.getStorage()); | ||
} | ||
DEBUG("testLZWAuto passed"); | ||
} | ||
|
||
void testAllAutoCompression() | ||
{ | ||
DEBUG("testAllAutoCompression"); | ||
testGammaCodeAuto(); | ||
testFibonacciCodeAuto(); | ||
testByteCodeAuto(); | ||
testBWTCompressAuto(); | ||
testLZWAuto(); | ||
} | ||
|
||
}//end namespace | ||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
#include "../ExternalMemoryAlgorithms/File.h" | ||
#include "../ExternalMemoryAlgorithms/CSV.h" | ||
#include "../Utils/Debug.h" | ||
#include <string> | ||
#include "Compression.h" | ||
using namespace std; | ||
|
||
using namespace igmdk; | ||
|
||
int compressor(File& in, File&out, bool compress, string const& smethod) | ||
{ | ||
char method; | ||
enum{HUF, BWT, LZW}; | ||
if(smethod == "Huffman") method = HUF; | ||
else if(smethod == "BWT") method = BWT; | ||
else if(smethod == "LZW") method = LZW; | ||
else{DEBUG("Method Unknown"); return 0;} | ||
|
||
enum{N = 8096}; | ||
unsigned char buffer[N]; | ||
Vector<unsigned char> original, v; | ||
for(;;) | ||
{ | ||
int size = min<long long>(N, in.bytesToEnd()); | ||
in.read(buffer, size); | ||
for(int i = 0; i < size; ++i) | ||
{ | ||
original.append(buffer[i]); | ||
} | ||
if(size < N) break; | ||
} | ||
if(compress) | ||
{ | ||
if(method == LZW) | ||
{ | ||
BitStream result; | ||
BitStream in(original); | ||
LZWCompress(in, result); | ||
v = ExtraBitsCompress(result.bitset); | ||
} | ||
else if(method == BWT) | ||
{ | ||
v = BWTCompress(original); | ||
} | ||
else if(method == HUF) | ||
{ | ||
v = HuffmanCompress(original); | ||
} | ||
} | ||
else | ||
{ | ||
if(method == LZW) | ||
{ | ||
BitStream in(ExtraBitsUncompress(original)); | ||
BitStream result; | ||
LZWUncompress(in, result); | ||
v = result.bitset.getStorage(); | ||
} | ||
else if(method == BWT) | ||
{ | ||
v = BWTUncompress(original); | ||
} | ||
else if(method == HUF) | ||
{ | ||
v = HuffmanUncompress(original); | ||
} | ||
} | ||
out.append(v.getArray(), v.getSize()); | ||
return out.getSize(); | ||
} | ||
|
||
void testAllMethods() | ||
{ | ||
//AAR decomp has bug for all | ||
string methods[] = {"Huf", "BWT", "LZW"}, files[] = {"a.txt", "bible.txt", | ||
"dickens.txt", "ecoli.txt", "mobydick.txt", "pi10mm.txt",// | ||
"world192.txt"}; | ||
Vector<Vector<string> > matrix; | ||
Vector<string> titles; | ||
titles.append("File"); | ||
titles.append("Size"); | ||
for(int j = 0; j < sizeof(methods)/sizeof(methods[0]); ++j) | ||
titles.append(methods[j]); | ||
matrix.append(titles); | ||
for(int i = 0; i < sizeof(files)/sizeof(files[0]); ++i) | ||
{ | ||
File in(files[i].c_str(), false); | ||
Vector<string> row; | ||
DEBUG(files[i]); | ||
row.append(files[i]); | ||
int oriSize = in.getSize(); | ||
row.append(to_string(oriSize)); | ||
for(int j = 0; j < sizeof(methods)/sizeof(methods[0]); ++j) | ||
{ | ||
in.setPosition(0); | ||
DEBUG(methods[j]); | ||
int size; | ||
string outName = files[i] + "." + methods[j], | ||
backName = outName + ".ori"; | ||
{ | ||
File out(outName.c_str(), true); | ||
int start = clock(); | ||
size = compressor(in, out, true, methods[j]); | ||
row.append(to_string(size)); | ||
int elapsed = clock()-start; | ||
} | ||
{ | ||
File out(outName.c_str(), false), back(backName.c_str(), true); | ||
int start = clock(); | ||
int size2 = compressor(out, back, false, methods[j]); | ||
assert(oriSize == size2); | ||
int elapsed = clock()-start; | ||
} | ||
File::remove(outName.c_str()); | ||
File::remove(backName.c_str()); | ||
} | ||
matrix.append(row); | ||
} | ||
createCSV(matrix, "CompressionResult.csv"); | ||
} | ||
|
||
int main(int argc, char *argv[]) | ||
{ | ||
testAllMethods(); | ||
return 0; | ||
} |
Oops, something went wrong.