First Version

ahmedkirmani · Dec 7, 2020 · d177a47 · d177a47
1 parent 95adfe4
commit d177a47
Show file tree

Hide file tree

Showing 287 changed files with 150,434 additions and 0 deletions.
diff --git a/AutoRegressionTest/test.cpp b/AutoRegressionTest/test.cpp
@@ -0,0 +1,41 @@
+#include "../Utils/UtilsTestAuto.h"
+#include "../Sorting/SortTestAuto.h"
+#include "../RandomTreap/DynamicSortedSequenceTestAuto.h"
+#include "../HashTable/HashTableTestAuto.h"
+#include "../Heaps/HeapTestAuto.h"
+#include "../Graphs/GraphsTestAuto.h"
+#include "../ExternalMemoryAlgorithms/ExternalMemoryAlgorithmsTestAuto.h"
+#include "../StringAlgorithms/StringAlgorithmsTestAuto.h"
+#include "../Compression/CompressionTestAuto.h"
+#include "../MiscAlgs/MiscAlgsTestAuto.h"
+#include "../Optimization/OptTestAuto.h"
+#include "../LargeNumbers/LargeNumberTestAuto.h"
+#include "../ComputationalGeometry/ComputationalGeometryTestAuto.h"
+#include "../ErrorCorrectingCodes/ErrorCorrectingCodesTestAuto.h"
+#include "../Cryptography/CryptographyTestAuto.h"
+#include "../NumericalMethods/NumericalMethodsTestAuto.h"
+
+using namespace igmdk;
+
+int main()
+{
+    DEBUG("All Tests Auto");
+    testAllAutoUtils();
+    testAllAutoSort();
+    testAllAutoDynamicSortedSequence();
+    testAllAutoHashTable();
+    testAllAutoHeaps();
+    testAllAutoGraphs();
+    testAllAutoExternalMemoryAlgorithms();
+    testAllAutoStringAlgorithms();
+    testAllAutoCompression();
+    testAllAutoMiscAlgorithms();
+    testAllAutoOpt();
+    testAllAutoComputationalGeometry();
+    testAllAutoErrorCorrectingCodes();
+    testAllAutoCryptography();
+    testAllAutoNumericalMethods();
+    DEBUG("All Tests Auto passed");
+
+	return 0;
+}
diff --git a/Compression/Compression.h b/Compression/Compression.h
@@ -0,0 +1,131 @@
+#ifndef IGMDK_COMPRESSION_H
+#define IGMDK_COMPRESSION_H
+#include "../StringAlgorithms/SuffixArray.h"
+#include "Stream.h"
+#include "StaticCodes.h"
+#include "HuffmanTree.h"
+#include "LZW.h"
+#include <cstdlib>
+namespace igmdk{
+
+enum {RLE_E1 = (1 << numeric_limits<unsigned char>::digits) - 1,
+    RLE_E2 = RLE_E1 - 1};
+Vector<unsigned char> RLECompress(Vector<unsigned char>const& byteArray)
+{
+    Vector<unsigned char> result;
+    for(int i = 0; i < byteArray.getSize();)
+    {
+        unsigned char byte = byteArray[i++];
+        result.append(byte);
+        int count = 0;
+        while(count < RLE_E2 - 1 && i + count < byteArray.getSize() &&
+            byteArray[i + count] == byte) ++count;
+        if(count > 1 || (byte == RLE_E1 && count == 1))
+        {
+            result.append(RLE_E1);
+            result.append(count);
+            i += count;
+        }
+        else if(byte == RLE_E1) result.append(RLE_E2);
+    }
+    return result;
+}
+Vector<unsigned char> RLEUncompress(Vector<unsigned char>const& byteArray)
+{
+    Vector<unsigned char> result;
+    for(int i = 0; i < byteArray.getSize();)
+    {
+        unsigned char byte = byteArray[i++];
+        if(byte == RLE_E1 && byteArray[i] != RLE_E1)
+        {
+            unsigned char count = byteArray[i++];
+            if(count == RLE_E2) count = 1;
+            else byte = result.lastItem();//need temp if vector reallocates
+            while(count--) result.append(byte);
+        }
+        else result.append(byte);
+    }
+    return result;
+}
+
+Vector<unsigned char> MoveToFrontTransform(bool compress,
+    Vector<unsigned char>const& byteArray)
+{
+    unsigned char list[1 << numeric_limits<unsigned char>::digits], j, letter;
+    for(int i = 0; i < sizeof(list); ++i) list[i] = i;
+    Vector<unsigned char> resultArray;
+    for(int i = 0; i < byteArray.getSize(); ++i)
+    {
+        if(compress)
+        {//find and output rank
+            j = 0;
+            letter = byteArray[i];
+            while(list[j] != letter) ++j;
+            resultArray.append(j);
+        }
+        else
+        {//rank to byte
+            j = byteArray[i];
+            letter = list[j];
+            resultArray.append(letter);
+        }//move list back to make space for front item
+        for(; j > 0; --j) list[j] = list[j - 1];
+        list[0] = letter;
+    }
+    return resultArray;
+}
+
+Vector<unsigned char> BurrowsWheelerTransform(
+    Vector<unsigned char> const& byteArray)
+{
+    int original = 0, size = byteArray.getSize();
+    Vector<int> BTWArray = suffixArray<BWTRank>(byteArray.getArray(), size);
+    Vector<unsigned char> result;
+    for(int i = 0; i < size; ++i)
+    {
+        int suffixIndex = BTWArray[i];
+        if(suffixIndex == 0)
+        {//found the original string
+            original = i;
+            suffixIndex = size;//avoid the % size in next step
+        }
+        result.append(byteArray[suffixIndex - 1]);
+    }//assume that 4 bytes is enough
+    Vector<unsigned char> code = ReinterpretEncode(original, 4);
+    for(int i = 0; i < code.getSize(); ++i) result.append(code[i]);
+    return result;
+}
+
+Vector<unsigned char> BurrowsWheelerReverseTransform(
+     Vector<unsigned char> const& byteArray)
+{
+    enum{M = 1 << numeric_limits<unsigned char>::digits};
+    int counts[M], firstPositions[M],
+        textSize = byteArray.getSize() - 4;
+    for(int i = 0; i < M; ++i) counts[i] = 0;
+    Vector<int> ranks(textSize);//compute ranks
+    for(int i = 0; i < textSize; ++i) ranks[i] = counts[byteArray[i]]++;
+    firstPositions[0] = 0;//compute first positions
+    for(int i = 0; i < M - 1; ++i)
+        firstPositions[i + 1] = firstPositions[i] + counts[i];
+    Vector<unsigned char> index, result(textSize);//extract original rotation
+    for(int i = 0; i < 4; ++i) index.append(byteArray[i + textSize]);
+    //construct in reverse order
+    for(int i = textSize - 1, ix = ReinterpretDecode(index); i >= 0; --i)
+        ix = ranks[ix] + firstPositions[result[i] = byteArray[ix]];
+    return result;
+}
+
+Vector<unsigned char> BWTCompress(Vector<unsigned char>const& byteArray)
+{
+    return HuffmanCompress(RLECompress(MoveToFrontTransform(true,
+        BurrowsWheelerTransform(byteArray))));
+}
+Vector<unsigned char> BWTUncompress(Vector<unsigned char>const& byteArray)
+{
+    return BurrowsWheelerReverseTransform(MoveToFrontTransform(false,
+       RLEUncompress(HuffmanUncompress(byteArray))));
+}
+
+}//end namespace
+#endif
diff --git a/Compression/CompressionTestAuto.h b/Compression/CompressionTestAuto.h
@@ -0,0 +1,80 @@
+#ifndef IGMDK_COMPRESSION_TEST_AUTO_H
+#define IGMDK_COMPRESSION_TEST_AUTO_H
+#include <string>
+using namespace std;
+#include "Compression.h"
+
+namespace igmdk{
+
+void testGammaCodeAuto()
+{
+    DEBUG("testGammaCodeAuto");
+    BitStream result;
+    for(int i = 1; i < 1000; ++i) GammaEncode(i, result);
+    for(int i = 1; i < 1000; ++i) assert(GammaDecode(result) == i);
+    DEBUG("testGammaCodeAuto passed");
+}
+
+void testFibonacciCodeAuto()
+{
+    DEBUG("testFibonacciCodeAuto");
+    BitStream result;
+    for(int i = 1; i < 1000; ++i) FibonacciEncode(i, result);
+    for(int i = 1; i < 1000; ++i) assert(FibonacciDecode(result) == i);
+    DEBUG("testFibonacciCodeAuto passed");
+}
+
+void testByteCodeAuto()
+{
+    DEBUG("testGammaCodeAuto");
+    BitStream result;
+    for(int i = 0; i < 1000; ++i) byteEncode(i, result);
+    for(int i = 0; i < 1000; ++i) assert(byteDecode(result) == i);
+    DEBUG("testGammaCodeAuto passed");
+}
+
+Vector<unsigned char> getRandomBytes(int n = 10000)
+{
+    Vector<unsigned char> w(n, 0);
+    for(int i = 0; i < n; ++i) w[i] = GlobalRNG().next();
+    return w;
+}
+void testBWTCompressAuto()
+{
+    DEBUG("testBWTCompressAuto");
+    Vector<unsigned char> byteArray = getRandomBytes();
+    assert(byteArray == BWTUncompress(BWTCompress(byteArray)));
+    DEBUG("testBWTCompressAuto passed");
+}
+
+void testLZWAuto()
+{
+    DEBUG("testLZWAuto");
+    Vector<unsigned char> byteArray = getRandomBytes(), code;
+    {
+        BitStream in(byteArray);
+        BitStream out;
+        LZWCompress(in, out);
+        code = ExtraBitsCompress(out.bitset);
+    }
+    {
+        BitStream in(ExtraBitsUncompress(code));
+        BitStream out;
+        LZWUncompress(in, out);
+        assert(byteArray == out.bitset.getStorage());
+    }
+    DEBUG("testLZWAuto passed");
+}
+
+void testAllAutoCompression()
+{
+    DEBUG("testAllAutoCompression");
+    testGammaCodeAuto();
+    testFibonacciCodeAuto();
+    testByteCodeAuto();
+    testBWTCompressAuto();
+    testLZWAuto();
+}
+
+}//end namespace
+#endif
diff --git a/Compression/Compressor.cpp b/Compression/Compressor.cpp
@@ -0,0 +1,126 @@
+#include "../ExternalMemoryAlgorithms/File.h"
+#include "../ExternalMemoryAlgorithms/CSV.h"
+#include "../Utils/Debug.h"
+#include <string>
+#include "Compression.h"
+using namespace std;
+
+using namespace igmdk;
+
+int compressor(File& in, File&out, bool compress, string const& smethod)
+{
+    char method;
+    enum{HUF, BWT, LZW};
+    if(smethod == "Huffman") method = HUF;
+    else if(smethod == "BWT") method = BWT;
+    else if(smethod == "LZW") method = LZW;
+    else{DEBUG("Method Unknown"); return 0;}
+
+    enum{N = 8096};
+    unsigned char buffer[N];
+    Vector<unsigned char> original, v;
+    for(;;)
+    {
+        int size = min<long long>(N, in.bytesToEnd());
+        in.read(buffer, size);
+        for(int i = 0; i < size; ++i)
+        {
+            original.append(buffer[i]);
+        }
+        if(size < N) break;
+    }
+    if(compress)
+    {
+        if(method == LZW)
+        {
+            BitStream result;
+            BitStream in(original);
+            LZWCompress(in, result);
+            v = ExtraBitsCompress(result.bitset);
+        }
+        else if(method == BWT)
+        {
+            v = BWTCompress(original);
+        }
+        else if(method == HUF)
+        {
+            v = HuffmanCompress(original);
+        }
+    }
+    else
+    {
+        if(method == LZW)
+        {
+            BitStream in(ExtraBitsUncompress(original));
+            BitStream result;
+            LZWUncompress(in, result);
+            v = result.bitset.getStorage();
+        }
+        else if(method == BWT)
+        {
+            v = BWTUncompress(original);
+        }
+        else if(method == HUF)
+        {
+            v = HuffmanUncompress(original);
+        }
+    }
+    out.append(v.getArray(), v.getSize());
+    return out.getSize();
+}
+
+void testAllMethods()
+{
+    //AAR decomp has bug for all
+    string methods[] = {"Huf", "BWT", "LZW"}, files[] = {"a.txt", "bible.txt",
+        "dickens.txt", "ecoli.txt", "mobydick.txt", "pi10mm.txt",//
+        "world192.txt"};
+    Vector<Vector<string> > matrix;
+    Vector<string> titles;
+    titles.append("File");
+    titles.append("Size");
+    for(int j = 0; j < sizeof(methods)/sizeof(methods[0]); ++j)
+        titles.append(methods[j]);
+    matrix.append(titles);
+    for(int i = 0; i < sizeof(files)/sizeof(files[0]); ++i)
+    {
+        File in(files[i].c_str(), false);
+        Vector<string> row;
+        DEBUG(files[i]);
+        row.append(files[i]);
+        int oriSize = in.getSize();
+        row.append(to_string(oriSize));
+        for(int j = 0; j < sizeof(methods)/sizeof(methods[0]); ++j)
+        {
+            in.setPosition(0);
+            DEBUG(methods[j]);
+            int size;
+            string outName = files[i] + "." + methods[j],
+                backName = outName + ".ori";
+            {
+                File out(outName.c_str(), true);
+                int start = clock();
+                size = compressor(in, out, true, methods[j]);
+                row.append(to_string(size));
+                int elapsed = clock()-start;
+            }
+            {
+                File out(outName.c_str(), false), back(backName.c_str(), true);
+                int start = clock();
+                int size2 = compressor(out, back, false, methods[j]);
+                assert(oriSize == size2);
+                int elapsed = clock()-start;
+            }
+            File::remove(outName.c_str());
+            File::remove(backName.c_str());
+        }
+        matrix.append(row);
+    }
+    createCSV(matrix, "CompressionResult.csv");
+}
+
+int main(int argc, char *argv[])
+{
+    testAllMethods();
+    return 0;
+}