Added RLE algorithm (keon#524)

* Init rle_compression file * Added encoding rle method * Added decoding rle method * Fixed typo and a bug where count would reset * RLE encode/decode unit tests * Added rle compression in README
debugspy · Sep 23, 2019 · 9cebd7a · 9cebd7a
1 parent 6f1fc9f
commit 9cebd7a
Show file tree

Hide file tree

Showing 3 changed files with 70 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -115,6 +115,7 @@ If you want to uninstall algorithms, it is as simple as:
     - [math_parser](algorithms/calculator/math_parser.py)
 - [compression](algorithms/compression)
     - [huffman_coding](algorithms/compression/huffman_coding.py)
+    - [rle_compression](algorithms/compression/rle_compression.py)
 - [dfs](algorithms/dfs)
     - [all_factors](algorithms/dfs/all_factors.py)
     - [count_islands](algorithms/dfs/count_islands.py)

diff --git a/algorithms/compression/rle_compression.py b/algorithms/compression/rle_compression.py
@@ -0,0 +1,58 @@
+"""
+Run-length encoding (RLE) is a simple compression algorithm 
+that gets a stream of data as the input and returns a
+sequence of counts of consecutive data values in a row.
+When decompressed the data will be fully recovered as RLE
+is a lossless data compression.
+"""
+
+def encode_rle(input):
+    """
+    Gets a stream of data and compresses it
+    under a Run-Length Encoding.
+    :param input: The data to be encoded.
+    :return: The encoded string.
+    """
+    if not input: return ''
+
+    encoded_str = ''
+    prev_ch = ''
+    count = 1
+
+    for ch in input:
+
+        # Check If the subsequent character does not match
+        if ch != prev_ch:
+            # Add the count and character
+            if prev_ch:
+                encoded_str += str(count) + prev_ch
+            # Reset the count and set the character
+            count = 1
+            prev_ch = ch
+        else:
+            # Otherwise increment the counter
+            count += 1
+    else:
+        return encoded_str + (str(count) + prev_ch)
+
+
+def decode_rle(input):
+    """
+    Gets a stream of data and decompresses it
+    under a Run-Length Decoding.
+    :param input: The data to be decoded.
+    :return: The decoded string.
+    """
+    decode_str = ''
+    count = ''
+
+    for ch in input:
+        # If not numerical
+        if not ch.isdigit():
+            # Expand it for the decoding
+            decode_str += ch * int(count)
+            count = ''
+        else:
+            # Add it in the counter
+            count += ch
+    return decode_str
diff --git a/tests/test_compression.py b/tests/test_compression.py
@@ -1,4 +1,5 @@
 from algorithms.compression.huffman_coding import HuffmanCoding
+from algorithms.compression.rle_compression import (decode_rle, encode_rle)
 
 import unittest
 
@@ -33,6 +34,16 @@ def tearDown(self):
         os.remove(self.file_out_bin_name)
         os.remove(self.file_out_name)
 
+class TestRLECompression(unittest.TestCase):
+
+    def test_encode_rle(self):
+        self.assertEqual('12W1B12W3B24W1B14W',
+            encode_rle('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW'))
+
+    def test_decode_rle(self):
+        self.assertEqual('WWWWWWWWWWWWBWWWWWWWWWWWWBBBWWWWWWWWWWWWWWWWWWWWWWWWBWWWWWWWWWWWWWW', 
+            decode_rle('12W1B12W3B24W1B14W'))
+
 
 if __name__ == "__main__":
     unittest.main()