Skip to content

Commit

Permalink
Add misra-gries streaming algorithm (keon#765)
Browse files Browse the repository at this point in the history
* feat:(first draft for the misra gries algorithm) keon#1

* feat:(Added examples and changed to correct name) keon#1

* feat:(Added init file for testing) keon#2

* test:(Added tests for misras_gries function) keon#2

* add misra-gries reference

* add correct reference to misra_gries

* add misra_gries import

Co-authored-by: Anders Renström <[email protected]>
Co-authored-by: Keon <[email protected]>
  • Loading branch information
3 people authored Mar 8, 2021
1 parent 3074384 commit e685f8d
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 3 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ If you want to uninstall algorithms, it is as simple as:
- [is_sorted](algorithms/stack/is_sorted.py)
- [streaming](algorithms/streaming)
- [1-sparse-recovery](algorithms/streaming/one_sparse_recovery.py)
- [misra-gries](algorithms/streaming/misra_gries.py)
- [strings](algorithms/strings)
- [fizzbuzz](algorithms/strings/fizzbuzz.py)
- [delete_reoccurring](algorithms/strings/delete_reoccurring.py)
Expand Down
3 changes: 2 additions & 1 deletion algorithms/streaming/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .one_sparse_recovery import *
from .one_sparse_recovery import *
from .misra_gries import *
49 changes: 49 additions & 0 deletions algorithms/streaming/misra_gries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@

"""
Implementation of the Misra-Gries algorithm.
Given a list of items and a value k, it returns the every item in the list that appears at least n/k times, where n is the length of the array
By default, k is set to 2, solving the majority problem.
For the majority problem, this algorithm only guarantees that if there is an element that appears more than n/2 times, it will be outputed. If there
is no such element, any arbitrary element is returned by the algorithm. Therefore, we need to iterate through again at the end. But since we have filtred
out the suspects, the memory complexity is significantly lower than it would be to create counter for every element in the list.
For example:
Input misras_gries([1,4,4,4,5,4,4])
Output {'4':5}
Input misras_gries([0,0,0,1,1,1,1])
Output {'1':4}
Input misras_gries([0,0,0,0,1,1,1,2,2],3)
Output {'0':4,'1':3}
Input misras_gries([0,0,0,1,1,1]
Output None
"""
def misras_gries(array,k=2):
keys = {}
for i in range(len(array)):
val = str(array[i])
if val in keys:
keys[val] = keys[val] + 1

elif len(keys) < k - 1:
keys[val] = 1

else:
for key in list(keys):
keys[key] = keys[key] - 1
if keys[key] == 0:
del keys[key]

suspects = keys.keys()
frequencies = {}
for suspect in suspects:
freq = _count_frequency(array,int(suspect))
if freq >= len(array) / k:
frequencies[suspect] = freq

return frequencies if len(frequencies) > 0 else None


def _count_frequency(array,element):
return array.count(element)


19 changes: 17 additions & 2 deletions tests/test_streaming.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,22 @@
from algorithms.streaming.misra_gries import (
misras_gries,
)
from algorithms.streaming import (
one_sparse
)
import unittest
import unittest


class TestMisraGreis(unittest.TestCase):
def test_misra_correct(self):
self.assertEqual({'4':5},misras_gries([1,4,4,4,5,4,4]))
self.assertEqual({'1':4},misras_gries([0,0,0,1,1,1,1]))
self.assertEqual({'0':4,'1':3},misras_gries([0,0,0,0,1,1,1,2,2],3))

def test_misra_incorrect(self):
self.assertEqual(None,misras_gries([1,2,5,4,5,4,4,5,4,4,5]))
self.assertEqual(None,misras_gries([0,0,0,2,1,1,1]))
self.assertEqual(None,misras_gries([0,0,0,1,1,1]))

class TestOneSparse(unittest.TestCase):
def test_one_sparse_correct(self):
Expand All @@ -12,4 +27,4 @@ def test_one_sparse_correct(self):
def test_one_sparse_incorrect(self):
self.assertEqual(None,one_sparse([(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'+'),(1,'+')])) #Two values remaining
self.assertEqual(None,one_sparse([(2,'+'),(2,'+'),(2,'+'),(2,'+'),(2,'-'),(2,'-'),(2,'-'),(2,'-')])) # No values remaining
self.assertEqual(None,one_sparse([(2,'+'),(2,'+'),(4,'+'),(4,'+')])) # Bitsum sum of sign is inccorect
self.assertEqual(None,one_sparse([(2,'+'),(2,'+'),(4,'+'),(4,'+')])) # Bitsum sum of sign is inccorect

0 comments on commit e685f8d

Please sign in to comment.