Skip to content

Commit

Permalink
simpler intersection
Browse files Browse the repository at this point in the history
  • Loading branch information
fgregg committed Jun 17, 2018
1 parent cbbd3d9 commit 221ef1a
Showing 1 changed file with 6 additions and 22 deletions.
28 changes: 6 additions & 22 deletions dedupe/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,18 +422,6 @@ def sort_key(x):
@functools.total_ordering
class Counter(collections.Counter):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if kwargs.get('cache', True):
self.ones = set()
self.multiples = {}
for k, v in self.items():
if v == 1:
self.ones.add(k)
else:
self.multiples[k] = v
self._key_set = set(self.multiples.keys())

def __le__(self, other):
return self.keys() <= other.keys()

Expand All @@ -445,18 +433,14 @@ def __eq__(self, other):

def __and__(self, other):

common = dict.fromkeys(self.ones, 1)

both_multiple = ((k, self.multiples[k] * other.multiples[k])
for k in self._key_set & other._key_set)
other_ones = ((k, self[k]) for k in self._key_set & other.ones)
self_ones = ((k, other[k]) for k in self.ones & other._key_set)
if len(self) <= len(other):
smaller, larger = self, other
else:
smaller, larger = other, self

common.update(itertools.chain(both_multiple,
other_ones,
self_ones))
common = {k: v * larger[k] for k, v in smaller.items() if k in larger}

return common
return Counter(common)



Expand Down

0 comments on commit 221ef1a

Please sign in to comment.