Skip to content

Commit

Permalink
improve typing coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
fgregg committed Aug 13, 2020
1 parent 6c85d53 commit 054d30e
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 12 deletions.
4 changes: 2 additions & 2 deletions dedupe/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ class TrainingData(TypedDict):


class Classifier(Protocol):
def fit(self, Any) -> None:
def fit(self, X: object, y: object) -> None:
...

def predict_proba(self, Any) -> Any:
def predict_proba(self, X: object) -> Any:
...
19 changes: 12 additions & 7 deletions dedupe/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ def join(self,
links = self.one_to_one(pair_scores, threshold)
elif constraint == 'many-to-one':
links = self.many_to_one(pair_scores, threshold)
elif constraint == 'many-to-many':
else:
links = pair_scores[pair_scores['score'] > threshold]

links = list(links)
Expand Down Expand Up @@ -880,10 +880,15 @@ def _format_search_results(self,
seen: Set[RecordID] = set()

for result in results:
a = None
prepared_result = []
a: Optional[RecordID] = None
b: RecordID
score: float
prepared_result: List[Tuple[RecordID, float]] = []
for (a, b), score in result: # type: ignore
prepared_result.append((b, score))

assert a is not None

yield a, tuple(prepared_result)
seen.add(a)

Expand Down Expand Up @@ -1085,13 +1090,13 @@ def _writeIndices(self, file_obj: BinaryIO) -> None:
canopies = {}
for full_predicate in self.predicates:
for predicate in full_predicate:
if hasattr(predicate, 'index') and predicate.index:
doc_to_ids[predicate] = dict(predicate.index._doc_to_id)
if hasattr(predicate, 'index') and predicate.index: # type: ignore
doc_to_ids[predicate] = dict(predicate.index._doc_to_id) # type: ignore
if hasattr(predicate, "canopy"):
canopies[predicate] = predicate.canopy
canopies[predicate] = predicate.canopy # type: ignore
else:
try:
indices[predicate] = predicate.index._index
indices[predicate] = predicate.index._index # type: ignore
except AttributeError:
pass

Expand Down
11 changes: 8 additions & 3 deletions dedupe/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,22 @@
import logging
import collections
import functools
from abc import ABC, abstractmethod

from . import blocking, predicates, core

logger = logging.getLogger(__name__)


class BlockLearner(object):
class BlockLearner(ABC):
def learn(self, matches, recall):
'''
Takes in a set of training pairs and predicates and tries to find
a good set of blocking rules.
'''
comparison_count = self.comparison_count
comparison_count = self.comparison_count # type: ignore

dupe_cover = Cover(self.blocker.predicates, matches)
dupe_cover = Cover(self.blocker.predicates, matches) # type: ignore
dupe_cover.compound(2)
dupe_cover.intersection_update(comparison_count)

Expand Down Expand Up @@ -105,6 +106,10 @@ def __call__(self, compound_predicate):

return a_cover & self.cover[b]

@abstractmethod
def estimate(self, comparisons):
...


class DedupeBlockLearner(BlockLearner):

Expand Down

0 comments on commit 054d30e

Please sign in to comment.