Skip to content

Commit

Permalink
Merge pull request seatgeek#27 from olethanh/docstrings
Browse files Browse the repository at this point in the history
[enh] Add docstrings to process.py
  • Loading branch information
josegonzalez committed Jun 15, 2013
2 parents e032ed7 + f36e2e0 commit 1ab66a3
Showing 1 changed file with 26 additions and 28 deletions.
54 changes: 26 additions & 28 deletions fuzzywuzzy/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,20 +29,20 @@
import itertools
import utils

########################################
# Find Best Matches In List Of Choices #
########################################

def extract(query, choices, processor=None, scorer=None, limit=5):

# choices = a list of objects we are attempting to extract values from
# query = an object representing the thing we want to find
# scorer f(OBJ, QUERY) --> INT. We will return the objects with the highest score
# by default, we use score.WRatio() and both OBJ and QUERY should be strings
# processor f(OBJ_A) --> OBJ_B, where the output is an input to scorer
# for example, "processor = lambda x: x[0]" would return the first element in a collection x (of, say, strings)
# this would then be used in the scoring collection

"""Find best matches in a list of choices, return a list of tuples containing the match and it's score.
Arguments:
query -- an object representing the thing we want to find
choices -- a list of objects we are attempting to extract values from
scorer -- f(OBJ, QUERY) --> INT. We will return the objects with the highest score
by default, we use score.WRatio() and both OBJ and QUERY should be strings
processor -- f(OBJ_A) --> OBJ_B, where the output is an input to scorer
for example, "processor = lambda x: x[0]" would return the first element
in a collection x (of, say, strings) this would then be used in the scoring collection
by default, we use utils.full_process()
"""
if choices is None or len(choices) == 0:
return []

Expand All @@ -65,33 +65,31 @@ def extract(query, choices, processor=None, scorer=None, limit=5):
sl.sort(key=lambda i: i[1], reverse=True)
return sl[:limit]

######################################################
# Find Best Matches Above A Score In List Of Choices #
######################################################

def extractBests(query, choices, processor=None, scorer=None, score_cutoff=0, limit=5):
"""Find best matches above a score in a list of choices, return a list of tuples containing the match and it's score.
# convenience method which returns the choices with best scores
# optional parameter: score_cutoff.
# If the choice has a score of less than or equal to score_cutoff
# it will not be included on result list
Convenience method which returns the choices with best scores, see extract() for full arguments list
Optional parameter: score_cutoff.
If the choice has a score of less than or equal to score_cutoff it will not be included on result list
"""

best_list = extract(query, choices, processor, scorer, limit)
if len(best_list) > 0:
return list(itertools.takewhile(lambda x: x[1] > score_cutoff, best_list))
else:
return []

##########################
# Find Single Best Match #
##########################

def extractOne(query, choices, processor=None, scorer=None, score_cutoff=0):
"""Find the best match above a score in a list of choices, return a tuple containing the match and it's score
if it's above the treshold or None.
Convenience method which returns the single best choice, see extract() for full arguments list
Optional parameter: score_cutoff.
If the best choice has a score of less than or equal to score_cutoff we will return none (intuition: not a good enough match)
# convenience method which returns the single best choice
# optional parameter: score_cutoff.
# If the best choice has a score of less than or equal to score_cutoff
# we will return none (intuition: not a good enough match)
"""

best_list = extract(query, choices, processor, scorer, limit=1)
if len(best_list) > 0:
Expand Down

0 comments on commit 1ab66a3

Please sign in to comment.