Skip to content

Commit

Permalink
Updating documentation etc.
Browse files Browse the repository at this point in the history
  • Loading branch information
Fossj117 committed Jul 30, 2014
1 parent 548b8e2 commit 5a470d6
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 73 deletions.
56 changes: 0 additions & 56 deletions TODO.md

This file was deleted.

8 changes: 4 additions & 4 deletions classes/business.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,10 +145,11 @@ def aspect_summary(self, aspect):
map to a list of positive sentences (strings) and
a list of negative sentences (strings) correspondingly.
Gets summary for a *particular* aspect.
Gets summary for a *particular* aspect. Summary includes primarily
the sorted positive/negative sentences mentioning this apsect.
"""

OPIN_THRESH = 0.7
OPIN_THRESH = 0.75
HARD_MIN_OPIN_THRESH = 0.6

POS_THRESH = 0.85
Expand Down Expand Up @@ -198,7 +199,7 @@ def aspect_summary(self, aspect):

def get_sents_by_aspect(self, aspect):
"""
INPUT:
INPUT: Business, string (aspect)
OUTPUT: List of Sentence objects
"""
return [sent for review in self for sent in review if sent.has_aspect(aspect)]
Expand Down Expand Up @@ -228,7 +229,6 @@ def filter_all_asps(self, asps):
INPUT: Business
OUTPUT: list of strings
"""
# TODO if needed
# filter aspects that are too close to the restaurant's name?
return asps

Expand Down
19 changes: 9 additions & 10 deletions classes/sentence.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@ class Sentence(object):
# Tokenizer for converting a raw string (sentence) to a list of strings (words)
WORD_TOKENIZER = MyPottsTokenizer(preserve_case=False)

#STANFORD_POS_TAGGER = POSTagger(
# '/Users/jeff/Zipfian/opinion-mining/references/resources/stanford-pos/stanford-postagger-2014-06-16/models/english-bidirectional-distsim.tagger',
# '/Users/jeff/Zipfian/opinion-mining/references/resources/stanford-pos/stanford-postagger-2014-06-16/stanford-postagger.jar')

# Lemmatizer
LEMMATIZER = WordNetLemmatizer()

Expand Down Expand Up @@ -74,8 +70,6 @@ def pos_tag(self, tokenized_sent):
the standard NLTK POS tagger.
"""

# Using Stanford tagger:
#return Sentence.STANFORD_POS_TAGGER.tag(tokenized_sent)
return nltk.pos_tag(tokenized_sent)

def lemmatize(self, pos_tagged_sent):
Expand All @@ -99,10 +93,11 @@ def lemmatize(self, pos_tagged_sent):

def get_features(self, asarray = False):
"""
INPUT: Sentence
INPUT: Sentence, boolean
OUTPUT: dict mapping string to ints/floats
Returns an (ordered) feature dict for this Sentence
Returns an (ordered) feature dict for this Sentence. If asarray is
True, returns an np feature array instead (unlabeled).
"""

if not hasattr(self, 'features'):
Expand All @@ -118,13 +113,17 @@ def compute_aspects(self):
"""
INPUT: Sentence
OUTPUT: list of lists of strings (i.e. list of aspects)
Get the candidate aspects contained in this sentence.
"""
return Sentence.ASP_EXTRACTOR.get_sent_aspects(self)

def has_aspect(self, asp_string):
"""
INPUT: Sentence, string (aspect)
OUTPUT: boolean
Return true if this sentence contains the given aspect string.
"""

# re-tokenize the aspect
Expand All @@ -138,8 +137,8 @@ def encode(self):
INPUT: Sentence
OUTPUT: dict of this sentence's data
Encodes this sentence and associated metadata
to insert into database.
Encodes this sentence and associated metadata, for
insertion into the database.
"""
return {'text': self.raw,
'user': self.review.user_name
Expand Down
4 changes: 2 additions & 2 deletions classes/transformers/asp_extractors.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ class SentenceAspectExtractor():

CHUNKER = nltk.RegexpParser(GRAMMAR)

_my_stopword_additions = ["it's", "i'm", "star", "", "time", "night", "try", "friend", "sure", "times", "way", "friends"]
_my_stopword_additions = ["it's", "i'm", "star", "", "time", "night", "try", "sure", "times", "way", "friends"]
STOPWORDS = set(stopwords.words('english') + _my_stopword_additions)

PUNCT_RE = re.compile("^[\".:;!?')(/]$")

FORBIDDEN = {'great', 'good', 'time', 'friend'}
FORBIDDEN = {'great', 'good', 'time', 'friend', 'way', 'friends'}

def __init__(self):
pass
Expand Down
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def main():

print "Loading data..."
df = read_data()
bus_ids = df.business_id.unique()[-50:-48]
bus_ids = df.business_id.unique()[21:]

for bus_id in bus_ids:

Expand Down

0 comments on commit 5a470d6

Please sign in to comment.