Skip to content

Commit

Permalink
Merge pull request #1 from AJRenold/multiclass
Browse files Browse the repository at this point in the history
Multiclass
  • Loading branch information
AJRenold committed May 4, 2013
2 parents 4362b2b + 3bbcfdf commit ebd4e6d
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 25 deletions.
5 changes: 3 additions & 2 deletions bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,6 @@ def create_class_descriptions(self,class_labels):
classes[str(item)] += 1
classes['total'] += 1


prob = {}
for label in labels:
prob[label] = float(classes[label]) / classes['total']
Expand Down Expand Up @@ -152,7 +151,9 @@ def create_vocab(self, tokenized_records, class_labels):
vocab_count[class_labels[i]] += 1
vocab_count['total'] += 1

vocab, vocab_count = self.modify_vocab(vocab, vocab_count)

#vocab, vocab_count = self.modify_vocab(vocab, vocab_count)

return vocab, vocab_count

def word_var(self,word):
Expand Down
18 changes: 13 additions & 5 deletions test_bayes_yelp.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@

for review in islice(reviews,None,200000):
if 'Restaurants' in business_dict[review['business_id']]['categories']:
if review['votes']['useful'] >= 2:
if review['votes']['useful'] >= 4:
data.append(review['text'])
labels.append('2')
elif review['votes']['useful'] < 4 and review['votes']['useful'] >= 1:
data.append(review['text'])
labels.append('1')
elif review['votes']['useful'] == 0:
Expand All @@ -45,7 +48,10 @@
for review in islice(reviews,200000,None):
if 'Restaurants' in business_dict[review['business_id']]['categories']:

if review['votes']['useful'] >= 1:
if review['votes']['useful'] >= 4:
test.append(review['text'])
correct_labels.append('2')
elif review['votes']['useful'] < 4 and review['votes']['useful'] >= 1:
test.append(review['text'])
correct_labels.append('1')
elif review['votes']['useful'] == 0:
Expand Down Expand Up @@ -81,6 +87,8 @@
matches['not-labeled'][correct_labels[i]] += 1
matches['total'][correct_labels[i]] += 1

#print matches
print 'TP',matches['labeled']['1'],'FN',matches['not-labeled']['1'], 'class 1 percent correct', (float(matches['labeled']['1']) / matches['total']['1'])
print 'FP',matches['not-labeled']['0'], 'TN',matches['labeled']['0'], 'class 0 percent correct', (float(matches['labeled']['0']) / matches['total']['0'] )
print matches
print 'class 2 percent correct', (float(matches['labeled']['2']) / matches['total']['2'])
print 'class 1 percent correct', (float(matches['labeled']['1']) / matches['total']['1'] )
print 'class 0 percent correct', (float(matches['labeled']['0']) / matches['total']['0'] )

76 changes: 58 additions & 18 deletions yelp_reviews.ipynb

Large diffs are not rendered by default.

0 comments on commit ebd4e6d

Please sign in to comment.