Skip to content

Commit 37920fb

Browse files
wrichertluispedro
authored andcommitted
py3-compliant; help message for missing SentiWordNet
1 parent 2f8ee9e commit 37920fb

File tree

1 file changed

+13
-7
lines changed

1 file changed

+13
-7
lines changed

ch06/utils.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# It is made available under the MIT License
77

88
import os
9+
import sys
910
import collections
1011
import csv
1112
import json
@@ -57,7 +58,7 @@ def load_sanders_data(dirname=".", line_count=-1):
5758
try:
5859
tweet = json.load(open(tweet_fn, "r"))
5960
except IOError:
60-
print("Tweet '%s' not found. Skip."%tweet_fn)
61+
print(("Tweet '%s' not found. Skip." % tweet_fn))
6162
continue
6263

6364
if 'text' in tweet and tweet['user']['lang'] == "en":
@@ -84,14 +85,14 @@ def plot_pr(auc_score, name, phase, precision, recall, label=None):
8485
pylab.title('P/R curve (AUC=%0.2f) / %s' % (auc_score, label))
8586
filename = name.replace(" ", "_")
8687
pylab.savefig(os.path.join(CHART_DIR, "pr_%s_%s.png" %
87-
(filename, phase)), bbox_inches="tight")
88+
(filename, phase)), bbox_inches="tight")
8889

8990

9091
def show_most_informative_features(vectorizer, clf, n=20):
9192
c_f = sorted(zip(clf.coef_[0], vectorizer.get_feature_names()))
92-
top = zip(c_f[:n], c_f[:-(n + 1):-1])
93+
top = list(zip(c_f[:n], c_f[:-(n + 1):-1]))
9394
for (c1, f1), (c2, f2) in top:
94-
print "\t%.4f\t%-15s\t\t%.4f\t%-15s" % (c1, f1, c2, f2)
95+
print("\t%.4f\t%-15s\t\t%.4f\t%-15s" % (c1, f1, c2, f2))
9596

9697

9798
def plot_log():
@@ -119,7 +120,7 @@ def plot_feat_importance(feature_names, clf, name):
119120
inds = np.argsort(coef)
120121
f_imp = f_imp[inds]
121122
coef = coef[inds]
122-
xpos = np.array(range(len(coef)))
123+
xpos = np.array(list(range(len(coef))))
123124
pylab.bar(xpos, coef, width=1)
124125

125126
pylab.title('Feature importance for %s' % (name))
@@ -181,8 +182,13 @@ def plot_bias_variance(data_sizes, train_errors, test_errors, name):
181182
def load_sent_word_net():
182183

183184
sent_scores = collections.defaultdict(list)
185+
sentiwordnet_path = os.path.join(DATA_DIR, "SentiWordNet_3.0.0_20130122.txt")
184186

185-
with open(os.path.join(DATA_DIR, "SentiWordNet_3.0.0_20130122.txt"), "r") as csvfile:
187+
if not os.path.exists(sentiwordnet_path):
188+
print("Please download SentiWordNet_3.0.0 from http://sentiwordnet.isti.cnr.it/download.php, extract it and put it into the data directory")
189+
sys.exit(1)
190+
191+
with open(sentiwordnet_path, 'r') as csvfile:
186192
reader = csv.reader(csvfile, delimiter='\t', quotechar='"')
187193
for line in reader:
188194
if line[0].startswith("#"):
@@ -200,7 +206,7 @@ def load_sent_word_net():
200206
term = term.replace("-", " ").replace("_", " ")
201207
key = "%s/%s" % (POS, term.split("#")[0])
202208
sent_scores[key].append((float(PosScore), float(NegScore)))
203-
for key, value in sent_scores.iteritems():
209+
for key, value in sent_scores.items():
204210
sent_scores[key] = np.mean(value, axis=0)
205211

206212
return sent_scores

0 commit comments

Comments
 (0)