-
Notifications
You must be signed in to change notification settings - Fork 6
/
server.py
189 lines (159 loc) · 5.37 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
"""
CONFIG
======
Starts the server for upvote and sentence prediction. To run:
python server.py config.json
Keys in config.json should exactly match the args to the
init_* functions defined below.
SENTENCE PREDICTION
===================
To get a sentence prediction, send a GET request to /sentence?word=WORD.
That will return the next word (NO NEW LINE).
e.g.
COMMAND: curl --request GET "localhost:18888/sentence?word=office"
RESPONSE: defeating
UPVOTE PREDICTION
=================
To get an upvote prediction, send a GET request to /up?comment=COMMENT
That will return the upvotes as a float. COMMENT must be query-formatted:
e.g.
COMMAND: curl --request GET "localhost:18888/up?comment=obama%02is%02awesome"
RESPONSE: 31.0
"""
import argparse
import tornado.ioloop
import tornado.web
import json
import features
import reduction
import learners
from sentence import *
from main import *
SENT_NGRAM = 6
SENT_DATAFILE = "data/Liberal"
SENT_RANDOM = False
SENT_NGRAM_FREQ = None
UP_DF = None
UP_FEATURE = None
UP_REDUCER = None
UP_LEARNER = None
UP_DIM = 1000
def init_sentence(subreddit, ngram, random):
"""
Initializes the ngram model for sentence prediction
"""
global SENT_DATAFILE, SENT_NGRAM, SENT_RANDOM, SENT_NGRAM_FREQ
SENT_DATAFILE = subreddit
SENT_NGRAM = ngram
SENT_RANDOM = random
model = features.NGramModel(SENT_NGRAM)
print "N used for ngram: %d" % SENT_NGRAM
df = load_subreddit(SENT_DATAFILE)
print "loaded: %s" % SENT_DATAFILE
# Make the training set
print "making training data..."
X_train, Y_train = model.make_training_xy(df)
# Build most likely sentence with a root word
SENT_NGRAM_FREQ = make_ngram_freq(X_train, model)
print "sentence init done"
def train_upvotes(df, model, reducer, learner):
"""
Generates a training set, reduces dimensionality, and train on reduced
dimensionality training set.
"""
X_train, Y_train = model.make_training_xy(df)
reducer.fit(X_train, Y_train)
X_train_red = reducer.transform(X_train)
learner.train(X_train_red, Y_train)
def predict_upvotes(comment, df, model, reducer, learner):
"""
Return the predicted number of upvotes
"""
comment = [clean_comment(comment)]
new_df = pd.DataFrame({'body' : comment, 'subreddit' : [df['subreddit'][0]]})
X_test = model.data_to_x(new_df)
X_test_red = reducer.transform(X_test)
Y_test = learner.predict(X_test_red)
upvotes = model.y_to_label(df, Y_test)
return upvotes[0]
def init_upvote(subreddit, feature, ngram, lda, reducer, learner, dim):
"""
initializes the models for predicting upvotes
"""
global UP_DF, UP_FEATURE, UP_REDUCER, UP_LEARNER, UP_DIM
print "subreddit file:", subreddit
UP_DF = load_subreddit(subreddit)
name = UP_DF["subreddit"][0]
print "subreddit:", name
print "num rows:", len(UP_DF.index)
print "max up:", features.denormalize_scores([1.], name)
if feature == "ngram":
UP_FEATURE = features.NGramModel(ngram)
elif feature == "lda":
UP_FEATURE = features.LdaFeatureModel(lda)
UP_REDUCER = REDUCERS[reducer](dim)
UP_LEARNER = LEARNERS[learner]()
UP_DIM = dim
print "training models..."
train_upvotes(UP_DF, UP_FEATURE, UP_REDUCER, UP_LEARNER)
print "upvote init done"
class SentenceHandler(tornado.web.RequestHandler):
"""
Handler that takes a word and outputs the next word
"""
def set_default_headers(self):
#TODO change this to your own domain
self.set_header("Access-Control-Allow-Origin", "http://cs109.joeong.com")
def get(self):
word = self.get_argument("word", "")
word = clean_comment(word)
if word == "":
next = ""
else:
next = find_next_word(word, SENT_NGRAM_FREQ, random=SENT_RANDOM)
print "[X] SENTENCE REQUEST: %s, RESPONSE: %s" % (word, next)
self.write(next)
class UpHandler(tornado.web.RequestHandler):
"""
Handler that takes a comment and outputs the upvote
"""
def set_default_headers(self):
#TODO change this to your own domain
self.set_header("Access-Control-Allow-Origin", "http://cs109.joeong.com")
def get(self):
comment = self.get_argument("comment", "")
comment = clean_comment(comment)
if comment == "":
upvote = 0.0
else:
upvote = predict_upvotes(comment, UP_DF, UP_FEATURE, UP_REDUCER, UP_LEARNER)
print "[X] UP REQUEST: %s, UPVOTES: %s" % (comment, str(upvote))
self.write(str(upvote))
application = tornado.web.Application([
(r"/sentence", SentenceHandler),
(r"/up", UpHandler),
])
def init_server(port):
"""
initializes the server
"""
print "starting server..."
application.listen(port)
tornado.ioloop.IOLoop.instance().start()
if __name__ == "__main__":
parser = argparse.ArgumentParser("sentence predictor server")
parser.add_argument("config", help="path to config file", type=str)
args = vars(parser.parse_args())
configfile = args["config"]
print "using config file %s ..." % configfile
f = open(configfile, "rb")
config = json.loads(f.read())
f.close()
print "configuration:"
print json.dumps(config, indent = 2)
print ">>>>>"
init_sentence(**config["sentence"])
print ">>>>>"
init_upvote(**config["upvote"])
print ">>>>>"
init_server(**config["server"])