forked from kamens/gae_bingo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_tests.py
480 lines (382 loc) · 19.2 KB
/
run_tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
import ast
import base64
import cookielib
import json
import os
import random
import time
import urllib
import urllib2
# TODO: convert this unit test file to the correct unit
# test pattern used by the rest of our codebase
TEST_GAE_HOST = "http://localhost:8111"
last_opener = None
def test_response(step="", data={}, use_last_cookies=False, bot=False, url=None):
global last_opener
if not use_last_cookies or last_opener is None:
cj = cookielib.CookieJar()
last_opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
if bot:
last_opener.addheaders = [(
'User-agent',
'monkeysmonkeys Googlebot monkeysmonkeys')]
if url is None:
data["step"] = step
url = "/gae_bingo/tests/run_step?%s" % urllib.urlencode(data)
req = last_opener.open("%s%s" % (TEST_GAE_HOST, url))
try:
response = req.read()
finally:
req.close()
try:
return json.loads(response)
except ValueError:
return None
def run_tests():
# Delete all experiments (response should be count of experiments left)
assert(test_response("delete_all") == 0)
# Ensure the identity works correctly and consistently after login.
for i in xrange(5):
# Randomly generate an ID so we have a good chance of having a new one.
# If that assumption is wrong, the test will fail--clear
# the datastore to increase chances of working.
user = base64.urlsafe_b64encode(os.urandom(30)) + "%40example.com"
test_response(url="/") # Load / to get ID assigned
firstID = test_response("get_identity", use_last_cookies=True) # get ID
url = "/_ah/login?email=" + user + "&action=Login&continue=%2Fpostlogin"
test_response(use_last_cookies=True, url=url)
# Now make sure the ID is consistent
assert(firstID == test_response("get_identity", use_last_cookies=True))
assert(test_response("delete_all") == 0) # Clear out experiments this made
# We're going to try to add a conversion to the experiment
assert(test_response("participate_in_hippos") in [True, False])
assert(test_response("convert_in",
{"conversion_name":
"hippos_binary"}, use_last_cookies=True))
# Make sure participant counts are right
assert(test_response("count_participants_in",
{"experiment_name": "hippos (hippos_binary)"},
use_last_cookies=True)
== 1)
assert(test_response("count_participants_in",
{"experiment_name": "hippos (hippos_counting)"},
use_last_cookies=True)
== 1)
# Make sure we have the right number of conversions
dict_conversions_server = test_response(
"count_conversions_in",
{"experiment_name": "hippos (hippos_binary)"},
use_last_cookies=True)
assert(sum(dict_conversions_server.values()) == 1)
dict_conversions_server = test_response(
"count_conversions_in",
{"experiment_name": "hippos (hippos_counting)"},
use_last_cookies=True)
assert(sum(dict_conversions_server.values()) == 0)
assert(test_response("add_conversions", use_last_cookies=True)
in [True, False])
assert(test_response("count_experiments", use_last_cookies=True) == 3)
# make sure that we have the /right/ experiments
assert(set(ast.literal_eval(test_response("get_experiments",
use_last_cookies=True)).keys()) ==
set(["hippos (hippos_binary)",
"hippos (hippos_counting)",
"hippos (rhinos_counting)"]))
assert(test_response("convert_in",
{"conversion_name": "rhinos_counting"},
use_last_cookies=True))
dict_conversions_server = test_response(
"count_conversions_in",
{"experiment_name": "hippos (hippos_binary)"})
assert(sum(dict_conversions_server.values()) == 1)
dict_conversions_server = test_response(
"count_conversions_in",
{"experiment_name": "hippos (hippos_counting)"},
use_last_cookies=True)
assert(sum(dict_conversions_server.values()) == 0)
dict_conversions_server = test_response(
"count_conversions_in",
{"experiment_name": "hippos (rhinos_counting)"},
use_last_cookies=True)
assert(sum(dict_conversions_server.values()) == 1)
# get rid of this test's data so it doesn't affect other tests
assert(test_response("delete_all") == 0)
# Now try the same, but with switching users
assert(test_response("participate_in_hippos") in [True, False])
assert(test_response("convert_in",
{"conversion_name":
"hippos_binary"}, use_last_cookies=True))
assert(test_response("participate_in_hippos", use_last_cookies=False)
in [True, False])
assert(test_response("add_conversions", use_last_cookies=True) in
[True, False])
assert(test_response("convert_in",
{"conversion_name":
"rhinos_counting"}, use_last_cookies=True))
assert(test_response("count_participants_in",
{"experiment_name": "hippos (hippos_binary)"}) == 2)
assert(test_response("count_participants_in",
{"experiment_name": "hippos (rhinos_counting)"}) == 1)
dict_conversions_server = test_response(
"count_conversions_in",
{"experiment_name": "hippos (hippos_binary)"})
assert(sum(dict_conversions_server.values()) == 1)
dict_conversions_server = test_response(
"count_conversions_in",
{"experiment_name": "hippos (rhinos_counting)"})
assert(sum(dict_conversions_server.values()) == 1)
assert(test_response("delete_all") == 0)
# Test constructing a redirect URL that converts in monkey and chimps
redirect_url_monkeys = test_response("create_monkeys_redirect_url")
assert(redirect_url_monkeys ==
"/gae_bingo/redirect?continue=/gae_bingo" +
"&conversion_name=monkeys")
redirect_url_chimps = test_response("create_chimps_redirect_url")
assert(redirect_url_chimps ==
"/gae_bingo/redirect?continue=/gae_bingo&" +
"conversion_name=chimps_conversion_1&" +
"conversion_name=chimps_conversion_2")
# Test participating in monkeys and chimps once,
# and use previously constructed redirect URLs to convert
assert(test_response("participate_in_monkeys") in [True, False])
test_response(use_last_cookies=True, url=redirect_url_monkeys)
assert(test_response("participate_in_chimpanzees") in [True, False])
test_response(use_last_cookies=True, url=redirect_url_chimps)
# Make sure there's a single participant and conversion in monkeys
assert(test_response("count_participants_in",
{"experiment_name": "monkeys"})
== 1)
dict_conversions_server = test_response("count_conversions_in",
{"experiment_name": "monkeys"})
assert(sum(dict_conversions_server.values()) == 1)
# Make sure there's a single participant and two conversions in chimps
assert(test_response(
"count_participants_in",
{"experiment_name": "chimpanzees (chimps_conversion_1)"}) == 1)
dict_conversions_server = test_response(
"count_conversions_in",
{"experiment_name":
"chimpanzees (chimps_conversion_1)"})
assert(sum(dict_conversions_server.values()) == 1)
dict_conversions_server = test_response(
"count_conversions_in",
{"experiment_name":
"chimpanzees (chimps_conversion_2)"})
assert(sum(dict_conversions_server.values()) == 1)
# Delete all experiments for next round of tests
# (response should be count of experiments left)
assert(test_response("delete_all") == 0)
# Refresh bot's identity record so it doesn't pollute tests
assert(test_response("refresh_identity_record", bot=True))
# Participate in experiment A, check for correct alternative
# valuesum(core_metrics.values(), [])s being returned,
for i in range(0, 20):
assert(test_response("participate_in_monkeys") in [True, False])
assert(test_response("count_participants_in",
{"experiment_name": "monkeys"})
== 20)
# Identify as a bot a couple times (response should stay the same)
bot_value = None
for i in range(0, 5):
value = test_response("participate_in_monkeys", bot=True)
assert(value in [True, False])
if bot_value is None:
bot_value = value
assert(value == bot_value)
# Check total participants in A (1 extra for bots)
assert(test_response("count_participants_in",
{"experiment_name": "monkeys"}) == 21)
# Participate in experiment B (responses should be "a" "b" or "c")
for i in range(0, 15):
assert(test_response("participate_in_gorillas") in ["a", "b", "c"])
# Participate in experiment A,
# using cookies half of the time to maintain identity
for i in range(0, 20):
assert(test_response("participate_in_monkeys",
use_last_cookies=(i % 2 == 1))
in [True, False])
# Check total participants in A
# (should've only added 10 more in previous step)
assert(test_response("count_participants_in",
{"experiment_name": "monkeys"}) == 31)
# Participate in A once more with a lot of followup,
# persisting to datastore and flushing memcache between followups
for i in range(0, 10):
assert(test_response("participate_in_monkeys",
use_last_cookies=(i not in [0, 5]))
in [True, False])
if i in [1, 6]:
assert(test_response("persist", use_last_cookies=True))
# Wait 10 seconds for task queues to run
time.sleep(10)
assert(test_response("flush_all_memcache",
use_last_cookies=True))
# NOTE: It's possible for this to fail sometimes--maybe a race condition?
# TODO(kamens,josh): figure out why this happens? (Or just wait to not use
# AppEngine any more)
# Check total participants in A
# (should've only added 2 more in previous step)
assert(test_response("count_participants_in",
{"experiment_name": "monkeys"}) == 33)
# Participate and convert in experiment A,
# using cookies to tie participation to conversions,
# tracking conversions-per-alternative
dict_conversions = {}
for i in range(0, 35):
alternative_key = str(test_response("participate_in_monkeys"))
assert(test_response("convert_in",
{"conversion_name": "monkeys"},
use_last_cookies=True))
if not alternative_key in dict_conversions:
dict_conversions[alternative_key] = 0
dict_conversions[alternative_key] += 1
# Check total conversions-per-alternative in A
assert(len(dict_conversions) == 2)
assert(35 == sum(dict_conversions.values()))
dict_conversions_server = test_response("count_conversions_in",
{"experiment_name": "monkeys"})
assert(len(dict_conversions) == len(dict_conversions_server))
for key in dict_conversions:
assert(dict_conversions[key] == dict_conversions_server[key])
# Participate in experiment B, using cookies to maintain identity
# and making sure alternatives for B are stable per identity
last_response = None
for i in range(0, 20):
use_last_cookies = (last_response is not None and
random.randint(0, 2) > 0)
current_response = test_response("participate_in_gorillas",
use_last_cookies=use_last_cookies)
if not use_last_cookies:
last_response = current_response
assert(current_response in ["a", "b", "c"])
assert(last_response == current_response)
# Participate in experiment C, which is a multi-conversion experiment,
# and occasionally convert in *one* of the conversions
expected_conversions = 0
for i in range(0, 20):
assert(test_response("participate_in_chimpanzees") in [True, False])
if random.randint(0, 2) > 0:
assert(test_response("convert_in",
{"conversion_name": "chimps_conversion_2"},
use_last_cookies=True))
expected_conversions += 1
# It's statistically possible but incredibly unlikely
# for this to fail based on random.randint()'s behavior
assert(expected_conversions > 0)
# Make sure conversions for the 2nd conversion type
# of this experiment are correct
dict_conversions_server = test_response(
"count_conversions_in",
{"experiment_name":
"chimpanzees (chimps_conversion_2)"})
assert(expected_conversions == sum(dict_conversions_server.values()))
# Make sure conversions for the 1st conversion type
# of this experiment are empty
dict_conversions_server = test_response(
"count_conversions_in",
{"experiment_name":
"chimpanzees (chimps_conversion_1)"})
assert(0 == sum(dict_conversions_server.values()))
# Test that calling bingo multiple times for a single
# user creates only one conversion (for a BINARY conversion type)
assert(test_response("participate_in_chimpanzees") in [True, False])
assert(test_response("convert_in",
{"conversion_name": "chimps_conversion_1"},
use_last_cookies=True))
assert(test_response("convert_in",
{"conversion_name": "chimps_conversion_1"},
use_last_cookies=True))
dict_conversions_server = test_response(
"count_conversions_in",
{"experiment_name":
"chimpanzees (chimps_conversion_1)"})
assert(1 == sum(dict_conversions_server.values()))
# End experiment C, choosing a short-circuit alternative
test_response("end_and_choose",
{"canonical_name": "chimpanzees", "alternative_number": 1})
# Make sure short-circuited alternatives for
# C's experiments are set appropriately
for i in range(0, 5):
assert(test_response("participate_in_chimpanzees") == False)
# Test an experiment with a Counting type conversion
# by converting multiple times for a single user
assert(test_response("participate_in_hippos") in [True, False])
# Persist to the datastore before Counting stress test
assert(test_response("persist", use_last_cookies=True))
# Wait 20 seconds for task queues to run
time.sleep(20)
# Hit Counting conversions multiple times
for i in range(0, 20):
if i % 3 == 0:
# Stress things out a bit by flushing the memcache .incr()
# counts of each hippo alternative
assert(test_response("persist", use_last_cookies=True))
assert(test_response("flush_hippo_counts_memcache",
use_last_cookies=True))
elif i % 5 == 0:
# Stress things out even more flushing the core bingo memcache
assert(test_response("flush_bingo_memcache",
use_last_cookies=True))
assert(test_response("convert_in",
{"conversion_name": "hippos_binary"},
use_last_cookies=True))
assert(test_response("convert_in",
{"conversion_name": "hippos_counting"},
use_last_cookies=True))
dict_conversions_server = test_response(
"count_conversions_in",
{"experiment_name": "hippos (hippos_binary)"})
assert(1 == sum(dict_conversions_server.values()))
dict_conversions_server = test_response(
"count_conversions_in",
{"experiment_name":
"hippos (hippos_counting)"})
assert(20 == sum(dict_conversions_server.values()))
# Participate in experiment D (weight alternatives),
# keeping track of alternative returned count.
dict_alternatives = {}
for i in range(0, 75):
alternative = test_response("participate_in_crocodiles")
assert(alternative in ["a", "b", "c"])
if not alternative in dict_alternatives:
dict_alternatives[alternative] = 0
dict_alternatives[alternative] += 1
# Make sure weighted alternatives work -> should be a < b < c < d < e,
# but they should all exist.
#
# Again, it is statistically possible for
# the following asserts to occasionally fail during
# these tests, but it should be exceedingly rare
# if weighted alternatives are working properly.
for key in ["a", "b", "c"]:
assert(dict_alternatives.get(key, 0) > 0)
assert(dict_alternatives.get("a", 0) < dict_alternatives.get("b", 0))
assert(dict_alternatives.get("b", 0) < dict_alternatives.get("c", 0))
# Check experiments count
assert(test_response("count_experiments") == 7)
# Test persist and load from DS
assert(test_response("persist"))
assert(test_response("flush_all_memcache"))
# Check experiments and conversion counts
# remain after persist and memcache flush
assert(test_response("count_experiments") == 7)
dict_conversions_server = test_response(
"count_conversions_in",
{"experiment_name":
"chimpanzees (chimps_conversion_2)"})
assert(expected_conversions == sum(dict_conversions_server.values()))
# Test archiving
assert(test_response("archive_monkeys"))
# Test lack of presence in normal list of experiments after archive
assert("monkeys" not in test_response("get_experiments"))
# Test presence in list of archived experiments
assert("monkeys" in test_response("get_archived_experiments"))
# Test participating in monkeys once again after archiving
# and make sure there's only one participant
assert(test_response("participate_in_monkeys") in [True, False])
assert(test_response("count_participants_in",
{"experiment_name": "monkeys"})
== 1)
print "Tests successful."
if __name__ == "__main__":
run_tests()