forked from quantopian/zipline
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrebuild_example_data
executable file
·175 lines (141 loc) · 4.79 KB
/
rebuild_example_data
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#!/usr/bin/env python
from code import InteractiveConsole
import readline # noqa
import shutil
import tarfile
import click
import numpy as np
import pandas as pd
from zipline import examples
from zipline.data.bundles import clean, ingest, register, yahoo_equities
from zipline.testing import test_resource_path, tmp_dir
from zipline.utils.cache import dataframe_cache
INPUT_DATA_START_DATE = pd.Timestamp('2004-01-02')
INPUT_DATA_END_DATE = pd.Timestamp('2014-12-31')
INPUT_DATA_SYMBOLS = (
'AMD',
'CERN',
'COST',
'DELL',
'GPS',
'INTC',
'MMM',
'AAPL',
'MSFT',
)
TEST_BUNDLE_NAME = 'test'
input_bundle = yahoo_equities(
INPUT_DATA_SYMBOLS,
INPUT_DATA_START_DATE,
INPUT_DATA_END_DATE,
)
register(TEST_BUNDLE_NAME, input_bundle)
banner = """
Please verify that the new performance is more correct than the old
performance.
To do this, please inspect `new` and `old` which are mappings from the name of
the example to the results.
The name `cols_to_check` has been bound to a list of perf columns that we
expect to be reliably deterministic (excluding, e.g. `orders`, which contains
UUIDs).
Calling `changed_results(new, old)` will compute a list of names of results
that produced a different value in one of the `cols_to_check` fields.
If you are sure that the new results are more correct, or that the difference
is acceptable, please call `correct()`. Otherwise, call `incorrect()`.
Note
----
Remember to run this with the other supported versions of pandas!
"""
def changed_results(new, old):
"""
Get the names of results that changed since the last invocation.
Useful for verifying that only expected results changed.
"""
changed = []
for col in new:
if col not in old:
changed.append(col)
continue
try:
pd.util.testing.assert_frame_equal(
new[col][examples._cols_to_check],
old[col][examples._cols_to_check],
)
except AssertionError:
changed.append(col)
return changed
def eof(*args, **kwargs):
raise EOFError()
def rebuild_input_data(environ):
ingest(TEST_BUNDLE_NAME, environ=environ, show_progress=True)
clean(TEST_BUNDLE_NAME, keep_last=1, environ=environ)
@click.command()
@click.option(
'--rebuild-input',
is_flag=True,
default=False,
help="Should we rebuild the input data from Yahoo?",
)
@click.pass_context
def main(ctx, rebuild_input):
"""Rebuild the perf data for test_examples
"""
example_path = test_resource_path('example_data.tar.gz')
with tmp_dir() as d:
with tarfile.open(example_path) as tar:
tar.extractall(d.path)
# The environ here should be the same (modulo the tempdir location)
# as we use in test_examples.py.
environ = {'ZIPLINE_ROOT': d.getpath('example_data/root')}
if rebuild_input:
rebuild_input_data(environ)
new_perf_path = d.getpath(
'example_data/new_perf/%s' % pd.__version__.replace('.', '-'),
)
c = dataframe_cache(
new_perf_path,
serialization='pickle:2',
)
with c:
for name in examples.EXAMPLE_MODULES:
c[name] = examples.run_example(name, environ=environ)
correct_called = [False]
console = None
def _exit(*args, **kwargs):
console.raw_input = eof
def correct():
correct_called[0] = True
_exit()
expected_perf_path = d.getpath(
'example_data/expected_perf/%s' %
pd.__version__.replace('.', '-'),
)
# allow users to run some analysis to make sure that the new
# results check out
console = InteractiveConsole({
'correct': correct,
'exit': _exit,
'incorrect': _exit,
'new': c,
'np': np,
'old': dataframe_cache(
expected_perf_path,
serialization='pickle',
),
'pd': pd,
'cols_to_check': examples._cols_to_check,
'changed_results': changed_results,
})
console.interact(banner)
if not correct_called[0]:
ctx.fail(
'`correct()` was not called! This means that the new'
' results will not be written',
)
# move the new results to the expected path
shutil.rmtree(expected_perf_path)
shutil.copytree(new_perf_path, expected_perf_path)
with tarfile.open(example_path, 'w|gz') as tar:
tar.add(d.getpath('example_data'), 'example_data')
if __name__ == '__main__':
main()