forked from pyodide/pyodide
-
Notifications
You must be signed in to change notification settings - Fork 0
/
benchmark.py
242 lines (194 loc) · 7.01 KB
/
benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
import argparse
import json
import re
import subprocess
import sys
from pathlib import Path
from time import time
sys.path.insert(0, str(Path(__file__).resolve().parents[1]))
from pytest_pyodide import ( # noqa: E402
SeleniumChromeRunner,
SeleniumFirefoxRunner,
spawn_web_server,
)
SKIP = {"fft", "hyantes"}
def print_entry(name, res):
print(" - ", name)
print(" " * 4, end="")
for name, dt in res.items():
print(f"{name}: {dt:.6f} ", end="")
print("")
def run_native(code):
if "# non-native" in code:
return float("NaN")
root = Path(__file__).resolve().parents[1]
output = subprocess.check_output(
[sys.executable, "-c", code],
cwd=Path(__file__).resolve().parent,
env={
"PYTHONPATH": str(root / "src/py/lib")
+ ":"
+ str(root / "packages" / ".artifacts" / "lib" / "python")
},
)
return float(output.strip().split()[-1])
def run_wasm(code, selenium, interrupt_buffer):
if interrupt_buffer:
selenium.run_js(
"""
let interrupt_buffer = new Int32Array(1);
pyodide.setInterruptBuffer(interrupt_buffer)
"""
)
selenium.run(code)
try:
runtime = float(selenium.logs.split("\n")[-1])
except ValueError:
print(selenium.logs)
raise
return runtime
def run_all(selenium_backends, code):
result = {"native": run_native(code)}
for browser_name, selenium in selenium_backends.items():
for interrupt_buffer in [False, True]:
dt = run_wasm(code, selenium, interrupt_buffer)
if interrupt_buffer:
browser_name += "(w/ ib)"
result[browser_name] = dt
return result
def parse_benchmark(filename):
lines = []
with open(filename) as fp:
for line in fp:
m = re.match(r"^#\s*(setup|run): (.*)$", line)
if m:
line = f"{m.group(1)} = {m.group(2)!r}\n"
lines.append(line)
return "".join(lines)
def get_benchmark_scripts(scripts_dir, repeat=5, number=5):
root = Path(__file__).resolve().parent / scripts_dir
for filename in sorted(root.iterdir()):
name = filename.stem
if name in SKIP:
continue
content = parse_benchmark(filename)
content += (
"import numpy as np\n"
"_ = np.empty(())\n"
f"setup = setup + '\\nfrom __main__ import {name}'\n"
"from timeit import Timer\n"
"t = Timer(run, setup)\n"
f"r = t.repeat({repeat}, {number})\n"
"r.remove(min(r))\n"
"r.remove(max(r))\n"
"print(np.mean(r))\n"
)
yield name, content
def get_pystone_benchmarks():
return get_benchmark_scripts("benchmarks/pystone_benchmarks", repeat=5, number=1)
def get_numpy_benchmarks():
return get_benchmark_scripts("benchmarks/numpy_benchmarks")
def get_matplotlib_benchmarks():
return get_benchmark_scripts("benchmarks/matplotlib_benchmarks")
def get_pandas_benchmarks():
return get_benchmark_scripts("benchmarks/pandas_benchmarks")
def get_benchmarks(benchmarks, targets=("all",)):
if "all" in targets:
for benchmark in benchmarks.values():
yield from benchmark()
else:
for target in targets:
yield from benchmarks[target]()
def parse_args(benchmarks):
benchmarks.append("all")
parser = argparse.ArgumentParser("Run benchmarks on Pyodide's performance")
parser.add_argument(
"target",
choices=benchmarks,
nargs="+",
help="Benchmarks to run ('all' to run all benchmarks)",
)
parser.add_argument(
"-o",
"--output",
default="dist/benchmarks.json",
help="path to the json file where benchmark results will be saved",
)
parser.add_argument(
"--timeout",
default=1200,
type=int,
help="Browser timeout(sec) for each benchmark (default: %(default)s)",
)
parser.add_argument(
"--dist-dir",
default=str(Path(__file__).parents[1] / "dist"),
help="Pyodide dist directory (default: %(default)s)",
)
return parser.parse_args()
def main():
BENCHMARKS = {
"pystone": get_pystone_benchmarks,
"numpy": get_numpy_benchmarks,
# TODO: matplotlib benchmark occasionally fails after https://github.com/pyodide/pyodide/pull/3130
# but it is not clear why.
# "matplotlib": get_matplotlib_benchmarks,
"pandas": get_pandas_benchmarks,
}
args = parse_args(list(BENCHMARKS.keys()))
targets = [t.lower() for t in args.target]
output = Path(args.output).resolve()
timeout = args.timeout
results = {}
selenium_backends = {}
browser_cls = [
("firefox", SeleniumFirefoxRunner),
("chrome", SeleniumChromeRunner),
]
with spawn_web_server(args.dist_dir) as (hostname, port, log_path):
# selenium initialization time
result = {"native": float("NaN")}
for browser_name, cls in browser_cls:
try:
t0 = time()
selenium = cls(port)
selenium.set_script_timeout(timeout)
result[browser_name] = time() - t0
finally:
selenium.driver.quit()
results["selenium init"] = result
print_entry("selenium init", result)
# package loading time
for package_name in ["numpy", "pandas", "matplotlib"]:
result = {"native": float("NaN")}
for browser_name, cls in browser_cls:
selenium = cls(port)
selenium.set_script_timeout(timeout)
try:
t0 = time()
selenium.load_package(package_name)
result[browser_name] = time() - t0
finally:
selenium.driver.quit()
results[f"load {package_name}"] = result
print_entry(f"load {package_name}", result)
# run benchmarks
for benchmark_name, content in get_benchmarks(BENCHMARKS, targets):
try:
# instantiate browsers for each benchmark to prevent side effects
for browser_name, cls in browser_cls:
selenium_backends[browser_name] = cls(port)
selenium_backends[browser_name].set_script_timeout(timeout)
# pre-load numpy, matplotlib and pandas for the selenium instance used in benchmarks
selenium_backends[browser_name].load_package(
["numpy", "matplotlib", "pandas"]
)
results[benchmark_name] = run_all(selenium_backends, content)
print_entry(benchmark_name, results[benchmark_name])
finally:
for selenium in selenium_backends.values():
selenium.driver.quit()
output.parent.mkdir(exist_ok=True, parents=True)
output.write_text(json.dumps(results))
if __name__ == "__main__":
main()