forked from benhoyt/scandir
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmark.py
255 lines (220 loc) · 8.7 KB
/
benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
"""Simple benchmark to compare the speed of scandir.walk() with os.walk()."""
import optparse
import os
import stat
import sys
import timeit
import scandir
DEPTH = 4
NUM_DIRS = 5
NUM_FILES = 50
os_walk_func = None
# ctypes versions of os.listdir() so benchmark can compare apples with apples
if sys.platform == 'win32':
import ctypes
from ctypes import wintypes
def listdir_python(path):
data = wintypes.WIN32_FIND_DATAW()
data_p = ctypes.byref(data)
filename = os.path.join(path, '*.*')
handle = scandir.FindFirstFile(filename, data_p)
if handle == scandir.INVALID_HANDLE_VALUE:
error = ctypes.GetLastError()
if error == scandir.ERROR_FILE_NOT_FOUND:
return []
raise scandir.win_error(error, path)
names = []
try:
while True:
name = data.cFileName
if name not in ('.', '..'):
names.append(name)
success = scandir.FindNextFile(handle, data_p)
if not success:
error = ctypes.GetLastError()
if error == scandir.ERROR_NO_MORE_FILES:
break
raise scandir.win_error(error, path)
finally:
if not scandir.FindClose(handle):
raise scandir.win_error(ctypes.GetLastError(), path)
return names
elif sys.platform.startswith(('linux', 'darwin')) or 'bsd' in sys.platform:
def listdir_python(path):
dir_p = scandir.opendir(path.encode(scandir.file_system_encoding))
if not dir_p:
raise scandir.posix_error(path)
names = []
try:
entry = scandir.Dirent()
result = scandir.Dirent_p()
while True:
if scandir.readdir_r(dir_p, entry, result):
raise scandir.posix_error(path)
if not result:
break
name = entry.d_name.decode(scandir.file_system_encoding)
if name not in ('.', '..'):
names.append(name)
finally:
if scandir.closedir(dir_p):
raise scandir.posix_error(path)
return names
else:
listdir_python = os.listdir
def os_walk_python(top, topdown=True, onerror=None, followlinks=False):
"""Identical to os.walk(), but use ctypes-based listdir() so benchmark
against ctypes-based scandir() is valid.
"""
try:
names = listdir_python(top)
except OSError as err:
if onerror is not None:
onerror(err)
return
dirs, nondirs = [], []
for name in names:
if os.path.isdir(os.path.join(top, name)):
dirs.append(name)
else:
nondirs.append(name)
if topdown:
yield top, dirs, nondirs
for name in dirs:
new_path = os.path.join(top, name)
if followlinks or not os.path.islink(new_path):
for x in os_walk_python(new_path, topdown, onerror, followlinks):
yield x
if not topdown:
yield top, dirs, nondirs
def create_tree(path, depth=DEPTH):
"""Create a directory tree at path with given depth, and NUM_DIRS and
NUM_FILES at each level.
"""
os.mkdir(path)
for i in range(NUM_FILES):
filename = os.path.join(path, 'file{0:03}.txt'.format(i))
with open(filename, 'wb') as f:
f.write(b'foo')
if depth <= 1:
return
for i in range(NUM_DIRS):
dirname = os.path.join(path, 'dir{0:03}'.format(i))
create_tree(dirname, depth - 1)
def get_tree_size(path):
"""Return total size of all files in directory tree at path."""
size = 0
try:
for entry in scandir.scandir(path):
if entry.is_dir(follow_symlinks=False):
size += get_tree_size(os.path.join(path, entry.name))
else:
size += entry.stat(follow_symlinks=False).st_size
except OSError:
pass
return size
def benchmark(path, get_size=False):
sizes = {}
if get_size:
def do_os_walk():
size = 0
for root, dirs, files in os_walk_func(path):
for filename in files:
fullname = os.path.join(root, filename)
size += os.path.getsize(fullname)
sizes['os_walk'] = size
def do_scandir_walk():
sizes['scandir_walk'] = get_tree_size(path)
else:
def do_os_walk():
for root, dirs, files in os_walk_func(path):
pass
def do_scandir_walk():
for root, dirs, files in scandir.walk(path):
pass
# Run this once first to cache things, so we're not benchmarking I/O
print("Priming the system's cache...")
do_scandir_walk()
# Use the best of 3 time for each of them to eliminate high outliers
os_walk_time = 1000000
scandir_walk_time = 1000000
N = 3
for i in range(N):
print('Benchmarking walks on {0}, repeat {1}/{2}...'.format(
path, i + 1, N))
os_walk_time = min(os_walk_time, timeit.timeit(do_os_walk, number=1))
scandir_walk_time = min(scandir_walk_time,
timeit.timeit(do_scandir_walk, number=1))
if get_size:
if sizes['os_walk'] == sizes['scandir_walk']:
equality = 'equal'
else:
equality = 'NOT EQUAL!'
print('os.walk size {0}, scandir.walk size {1} -- {2}'.format(
sizes['os_walk'], sizes['scandir_walk'], equality))
print('os.walk took {0:.3f}s, scandir.walk took {1:.3f}s -- {2:.1f}x as fast'.format(
os_walk_time, scandir_walk_time, os_walk_time / scandir_walk_time))
if __name__ == '__main__':
usage = """Usage: benchmark.py [-h] [tree_dir]
Create a large directory tree named "benchtree" (relative to this script) and
benchmark os.walk() versus scandir.walk(). If tree_dir is specified, benchmark
using it instead of creating a tree."""
parser = optparse.OptionParser(usage=usage)
parser.add_option('-s', '--size', action='store_true',
help='get size of directory tree while walking')
parser.add_option('-c', '--scandir', type='choice', choices=['best', 'generic', 'c', 'python', 'os'], default='best',
help='version of scandir() to use, default "%default"')
parser.add_option('-o', '--os-walk', type='choice', choices=['match', 'builtin', 'python'], default='builtin',
help='version of os.walk() to use, default "%default"')
options, args = parser.parse_args()
if args:
tree_dir = args[0]
else:
tree_dir = os.path.join(os.path.dirname(__file__), 'benchtree')
if not os.path.exists(tree_dir):
print('Creating tree at {0}: depth={1}, num_dirs={2}, num_files={3}'.format(
tree_dir, DEPTH, NUM_DIRS, NUM_FILES))
create_tree(tree_dir)
if options.scandir == 'generic':
scandir.scandir = scandir.scandir_generic
elif options.scandir == 'c':
if not hasattr(scandir, 'scandir_c'):
print("ERROR: Compiled C version of scandir not found!")
sys.exit(1)
scandir.scandir = scandir.scandir_c
elif options.scandir == 'python':
if not hasattr(scandir, 'scandir_python'):
print("ERROR: Python version of scandir not found!")
sys.exit(1)
scandir.scandir = scandir.scandir_python
elif options.scandir == 'os':
if not hasattr(os, 'scandir'):
print("ERROR: Python 3.5's os.scandir() not found!")
sys.exit(1)
scandir.scandir = os.scandir
elif hasattr(os, 'scandir'):
scandir.scandir = os.scandir
if scandir.scandir == getattr(os, 'scandir', None):
print("Using Python 3.5's builtin os.scandir()")
os_walk_func = os.walk
elif scandir.scandir == getattr(scandir, 'scandir_c', None):
print('Using fast C version of scandir')
os_walk_func = os.walk
elif scandir.scandir == getattr(scandir, 'scandir_python', None):
print('Using slower ctypes version of scandir')
os_walk_func = os_walk_python
elif scandir.scandir == scandir.scandir_generic:
print('Using very slow generic version of scandir')
os_walk_func = os_walk_python
else:
print('ERROR: Unsure which version of scandir we are using!')
sys.exit(1)
if options.os_walk == 'builtin':
os_walk_func = os.walk
elif options.os_walk == 'python':
os_walk_func = os_walk_python
if os_walk_func == os.walk:
print('Comparing against builtin version of os.walk()')
else:
print('Comparing against ctypes emulation of os.walk()')
benchmark(tree_dir, get_size=options.size)