BENCH: speed up benchmark suite import time; bump bench_ufunc timeout…

… upward The input data generation in benchmarks/common.py takes ~ 1s, and it is not used by most benchmarks. Generate it lazily instead, making sure the generation is done in the setup() routines.
tacaswell · Dec 14, 2015 · ff92db2 · ff92db2
1 parent 33d7724
commit ff92db2
Show file tree

Hide file tree

Showing 6 changed files with 107 additions and 51 deletions.
diff --git a/benchmarks/benchmarks/bench_indexing.py b/benchmarks/benchmarks/bench_indexing.py
@@ -1,6 +1,6 @@
 from __future__ import absolute_import, division, print_function
 
-from .common import Benchmark, squares_, indexes_, indexes_rand_
+from .common import Benchmark, get_squares_, get_indexes_, get_indexes_rand_
 
 import sys
 import six
@@ -17,10 +17,10 @@ class Indexing(Benchmark):
     def setup(self, indexes, sel, op):
         sel = sel.replace('I', indexes)
 
-        ns = {'squares_': squares_,
+        ns = {'squares_': get_squares_(),
               'np': np,
-              'indexes_': indexes_,
-              'indexes_rand_': indexes_rand_}
+              'indexes_': get_indexes_(),
+              'indexes_rand_': get_indexes_rand_()}
 
         if sys.version_info[0] >= 3:
             code = "def run():\n    for a in squares_.values(): a[%s]%s"

diff --git a/benchmarks/benchmarks/bench_io.py b/benchmarks/benchmarks/bench_io.py
@@ -1,6 +1,6 @@
 from __future__ import absolute_import, division, print_function
 
-from .common import Benchmark, squares
+from .common import Benchmark, get_squares
 
 import numpy as np
 
@@ -57,5 +57,8 @@ def time_copyto_8_dense(self):
 
 
 class Savez(Benchmark):
+    def setup(self):
+        self.squares = get_squares()
+
     def time_vb_savez_squares(self):
-        np.savez('tmp.npz', squares)
+        np.savez('tmp.npz', self.squares)
diff --git a/benchmarks/benchmarks/bench_linalg.py b/benchmarks/benchmarks/bench_linalg.py
@@ -1,6 +1,6 @@
 from __future__ import absolute_import, division, print_function
 
-from .common import Benchmark, squares_, indexes_rand
+from .common import Benchmark, get_squares_, get_indexes_rand, TYPES1
 
 import numpy as np
 
@@ -36,7 +36,7 @@ def time_tensordot_a_b_axes_1_0_0_1(self):
 
 class Linalg(Benchmark):
     params = [['svd', 'pinv', 'det', 'norm'],
-              list(squares_.keys())]
+              TYPES1]
     param_names = ['op', 'type']
 
     def setup(self, op, typename):
@@ -46,10 +46,10 @@ def setup(self, op, typename):
 
         if op == 'cholesky':
             # we need a positive definite
-            self.a = np.dot(squares_[typename],
-                            squares_[typename].T)
+            self.a = np.dot(get_squares_()[typename],
+                            get_squares_()[typename].T)
         else:
-            self.a = squares_[typename]
+            self.a = get_squares_()[typename]
 
         # check that dtype is supported at all
         try:
@@ -63,8 +63,8 @@ def time_op(self, op, typename):
 
 class Lstsq(Benchmark):
     def setup(self):
-        self.a = squares_['float64']
-        self.b = indexes_rand[:100].astype(np.float64)
+        self.a = get_squares_()['float64']
+        self.b = get_indexes_rand()[:100].astype(np.float64)
 
     def time_numpy_linalg_lstsq_a__b_float64(self):
         np.linalg.lstsq(self.a, self.b)
diff --git a/benchmarks/benchmarks/bench_reduce.py b/benchmarks/benchmarks/bench_reduce.py
@@ -1,24 +1,27 @@
 from __future__ import absolute_import, division, print_function
 
-from .common import Benchmark, TYPES1, squares
+from .common import Benchmark, TYPES1, get_squares
 
 import numpy as np
 
 
 class AddReduce(Benchmark):
+    def setup(self):
+        self.squares = get_squares().values()
+
     def time_axis_0(self):
-        [np.add.reduce(a, axis=0) for a in squares.values()]
+        [np.add.reduce(a, axis=0) for a in self.squares]
 
     def time_axis_1(self):
-        [np.add.reduce(a, axis=1) for a in squares.values()]
+        [np.add.reduce(a, axis=1) for a in self.squares]
 
 
 class AddReduceSeparate(Benchmark):
     params = [[0, 1], TYPES1]
     param_names = ['axis', 'type']
 
     def setup(self, axis, typename):
-        self.a = squares[typename]
+        self.a = get_squares()[typename]
 
     def time_reduce(self, axis, typename):
         np.add.reduce(self.a, axis=axis)

diff --git a/benchmarks/benchmarks/bench_ufunc.py b/benchmarks/benchmarks/bench_ufunc.py
@@ -1,6 +1,6 @@
 from __future__ import absolute_import, division, print_function
 
-from .common import Benchmark, squares_
+from .common import Benchmark, get_squares_
 
 import numpy as np
 
@@ -39,7 +39,7 @@ def time_broadcast(self):
 class UFunc(Benchmark):
     params = [ufuncs]
     param_names = ['ufunc']
-    timeout = 2
+    timeout = 10
 
     def setup(self, ufuncname):
         np.seterr(all='ignore')
@@ -48,7 +48,7 @@ def setup(self, ufuncname):
         except AttributeError:
             raise NotImplementedError()
         self.args = []
-        for t, a in squares_.items():
+        for t, a in get_squares_().items():
             arg = (a,) * self.f.nin
             try:
                 self.f(*arg)

diff --git a/benchmarks/benchmarks/common.py b/benchmarks/benchmarks/common.py
@@ -25,40 +25,90 @@
     'complex256',
 ]
 
+
+def memoize(func):
+    result = []
+    def wrapper():
+        if not result:
+            result.append(func())
+        return result[0]
+    return wrapper
+
+
 # values which will be used to construct our sample data matrices
 # replicate 10 times to speed up initial imports of this helper
 # and generate some redundancy
-values = [random.uniform(0, 100) for x in range(nx*ny//10)]*10
-
-squares = {t: numpy.array(values,
-                          dtype=getattr(numpy, t)).reshape((nx, ny))
-           for t in TYPES1}
-
-# adjust complex ones to have non-degenerated imagery part -- use
-# original data transposed for that
-for t, v in squares.items():
-    if t.startswith('complex'):
-        v += v.T*1j
-
-# smaller squares
-squares_ = {t: s[:nxs, :nys] for t, s in squares.items()}
-# vectors
-vectors = {t: s[0] for t, s in squares.items()}
-
-indexes = list(range(nx))
-# so we do not have all items
-indexes.pop(5)
-indexes.pop(95)
-
-indexes_rand = indexes[:]       # copy
-random.shuffle(indexes_rand)         # in-place shuffle
-
-# only now make them arrays
-indexes = numpy.array(indexes)
-indexes_rand = numpy.array(indexes_rand)
-# smaller versions
-indexes_ = indexes[indexes < nxs]
-indexes_rand_ = indexes_rand[indexes_rand < nxs]
+
+@memoize
+def get_values():
+    rnd = numpy.random.RandomState(1)
+    values = numpy.tile(rnd.uniform(0, 100, size=nx*ny//10), 10)
+    return values
+
+
+@memoize
+def get_squares():
+    values = get_values()
+    squares = {t: numpy.array(values,
+                              dtype=getattr(numpy, t)).reshape((nx, ny))
+               for t in TYPES1}
+
+    # adjust complex ones to have non-degenerated imagery part -- use
+    # original data transposed for that
+    for t, v in squares.items():
+        if t.startswith('complex'):
+            v += v.T*1j
+    return squares
+
+
+@memoize
+def get_squares_():
+    # smaller squares
+    squares_ = {t: s[:nxs, :nys] for t, s in get_squares().items()}
+    return squares_
+
+
+@memoize
+def get_vectors():
+    # vectors
+    vectors = {t: s[0] for t, s in get_squares().items()}
+    return vectors
+
+
+@memoize
+def get_indexes():
+    indexes = list(range(nx))
+    # so we do not have all items
+    indexes.pop(5)
+    indexes.pop(95)
+
+    indexes = numpy.array(indexes)
+    return indexes
+
+
+@memoize
+def get_indexes_rand():
+    rnd = random.Random(1)
+
+    indexes_rand = get_indexes().tolist()       # copy
+    rnd.shuffle(indexes_rand)         # in-place shuffle
+    indexes_rand = numpy.array(indexes_rand)
+    return indexes_rand
+
+
+@memoize
+def get_indexes_():
+    # smaller versions
+    indexes = get_indexes()
+    indexes_ = indexes[indexes < nxs]
+    return indexes_
+
+
+@memoize
+def get_indexes_rand_():
+    indexes_rand = get_indexes_rand()
+    indexes_rand_ = indexes_rand[indexes_rand < nxs]
+    return indexes_rand_
 
 
 class Benchmark(object):