Skip to content

Commit

Permalink
ENH lots of benchmarks fixes
Browse files Browse the repository at this point in the history
* add a title to all benchmark figures/windows
* "bench" is non-standard slang for benchmarking
* remove calls to pyplot.clf because the previous call
  to pyplot.figure sets the current figure
* standardize on "Time (s)" as time axis label
* move some benchmark plot legends to the upper left corner of the plot:
  most benchmarks expand from (0,0) up and to the right
* simplified plot titles and moved constant info from legend into title
* fixed typos
* made some labels more viewer-friendly

Fixes scikit-learn#2023.
  • Loading branch information
kgeis authored and larsmans committed Jun 30, 2013
1 parent 7419d2c commit ca2142e
Show file tree
Hide file tree
Showing 15 changed files with 83 additions and 79 deletions.
7 changes: 4 additions & 3 deletions benchmarks/bench_glm.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,13 @@
lasso.fit(X, Y)
time_lasso[i] = total_seconds(datetime.now() - start)

pl.xlabel('Dimesions')
pl.ylabel('Time (in seconds)')
pl.figure('scikit-learn GLM benchmark results')
pl.xlabel('Dimensions')
pl.ylabel('Time (s)')
pl.plot(dimensions, time_ridge, color='r')
pl.plot(dimensions, time_ols, color='g')
pl.plot(dimensions, time_lasso, color='b')

pl.legend(['Ridge', 'OLS', 'LassoLars'])
pl.legend(['Ridge', 'OLS', 'LassoLars'], loc='upper left')
pl.axis('tight')
pl.show()
16 changes: 8 additions & 8 deletions benchmarks/bench_glmnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,9 @@ def bench(factory, X, Y, X_test, Y_test, ref_coef):
X = X[:(i * step)]
Y = Y[:(i * step)]

print("benching scikit-learn: ")
print("benchmarking scikit-learn: ")
scikit_results.append(bench(ScikitLasso, X, Y, X_test, Y_test, coef_))
print("benching glmnet: ")
print("benchmarking glmnet: ")
glmnet_results.append(bench(GlmnetLasso, X, Y, X_test, Y_test, coef_))

pl.clf()
Expand All @@ -83,10 +83,10 @@ def bench(factory, X, Y, X_test, Y_test, ref_coef):
pl.plot(xx, glmnet_results, 'r-', label='glmnet')
pl.legend()
pl.xlabel('number of samples to classify')
pl.ylabel('time (in seconds)')
pl.ylabel('Time (s)')
pl.show()

# now do a bench where the number of points is fixed
# now do a benchmark where the number of points is fixed
# and the variable is the number of features

scikit_results = []
Expand All @@ -111,18 +111,18 @@ def bench(factory, X, Y, X_test, Y_test, ref_coef):
X = X[:n_samples]
Y = Y[:n_samples]

print("benching scikit-learn: ")
print("benchmarking scikit-learn: ")
scikit_results.append(bench(ScikitLasso, X, Y, X_test, Y_test, coef_))
print("benching glmnet: ")
print("benchmarking glmnet: ")
glmnet_results.append(bench(GlmnetLasso, X, Y, X_test, Y_test, coef_))

xx = np.arange(100, 100 + n * step, step)
pl.figure()
pl.figure('scikit-learn vs. glmnet benchmark results')
pl.title('Regression in high dimensional spaces (%d samples)' % n_samples)
pl.plot(xx, scikit_results, 'b-', label='scikit-learn')
pl.plot(xx, glmnet_results, 'r-', label='glmnet')
pl.legend()
pl.xlabel('number of features')
pl.ylabel('time (in seconds)')
pl.ylabel('Time (s)')
pl.axis('tight')
pl.show()
18 changes: 9 additions & 9 deletions benchmarks/bench_lasso.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,15 @@ def compute_bench(alpha, n_samples, n_features, precompute):
X /= np.sqrt(np.sum(X ** 2, axis=0)) # Normalize data

gc.collect()
print("- benching Lasso")
print("- benchmarking Lasso")
clf = Lasso(alpha=alpha, fit_intercept=False,
precompute=precompute)
tstart = time()
clf.fit(X, Y)
lasso_results.append(time() - tstart)

gc.collect()
print("- benching LassoLars")
print("- benchmarking LassoLars")
clf = LassoLars(alpha=alpha, fit_intercept=False,
normalize=False, precompute=precompute)
tstart = time()
Expand All @@ -68,16 +68,16 @@ def compute_bench(alpha, n_samples, n_features, precompute):
lasso_results, lars_lasso_results = compute_bench(alpha, list_n_samples,
[n_features], precompute=True)

pl.clf()
pl.figure('scikit-learn LASSO benchmark results')
pl.subplot(211)
pl.plot(list_n_samples, lasso_results, 'b-',
label='Lasso (with precomputed Gram matrix)')
label='Lasso')
pl.plot(list_n_samples, lars_lasso_results, 'r-',
label='LassoLars (with precomputed Gram matrix)')
pl.title('Lasso benchmark (%d features - alpha=%s)' % (n_features, alpha))
label='LassoLars')
pl.title('precomputed Gram matrix, %d features, alpha=%s' % (n_features, alpha))
pl.legend(loc='upper left')
pl.xlabel('number of samples')
pl.ylabel('time (in seconds)')
pl.ylabel('Time (s)')
pl.axis('tight')

n_samples = 2000
Expand All @@ -87,9 +87,9 @@ def compute_bench(alpha, n_samples, n_features, precompute):
pl.subplot(212)
pl.plot(list_n_features, lasso_results, 'b-', label='Lasso')
pl.plot(list_n_features, lars_lasso_results, 'r-', label='LassoLars')
pl.title('Lasso benchmark (%d samples - alpha=%s)' % (n_samples, alpha))
pl.title('%d samples, alpha=%s' % (n_samples, alpha))
pl.legend(loc='upper left')
pl.xlabel('number of features')
pl.ylabel('time (in seconds)')
pl.ylabel('Time (s)')
pl.axis('tight')
pl.show()
12 changes: 6 additions & 6 deletions benchmarks/bench_plot_fastkmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,8 @@ def compute_bench(samples_range, features_range):
print()
print()

results['minibatchkmeans_speed'].append(delta)
results['minibatchkmeans_quality'].append(mbkmeans.inertia_)
results['MiniBatchKMeans Speed'].append(delta)
results['MiniBatchKMeans Quality'].append(mbkmeans.inertia_)

return results

Expand Down Expand Up @@ -85,8 +85,8 @@ def compute_bench_2(chunks):
print("Inertia: %0.3fs" % mbkmeans.inertia_)
print()

results['minibatchkmeans_speed'].append(delta)
results['minibatchkmeans_quality'].append(mbkmeans.inertia_)
results['MiniBatchKMeans Speed'].append(delta)
results['MiniBatchKMeans Quality'].append(mbkmeans.inertia_)

return results

Expand All @@ -108,7 +108,7 @@ def compute_bench_2(chunks):
t for (label, t) in results.iteritems()
if "speed" not in label]])

fig = plt.figure()
fig = plt.figure('scikit-learn K-Means benchmark results')
for c, (label, timings) in zip('brcy',
sorted(results.iteritems())):
if 'speed' in label:
Expand All @@ -132,7 +132,7 @@ def compute_bench_2(chunks):
ax = fig.add_subplot(2, 2, i + 2)
y = np.asarray(timings)
ax.plot(chunks, y, color=c, alpha=0.8)
ax.set_xlabel('chunks')
ax.set_xlabel('Chunks')
ax.set_ylabel(label)

plt.show()
12 changes: 6 additions & 6 deletions benchmarks/bench_plot_lasso_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def compute_bench(samples_range, features_range):
X, y = make_regression(**dataset_kwargs)

gc.collect()
print("benching lars_path (with Gram):", end='')
print("benchmarking lars_path (with Gram):", end='')
sys.stdout.flush()
tstart = time()
G = np.dot(X.T, X) # precomputed Gram matrix
Expand All @@ -53,7 +53,7 @@ def compute_bench(samples_range, features_range):
results['lars_path (with Gram)'].append(delta)

gc.collect()
print("benching lars_path (without Gram):", end='')
print("benchmarking lars_path (without Gram):", end='')
sys.stdout.flush()
tstart = time()
lars_path(X, y, method='lasso')
Expand All @@ -62,7 +62,7 @@ def compute_bench(samples_range, features_range):
results['lars_path (without Gram)'].append(delta)

gc.collect()
print("benching lasso_path (with Gram):", end='')
print("benchmarking lasso_path (with Gram):", end='')
sys.stdout.flush()
tstart = time()
lasso_path(X, y, precompute=True)
Expand All @@ -71,7 +71,7 @@ def compute_bench(samples_range, features_range):
results['lasso_path (with Gram)'].append(delta)

gc.collect()
print("benching lasso_path (without Gram):", end='')
print("benchmarking lasso_path (without Gram):", end='')
sys.stdout.flush()
tstart = time()
lasso_path(X, y, precompute=False)
Expand All @@ -92,7 +92,7 @@ def compute_bench(samples_range, features_range):

max_time = max(max(t) for t in results.values())

fig = plt.figure()
fig = plt.figure('scikit-learn Lasso path benchmark results')
i = 1
for c, (label, timings) in zip('bcry', sorted(results.items())):
ax = fig.add_subplot(2, 2, i, projection='3d')
Expand All @@ -109,7 +109,7 @@ def compute_bench(samples_range, features_range):

ax.set_xlabel('n_samples')
ax.set_ylabel('n_features')
ax.set_zlabel('time (s)')
ax.set_zlabel('Time (s)')
ax.set_zlim3d(0.0, max_time * 1.1)
ax.set_title(label)
#ax.legend()
Expand Down
19 changes: 10 additions & 9 deletions benchmarks/bench_plot_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ def barplot_neighbors(Nrange=2 ** np.arange(1, 11),
k_results_build[algorithm][i] = (t1 - t0)
k_results_query[algorithm][i] = (t2 - t1)

pl.figure(figsize=(8, 11))
pl.figure('scikit-learn nearest neighbors benchmark results',
figsize=(8, 11))

for (sbplt, vals, quantity,
build_time, query_time) in [(311, Nrange, 'N',
Expand All @@ -131,10 +132,10 @@ def barplot_neighbors(Nrange=2 ** np.arange(1, 11),
xvals = 0.1 + i * (1 + len(vals)) + np.arange(len(vals))
width = 0.8

pl.bar(xvals, build_time[alg] - bottom,
width, bottom, color='r')
pl.bar(xvals, query_time[alg],
width, build_time[alg], color='b')
c_bar = pl.bar(xvals, build_time[alg] - bottom,
width, bottom, color='r')
q_bar = pl.bar(xvals, query_time[alg],
width, build_time[alg], color='b')

tick_vals += list(xvals + 0.5 * width)
tick_labels += ['%i' % val for val in vals]
Expand All @@ -145,7 +146,7 @@ def barplot_neighbors(Nrange=2 ** np.arange(1, 11),
va='top',
bbox=dict(facecolor='w', edgecolor='w', alpha=0.5))

pl.ylabel('time (seconds)')
pl.ylabel('Time (s)')

ax.xaxis.set_major_locator(ticker.FixedLocator(tick_vals))
ax.xaxis.set_major_formatter(ticker.FixedFormatter(tick_labels))
Expand Down Expand Up @@ -174,10 +175,10 @@ def barplot_neighbors(Nrange=2 ** np.arange(1, 11),
transform=ax.transAxes, rotation=-90,
ha='right', va='center')

pl.gcf().suptitle("%s data\nred = construction; blue = N-point query"
% (dataset[0].upper() + dataset[1:]),
fontsize=16)
pl.gcf().suptitle("%s data set" % dataset.capitalize(), fontsize=16)

pl.figlegend((c_bar, q_bar), ('construction', 'N-point query'),
'upper right')

if __name__ == '__main__':
barplot_neighbors(dataset='digits')
Expand Down
14 changes: 7 additions & 7 deletions benchmarks/bench_plot_nmf.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
effective_rank=rank, tail_strength=0.2))

gc.collect()
print("benching nndsvd-nmf: ")
print("benchmarking nndsvd-nmf: ")
tstart = time()
m = NMF(n_components=30, tol=tolerance, init='nndsvd').fit(X)
tend = time() - tstart
Expand All @@ -94,7 +94,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
print(m.reconstruction_err_, tend)

gc.collect()
print("benching nndsvda-nmf: ")
print("benchmarking nndsvda-nmf: ")
tstart = time()
m = NMF(n_components=30, init='nndsvda',
tol=tolerance).fit(X)
Expand All @@ -104,7 +104,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
print(m.reconstruction_err_, tend)

gc.collect()
print("benching nndsvdar-nmf: ")
print("benchmarking nndsvdar-nmf: ")
tstart = time()
m = NMF(n_components=30, init='nndsvdar',
tol=tolerance).fit(X)
Expand All @@ -114,7 +114,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
print(m.reconstruction_err_, tend)

gc.collect()
print("benching random-nmf")
print("benchmarking random-nmf")
tstart = time()
m = NMF(n_components=30, init=None, max_iter=1000,
tol=tolerance).fit(X)
Expand All @@ -124,7 +124,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
print(m.reconstruction_err_, tend)

gc.collect()
print("benching alt-random-nmf")
print("benchmarking alt-random-nmf")
tstart = time()
W, H = alt_nnmf(X, r=30, R=None, tol=tolerance)
tend = time() - tstart
Expand All @@ -145,7 +145,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):
timeset, err = compute_bench(samples_range, features_range)

for i, results in enumerate((timeset, err)):
fig = plt.figure()
fig = plt.figure('scikit-learn Non-Negative Matrix Factorization benchmkar results')
ax = fig.gca(projection='3d')
for c, (label, timings) in zip('rbgcm', sorted(results.iteritems())):
X, Y = np.meshgrid(samples_range, features_range)
Expand All @@ -160,7 +160,7 @@ def compute_bench(samples_range, features_range, rank=50, tolerance=1e-7):

ax.set_xlabel('n_samples')
ax.set_ylabel('n_features')
zlabel = 'time (s)' if i == 0 else 'reconstruction error'
zlabel = 'Time (s)' if i == 0 else 'reconstruction error'
ax.set_zlabel(zlabel)
ax.legend()
plt.show()
10 changes: 5 additions & 5 deletions benchmarks/bench_plot_omp_lars.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def compute_bench(samples_range, features_range):
X = np.asfortranarray(X)

gc.collect()
print("benching lars_path (with Gram):", end='')
print("benchmarking lars_path (with Gram):", end='')
sys.stdout.flush()
tstart = time()
G = np.dot(X.T, X) # precomputed Gram matrix
Expand All @@ -66,7 +66,7 @@ def compute_bench(samples_range, features_range):
lars_gram[i_f, i_s] = delta

gc.collect()
print("benching lars_path (without Gram):", end='')
print("benchmarking lars_path (without Gram):", end='')
sys.stdout.flush()
tstart = time()
lars_path(X, y, Gram=None, max_iter=n_informative)
Expand All @@ -75,7 +75,7 @@ def compute_bench(samples_range, features_range):
lars[i_f, i_s] = delta

gc.collect()
print("benching orthogonal_mp (with Gram):", end='')
print("benchmarking orthogonal_mp (with Gram):", end='')
sys.stdout.flush()
tstart = time()
orthogonal_mp(X, y, precompute_gram=True,
Expand All @@ -85,7 +85,7 @@ def compute_bench(samples_range, features_range):
omp_gram[i_f, i_s] = delta

gc.collect()
print("benching orthogonal_mp (without Gram):", end='')
print("benchmarking orthogonal_mp (without Gram):", end='')
sys.stdout.flush()
tstart = time()
orthogonal_mp(X, y, precompute_gram=False,
Expand All @@ -106,7 +106,7 @@ def compute_bench(samples_range, features_range):
max_time = max(np.max(t) for t in results.values())

import pylab as pl
fig = pl.figure()
fig = pl.figure('scikit-learn OMP vs. LARS benchmark results')
for i, (label, timings) in enumerate(sorted(results.iteritems())):
ax = fig.add_subplot(1, 2, i)
vmax = max(1 - timings.min(), -1 + timings.max())
Expand Down
4 changes: 2 additions & 2 deletions benchmarks/bench_plot_parallel_pairwise.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ def plot(func):
func(X, n_jobs=-1)
multi_core.append(time.time() - start)

pl.figure()
pl.figure('scikit-learn parallel %s benchmark results' % func.__name__)
pl.plot(sample_sizes, one_core, label="one core")
pl.plot(sample_sizes, multi_core, label="multi core")
pl.xlabel('n_samples')
pl.ylabel('time')
pl.ylabel('Time (s)')
pl.title('Parallel %s' % func.__name__)
pl.legend()

Expand Down
Loading

0 comments on commit ca2142e

Please sign in to comment.