Skip to content

Commit

Permalink
Adding dropout
Browse files Browse the repository at this point in the history
  • Loading branch information
mnielsen committed Feb 2, 2015
1 parent 2ae7c75 commit 454cb2c
Show file tree
Hide file tree
Showing 2 changed files with 284 additions and 58 deletions.
204 changes: 192 additions & 12 deletions src/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,47 @@
"""

from collections import Counter

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np
import theano
import theano.tensor as T

import network3
from network3 import sigmoid, tanh, ReLU, Network
from network3 import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer

training_data, validation_data, test_data = network3.load_data_shared()
mini_batch_size = 10

def shallow():
for j in range(3):
def shallow(n=3, epochs=60):
nets = []
for j in range(n):
print "A shallow net with 100 hidden neurons"
net = Network([
FullyConnectedLayer(n_in=784, n_out=100),
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data)
net.SGD(
training_data, epochs, mini_batch_size, 0.1,
validation_data, test_data)
nets.append(net)
return nets

def basic_conv():
for j in range(3):
def basic_conv(n=3, epochs=60):
for j in range(n):
print "Conv + FC architecture"
net = Network([
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
filter_shape=(20, 1, 5, 5),
poolsize=(2, 2)),
FullyConnectedLayer(n_in=20*12*12, n_out=100),
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data)
net.SGD(
training_data, epochs, mini_batch_size, 0.1, validation_data, test_data)
return net

def omit_FC():
for j in range(3):
Expand All @@ -43,6 +60,7 @@ def omit_FC():
poolsize=(2, 2)),
SoftmaxLayer(n_in=20*12*12, n_out=10)], mini_batch_size)
net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data)
return net

def dbl_conv(activation_fn=sigmoid):
for j in range(3):
Expand All @@ -59,8 +77,14 @@ def dbl_conv(activation_fn=sigmoid):
FullyConnectedLayer(
n_in=40*4*4, n_out=100, activation_fn=activation_fn),
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data)
net.SGD(training_data, 60, mini_batch_size, 0.1, validation_data, test_data)
return net

# The following experiment was eventually omitted from the chapter,
# but I've left it in here, since it's an important negative result:
# basic l2 regularization didn't help much. The reason (I believe) is
# that using convolutional-pooling layers is already a pretty strong
# regularizer.
def regularized_dbl_conv():
for lmbda in [0.00001, 0.0001, 0.001, 0.01, 0.1, 1.0, 10.0, 100.0]:
for j in range(3):
Expand Down Expand Up @@ -96,11 +120,15 @@ def dbl_conv_relu():
#### Some subsequent functions may make use of the expanded MNIST
#### data. That can be generated by running expand_mnist.py.

def expanded_data():
def expanded_data(n=100):
"""n is the number of neurons in the fully-connected layer. We'll try
n=100, 300, and 1000.
"""
expanded_training_data, _, _ = network3.load_data_shared(
"../data/mnist_expanded.pkl.gz")
for j in range(3):
print "Training with expanded data, run num %s" % j
print "Training with expanded data, %s neurons in the FC layer, run num %s" % (n, j)
net = Network([
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
filter_shape=(20, 1, 5, 5),
Expand All @@ -110,8 +138,160 @@ def expanded_data():
filter_shape=(40, 20, 5, 5),
poolsize=(2, 2),
activation_fn=ReLU),
FullyConnectedLayer(n_in=40*4*4, n_out=100, activation_fn=ReLU),
SoftmaxLayer(n_in=100, n_out=10)], mini_batch_size)
net.SGD(expanded_training_data, 20, mini_batch_size, 0.03,
FullyConnectedLayer(n_in=40*4*4, n_out=n, activation_fn=ReLU),
SoftmaxLayer(n_in=n, n_out=10)], mini_batch_size)
net.SGD(expanded_training_data, 60, mini_batch_size, 0.03,
validation_data, test_data, lmbda=0.1)
return net

def expanded_data_double_fc(n=100):
"""n is the number of neurons in both fully-connected layers. We'll
try n=100, 300, and 1000.
"""
expanded_training_data, _, _ = network3.load_data_shared(
"../data/mnist_expanded.pkl.gz")
for j in range(3):
print "Training with expanded data, %s neurons in two FC layers, run num %s" % (n, j)
net = Network([
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
filter_shape=(20, 1, 5, 5),
poolsize=(2, 2),
activation_fn=ReLU),
ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
filter_shape=(40, 20, 5, 5),
poolsize=(2, 2),
activation_fn=ReLU),
FullyConnectedLayer(n_in=40*4*4, n_out=n, activation_fn=ReLU),
FullyConnectedLayer(n_in=n, n_out=n, activation_fn=ReLU),
SoftmaxLayer(n_in=n, n_out=10)], mini_batch_size)
net.SGD(expanded_training_data, 60, mini_batch_size, 0.03,
validation_data, test_data, lmbda=0.1)

def double_fc_dropout(p0, p1, p2, repetitions):
expanded_training_data, _, _ = network3.load_data_shared(
"../data/mnist_expanded.pkl.gz")
nets = []
for j in range(repetitions):
print "\n\nTraining using a dropout network with parameters ",p0,p1,p2
print "Training with expanded data, run num %s" % j
net = Network([
ConvPoolLayer(image_shape=(mini_batch_size, 1, 28, 28),
filter_shape=(20, 1, 5, 5),
poolsize=(2, 2),
activation_fn=ReLU),
ConvPoolLayer(image_shape=(mini_batch_size, 20, 12, 12),
filter_shape=(40, 20, 5, 5),
poolsize=(2, 2),
activation_fn=ReLU),
FullyConnectedLayer(
n_in=40*4*4, n_out=1000, activation_fn=ReLU, p_dropout=p0),
FullyConnectedLayer(
n_in=1000, n_out=1000, activation_fn=ReLU, p_dropout=p1),
SoftmaxLayer(n_in=1000, n_out=10, p_dropout=p2)], mini_batch_size)
net.SGD(expanded_training_data, 40, mini_batch_size, 0.03,
validation_data, test_data)
nets.append(net)
return nets

def ensemble(nets):
"""Takes as input a list of nets, and then computes the accuracy on
the test data when classifications are computed by taking a vote
amongst the nets. Returns a tuple containing a list of indices
for test data which is erroneously classified, and a list of the
corresponding erroneous predictions.
Note that this is a quick-and-dirty kluge: it'd be more reusable
(and faster) to define a Theano function taking the vote. But
this works.
"""

test_x, test_y = test_data
for net in nets:
i = T.lscalar() # mini-batch index
net.test_mb_predictions = theano.function(
[i], net.layers[-1].y_out,
givens={
net.x:
test_x[i*net.mini_batch_size: (i+1)*net.mini_batch_size]
})
net.test_predictions = list(np.concatenate(
[net.test_mb_predictions(i) for i in xrange(1000)]))
all_test_predictions = zip(*[net.test_predictions for net in nets])
def plurality(p): return Counter(p).most_common(1)[0][0]
plurality_test_predictions = [plurality(p)
for p in all_test_predictions]
test_y_eval = test_y.eval()
error_locations = [j for j in xrange(10000)
if plurality_test_predictions[j] != test_y_eval[j]]
erroneous_predictions = [plurality(all_test_predictions[j])
for j in error_locations]
print "Accuracy is {:.2%}".format((1-len(error_locations)/10000.0))
return error_locations, erroneous_predictions

def plot_errors(error_locations, erroneous_predictions=None):
test_x, test_y = test_data[0].eval(), test_data[1].eval()
fig = plt.figure()
error_images = [np.array(test_x[i]).reshape(28, -1) for i in error_locations]
n = min(40, len(error_locations))
for j in range(n):
ax = plt.subplot2grid((5, 8), (j/8, j % 8))
ax.matshow(error_images[j], cmap = matplotlib.cm.binary)
ax.text(24, 5, test_y[error_locations[j]])
if erroneous_predictions:
ax.text(24, 24, erroneous_predictions[j])
plt.xticks(np.array([]))
plt.yticks(np.array([]))
plt.tight_layout()
return plt

def plot_filters(net, layer, x, y):

"""Plot the filters for net after the (convolutional) layer number
layer. They are plotted in x by y format. So, for example, if we
have 20 filters after layer 0, then we can call show_filters(net, 0, 5, 4) to
get a 5 by 4 plot of all filters."""
filters = net.layers[layer].w.eval()
fig = plt.figure()
for j in range(len(filters)):
ax = fig.add_subplot(y, x, j)
ax.matshow(filters[j][0], cmap = matplotlib.cm.binary)
plt.xticks(np.array([]))
plt.yticks(np.array([]))
plt.tight_layout()
return plt


#### Helper method to run all experiments in the book

def run_experiments():

"""Run the experiments described in the book. Note that the later
experiments require access to the expanded training data, which
can be generated by running expand_mnist.py.
"""
shallow()
basic_conv()
omit_FC()
dbl_conv(activation_fn=sigmoid)
# omitted, but still interesting: regularized_dbl_conv()
dbl_conv_relu()
expanded_data(n=100)
expanded_data(n=300)
expanded_data(n=1000)
expanded_data_double_fc(n=100)
expanded_data_double_fc(n=300)
expanded_data_double_fc(n=1000)
nets = double_fc_dropout(0.5, 0.5, 0.5, 5)
# plot the erroneous digits in the ensemble of nets just trained
error_locations, erroneous_predictions = ensemble(nets)
plt = plot_errors(error_locations, erroneous_predictions)
plt.savefig("ensemble_errors.png")
# plot the filters learned by the first of the nets just trained
plt = plot_filters(nets[0], 0, 5, 4)
plt.savefig("net_full_layer_0.png")
plt = plot_filters(nets[0], 1, 8, 5)
plt.savefig("net_full_layer_1.png")

Loading

0 comments on commit 454cb2c

Please sign in to comment.