Skip to content

Commit

Permalink
PyConvNet with mnist and cifar demo
Browse files Browse the repository at this point in the history
  • Loading branch information
Eniac-Xie committed Feb 26, 2016
1 parent ed2b4e1 commit 72b126a
Show file tree
Hide file tree
Showing 17 changed files with 844 additions and 0 deletions.
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# PyConvNet: CNN for Python
**PyConvNet** is a python toolbox implementing convolutional neural network.

To train LeNet on MNIST dataset, just do as follow(you may need some python package such as numpy matplotlib):

1. cd code
2. python mnist_demo.py

To train cifar-net on cifar dataset, do as follow:

1. cd code
2. python cifar_demo.py
32 changes: 32 additions & 0 deletions code/cifar-demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# coding = utf-8

import os
import scipy.io as sio
import numpy as np

from load_cifar import load_cifar
from model.initial_cifarNet import initial_cifar

train_data = []
valid_data = []
train_labels = []
valid_labels = []
if os.path.isfile('../data/cifar/cifar.mat'):
print 'read mat file: %s' % ('../data/cifar/cifar.mat')
data = sio.loadmat('../data/cifar/cifar.mat')
train_data = data['train_data']
valid_data = data['valid_data']
train_labels = data['train_labels']
valid_labels = data['valid_labels']
else:
train_data, valid_data, train_labels, valid_labels = load_cifar()

cnn = initial_cifar()
lr = np.ones(20) * 0
.01
cnn.train(train_data, train_labels, lr, epoch=20, batch_size=32)

res = cnn.predict(valid_data)
res = res.reshape(valid_labels.shape)

print 'Accuracy is: %f' % (np.sum(res == valid_labels) / float(np.max(valid_labels.shape)))
1 change: 1 addition & 0 deletions code/cnn/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# empty file
166 changes: 166 additions & 0 deletions code/cnn/conv_net.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
# coding = utf-8

import numpy as np
from layers import *
import time

class ConvNet:
def __init__(self):
self.layers = []

def add_layer(self, layer_type, layer_params):
if layer_type == 'conv':
HF = layer_params['HF']
WF = layer_params['WF']
DF = layer_params['DF']
NF = layer_params['NF']
l_weights = np.random.normal(0, layer_params['var'], (HF, WF, DF, NF))
l_bias = np.zeros((1, 1, 1, NF))
layer_params = {'type': 'conv',
'weights': l_weights,
'bias': l_bias,
'stride': layer_params['stride'],
'pad': layer_params['pad'],
'input': None,
'output': None,
'grad': None}
self.layers.append(layer_params)
elif layer_type == 'max_pooling':
layer_params = {'type': 'max_pooling',
'stride': layer_params['stride'],
'HF': layer_params['HF'],
'WF': layer_params['WF'],
'pad': layer_params['pad'],
'input': None,
'output': None,
'grad': None}
self.layers.append(layer_params)
elif layer_type == 'relu':
layer_params = {'type': 'relu',
'input': None,
'output': None,
'grad': None}
self.layers.append(layer_params)
elif layer_type == 'softmax-loss':
layer_params = {'type': 'softmax-loss',
'input': None,
'output': None,
'grad': None}
self.layers.append(layer_params)
else:
print 'unkonw layer type!\n'
exit(1)

def forward(self, data, label=[]):
for idx, each_layer in enumerate(self.layers):
if idx == 0:
each_layer['input'] = data
else:
each_layer['input'] = self.layers[idx - 1]['output']
if each_layer['type'] == 'conv':
params = {'stride': each_layer['stride'],
'pad': each_layer['pad']}
params['pad'] = each_layer['pad']
each_layer['output'] = conv_forward(each_layer['input'],
each_layer['weights'],
each_layer['bias'], params)
elif each_layer['type'] == 'max_pooling':
params = {'stride': each_layer['stride'],
'HF': each_layer['HF'],
'WF': each_layer['WF'],
'pad': each_layer['pad']
}
each_layer['output'] = max_pooling_forward(each_layer['input'],
params)
elif each_layer['type'] == 'relu':
each_layer['output'] = relu_forward(each_layer['input'])
elif each_layer['type'] == 'softmax-loss':
if len(label) == 0:
each_layer['output'] = softmax(each_layer['input'])
else:
each_layer['output'] = softmax_loss_forward(each_layer['input'], label)
return each_layer['output']

def backward(self, data, label, lr=0.01):
for idx in reversed(np.arange(len(self.layers))):
current_layer = self.layers[idx]
if current_layer['type'] == 'softmax-loss':
if idx != len(self.layers) - 1:
print 'wrong architecture'
exit(-1)
self.layers[idx]['grad'] = softmax_loss_backward(self.layers[idx - 1]['output'],
label)

elif current_layer['type'] == 'conv':
conv_param = {'stride': self.layers[idx]['stride'],
'pad': self.layers[idx]['pad']}
self.layers[idx]['grad'] = conv_backward(self.layers[idx]['input'],
self.layers[idx]['weights'],
self.layers[idx]['bias'],
conv_param,
self.layers[idx + 1]['grad'][0])
self.layers[idx]['weights'] = self.layers[idx]['weights'] \
- lr * self.layers[idx]['grad'][1]
self.layers[idx]['bias'] = self.layers[idx]['bias'] \
- lr * self.layers[idx]['grad'][2]
elif current_layer['type'] == 'max_pooling':
pooling_params = {'stride': self.layers[idx]['stride'],
'HF': self.layers[idx]['HF'],
'WF': self.layers[idx]['WF'],
'pad': self.layers[idx]['pad']}
self.layers[idx]['grad'] = max_pooling_backward(self.layers[idx]['input'],
self.layers[idx + 1]['grad'][0],
pooling_params)
elif current_layer['type'] == 'relu':
self.layers[idx]['grad'] = relu_backward(self.layers[idx]['input'], self.layers[idx + 1]['grad'][0])

def predict(self, test_data, batch_size=50):
_, _, _, N = test_data.shape
prediction = np.zeros((1, N))
batch_num = np.ceil(float(N) / float(batch_size))
for batch_idx in np.arange(batch_num):
sub_test_data = None
sub_test_label= None
if batch_idx == batch_num - 1:
sub_test_data = test_data[:, :, :, batch_idx * batch_size:]
self.forward(sub_test_data)
prediction[0, batch_idx * batch_size:] = self.layers[-1]['output']
else:
sub_test_data = test_data[:, :, :, batch_idx * batch_size:(batch_idx + 1) * batch_size]
self.forward(sub_test_data)
prediction[0, batch_idx * batch_size:(batch_idx + 1) * batch_size] = self.layers[-1]['output']
return prediction

def train(self, train_data, train_label, lr, epoch=20, batch_size=100):
H, W, D, N = train_data.shape
_, N_l = train_label.shape
assert N == N_l, 'Wrong data input!'
# shuffle train_data
shuffle_idx = np.arange(N)
train_data = train_data[:, :, :, shuffle_idx]
train_label = train_label[:, shuffle_idx]
error_list = []
for epoch_idx in np.arange(epoch):
batch_num = np.ceil(float(N) / float(batch_size))
for batch_idx in np.arange(batch_num):
# start timing
start_t = time.clock()
sub_train_data = None
sub_train_label= None
if batch_idx == batch_num - 1:
sub_train_data = train_data[:, :, :, batch_idx * batch_size:]
sub_train_label = train_label[:, batch_idx * batch_size:]
else:
sub_train_data = train_data[:, :, :, batch_idx * batch_size:(batch_idx + 1) * batch_size]
sub_train_label = train_label[:, batch_idx * batch_size:(batch_idx + 1) * batch_size]

loss = self.forward(sub_train_data, sub_train_label)
error_list.append(loss)

self.backward(sub_train_data, sub_train_label, lr[epoch_idx])
# end timing
end_t = time.clock()
print 'epoch: %d, batch: %d, time: %f sec, obj: %f' % (epoch_idx,
batch_idx,
end_t - start_t,
np.mean(np.array(error_list)))
65 changes: 65 additions & 0 deletions code/cnn/im2col.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# coding=utf-8

import numpy as np

def im2col_index(x_shape, HF, WF, pad, stride):
# get input size
H, W, D, N = x_shape
# get output size
out_h = 0
out_w = 0
if type(pad) is int:
out_h = (H + 2 * pad - HF) / stride + 1
out_w = (W + 2 * pad - WF) / stride + 1
else:
out_h = (H + pad[0] + pad[1] - HF) / stride + 1
out_w = (W + pad[2] + pad[3] - WF) / stride + 1
# for row index, compute the first index of the first HF * WF block
r0 = np.repeat(np.arange(HF), WF)
r0 = np.tile(r0, D)
# then compute the bias of each block
r_bias = stride * np.repeat(np.arange(out_h), out_w)
# then the row index is the r0 + r_bias
r = r0.reshape(-1, 1) + r_bias.reshape(1, -1)

# the same to the col index
c0 = np.tile(np.arange(WF), HF * D)
c_bias = stride * np.tile(np.arange(out_w), out_h)
c = c0.reshape(-1, 1) + c_bias.reshape(1, -1)

# then the dimension index
d = np.repeat(np.arange(D), HF * WF).reshape(-1, 1)

return (r, c, d)

def im2col(x, HF, WF, pad, stride):
# padding
x_padded = None
if type(pad) is int:
x_padded = np.pad(x, ((pad, pad), (pad, pad), (0, 0), (0, 0)), mode='constant')
else:
x_padded = np.pad(x, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0), (0, 0)), mode='constant')
r, c, d = im2col_index(x.shape, HF, WF, pad, stride)
cols = x_padded[r, c, d, :]
cols = cols.reshape(HF * WF * x.shape[2], -1)
return cols

def col2im(cols, x_shape, HF, WF, pad, stride):
# get input size
H, W, D, N = x_shape
H_padded = 0
W_padded = 0
if type(pad) is int:
H_padded, W_padded = H + 2 * pad, W + 2 * pad
else:
H_padded, W_padded = H + pad[0] + pad[1], W + pad[2] + pad[3]
x_padded = np.zeros((H_padded, W_padded, D, N), dtype=cols.dtype)
r, c, d = im2col_index(x_shape, HF, WF, pad, stride)
cols_reshaped = cols.reshape((HF * WF * D, -1, N))
np.add.at(x_padded, (r, c, d, slice(None)), cols_reshaped)
if pad == 0:
return x_padded
elif type(pad) is int:
return x_padded[pad:-pad, pad:-pad, :, :]
else:
return x_padded[pad[0]:-pad[1], pad[2]:-pad[3], :, :]
116 changes: 116 additions & 0 deletions code/cnn/layers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# coding=utf-8

import numpy as np
from im2col import im2col
from im2col import col2im

def conv_forward(x, w, b, params):
# get convolution parameters
stride = params['stride']
pad = params['pad']
# get input size
H, W, D, N = x.shape
HF, WF, DF, NF = w.shape
_, _, DB, NB = b.shape
# check input size
assert D == DF, 'dimension does not work'
assert NF == NB, 'batch size does not work'
# check params
assert (H + 2 * pad - HF) % stride == 0, 'pad and stride do not work'
assert (W + 2 * pad - WF) % stride == 0, 'pad and stride do not work'
# get output size
HO = (H + 2 * pad - HF) / stride + 1
WO = (W + 2 * pad - WF) / stride + 1
x_col = im2col(x, HF, WF, pad, stride)
w_col = w.transpose(3, 0, 1, 2).reshape((NF, -1))
output_col = w_col.dot(x_col) + b.reshape(-1, 1)
output_col = output_col.reshape((NF, HO, WO, N))
output_col = output_col.transpose(1, 2, 0, 3)
return output_col

def conv_backward(x, w, b, conv_param, dout):
HF, WF, DF, NF = w.shape
x_col = im2col(x, HF, WF, conv_param['pad'], conv_param['stride'])
w_col = w.transpose(3, 0, 1, 2).reshape((NF, -1))
db = np.sum(dout, axis=(0, 1, 3))
dout = dout.transpose(2, 0, 1, 3)
dout = dout.reshape((w_col.shape[0], x_col.shape[-1]))
dx_col = w_col.T.dot(dout)
dw_col = dout.dot(x_col.T)

dx = col2im(dx_col, x.shape, HF, WF, conv_param['pad'], conv_param['stride'])
dw = dw_col.reshape((dw_col.shape[0], HF, WF, DF))
dw = dw.transpose(1, 2, 3, 0)

return [dx, dw, db]

def max_pooling_forward(x, pool_params):
# get max-pooling parameters
stride = pool_params['stride']
HF = pool_params['HF']
WF = pool_params['WF']
pad = pool_params['pad']
# get input size
H, W, D, N = x.shape
x_reshaped = x.reshape(H, W, 1, -1)
# get output size
HO = 0
WO = 0
if type(pad) is int:
HO = (H + 2 * pad - HF) / stride + 1
WO = (W + 2 * pad - WF) / stride + 1
else:
HO = (H + pad[0] + pad[1] - HF) / stride + 1
WO = (W + pad[2] + pad[3] - WF) / stride + 1
x_col = im2col(x_reshaped, HF, WF, pad, stride)
x_col_argmax = np.argmax(x_col, axis=0)
x_col_max = x_col[x_col_argmax, np.arange(x_col.shape[1])]
out = x_col_max.reshape((HO, WO, D, N))
return out

def max_pooling_backward(x, dout, pool_params):
H, W, D, N = x.shape
x_reshaped = x.reshape(H, W, 1, -1)
x_col = im2col(x_reshaped, pool_params['HF'],
pool_params['WF'], pool_params['pad'], pool_params['stride'])
x_col_argmax = np.argmax(x_col, axis=0)
dx_col = np.zeros_like(x_col)
dx_col[x_col_argmax, np.arange(x_col.shape[1])] = dout.ravel()
dx_shaped = col2im(dx_col, x_reshaped.shape, pool_params['HF'], pool_params['WF'],
pool_params['pad'], stride=pool_params['stride'])
dx = dx_shaped.reshape(x.shape)
return [dx]

def relu_forward(x):
out = np.where(x > 0, x, 0)
return out

def relu_backward(x, dout):
dx = np.where(x > 0, dout, 0)
return [dx]

def softmax_loss_forward(x, y):
# x is the prediction(C * N), y is the label(1 * N)
x_reshaped = x.reshape((x.shape[2], x.shape[3]))
probs = np.exp(x_reshaped - np.max(x_reshaped, axis=0, keepdims=True))
probs /= np.sum(probs, axis=0, keepdims=True)
N = x_reshaped.shape[1]
loss = -np.sum(np.log(probs[y, np.arange(N)])) / N
return loss

def softmax_loss_backward(x, y):
x_reshaped = x.reshape((x.shape[2], x.shape[3]))
probs = np.exp(x_reshaped - np.max(x_reshaped, axis=0, keepdims=True))
probs /= np.sum(probs, axis=0, keepdims=True)
dx = probs.copy()
N = x_reshaped.shape[1]
dx[y, np.arange(N)] -= 1
dx /= N
dx = dx.reshape((1, 1, dx.shape[0], dx.shape[1]))
return [dx]

def softmax(x):
x_reshaped = x.reshape((x.shape[2], x.shape[3]))
probs = np.exp(x_reshaped - np.max(x_reshaped, axis=0, keepdims=True))
probs /= np.sum(probs, axis=0, keepdims=True)
return np.argmax(probs, axis=0)
Loading

0 comments on commit 72b126a

Please sign in to comment.