forked from Eniac-Xie/PyConvNet
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
17 changed files
with
844 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# PyConvNet: CNN for Python | ||
**PyConvNet** is a python toolbox implementing convolutional neural network. | ||
|
||
To train LeNet on MNIST dataset, just do as follow(you may need some python package such as numpy matplotlib): | ||
|
||
1. cd code | ||
2. python mnist_demo.py | ||
|
||
To train cifar-net on cifar dataset, do as follow: | ||
|
||
1. cd code | ||
2. python cifar_demo.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# coding = utf-8 | ||
|
||
import os | ||
import scipy.io as sio | ||
import numpy as np | ||
|
||
from load_cifar import load_cifar | ||
from model.initial_cifarNet import initial_cifar | ||
|
||
train_data = [] | ||
valid_data = [] | ||
train_labels = [] | ||
valid_labels = [] | ||
if os.path.isfile('../data/cifar/cifar.mat'): | ||
print 'read mat file: %s' % ('../data/cifar/cifar.mat') | ||
data = sio.loadmat('../data/cifar/cifar.mat') | ||
train_data = data['train_data'] | ||
valid_data = data['valid_data'] | ||
train_labels = data['train_labels'] | ||
valid_labels = data['valid_labels'] | ||
else: | ||
train_data, valid_data, train_labels, valid_labels = load_cifar() | ||
|
||
cnn = initial_cifar() | ||
lr = np.ones(20) * 0 | ||
.01 | ||
cnn.train(train_data, train_labels, lr, epoch=20, batch_size=32) | ||
|
||
res = cnn.predict(valid_data) | ||
res = res.reshape(valid_labels.shape) | ||
|
||
print 'Accuracy is: %f' % (np.sum(res == valid_labels) / float(np.max(valid_labels.shape))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# empty file |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
# coding = utf-8 | ||
|
||
import numpy as np | ||
from layers import * | ||
import time | ||
|
||
class ConvNet: | ||
def __init__(self): | ||
self.layers = [] | ||
|
||
def add_layer(self, layer_type, layer_params): | ||
if layer_type == 'conv': | ||
HF = layer_params['HF'] | ||
WF = layer_params['WF'] | ||
DF = layer_params['DF'] | ||
NF = layer_params['NF'] | ||
l_weights = np.random.normal(0, layer_params['var'], (HF, WF, DF, NF)) | ||
l_bias = np.zeros((1, 1, 1, NF)) | ||
layer_params = {'type': 'conv', | ||
'weights': l_weights, | ||
'bias': l_bias, | ||
'stride': layer_params['stride'], | ||
'pad': layer_params['pad'], | ||
'input': None, | ||
'output': None, | ||
'grad': None} | ||
self.layers.append(layer_params) | ||
elif layer_type == 'max_pooling': | ||
layer_params = {'type': 'max_pooling', | ||
'stride': layer_params['stride'], | ||
'HF': layer_params['HF'], | ||
'WF': layer_params['WF'], | ||
'pad': layer_params['pad'], | ||
'input': None, | ||
'output': None, | ||
'grad': None} | ||
self.layers.append(layer_params) | ||
elif layer_type == 'relu': | ||
layer_params = {'type': 'relu', | ||
'input': None, | ||
'output': None, | ||
'grad': None} | ||
self.layers.append(layer_params) | ||
elif layer_type == 'softmax-loss': | ||
layer_params = {'type': 'softmax-loss', | ||
'input': None, | ||
'output': None, | ||
'grad': None} | ||
self.layers.append(layer_params) | ||
else: | ||
print 'unkonw layer type!\n' | ||
exit(1) | ||
|
||
def forward(self, data, label=[]): | ||
for idx, each_layer in enumerate(self.layers): | ||
if idx == 0: | ||
each_layer['input'] = data | ||
else: | ||
each_layer['input'] = self.layers[idx - 1]['output'] | ||
if each_layer['type'] == 'conv': | ||
params = {'stride': each_layer['stride'], | ||
'pad': each_layer['pad']} | ||
params['pad'] = each_layer['pad'] | ||
each_layer['output'] = conv_forward(each_layer['input'], | ||
each_layer['weights'], | ||
each_layer['bias'], params) | ||
elif each_layer['type'] == 'max_pooling': | ||
params = {'stride': each_layer['stride'], | ||
'HF': each_layer['HF'], | ||
'WF': each_layer['WF'], | ||
'pad': each_layer['pad'] | ||
} | ||
each_layer['output'] = max_pooling_forward(each_layer['input'], | ||
params) | ||
elif each_layer['type'] == 'relu': | ||
each_layer['output'] = relu_forward(each_layer['input']) | ||
elif each_layer['type'] == 'softmax-loss': | ||
if len(label) == 0: | ||
each_layer['output'] = softmax(each_layer['input']) | ||
else: | ||
each_layer['output'] = softmax_loss_forward(each_layer['input'], label) | ||
return each_layer['output'] | ||
|
||
def backward(self, data, label, lr=0.01): | ||
for idx in reversed(np.arange(len(self.layers))): | ||
current_layer = self.layers[idx] | ||
if current_layer['type'] == 'softmax-loss': | ||
if idx != len(self.layers) - 1: | ||
print 'wrong architecture' | ||
exit(-1) | ||
self.layers[idx]['grad'] = softmax_loss_backward(self.layers[idx - 1]['output'], | ||
label) | ||
|
||
elif current_layer['type'] == 'conv': | ||
conv_param = {'stride': self.layers[idx]['stride'], | ||
'pad': self.layers[idx]['pad']} | ||
self.layers[idx]['grad'] = conv_backward(self.layers[idx]['input'], | ||
self.layers[idx]['weights'], | ||
self.layers[idx]['bias'], | ||
conv_param, | ||
self.layers[idx + 1]['grad'][0]) | ||
self.layers[idx]['weights'] = self.layers[idx]['weights'] \ | ||
- lr * self.layers[idx]['grad'][1] | ||
self.layers[idx]['bias'] = self.layers[idx]['bias'] \ | ||
- lr * self.layers[idx]['grad'][2] | ||
elif current_layer['type'] == 'max_pooling': | ||
pooling_params = {'stride': self.layers[idx]['stride'], | ||
'HF': self.layers[idx]['HF'], | ||
'WF': self.layers[idx]['WF'], | ||
'pad': self.layers[idx]['pad']} | ||
self.layers[idx]['grad'] = max_pooling_backward(self.layers[idx]['input'], | ||
self.layers[idx + 1]['grad'][0], | ||
pooling_params) | ||
elif current_layer['type'] == 'relu': | ||
self.layers[idx]['grad'] = relu_backward(self.layers[idx]['input'], self.layers[idx + 1]['grad'][0]) | ||
|
||
def predict(self, test_data, batch_size=50): | ||
_, _, _, N = test_data.shape | ||
prediction = np.zeros((1, N)) | ||
batch_num = np.ceil(float(N) / float(batch_size)) | ||
for batch_idx in np.arange(batch_num): | ||
sub_test_data = None | ||
sub_test_label= None | ||
if batch_idx == batch_num - 1: | ||
sub_test_data = test_data[:, :, :, batch_idx * batch_size:] | ||
self.forward(sub_test_data) | ||
prediction[0, batch_idx * batch_size:] = self.layers[-1]['output'] | ||
else: | ||
sub_test_data = test_data[:, :, :, batch_idx * batch_size:(batch_idx + 1) * batch_size] | ||
self.forward(sub_test_data) | ||
prediction[0, batch_idx * batch_size:(batch_idx + 1) * batch_size] = self.layers[-1]['output'] | ||
return prediction | ||
|
||
def train(self, train_data, train_label, lr, epoch=20, batch_size=100): | ||
H, W, D, N = train_data.shape | ||
_, N_l = train_label.shape | ||
assert N == N_l, 'Wrong data input!' | ||
# shuffle train_data | ||
shuffle_idx = np.arange(N) | ||
train_data = train_data[:, :, :, shuffle_idx] | ||
train_label = train_label[:, shuffle_idx] | ||
error_list = [] | ||
for epoch_idx in np.arange(epoch): | ||
batch_num = np.ceil(float(N) / float(batch_size)) | ||
for batch_idx in np.arange(batch_num): | ||
# start timing | ||
start_t = time.clock() | ||
sub_train_data = None | ||
sub_train_label= None | ||
if batch_idx == batch_num - 1: | ||
sub_train_data = train_data[:, :, :, batch_idx * batch_size:] | ||
sub_train_label = train_label[:, batch_idx * batch_size:] | ||
else: | ||
sub_train_data = train_data[:, :, :, batch_idx * batch_size:(batch_idx + 1) * batch_size] | ||
sub_train_label = train_label[:, batch_idx * batch_size:(batch_idx + 1) * batch_size] | ||
|
||
loss = self.forward(sub_train_data, sub_train_label) | ||
error_list.append(loss) | ||
|
||
self.backward(sub_train_data, sub_train_label, lr[epoch_idx]) | ||
# end timing | ||
end_t = time.clock() | ||
print 'epoch: %d, batch: %d, time: %f sec, obj: %f' % (epoch_idx, | ||
batch_idx, | ||
end_t - start_t, | ||
np.mean(np.array(error_list))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# coding=utf-8 | ||
|
||
import numpy as np | ||
|
||
def im2col_index(x_shape, HF, WF, pad, stride): | ||
# get input size | ||
H, W, D, N = x_shape | ||
# get output size | ||
out_h = 0 | ||
out_w = 0 | ||
if type(pad) is int: | ||
out_h = (H + 2 * pad - HF) / stride + 1 | ||
out_w = (W + 2 * pad - WF) / stride + 1 | ||
else: | ||
out_h = (H + pad[0] + pad[1] - HF) / stride + 1 | ||
out_w = (W + pad[2] + pad[3] - WF) / stride + 1 | ||
# for row index, compute the first index of the first HF * WF block | ||
r0 = np.repeat(np.arange(HF), WF) | ||
r0 = np.tile(r0, D) | ||
# then compute the bias of each block | ||
r_bias = stride * np.repeat(np.arange(out_h), out_w) | ||
# then the row index is the r0 + r_bias | ||
r = r0.reshape(-1, 1) + r_bias.reshape(1, -1) | ||
|
||
# the same to the col index | ||
c0 = np.tile(np.arange(WF), HF * D) | ||
c_bias = stride * np.tile(np.arange(out_w), out_h) | ||
c = c0.reshape(-1, 1) + c_bias.reshape(1, -1) | ||
|
||
# then the dimension index | ||
d = np.repeat(np.arange(D), HF * WF).reshape(-1, 1) | ||
|
||
return (r, c, d) | ||
|
||
def im2col(x, HF, WF, pad, stride): | ||
# padding | ||
x_padded = None | ||
if type(pad) is int: | ||
x_padded = np.pad(x, ((pad, pad), (pad, pad), (0, 0), (0, 0)), mode='constant') | ||
else: | ||
x_padded = np.pad(x, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0), (0, 0)), mode='constant') | ||
r, c, d = im2col_index(x.shape, HF, WF, pad, stride) | ||
cols = x_padded[r, c, d, :] | ||
cols = cols.reshape(HF * WF * x.shape[2], -1) | ||
return cols | ||
|
||
def col2im(cols, x_shape, HF, WF, pad, stride): | ||
# get input size | ||
H, W, D, N = x_shape | ||
H_padded = 0 | ||
W_padded = 0 | ||
if type(pad) is int: | ||
H_padded, W_padded = H + 2 * pad, W + 2 * pad | ||
else: | ||
H_padded, W_padded = H + pad[0] + pad[1], W + pad[2] + pad[3] | ||
x_padded = np.zeros((H_padded, W_padded, D, N), dtype=cols.dtype) | ||
r, c, d = im2col_index(x_shape, HF, WF, pad, stride) | ||
cols_reshaped = cols.reshape((HF * WF * D, -1, N)) | ||
np.add.at(x_padded, (r, c, d, slice(None)), cols_reshaped) | ||
if pad == 0: | ||
return x_padded | ||
elif type(pad) is int: | ||
return x_padded[pad:-pad, pad:-pad, :, :] | ||
else: | ||
return x_padded[pad[0]:-pad[1], pad[2]:-pad[3], :, :] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
# coding=utf-8 | ||
|
||
import numpy as np | ||
from im2col import im2col | ||
from im2col import col2im | ||
|
||
def conv_forward(x, w, b, params): | ||
# get convolution parameters | ||
stride = params['stride'] | ||
pad = params['pad'] | ||
# get input size | ||
H, W, D, N = x.shape | ||
HF, WF, DF, NF = w.shape | ||
_, _, DB, NB = b.shape | ||
# check input size | ||
assert D == DF, 'dimension does not work' | ||
assert NF == NB, 'batch size does not work' | ||
# check params | ||
assert (H + 2 * pad - HF) % stride == 0, 'pad and stride do not work' | ||
assert (W + 2 * pad - WF) % stride == 0, 'pad and stride do not work' | ||
# get output size | ||
HO = (H + 2 * pad - HF) / stride + 1 | ||
WO = (W + 2 * pad - WF) / stride + 1 | ||
x_col = im2col(x, HF, WF, pad, stride) | ||
w_col = w.transpose(3, 0, 1, 2).reshape((NF, -1)) | ||
output_col = w_col.dot(x_col) + b.reshape(-1, 1) | ||
output_col = output_col.reshape((NF, HO, WO, N)) | ||
output_col = output_col.transpose(1, 2, 0, 3) | ||
return output_col | ||
|
||
def conv_backward(x, w, b, conv_param, dout): | ||
HF, WF, DF, NF = w.shape | ||
x_col = im2col(x, HF, WF, conv_param['pad'], conv_param['stride']) | ||
w_col = w.transpose(3, 0, 1, 2).reshape((NF, -1)) | ||
db = np.sum(dout, axis=(0, 1, 3)) | ||
dout = dout.transpose(2, 0, 1, 3) | ||
dout = dout.reshape((w_col.shape[0], x_col.shape[-1])) | ||
dx_col = w_col.T.dot(dout) | ||
dw_col = dout.dot(x_col.T) | ||
|
||
dx = col2im(dx_col, x.shape, HF, WF, conv_param['pad'], conv_param['stride']) | ||
dw = dw_col.reshape((dw_col.shape[0], HF, WF, DF)) | ||
dw = dw.transpose(1, 2, 3, 0) | ||
|
||
return [dx, dw, db] | ||
|
||
def max_pooling_forward(x, pool_params): | ||
# get max-pooling parameters | ||
stride = pool_params['stride'] | ||
HF = pool_params['HF'] | ||
WF = pool_params['WF'] | ||
pad = pool_params['pad'] | ||
# get input size | ||
H, W, D, N = x.shape | ||
x_reshaped = x.reshape(H, W, 1, -1) | ||
# get output size | ||
HO = 0 | ||
WO = 0 | ||
if type(pad) is int: | ||
HO = (H + 2 * pad - HF) / stride + 1 | ||
WO = (W + 2 * pad - WF) / stride + 1 | ||
else: | ||
HO = (H + pad[0] + pad[1] - HF) / stride + 1 | ||
WO = (W + pad[2] + pad[3] - WF) / stride + 1 | ||
x_col = im2col(x_reshaped, HF, WF, pad, stride) | ||
x_col_argmax = np.argmax(x_col, axis=0) | ||
x_col_max = x_col[x_col_argmax, np.arange(x_col.shape[1])] | ||
out = x_col_max.reshape((HO, WO, D, N)) | ||
return out | ||
|
||
def max_pooling_backward(x, dout, pool_params): | ||
H, W, D, N = x.shape | ||
x_reshaped = x.reshape(H, W, 1, -1) | ||
x_col = im2col(x_reshaped, pool_params['HF'], | ||
pool_params['WF'], pool_params['pad'], pool_params['stride']) | ||
x_col_argmax = np.argmax(x_col, axis=0) | ||
dx_col = np.zeros_like(x_col) | ||
dx_col[x_col_argmax, np.arange(x_col.shape[1])] = dout.ravel() | ||
dx_shaped = col2im(dx_col, x_reshaped.shape, pool_params['HF'], pool_params['WF'], | ||
pool_params['pad'], stride=pool_params['stride']) | ||
dx = dx_shaped.reshape(x.shape) | ||
return [dx] | ||
|
||
def relu_forward(x): | ||
out = np.where(x > 0, x, 0) | ||
return out | ||
|
||
def relu_backward(x, dout): | ||
dx = np.where(x > 0, dout, 0) | ||
return [dx] | ||
|
||
def softmax_loss_forward(x, y): | ||
# x is the prediction(C * N), y is the label(1 * N) | ||
x_reshaped = x.reshape((x.shape[2], x.shape[3])) | ||
probs = np.exp(x_reshaped - np.max(x_reshaped, axis=0, keepdims=True)) | ||
probs /= np.sum(probs, axis=0, keepdims=True) | ||
N = x_reshaped.shape[1] | ||
loss = -np.sum(np.log(probs[y, np.arange(N)])) / N | ||
return loss | ||
|
||
def softmax_loss_backward(x, y): | ||
x_reshaped = x.reshape((x.shape[2], x.shape[3])) | ||
probs = np.exp(x_reshaped - np.max(x_reshaped, axis=0, keepdims=True)) | ||
probs /= np.sum(probs, axis=0, keepdims=True) | ||
dx = probs.copy() | ||
N = x_reshaped.shape[1] | ||
dx[y, np.arange(N)] -= 1 | ||
dx /= N | ||
dx = dx.reshape((1, 1, dx.shape[0], dx.shape[1])) | ||
return [dx] | ||
|
||
def softmax(x): | ||
x_reshaped = x.reshape((x.shape[2], x.shape[3])) | ||
probs = np.exp(x_reshaped - np.max(x_reshaped, axis=0, keepdims=True)) | ||
probs /= np.sum(probs, axis=0, keepdims=True) | ||
return np.argmax(probs, axis=0) |
Oops, something went wrong.