PyConvNet with mnist and cifar demo

swordlidev · Feb 26, 2016 · 72b126a · 72b126a
1 parent ed2b4e1
commit 72b126a
Show file tree

Hide file tree

Showing 17 changed files with 844 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,12 @@
+# PyConvNet: CNN for Python
+**PyConvNet** is a python toolbox implementing convolutional neural network.
+
+To train LeNet on MNIST dataset, just do as follow(you may need some python package such as numpy matplotlib):
+
+1. cd code
+2. python mnist_demo.py
+
+To train cifar-net on cifar dataset, do as follow:
+
+1. cd code
+2. python cifar_demo.py
diff --git a/code/cifar-demo.py b/code/cifar-demo.py
@@ -0,0 +1,32 @@
+# coding = utf-8
+
+import os
+import scipy.io as sio
+import numpy as np
+
+from load_cifar import load_cifar
+from model.initial_cifarNet import initial_cifar
+
+train_data = []
+valid_data = []
+train_labels = []
+valid_labels = []
+if os.path.isfile('../data/cifar/cifar.mat'):
+    print 'read mat file: %s' % ('../data/cifar/cifar.mat')
+    data = sio.loadmat('../data/cifar/cifar.mat')
+    train_data = data['train_data']
+    valid_data = data['valid_data']
+    train_labels = data['train_labels']
+    valid_labels = data['valid_labels']
+else:
+    train_data, valid_data, train_labels, valid_labels = load_cifar()
+
+cnn = initial_cifar()
+lr = np.ones(20) * 0
+.01
+cnn.train(train_data, train_labels, lr, epoch=20, batch_size=32)
+
+res = cnn.predict(valid_data)
+res = res.reshape(valid_labels.shape)
+
+print 'Accuracy is: %f' % (np.sum(res == valid_labels) / float(np.max(valid_labels.shape)))
diff --git a/code/cnn/__init__.py b/code/cnn/__init__.py
@@ -0,0 +1 @@
+# empty file
diff --git a/code/cnn/conv_net.py b/code/cnn/conv_net.py
@@ -0,0 +1,166 @@
+# coding = utf-8
+
+import numpy as np
+from layers import *
+import time
+
+class ConvNet:
+    def __init__(self):
+        self.layers = []
+
+    def add_layer(self, layer_type, layer_params):
+        if layer_type == 'conv':
+            HF = layer_params['HF']
+            WF = layer_params['WF']
+            DF = layer_params['DF']
+            NF = layer_params['NF']
+            l_weights = np.random.normal(0, layer_params['var'], (HF, WF, DF, NF))
+            l_bias = np.zeros((1, 1, 1, NF))
+            layer_params = {'type': 'conv',
+                            'weights': l_weights,
+                            'bias': l_bias,
+                            'stride': layer_params['stride'],
+                            'pad': layer_params['pad'],
+                            'input': None,
+                            'output': None,
+                            'grad': None}
+            self.layers.append(layer_params)
+        elif layer_type == 'max_pooling':
+            layer_params = {'type': 'max_pooling',
+                            'stride': layer_params['stride'],
+                            'HF': layer_params['HF'],
+                            'WF': layer_params['WF'],
+                            'pad': layer_params['pad'],
+                            'input': None,
+                            'output': None,
+                            'grad': None}
+            self.layers.append(layer_params)
+        elif layer_type == 'relu':
+            layer_params = {'type': 'relu',
+                            'input': None,
+                            'output': None,
+                            'grad': None}
+            self.layers.append(layer_params)
+        elif layer_type == 'softmax-loss':
+            layer_params = {'type': 'softmax-loss',
+                            'input': None,
+                            'output': None,
+                            'grad': None}
+            self.layers.append(layer_params)
+        else:
+            print 'unkonw layer type!\n'
+            exit(1)
+
+    def forward(self, data, label=[]):
+        for idx, each_layer in enumerate(self.layers):
+            if idx == 0:
+                each_layer['input'] = data
+            else:
+                each_layer['input'] = self.layers[idx - 1]['output']
+            if each_layer['type'] == 'conv':
+                params = {'stride': each_layer['stride'],
+                          'pad': each_layer['pad']}
+                params['pad'] = each_layer['pad']
+                each_layer['output'] = conv_forward(each_layer['input'],
+                                                    each_layer['weights'],
+                                                    each_layer['bias'], params)
+            elif each_layer['type'] == 'max_pooling':
+                params = {'stride': each_layer['stride'],
+                          'HF': each_layer['HF'],
+                          'WF': each_layer['WF'],
+                          'pad': each_layer['pad']
+                          }
+                each_layer['output'] = max_pooling_forward(each_layer['input'],
+                                                           params)
+            elif each_layer['type'] == 'relu':
+                each_layer['output'] = relu_forward(each_layer['input'])
+            elif each_layer['type'] == 'softmax-loss':
+                if len(label) == 0:
+                    each_layer['output'] = softmax(each_layer['input'])
+                else:
+                    each_layer['output'] = softmax_loss_forward(each_layer['input'], label)
+                    return each_layer['output']
+
+    def backward(self, data, label, lr=0.01):
+        for idx in reversed(np.arange(len(self.layers))):
+            current_layer = self.layers[idx]
+            if current_layer['type'] == 'softmax-loss':
+                if idx != len(self.layers) - 1:
+                    print 'wrong architecture'
+                    exit(-1)
+                self.layers[idx]['grad'] = softmax_loss_backward(self.layers[idx - 1]['output'],
+                                                                 label)
+
+            elif current_layer['type'] == 'conv':
+                conv_param = {'stride': self.layers[idx]['stride'],
+                              'pad': self.layers[idx]['pad']}
+                self.layers[idx]['grad'] = conv_backward(self.layers[idx]['input'],
+                                                    self.layers[idx]['weights'],
+                                                    self.layers[idx]['bias'],
+                                                    conv_param,
+                                                    self.layers[idx + 1]['grad'][0])
+                self.layers[idx]['weights'] = self.layers[idx]['weights'] \
+                                              - lr * self.layers[idx]['grad'][1]
+                self.layers[idx]['bias'] = self.layers[idx]['bias'] \
+                                              - lr * self.layers[idx]['grad'][2]
+            elif current_layer['type'] == 'max_pooling':
+                pooling_params = {'stride': self.layers[idx]['stride'],
+                                  'HF': self.layers[idx]['HF'],
+                                  'WF': self.layers[idx]['WF'],
+                                  'pad': self.layers[idx]['pad']}
+                self.layers[idx]['grad'] = max_pooling_backward(self.layers[idx]['input'],
+                                                                self.layers[idx + 1]['grad'][0],
+                                                                pooling_params)
+            elif current_layer['type'] == 'relu':
+                self.layers[idx]['grad'] = relu_backward(self.layers[idx]['input'], self.layers[idx + 1]['grad'][0])
+
+    def predict(self, test_data, batch_size=50):
+        _, _, _, N = test_data.shape
+        prediction = np.zeros((1, N))
+        batch_num = np.ceil(float(N) / float(batch_size))
+        for batch_idx in np.arange(batch_num):
+            sub_test_data = None
+            sub_test_label= None
+            if batch_idx == batch_num - 1:
+                sub_test_data = test_data[:, :, :, batch_idx * batch_size:]
+                self.forward(sub_test_data)
+                prediction[0, batch_idx * batch_size:] = self.layers[-1]['output']
+            else:
+                sub_test_data = test_data[:, :, :, batch_idx * batch_size:(batch_idx + 1) * batch_size]
+                self.forward(sub_test_data)
+                prediction[0, batch_idx * batch_size:(batch_idx + 1) * batch_size] = self.layers[-1]['output']
+        return prediction
+
+    def train(self, train_data, train_label, lr, epoch=20, batch_size=100):
+        H, W, D, N = train_data.shape
+        _, N_l = train_label.shape
+        assert N == N_l, 'Wrong data input!'
+        # shuffle train_data
+        shuffle_idx = np.arange(N)
+        train_data = train_data[:, :, :, shuffle_idx]
+        train_label = train_label[:, shuffle_idx]
+        error_list = []
+        for epoch_idx in np.arange(epoch):
+            batch_num = np.ceil(float(N) / float(batch_size))
+            for batch_idx in np.arange(batch_num):
+                # start timing
+                start_t = time.clock()
+                sub_train_data = None
+                sub_train_label= None
+                if batch_idx == batch_num - 1:
+                    sub_train_data = train_data[:, :, :, batch_idx * batch_size:]
+                    sub_train_label = train_label[:, batch_idx * batch_size:]
+                else:
+                    sub_train_data = train_data[:, :, :, batch_idx * batch_size:(batch_idx + 1) * batch_size]
+                    sub_train_label = train_label[:, batch_idx * batch_size:(batch_idx + 1) * batch_size]
+
+                loss = self.forward(sub_train_data, sub_train_label)
+                error_list.append(loss)
+
+                self.backward(sub_train_data, sub_train_label, lr[epoch_idx])
+                # end timing
+                end_t = time.clock()
+                print 'epoch: %d, batch: %d, time: %f sec, obj: %f' % (epoch_idx,
+                                                                       batch_idx,
+                                                                       end_t - start_t,
+                                                                       np.mean(np.array(error_list)))
diff --git a/code/cnn/im2col.py b/code/cnn/im2col.py
@@ -0,0 +1,65 @@
+# coding=utf-8
+
+import numpy as np
+
+def im2col_index(x_shape, HF, WF, pad, stride):
+    # get input size
+    H, W, D, N = x_shape
+    # get output size
+    out_h = 0
+    out_w = 0
+    if type(pad) is int:
+        out_h = (H + 2 * pad - HF) / stride + 1
+        out_w = (W + 2 * pad - WF) / stride + 1
+    else:
+        out_h = (H + pad[0] + pad[1] - HF) / stride + 1
+        out_w = (W + pad[2] + pad[3] - WF) / stride + 1
+    # for row index, compute the first index of the first HF * WF block
+    r0 = np.repeat(np.arange(HF), WF)
+    r0 = np.tile(r0, D)
+    # then compute the bias of each block
+    r_bias = stride * np.repeat(np.arange(out_h), out_w)
+    # then the row index is the r0 + r_bias
+    r = r0.reshape(-1, 1) + r_bias.reshape(1, -1)
+
+    # the same to the col index
+    c0 = np.tile(np.arange(WF), HF * D)
+    c_bias = stride * np.tile(np.arange(out_w), out_h)
+    c = c0.reshape(-1, 1) + c_bias.reshape(1, -1)
+
+    # then the dimension index
+    d = np.repeat(np.arange(D), HF * WF).reshape(-1, 1)
+
+    return (r, c, d)
+
+def im2col(x, HF, WF, pad, stride):
+    # padding
+    x_padded = None
+    if type(pad) is int:
+        x_padded = np.pad(x, ((pad, pad), (pad, pad), (0, 0), (0, 0)), mode='constant')
+    else:
+        x_padded = np.pad(x, ((pad[0], pad[1]), (pad[2], pad[3]), (0, 0), (0, 0)), mode='constant')
+    r, c, d = im2col_index(x.shape, HF, WF, pad, stride)
+    cols = x_padded[r, c, d, :]
+    cols = cols.reshape(HF * WF * x.shape[2], -1)
+    return cols
+
+def col2im(cols, x_shape, HF, WF, pad, stride):
+    # get input size
+    H, W, D, N = x_shape
+    H_padded = 0
+    W_padded = 0
+    if type(pad) is int:
+        H_padded, W_padded = H + 2 * pad, W + 2 * pad
+    else:
+        H_padded, W_padded = H + pad[0] + pad[1], W + pad[2] + pad[3]
+    x_padded = np.zeros((H_padded, W_padded, D, N), dtype=cols.dtype)
+    r, c, d = im2col_index(x_shape, HF, WF, pad, stride)
+    cols_reshaped = cols.reshape((HF * WF * D, -1, N))
+    np.add.at(x_padded, (r, c, d, slice(None)), cols_reshaped)
+    if pad == 0:
+        return x_padded
+    elif type(pad) is int:
+        return x_padded[pad:-pad, pad:-pad, :, :]
+    else:
+        return x_padded[pad[0]:-pad[1], pad[2]:-pad[3], :, :]
diff --git a/code/cnn/layers.py b/code/cnn/layers.py
@@ -0,0 +1,116 @@
+# coding=utf-8
+
+import numpy as np
+from im2col import im2col
+from im2col import col2im
+
+def conv_forward(x, w, b, params):
+    # get convolution parameters
+    stride = params['stride']
+    pad = params['pad']
+    # get input size
+    H, W, D, N = x.shape
+    HF, WF, DF, NF = w.shape
+    _, _, DB, NB = b.shape
+    # check input size
+    assert D == DF, 'dimension does not work'
+    assert NF == NB, 'batch size does not work'
+    # check params
+    assert (H + 2 * pad - HF) % stride == 0, 'pad and stride do not work'
+    assert (W + 2 * pad - WF) % stride == 0, 'pad and stride do not work'
+    # get output size
+    HO = (H + 2 * pad - HF) / stride + 1
+    WO = (W + 2 * pad - WF) / stride + 1
+    x_col = im2col(x, HF, WF, pad, stride)
+    w_col = w.transpose(3, 0, 1, 2).reshape((NF, -1))
+    output_col = w_col.dot(x_col) + b.reshape(-1, 1)
+    output_col = output_col.reshape((NF, HO, WO, N))
+    output_col = output_col.transpose(1, 2, 0, 3)
+    return output_col
+
+def conv_backward(x, w, b, conv_param, dout):
+    HF, WF, DF, NF = w.shape
+    x_col = im2col(x, HF, WF, conv_param['pad'], conv_param['stride'])
+    w_col = w.transpose(3, 0, 1, 2).reshape((NF, -1))
+    db = np.sum(dout, axis=(0, 1, 3))
+    dout = dout.transpose(2, 0, 1, 3)
+    dout = dout.reshape((w_col.shape[0], x_col.shape[-1]))
+    dx_col = w_col.T.dot(dout)
+    dw_col = dout.dot(x_col.T)
+
+    dx = col2im(dx_col, x.shape, HF, WF, conv_param['pad'], conv_param['stride'])
+    dw = dw_col.reshape((dw_col.shape[0], HF, WF, DF))
+    dw = dw.transpose(1, 2, 3, 0)
+
+    return [dx, dw, db]
+
+def max_pooling_forward(x, pool_params):
+    # get max-pooling parameters
+    stride = pool_params['stride']
+    HF = pool_params['HF']
+    WF = pool_params['WF']
+    pad = pool_params['pad']
+    # get input size
+    H, W, D, N = x.shape
+    x_reshaped = x.reshape(H, W, 1, -1)
+    # get output size
+    HO = 0
+    WO = 0
+    if type(pad) is int:
+        HO = (H + 2 * pad - HF) / stride + 1
+        WO = (W + 2 * pad - WF) / stride + 1
+    else:
+        HO = (H + pad[0] + pad[1] - HF) / stride + 1
+        WO = (W + pad[2] + pad[3] - WF) / stride + 1
+    x_col = im2col(x_reshaped, HF, WF, pad, stride)
+    x_col_argmax = np.argmax(x_col, axis=0)
+    x_col_max = x_col[x_col_argmax, np.arange(x_col.shape[1])]
+    out = x_col_max.reshape((HO, WO, D, N))
+    return out
+
+def max_pooling_backward(x, dout, pool_params):
+    H, W, D, N = x.shape
+    x_reshaped = x.reshape(H, W, 1, -1)
+    x_col = im2col(x_reshaped, pool_params['HF'],
+                   pool_params['WF'], pool_params['pad'], pool_params['stride'])
+    x_col_argmax = np.argmax(x_col, axis=0)
+    dx_col = np.zeros_like(x_col)
+    dx_col[x_col_argmax, np.arange(x_col.shape[1])] = dout.ravel()
+    dx_shaped = col2im(dx_col, x_reshaped.shape, pool_params['HF'], pool_params['WF'],
+                       pool_params['pad'], stride=pool_params['stride'])
+    dx = dx_shaped.reshape(x.shape)
+    return [dx]
+
+def relu_forward(x):
+    out = np.where(x > 0, x, 0)
+    return out
+
+def relu_backward(x, dout):
+    dx = np.where(x > 0, dout, 0)
+    return [dx]
+
+def softmax_loss_forward(x, y):
+    # x is the prediction(C * N), y is the label(1 * N)
+    x_reshaped = x.reshape((x.shape[2], x.shape[3]))
+    probs = np.exp(x_reshaped - np.max(x_reshaped, axis=0, keepdims=True))
+    probs /= np.sum(probs, axis=0, keepdims=True)
+    N = x_reshaped.shape[1]
+    loss = -np.sum(np.log(probs[y, np.arange(N)])) / N
+    return loss
+
+def softmax_loss_backward(x, y):
+    x_reshaped = x.reshape((x.shape[2], x.shape[3]))
+    probs = np.exp(x_reshaped - np.max(x_reshaped, axis=0, keepdims=True))
+    probs /= np.sum(probs, axis=0, keepdims=True)
+    dx = probs.copy()
+    N = x_reshaped.shape[1]
+    dx[y, np.arange(N)] -= 1
+    dx /= N
+    dx = dx.reshape((1, 1, dx.shape[0], dx.shape[1]))
+    return [dx]
+
+def softmax(x):
+    x_reshaped = x.reshape((x.shape[2], x.shape[3]))
+    probs = np.exp(x_reshaped - np.max(x_reshaped, axis=0, keepdims=True))
+    probs /= np.sum(probs, axis=0, keepdims=True)
+    return np.argmax(probs, axis=0)