Skip to content

Commit

Permalink
Important COMMIT: improve load data speed, 100 times faster
Browse files Browse the repository at this point in the history
  • Loading branch information
xingjian-f committed Jul 20, 2016
1 parent bffa162 commit ad6d0a3
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 62 deletions.
28 changes: 2 additions & 26 deletions architecture/CNN_LSTM.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,43 +10,21 @@ def build_CNN_LSTM(channels, width, height, lstm_output_size, nb_classes):
model = Sequential()
# 1 conv
model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu',
input_shape=(channels, width, height)))
input_shape=(channels, height, width)))
model.add(BatchNormalization(mode=0, axis=1))
# model.add(Dropout(0.5))
# 2 conv
model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu'))
model.add(BatchNormalization(mode=0, axis=1))
# model.add(Dropout(0.5))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
# 3 conv
model.add(Convolution2D(128, 3, 3, border_mode='same', activation='relu'))
model.add(BatchNormalization(mode=0, axis=1))
# model.add(Dropout(0.5))
# 4 conv
model.add(Convolution2D(128, 3, 3, border_mode='same', activation='relu'))
model.add(BatchNormalization(mode=0, axis=1))
# model.add(Dropout(0.5))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
# # 5 conv
# model.add(Convolution2D(256, 3, 3, border_mode='same', activation='relu'))
# model.add(BatchNormalization(mode=0, axis=1))
# model.add(Dropout(0.5))
# # 6 conv
# model.add(Convolution2D(256, 3, 3, border_mode='same', activation='relu'))
# model.add(BatchNormalization(mode=0, axis=1))
# model.add(Dropout(0.5))
# model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
# # 7 conv
# model.add(Convolution2D(512, 3, 3, border_mode='same', activation='relu'))
# model.add(BatchNormalization(mode=0, axis=1))
# model.add(Dropout(0.5))
# # 8 conv
# model.add(Convolution2D(512, 3, 3, border_mode='same', activation='relu'))
# model.add(BatchNormalization(mode=0, axis=1))
# model.add(Dropout(0.5))
# flaten
a = model.add(Flatten())

# 1 dense
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
Expand All @@ -59,12 +37,10 @@ def build_CNN_LSTM(channels, width, height, lstm_output_size, nb_classes):
model.add(RepeatVector(lstm_output_size))
model.add(LSTM(512, return_sequences=True))
model.add(TimeDistributed(Dropout(0.5)))
# model.add(TimeDistributed(BatchNormalization()))
model.add(TimeDistributed(Dense(nb_classes, activation='softmax')))
# model.summary()
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer='adam',
# metrics=['accuracy']
metrics=[categorical_accuracy_per_sequence]
)

Expand Down
62 changes: 62 additions & 0 deletions architecture/cv_cnn_lstm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten, RepeatVector
from keras.layers import LSTM
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.layers.wrappers import TimeDistributed
from util import categorical_accuracy_per_sequence

def build_cv_cnn_lstm(channels, width, height, lstm_output_size, nb_classes):
model = Sequential()
# 1 conv
model.add(Convolution2D(32, 3, 3, border_mode='same', activation='relu',
input_shape=(channels, height, width)))
model.add(BatchNormalization(mode=0, axis=1))
# 2 conv
model.add(Convolution2D(32, 3, 3, border_mode='same', activation='relu'))
model.add(BatchNormalization(mode=0, axis=1))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
# 3 conv
model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu'))
model.add(BatchNormalization(mode=0, axis=1))
# 4 conv
model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu'))
model.add(BatchNormalization(mode=0, axis=1))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
# 5 conv
model.add(Convolution2D(128, 3, 3, border_mode='same', activation='relu'))
model.add(BatchNormalization(mode=0, axis=1))
# 6 conv
model.add(Convolution2D(128, 3, 3, border_mode='same', activation='relu'))
model.add(BatchNormalization(mode=0, axis=1))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
# 7 conv
model.add(Convolution2D(256, 3, 3, border_mode='same', activation='relu'))
model.add(BatchNormalization(mode=0, axis=1))
# 8 conv
model.add(Convolution2D(256, 3, 3, border_mode='same', activation='relu'))
model.add(BatchNormalization(mode=0, axis=1))
model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2)))
# flaten
a = model.add(Flatten())
# 1 dense
model.add(Dense(256, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
# 2 dense
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))
# lstm
model.add(RepeatVector(lstm_output_size))
model.add(LSTM(512, return_sequences=True))
model.add(TimeDistributed(Dropout(0.5)))
model.add(TimeDistributed(Dense(nb_classes, activation='softmax')))
# model.summary()
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=[categorical_accuracy_per_sequence],
sample_weight_mode='temporal'
)

return model
16 changes: 14 additions & 2 deletions online/captcha.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,20 @@ def __init__(self):
self.img_channels = 3
self.train_data_dir = '/home/feixingjian/DeepLearning-OCR/train_data/guangdong/'
self.weights_file_path = '/home/feixingjian/DeepLearning-OCR/save_model/2016-07-15/weights.499-0.07.hdf5'
self.char_set = list(get_char_set(self.train_data_dir))
self.char_set = get_char_set(self.train_data_dir)
self.nb_classes = len(self.char_set)
self.max_nb_char = get_maxnb_char(self.train_data_dir)
self.model = build_CNN_LSTM(self.img_channels, self.img_width, self.img_height, self.max_nb_char, self.nb_classes) # 生成CNN的架构
self.model.load_weights(self.weights_file_path) # 读取训练好的模型

class jiangsu():
def __init__(self):
self.img_width = 150
self.img_height = 60
self.img_channels = 3
self.train_data_dir = '/home/feixingjian/DeepLearning-OCR/train_data/jiangsu/'
self.weights_file_path = '/home/feixingjian/DeepLearning-OCR/save_model/2016-07-18/weights.55-0.06.hdf5'
self.char_set = get_char_set(self.train_data_dir)
self.nb_classes = len(self.char_set)
self.max_nb_char = get_maxnb_char(self.train_data_dir)
self.model = build_CNN_LSTM(self.img_channels, self.img_width, self.img_height, self.max_nb_char, self.nb_classes) # 生成CNN的架构
Expand All @@ -28,7 +41,6 @@ def load_data(img_vals, width, height, channels):
for img_val in img_vals:
x.append(load_img(img_val, width, height, channels))
x = np.array(x)
x = x.astype('float32') # gpu只接受32位浮点运算
x /= 255 # normalized
return x

Expand Down
5 changes: 4 additions & 1 deletion online/upload.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from flask import Flask, request, render_template, make_response
from captcha import guangdong, predict
from captcha import guangdong, predict, jiangsu

app = Flask(__name__)
guangdong_model = guangdong()
jiangsu_model = jiangsu()
@app.route('/', methods=['GET', 'POST'])
def index():
global guangdong_model
Expand All @@ -12,6 +13,8 @@ def index():
province = request.form['province']
if province == 'guangdong':
res = predict(guangdong_model, imgs)
elif province == 'jiangsu':
res = predict(jiangsu_model, imgs)
elif province == 'nacao':
res = 'pass'
else:
Expand Down
45 changes: 25 additions & 20 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from keras.optimizers import SGD
from keras.callbacks import ModelCheckpoint
from util import one_hot_decoder, plot_loss_figure, load_data, get_char_set, get_maxnb_char
from architecture.CNN_LSTM import build_CNN_LSTM
from util import get_sample_weight
# from architecture.CNN_LSTM import build_CNN_LSTM
from architecture.cv_cnn_lstm import build_cv_cnn_lstm


def pred(model, X, char_set):
Expand All @@ -20,12 +22,14 @@ def test(model, test_data, char_set):
pred_res = pred(model, test_X, char_set)
nb_correct = sum(pred_res[i]==test_y[i] for i in range(len(pred_res)))
for i in range(len(pred_res)):
print test_y[i], pred_res[i]
print 'test:', test_y[i]
print 'pred:', pred_res[i]
print 'Acurracy: ', float(nb_correct) / len(test_y)


def train(model, batch_size, nb_epoch, save_dir, train_data, val_data):
def train(model, batch_size, nb_epoch, save_dir, train_data, val_data, char_set):
X_train, y_train = train_data[0], train_data[1]
sample_weight = get_sample_weight(y_train, char_set)
print 'X_train shape:', X_train.shape
print X_train.shape[0], 'train samples'
if os.path.exists(save_dir) == False:
Expand All @@ -34,46 +38,47 @@ def train(model, batch_size, nb_epoch, save_dir, train_data, val_data):
start_time = time.time()
save_path = save_dir + 'weights.{epoch:02d}-{val_loss:.2f}.hdf5'
check_pointer = ModelCheckpoint(save_path,
save_best_only=False)
save_best_only=True)
history = model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch,
validation_data=val_data,
validation_split=0.1,
callbacks=[check_pointer])
validation_split=0.3,
callbacks=[check_pointer],
sample_weight=sample_weight)

plot_loss_figure(history, save_dir + str(datetime.now()).split('.')[0].split()[1]+'.jpg')
print 'Training time(h):', (time.time()-start_time) / 3600


if __name__ == '__main__':
img_width, img_height = 223, 50
img_width, img_height = 2048, 32
img_channels = 3
batch_size = 32
nb_epoch = 500
batch_size = 64
nb_epoch = 1

save_dir = 'save_model/' + str(datetime.now()).split('.')[0].split()[0] + '/' # 模型保存在当天对应的目录中
train_data_dir = 'train_data/guangdong/'
train_data_dir = 'train_data/cv1/'
val_data_dir = 'test_data/nacao_5/'
test_data_dir = 'test_data/nacao_5/'
weights_file_path = 'save_model/2016-07-15/weights.499-0.07.hdf5'
char_set = list(get_char_set(train_data_dir))
weights_file_path = 'save_model/2016-07-19/weights.14-1.29.hdf5'
char_set, char2idx = get_char_set(train_data_dir)
nb_classes = len(char_set)
max_nb_char = get_maxnb_char(train_data_dir)
# print 'char_set:', char_set
print 'nb_classes:', nb_classes
print 'max_nb_char:', max_nb_char
model = build_CNN_LSTM(img_channels, img_width, img_height, max_nb_char, nb_classes) # 生成CNN的架构
model.load_weights(weights_file_path) # 读取训练好的模型
model = build_cv_cnn_lstm(img_channels, img_width, img_height, max_nb_char, nb_classes) # 生成CNN的架构
# model.load_weights(weights_file_path) # 读取训练好的模型

# 先读取整个数据集,然后训练
# val_data = load_data(val_data_dir, max_nb_char, img_width, img_height, img_channels, char_set)
# val_data = load_data(val_data_dir, max_nb_char, img_width, img_height, img_channels, char_set, char2idx)
val_data = None
# train_data = load_data(train_data_dir, max_nb_char, img_width, img_height, img_channels, char_set)
# train(model, batch_size, nb_epoch, save_dir, train_data, val_data)
train_data = load_data(train_data_dir, max_nb_char, img_width, img_height, img_channels, char_set, char2idx)
train(model, batch_size, nb_epoch, save_dir, train_data, val_data, char_set)

# 测试
train_data = load_data(train_data_dir, max_nb_char, img_width, img_height, img_channels, char_set)
# train_data = load_data(train_data_dir, max_nb_char, img_width, img_height, img_channels, char_set, char2idx)
test(model, train_data, char_set)
# val_data = load_data(val_data_dir, max_nb_char, img_width, img_height, img_channels, char_set)
# val_data = load_data(val_data_dir, max_nb_char, img_width, img_height, img_channels, char_set, char2idx)
# test(model, val_data, char_set)
# test_data = load_data(test_data_dir, max_nb_char, img_width, img_height, img_channels, char_set)
# test_data = load_data(test_data_dir, max_nb_char, img_width, img_height, img_channels, char_set, char2idx)
# test(model, test_data, char_set)
54 changes: 41 additions & 13 deletions util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,18 @@
import matplotlib.pyplot as plt
import keras.backend as K

def one_hot_encoder(data, whole_set):

@profile
def one_hot_encoder(data, whole_set, char2idx):
"""
对整个list做encoder,而不是单个record
"""
ret = []
for i in data:
idx = whole_set.index(i)
ret.append([1 if j==idx else 0 for j in range(len(whole_set))])
idx = char2idx[i]
tmp = np.zeros(len(whole_set))
tmp[idx] = 1
ret.append(tmp)
return ret


Expand All @@ -23,6 +27,8 @@ def one_hot_decoder(data, whole_set):
idx = np.argmax(probs)
if whole_set[idx] != 'empty':
ret.append(whole_set[idx])
else:
break
ret = ''.join(ret)
return ret

Expand All @@ -37,7 +43,8 @@ def plot_loss_figure(history, save_path):
plt.savefig(save_path)


def load_data(input_dir, max_nb_cha, width, height, channels, cha_set):
@profile
def load_data(input_dir, max_nb_cha, width, height, channels, char_set, char2idx):
"""
文件夹的规范:
所有图片文件,命名方式为id.jpg,id从1开始
Expand All @@ -51,11 +58,14 @@ def load_data(input_dir, max_nb_cha, width, height, channels, cha_set):

for dirpath, dirnames, filenames in os.walk(input_dir):
nb_pic = len(filenames)-1
if nb_pic <= 0:
continue
for i in range(1, nb_pic+1):
filename = str(i) + '.jpg'
filepath = dirpath + os.sep + filename
pixels = load_img(filepath, width, height, channels)
x.append(pixels)
# print sys.getsizeof(x), i

label_path = dirpath + os.sep + 'label.txt'
with open(label_path) as f:
Expand All @@ -72,9 +82,8 @@ def load_data(input_dir, max_nb_cha, width, height, channels, cha_set):

# 转成keras能接受的数据形式,以及做one hot 编码
x = np.array(x)
x = x.astype('float32') # gpu只接受32位浮点运算
x /= 255 # normalized
y = [one_hot_encoder(i, cha_set) for i in y]
y = [one_hot_encoder(i, char_set, char2idx) for i in y]
y = np.array(y)

print 'Data loaded, spend time(m) :', (time.time()-tag)/60
Expand All @@ -83,13 +92,13 @@ def load_data(input_dir, max_nb_cha, width, height, channels, cha_set):

def load_img(path, width, height, channels):
img = Image.open(path)
im = img.resize((width, height)) # resize is necessary if not using FCN
pixels = list(im.getdata())
img = img.resize((width, height)) # resize is necessary if not using FCN
img = np.asarray(img, dtype='float32')
if channels > 1:
x = [[[pixels[k*width+i][j] for k in range(height)] for i in range(width)] for j in range(channels)] # 转成(channel,width,height)shape
img = np.rollaxis(img, 2, 0)
else:
x = [[[pixels[k*width+i] for k in range(height)] for i in range(width)]]
return x
img = [[[img[k*width+i] for k in range(height)] for i in range(width)]] # TODO
return img


def get_char_set(file_dir):
Expand All @@ -100,7 +109,9 @@ def get_char_set(file_dir):
raw = raw.decode('utf-8').strip('\r\n')
for i in raw:
ret.add(i)
return ret
char_set = list(ret)
char2idx = dict(zip(char_set, range(len(char_set))))
return char_set, char2idx


def get_maxnb_char(file_dir):
Expand All @@ -115,4 +126,21 @@ def get_maxnb_char(file_dir):

def categorical_accuracy_per_sequence(y_true, y_pred):
return K.mean(K.min(K.equal(K.argmax(y_true, axis=-1),
K.argmax(y_pred, axis=-1)), axis=-1))
K.argmax(y_pred, axis=-1)), axis=-1))


def get_sample_weight(label, whole_set):
ret = []
for i in label:
ret.append([])
tag = False
for j in i:
cha = whole_set[np.argmax(j)]
weight = 0
if cha == 'empty' and tag == False:
weight = 1
tag = True
if cha != 'empty':
weight = 1
ret[-1].append(weight)
return np.array(ret)

0 comments on commit ad6d0a3

Please sign in to comment.