First commit

yukoba · Sep 29, 2016 · 6a61284 · 6a61284
commit 6a61284
Show file tree

Hide file tree

Showing 17 changed files with 363 additions and 0 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,2 @@
+*.sh eol=lf
+* text=auto
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,15 @@
+.idea/workspace.xml
+.idea/dictionaries/
+ETL8G/
+target/
+tmp/
+temp/
+__pycache__/
+Thumbs.db
+*.aux
+*.dvi
+*.log
+*.pdf
+*.gz
+*.dat
+.~lock.*
diff --git a/.idea/compiler.xml b/.idea/compiler.xml
diff --git a/.idea/copyright/profiles_settings.xml b/.idea/copyright/profiles_settings.xml
diff --git a/.idea/encodings.xml b/.idea/encodings.xml
diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/CnnJapaneseCharacter.iml b/CnnJapaneseCharacter.iml
@@ -0,0 +1,9 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
diff --git a/readme.md b/readme.md
@@ -0,0 +1,46 @@
+# Papers
+I implemented Charlie Tsai "Recognizing Handwritten Japanese Characters Using Deep Convolutional Neural Networks".
+http://cs231n.stanford.edu/reports2016/262_Report.pdf
+
+This is just a VGG-like convnet.
+https://arxiv.org/abs/1409.1556
+
+# Librarys
+You need following librarys.
+- Anaconda (Python 3.5)
+- Theano 0.8.2
+- Keras 1.1.0
+- scikit-learn 0.17.1 (Included in Anaconda)
+- CUDA 7.5
+- cuDNN 5.0
+
+# Config files
+~/.theanorc
+```
+[global]
+floatX = float32
+device = gpu
+
+[lib]
+cnmem = 1
+
+[nvcc]
+flags=-D_FORCE_INLINES
+```
+
+~/.keras/keras.json
+```
+{
+    "epsilon": 1e-07,
+    "image_dim_ordering": "tf",
+    "floatx": "float32",
+    "backend": "theano"
+}
+```
+
+# Dataset
+Please download dataset from http://etlcdb.db.aist.go.jp/?page_id=651 and extract to ETL8G folder.
+Dataset contains 160 person hiragana characters.
+
+# How to run
+Just run ```python learn.py```.
diff --git a/src/hiragana.npz b/src/hiragana.npz
diff --git a/src/learn.py b/src/learn.py
@@ -0,0 +1,89 @@
+# This code is based on
+# https://github.com/fchollet/keras/blob/master/examples/mnist_cnn.py
+# https://github.com/fchollet/keras/blob/master/examples/cifar10_cnn.py
+
+import numpy as np
+import scipy
+from sklearn.cross_validation import train_test_split
+from keras.models import Sequential
+from keras.layers import Dense, Dropout, Activation, Flatten
+from keras.layers import Convolution2D, MaxPooling2D
+from keras.utils import np_utils
+from keras import initializations
+from keras import backend as K
+
+nb_classes = 72
+# input image dimensions
+img_rows, img_cols = 64, 64
+# img_rows, img_cols = 127, 128
+
+ary = np.load("hiragana.npz")['arr_0'].reshape([-1, 127, 128]).astype(np.float32) / 15.0
+X_train = np.zeros([nb_classes * 160, img_rows, img_cols], dtype=np.float32)
+for i in range(nb_classes * 160):
+    X_train[i] = scipy.misc.imresize(ary[i], (img_rows, img_cols), mode='F')
+    # X_train[i] = ary[i]
+Y_train = np.repeat(np.arange(nb_classes), 160)
+
+X_train, X_test, Y_train, Y_test = train_test_split(X_train, Y_train, test_size=0.2)
+
+if K.image_dim_ordering() == 'th':
+    X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
+    X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
+    input_shape = (1, img_rows, img_cols)
+else:
+    X_train = X_train.reshape(X_train.shape[0], img_rows, img_cols, 1)
+    X_test = X_test.reshape(X_test.shape[0], img_rows, img_cols, 1)
+    input_shape = (img_rows, img_cols, 1)
+
+# convert class vectors to binary class matrices
+Y_train = np_utils.to_categorical(Y_train, nb_classes)
+Y_test = np_utils.to_categorical(Y_test, nb_classes)
+
+model = Sequential()
+
+
+def my_init(shape, name=None):
+    return initializations.normal(shape, scale=0.1, name=name)
+
+
+# Best val_acc: 0.9679 (just tried only once)
+# 25 minutes on Amazon EC2 g2.2xlarge
+def m6_1():
+    model.add(Convolution2D(32, 3, 3, init=my_init, input_shape=input_shape))
+    model.add(Activation('relu'))
+    model.add(Convolution2D(32, 3, 3, init=my_init))
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+    model.add(Dropout(0.5))
+
+    model.add(Convolution2D(64, 3, 3, init=my_init))
+    model.add(Activation('relu'))
+    model.add(Convolution2D(64, 3, 3, init=my_init))
+    model.add(Activation('relu'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+    model.add(Dropout(0.5))
+
+    model.add(Flatten())
+    model.add(Dense(256, init=my_init))
+    model.add(Activation('relu'))
+    model.add(Dropout(0.5))
+    model.add(Dense(nb_classes))
+    model.add(Activation('softmax'))
+
+
+# Best val_acc: 0.8016 (just tried only once)
+def classic_neural():
+    model.add(Flatten(input_shape=input_shape))
+    model.add(Dense(256))
+    model.add(Activation('relu'))
+    model.add(Dropout(0.5))
+
+    model.add(Dense(nb_classes))
+    model.add(Activation('softmax'))
+
+
+m6_1()
+# classic_neural()
+
+model.compile(loss='categorical_crossentropy', optimizer='adagrad', metrics=['accuracy'])
+model.fit(X_train, Y_train, batch_size=16, nb_epoch=40, validation_data=(X_test, Y_test))
diff --git a/src/read_hiragana_file.py b/src/read_hiragana_file.py
@@ -0,0 +1,33 @@
+import struct
+import numpy as np
+from PIL import Image
+
+sz_record = 8199
+
+
+def read_record_ETL8G(f):
+    s = f.read(sz_record)
+    r = struct.unpack('>2H8sI4B4H2B30x8128s11x', s)
+    iF = Image.frombytes('F', (128, 127), r[14], 'bit', 4)
+    iL = iF.convert('L')
+    return r + (iL,)
+
+
+def read_hiragana():
+    # Character type = 72, person = 160, y = 127, x = 128
+    ary = np.zeros([72, 160, 127, 128], dtype=np.uint8)
+
+    for j in range(1, 33):
+        filename = '../ETL8G/ETL8G_{:02d}'.format(j)
+        with open(filename, 'rb') as f:
+            for id_dataset in range(5):
+                moji = 0
+                for i in range(956):
+                    r = read_record_ETL8G(f)
+                    if b'.HIRA' in r[2]:
+                        ary[moji, (j - 1) * 5 + id_dataset] = np.array(r[-1])
+                        moji += 1
+    np.savez_compressed("hiragana.npz", ary)
+
+read_hiragana()
+
diff --git a/test/plt_hiragana.py b/test/plt_hiragana.py
@@ -0,0 +1,16 @@
+import numpy as np
+# import scipy
+import matplotlib.pyplot as plt
+
+nb_classes = 72
+img_rows, img_cols = 64, 64
+
+ary = np.load("../src/hiragana.npz")['arr_0'].reshape([-1, 127, 128]).astype(np.float32) / 15.0
+X_train = np.zeros([nb_classes * 160, img_rows, img_cols], dtype=np.float32)
+# for i in range(nb_classes * 160):
+#     X_train[i] = scipy.misc.imresize(ary[i], (img_rows, img_cols), mode='F')
+y_train = np.repeat(np.arange(nb_classes), 160)
+
+plt.imshow(ary[71 * 160 + 1])
+print(y_train[71 * 160 + 1])
+plt.show()
diff --git a/test/read_file_test.py b/test/read_file_test.py
@@ -0,0 +1,64 @@
+# This code is from http://etlcdb.db.aist.go.jp/?page_id=2461
+
+import struct
+from PIL import Image
+
+sz_record = 8199
+
+
+def read_record_ETL8G(f):
+    s = f.read(8199)
+    r = struct.unpack('>2H8sI4B4H2B30x8128s11x', s)
+    iF = Image.frombytes('F', (128, 127), r[14], 'bit', 4)
+    iL = iF.convert('L')
+    return r + (iL,)
+
+
+def test1():
+    filename = '../ETL8G/ETL8G_01'
+    id_record = 0
+
+    with open(filename, 'rb') as f:
+        f.seek(id_record * sz_record)
+        r = read_record_ETL8G(f)
+
+    print(r[0:-2], hex(r[1]))
+    iE = Image.eval(r[-1], lambda x: 255 - x * 16)
+    fn = '../tmp/ETL8G_{:d}_{:s}.png'.format((r[0] - 1) % 20 + 1, hex(r[1])[-4:])
+    iE.save(fn, 'PNG')
+
+
+def test2():
+    filename = '../ETL8G/ETL8G_01'
+    id_dataset = 0
+    new_img = Image.new('L', (128 * 32, 128 * 30))
+
+    with open(filename, 'rb') as f:
+        f.seek(id_dataset * 956 * sz_record)
+        for i in range(956):
+            r = read_record_ETL8G(f)
+            new_img.paste(r[-1], (128 * (i % 32), 128 * (i // 32)))
+    iE = Image.eval(new_img, lambda x: 255 - x * 16)
+    fn = '../tmp/ETL8G_ds{:03d}.png'.format(id_dataset)
+    iE.save(fn, 'PNG')
+
+
+def dump_all():
+    for j in range(1, 33):
+        for id_dataset in range(5):
+            new_img = Image.new('L', (128 * 32, 128 * 30))
+
+            filename = '../ETL8G/ETL8G_{:02d}'.format(j)
+            with open(filename, 'rb') as f:
+                f.seek(id_dataset * 956 * sz_record)
+                for i in range(956):
+                    r = read_record_ETL8G(f)
+                    new_img.paste(r[-1], (128 * (i % 32), 128 * (i // 32)))
+            iE = Image.eval(new_img, lambda x: 255 - x * 16)
+            fn = '../tmp/ETL8G_ds{:02d}_{:01d}.png'.format(j, id_dataset)
+            iE.save(fn, 'PNG')
+
+
+test1()
+test2()
+# dump_all()