重构卷积神经网络识别验证码部分。

这里用3层卷积神经网络加上一层全连接神经网络识别四位数字及数字加字母验证码，与用多层感知机的方法比较效果。
ChengYinghao · Nov 16, 2018 · 25d9d44 · 25d9d44
1 parent 0aed02c
commit 25d9d44
Show file tree

Hide file tree

Showing 10 changed files with 3,271 additions and 0 deletions.
diff --git a/CNN_method/Data_NumbersAndAlphabet.py b/CNN_method/Data_NumbersAndAlphabet.py
@@ -0,0 +1,86 @@
+from captcha.image import ImageCaptcha
+import numpy as np
+from PIL import Image
+
+# Content used to generate a captcha starts with numbers only
+number = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
+
+number_and_alphabet = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+                       'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
+                       'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R','S', 'T',
+                       'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
+                       'e', 'f', 'g', 'h', 'i','j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r','s', 't',
+                       'u', 'v', 'w', 'x', 'y', 'z']
+
+
+def random_captcha_text(captcha_content, captcha_size):
+    captcha_text = []
+    for i in range(captcha_size):
+        captcha_text.append(np.random.choice(captcha_content))
+    return captcha_text
+
+
+# Randomly generate a verification code image and text
+def gen_captcha_text_image(captcha_content=number_and_alphabet, captcha_size=4):
+    image = ImageCaptcha()
+    captcha_text = random_captcha_text(captcha_content, captcha_size)
+    captcha_text_str = ''.join(captcha_text)
+    captcha_image = image.generate(captcha_text_str)
+    captcha_image = Image.open(captcha_image)
+    captcha_image = np.array(captcha_image)
+    return captcha_text, captcha_image
+
+
+def text2vec(text, char_set_len=len(number_and_alphabet), max_captcha=4):
+    text_len = len(text)
+    if text_len > max_captcha:
+        raise ValueError('验证码最长4个字符')
+
+    vector = np.zeros(max_captcha * char_set_len)
+
+    def char2pos(char):
+        if char == '_':
+            k = 62
+            return k
+        k = ord(char) - 48
+        if k > 9:
+            k = ord(char) - 55
+            if k > 35:
+                k = ord(char) - 61
+                if k > 61:
+                    raise ValueError('No Map')
+        return k
+
+    for i, c in enumerate(text):
+        idx = i * char_set_len + char2pos(c)
+        vector[idx] = 1
+    return vector
+
+
+def convert2gray(img):
+    if len(img.shape) > 2:
+        r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
+        gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
+        return gray
+    else:
+        return img
+
+
+def get_next_batch(batch_size, image_height, image_width, char_set_len, max_captcha):
+    batch_x = np.zeros([batch_size, image_height * image_width])
+    batch_y = np.zeros([batch_size, max_captcha * char_set_len])
+
+    def wrap_gen_captcha_text_and_image():
+        while True:
+            text, image = gen_captcha_text_image()
+            if image.shape == (60, 160, 3):
+                return text, image
+
+    for i in range(batch_size):
+        text, image = wrap_gen_captcha_text_and_image()
+        image = convert2gray(image)
+
+        batch_x[i, :] = image.flatten() / 255
+        batch_y[i, :] = text2vec(text)
+
+    return batch_x, batch_y
diff --git a/CNN_method/Data_onlyNumbers.py b/CNN_method/Data_onlyNumbers.py
@@ -0,0 +1,86 @@
+from captcha.image import ImageCaptcha
+import numpy as np
+from PIL import Image
+
+# Content used to generate a captcha starts with numbers only
+number = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
+
+# number_and_alphabet = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
+#                        'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
+#                        'K', 'L', 'M', 'N', 'O', 'P', 'Q ', 'R','S', 'T',
+#                        'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd',
+#                        'e', 'f', 'g', 'h', 'i','j', 'k', 'l', 'm', 'n', 'o', 'p', 'q ', 'r','s', 't',
+#                        'u', 'v', 'w', 'x', 'y', 'z']
+
+
+def random_captcha_text(captcha_content, captcha_size):
+    captcha_text = []
+    for i in range(captcha_size):
+        captcha_text.append(np.random.choice(captcha_content))
+    return captcha_text
+
+
+# Randomly generate a verification code image and text
+def gen_captcha_text_image(captcha_content=number, captcha_size=4):
+    image = ImageCaptcha()
+    captcha_text = random_captcha_text(captcha_content, captcha_size)
+    captcha_text_str = ''.join(captcha_text)
+    captcha_image = image.generate(captcha_text_str)
+    captcha_image = Image.open(captcha_image)
+    captcha_image = np.array(captcha_image)
+    return captcha_text, captcha_image
+
+
+def text2vec(text, char_set_len=len(number), max_captcha=4):
+    text_len = len(text)
+    if text_len > max_captcha:
+        raise ValueError('验证码最长4个字符')
+
+    vector = np.zeros(max_captcha * char_set_len)
+
+    def char2pos(char):
+        if char == '_':
+            k = 62
+            return k
+        k = ord(char) - 48
+        if k > 9:
+            k = ord(char) - 55
+            if k > 35:
+                k = ord(char) - 61
+                if k > 61:
+                    raise ValueError('No Map')
+        return k
+
+    for i, c in enumerate(text):
+        idx = i * char_set_len + char2pos(c)
+        vector[idx] = 1
+    return vector
+
+
+def convert2gray(img):
+    if len(img.shape) > 2:
+        r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
+        gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
+        return gray
+    else:
+        return img
+
+
+def get_next_batch(batch_size, image_height, image_width, char_set_len, max_captcha):
+    batch_x = np.zeros([batch_size, image_height * image_width])
+    batch_y = np.zeros([batch_size, max_captcha * char_set_len])
+
+    def wrap_gen_captcha_text_and_image():
+        while True:
+            text, image = gen_captcha_text_image()
+            if image.shape == (60, 160, 3):
+                return text, image
+
+    for i in range(batch_size):
+        text, image = wrap_gen_captcha_text_and_image()
+        image = convert2gray(image)
+
+        batch_x[i, :] = image.flatten() / 255
+        batch_y[i, :] = text2vec(text)
+
+    return batch_x, batch_y
diff --git a/CNN_method/Network_NumbersAndAlphabet.py b/CNN_method/Network_NumbersAndAlphabet.py
@@ -0,0 +1,133 @@
+import tensorflow as tf
+import matplotlib.pyplot as plt
+from core.CNN_method.Data_NumbersAndAlphabet import gen_captcha_text_image, number_and_alphabet, get_next_batch, convert2gray
+
+
+def cnn_structure(w_alpha=0.01, b_alpha=0.1, debug=False):
+    x = tf.reshape(X, shape=[-1, image_height, image_width, 1])
+
+    wc1 = tf.Variable(w_alpha * tf.random_normal([3, 3, 1, 32]))
+    bc1 = tf.Variable(b_alpha * tf.random_normal([32]))
+    conv1 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, wc1, strides=[1, 1, 1, 1], padding='SAME'), bc1))
+    pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
+    out1 = tf.nn.dropout(pool1, keep_prob)
+
+    wc2 = tf.Variable(w_alpha * tf.random_normal([3, 3, 32, 64]))
+    bc2 = tf.Variable(b_alpha * tf.random_normal([64]))
+    conv2 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(out1, wc2, strides=[1, 1, 1, 1], padding='SAME'), bc2))
+    pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
+    out2 = tf.nn.dropout(pool2, keep_prob)
+
+    wc3 = tf.get_variable(name='wc3', shape=[3, 3, 64, 128], dtype=tf.float32,
+                          initializer=tf.contrib.layers.xavier_initializer())
+    # wc3 = tf.Variable(w_alpha * tf.random_normal([3, 3, 64, 128]))
+    bc3 = tf.Variable(b_alpha * tf.random_normal([128]))
+    conv3 = tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(out2, wc3, strides=[1, 1, 1, 1], padding='SAME'), bc3))
+    pool3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
+    out3 = tf.nn.dropout(pool3, keep_prob)
+
+    wd1 = tf.get_variable(name='wd1', shape=[8 * 20 * 128, 1024], dtype=tf.float32,
+                          initializer=tf.contrib.layers.xavier_initializer())
+    # wd1 = tf.Variable(w_alpha * tf.random_normal([7*20*128,1024]))
+    bd1 = tf.Variable(b_alpha * tf.random_normal([1024]))
+    dense = tf.reshape(out3, [-1, wd1.get_shape().as_list()[0]])
+    dense = tf.nn.relu(tf.add(tf.matmul(dense, wd1), bd1))
+    dense = tf.nn.dropout(dense, keep_prob)
+
+    wout = tf.get_variable('name', shape=[1024, max_captcha * char_set_len], dtype=tf.float32,
+                           initializer=tf.contrib.layers.xavier_initializer())
+    # wout = tf.Variable(w_alpha * tf.random_normal([1024, max_captcha * char_set_len]))
+    bout = tf.Variable(b_alpha * tf.random_normal([max_captcha * char_set_len]))
+    out = tf.add(tf.matmul(dense, wout), bout)
+    if (debug):
+        print("x size=", x.shape)
+        print("conv1 size =", conv1.shape)
+        print("pool1 size =", pool1.shape)
+        print("out1 size =", out1.shape)
+        print("conv2 size =", conv2.shape)
+        print("pool2 size =", pool2.shape)
+        print("out2 size =", out2.shape)
+        print("conv3 size =", conv3.shape)
+        print("pool3 size =", pool3.shape)
+        print("out3 size =", out3.shape)
+        print("dense size =", dense.shape)
+    return out
+
+
+def train_cnn(debug=False):
+    output = cnn_structure(debug=debug)
+    cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=output, labels=Y))
+    optimizer = tf.train.AdamOptimizer(learning_rate=0.000001).minimize(cost)
+    predict = tf.reshape(output, [-1, max_captcha, char_set_len])
+    max_idx_p = tf.argmax(predict, 2)
+    max_idx_l = tf.argmax(tf.reshape(Y, [-1, max_captcha, char_set_len]), 2)
+    correct_pred = tf.equal(max_idx_p, max_idx_l)
+    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
+
+    saver = tf.train.Saver()
+
+    with tf.Session() as sess:
+        init = tf.global_variables_initializer()
+        sess.run(init)
+        step = 0
+        while True:
+            batch_x, batch_y = get_next_batch(100, image_height, image_width, char_set_len, max_captcha)
+            _, cost_ = sess.run([optimizer, cost], feed_dict={X: batch_x, Y: batch_y, keep_prob: 0.75})
+            print(step, cost_)
+            if step % 10 == 0:
+                batch_x_test, batch_y_test = get_next_batch(100, image_height, image_width, char_set_len, max_captcha)
+                acc = sess.run(accuracy, feed_dict={X: batch_x_test, Y: batch_y_test, keep_prob: 1.})
+                print(step, acc)
+                if acc > 0.99:
+                    saver.save(sess, "./model_number_and_alphabets/crack_captcha.model", global_step=step)
+                    break
+            step += 1
+
+
+def crack_captcha(captcha_image):
+    output = cnn_structure()
+
+    saver = tf.train.Saver()
+    with tf.Session() as sess:
+        saver.restore(sess, "./model_number_and_alphabets/crack_captcha.model-2510")
+
+        predict = tf.argmax(tf.reshape(output, [-1, max_captcha, char_set_len]), 2)
+        text_list = sess.run(predict, feed_dict={X: [captcha_image], keep_prob: 1.})
+        text = text_list[0].tolist()
+        return text
+
+
+if __name__ == '__main__':
+    train = 0
+    if train == 0:
+        text, image = gen_captcha_text_image()
+        print("验证码大小：", image.shape)  # (60,160,3)
+
+        image_height = 60
+        image_width = 160
+        max_captcha = len(text)
+        print("验证码文本最长字符数", max_captcha)
+        char_set = number_and_alphabet
+        char_set_len = len(char_set)
+        print(char_set_len)
+        X = tf.placeholder(tf.float32, [None, image_height * image_width])
+        Y = tf.placeholder(tf.float32, [None, max_captcha * char_set_len])
+        keep_prob = tf.placeholder(tf.float32)
+        train_cnn()
+
+    if train == 1:
+        image_height = 60
+        image_width = 160
+        char_set = number_and_alphabet
+        char_set_len = len(char_set)
+        X = tf.placeholder(tf.float32, [None, image_height * image_width])
+        keep_prob = tf.placeholder(tf.float32)
+        correct_text, image = gen_captcha_text_image(number_and_alphabet, 4)
+        max_captcha = len(correct_text)
+        image_array = convert2gray(image)
+        image_array = image_array.flatten() / 255
+        predict_text = crack_captcha(image_array)
+        print("正确文本为：", correct_text)
+        print("预测文本为：", predict_text)
+        plt.imshow(image)
+        plt.show()