forked from xingjian-f/DeepLearning-OCR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcaptcha_new.py
96 lines (90 loc) · 2.8 KB
/
captcha_new.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# coding:utf-8
import sys
sys.path.append('/home/feixingjian/DeepLearning-OCR/')
import json
import numpy as np
from util import load_img
def parse_expr(expr, pinyin):
if expr[-2:] == u'等于' or expr[-3:] in [u'等于?', u'等于几']:
rep_dict = [
(u'零', '0'),
(u'一', '1'),
(u'二', '2'),
(u'三', '3'),
(u'四', '4'),
(u'五', '5'),
(u'六', '6'),
(u'七', '7'),
(u'八', '8'),
(u'九', '9'),
(u'壹', '1'),
(u'贰', '2'),
(u'叁', '3'),
(u'肆', '4'),
(u'伍', '5'),
(u'陆', '6'),
(u'柒', '7'),
(u'捌', '8'),
(u'玖', '9'),
(u'加上', '+'),
(u'减去', '-'),
(u'乘以', '*'),
(u'除以', '/'),
(u'加', '+'),
(u'乘', '*'),
(u'×', '*'),
(u'x', '*'),
(u'减', '-'),
(u'等于几', ''),
(u'等于?', ''),
(u'等于', '')
]
for i in rep_dict:
expr = expr.replace(i[0], i[1])
try:
return eval(expr)
except:
return False
elif expr[-6:] == u'的拼音首字母': # ZheJiang's captcha
ret = []
# print expr
for i in range(len(expr)-6):
char = expr[i]
if char not in pinyin:
return False
ret.append(pinyin[char][0])
return ''.join(ret)
else:
return expr
def load_data(img_vals, width, height, channels):
x = []
for img_val in img_vals:
x.append(load_img(img_val, width, height, channels))
x = np.asarray(x)
x /= 255 # normalized
return x
def predict(predictor, post_vals, pinyin=None):
width = predictor.img_width
height = predictor.img_height
channels = predictor.img_channels
# predict result
keys = post_vals.keys()
img_vals = post_vals.values()
X = load_data(img_vals, width, height, channels)
predictions = predictor.pred(X)
if pinyin == 'prob': # TODO
predictions = map(lambda x: ','.join(map(str, x)), predictor.pred_probs)
# print predictor.pred_probs
# format reply
res = {}
for i, expr in enumerate(predictions):
valid = True
ans = parse_expr(expr, pinyin)
if ans is False:
valid = False
form = {'valid':valid, 'answer':ans, 'expr':expr}
res[keys[i]] = form
if len(res) == 1 and 'file' in res: # in case input data is not in batch form
res = res['file']
# print res, len(res)
return json.dumps(res, ensure_ascii=False) # utf-8 output