forked from chineseocr/chineseocr
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
807f882
commit 9a2924b
Showing
38 changed files
with
359 additions
and
66 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
""" | ||
身份证 | ||
""" | ||
from apphelper.image import union_rbox | ||
import re | ||
class idcard: | ||
""" | ||
身份证结构化识别 | ||
""" | ||
def __init__(self,result): | ||
self.result = union_rbox(result,0.2) | ||
self.N = len(self.result) | ||
self.res = {} | ||
self.full_name() | ||
self.sex() | ||
self.birthday() | ||
self.birthNo() | ||
self.address() | ||
|
||
|
||
def full_name(self): | ||
""" | ||
身份证姓名 | ||
""" | ||
name={} | ||
for i in range(self.N): | ||
txt = self.result[i]['text'].replace(' ','') | ||
txt = txt.replace(' ','') | ||
##匹配身份证姓名 | ||
res = re.findall("姓名[\u4e00-\u9fa5]{1,4}",txt) | ||
if len(res)>0: | ||
name['姓名'] =res[0].replace('姓名','') | ||
self.res.update(name) | ||
break | ||
|
||
def sex(self): | ||
""" | ||
性别女民族汉 | ||
""" | ||
sex={} | ||
for i in range(self.N): | ||
txt = self.result[i]['text'].replace(' ','') | ||
txt = txt.replace(' ','') | ||
if '男' in txt: | ||
sex["性别"] = '男' | ||
elif '女' in txt: | ||
sex["性别"] = '女' | ||
|
||
##性别女民族汉 | ||
res = re.findall(".*民族[\u4e00-\u9fa5]+",txt) | ||
if len(res)>0: | ||
sex["民族"] = res[0].split('民族')[-1] | ||
self.res.update(sex) | ||
break | ||
|
||
|
||
|
||
def birthday(self): | ||
""" | ||
出生年月 | ||
""" | ||
birth={} | ||
for i in range(self.N): | ||
txt = self.result[i]['text'].replace(' ','') | ||
txt = txt.replace(' ','') | ||
##出生年月 | ||
res = re.findall('出生\d*年\d*月\d*日',txt) | ||
res = re.findall('\d*年\d*月\d*日',txt) | ||
|
||
if len(res)>0: | ||
birth['出生年月'] =res[0].replace('出生','').replace('年','-').replace('月','-').replace('日','') | ||
self.res.update(birth) | ||
break | ||
|
||
def birthNo(self): | ||
""" | ||
身份证号码 | ||
""" | ||
No={} | ||
for i in range(self.N): | ||
txt = self.result[i]['text'].replace(' ','') | ||
txt = txt.replace(' ','') | ||
##身份证号码 | ||
res = re.findall('号码\d*[X|x]',txt) | ||
res += re.findall('号码\d*',txt) | ||
res += re.findall('\d{16,18}',txt) | ||
|
||
if len(res)>0: | ||
No['身份证号码'] =res[0].replace('号码','') | ||
self.res.update(No) | ||
break | ||
|
||
def address(self): | ||
""" | ||
身份证地址 | ||
##此处地址匹配还需完善 | ||
""" | ||
add={} | ||
addString=[] | ||
for i in range(self.N): | ||
txt = self.result[i]['text'].replace(' ','') | ||
txt = txt.replace(' ','') | ||
|
||
##身份证地址 | ||
if '住址' in txt or '省' in txt or '市' in txt or '县' in txt or '街' in txt or '村' in txt or "镇" in txt or "区" in txt or "城" in txt: | ||
addString.append(txt.replace('住址','')) | ||
|
||
if len(addString)>0: | ||
add['身份证地址'] =''.join(addString) | ||
self.res.update(add) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
""" | ||
火车票 | ||
""" | ||
from apphelper.image import union_rbox | ||
import re | ||
class trainTicket: | ||
""" | ||
火车票结构化识别 | ||
""" | ||
def __init__(self,result): | ||
self.result = union_rbox(result,0.2) | ||
self.N = len(self.result) | ||
self.res = {} | ||
self.station() | ||
self.time() | ||
self.price() | ||
self.full_name() | ||
|
||
def station(self): | ||
""" | ||
安顺站K492贵阳站 | ||
re.findall('[一-龥]+站','安顺站K492贵阳站'),re.findall('[一-龥]+站(.+?)[][一-龥]+站','安顺站K492贵阳站') | ||
""" | ||
station={} | ||
for i in range(self.N): | ||
txt = self.result[i]['text'].replace(' ','') | ||
txt = txt.replace(' ','') | ||
|
||
res = re.findall('[一-龥]+站',txt),re.findall('[一-龥]+站(.+?)[][一-龥]+站',txt) | ||
if len(res[0])>0: | ||
|
||
station['出发'],station['到达'] = res[0][0].replace('站',''),res[0][1].replace('站','') | ||
if len(res[1])>0: | ||
station['车次'] = res[1][0] | ||
|
||
if len(station)>0: | ||
self.res.update(station) | ||
break | ||
|
||
def time(self): | ||
""" | ||
提取日期 时间 | ||
""" | ||
time={} | ||
for i in range(self.N): | ||
txt = self.result[i]['text'].replace(' ','') | ||
txt = txt.replace(' ','') | ||
##匹配日期 | ||
res = re.findall('[0-9]{1,4}年[0-9]{1,2}月[0-9]{1,2}日',txt) | ||
if len(res)>0: | ||
time['日期'] =res[0].replace('年','-').replace('月','-').replace('日','') | ||
##匹配时间 | ||
res = re.findall('[0-9]{1,2}:[0-9]{1,2}',txt) | ||
if len(res)>0: | ||
time['时间'] =res[0] | ||
self.res.update(time) | ||
break | ||
|
||
def price(self): | ||
""" | ||
车票价格 | ||
""" | ||
price={} | ||
for i in range(self.N): | ||
txt = self.result[i]['text'].replace(' ','') | ||
txt = txt.replace(' ','') | ||
##车票价格 | ||
res = re.findall('¥[0-9]{1,4}.[0-9]{1,2}元',txt) | ||
res += re.findall('[0-9]{1,4}.[0-9]{1,2}元',txt) | ||
res += re.findall('[0-9]{1,6}元',txt) | ||
res += re.findall('¥[0-9]{1,4}.[0-9]{1,2}',txt) | ||
if len(res)>0: | ||
price['车票价格'] =res[0].replace('¥','').replace('元','') | ||
self.res.update(price) | ||
break | ||
|
||
|
||
|
||
def full_name(self): | ||
""" | ||
姓名 | ||
""" | ||
name={} | ||
for i in range(self.N): | ||
txt = self.result[i]['text'].replace(' ','') | ||
txt = txt.replace(' ','') | ||
##车票价格 | ||
res = re.findall("\d*\*\d*([一-龥]{1,4})",txt) | ||
if len(res)>0: | ||
name['姓名'] =res[0] | ||
self.res.update(name) | ||
|
||
|
||
|
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.