Skip to content

Commit

Permalink
新增ocr dense
Browse files Browse the repository at this point in the history
  • Loading branch information
wenlihaoyu committed Oct 9, 2018
1 parent ee1e90d commit 1e1eb76
Show file tree
Hide file tree
Showing 12 changed files with 170 additions and 33 deletions.
32 changes: 25 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,23 +1,39 @@
## 本项目基于[yolo3](https://github.com/pjreddie/darknet.git)[crnn](https://github.com/meijieru/crnn.pytorch.git) 实现中文自然场景文字检测及识别

## 环境部署
python=3.6 pytorch=0.2.0
python=3.6 pytorch==0.4.1
``` Bash
git clone https://github.com/chineseocr/chineseocr.git
cd chineseocr
sh setup.sh #(cpu sh setpu-cpu.sh)
```

下载编译darknet(如果直接运用opencv dnn 可忽略darknet的编译)
```
git clone https://github.com/pjreddie/darknet.git
mv darknet chineseocr/
##编译对GPU、cudnn的支持 修改 Makefile
#GPU=1
#CUDNN=1
#OPENCV=0
#OPENMP=0
make
```
修改 darknet/python/darknet.py line 48
root = '/root/'##chineseocr所在目录
lib = CDLL(root+"chineseocr/darknet/libdarknet.so", RTLD_GLOBAL)


## 下载模型文件
模型文件地址:
* [baidu pan](https://pan.baidu.com/s/1dEopm8VkFLuDDfVkpuzHqQ)
* [google drive](https://drive.google.com/drive/folders/1vlA6FjvicTt5GKvAfmycP5AlYxm4i9ze?usp=sharing)
* [baidu pan](https://pan.baidu.com/s/1gTW9gwJR6hlwTuyB6nCkzQ)
* [google drive](https://drive.google.com/drive/folders/1vlA6FjvicTt5GKvAfmycP5AlYxm4i9ze?usp=sharing)(暂时无更新)

复制文件夹中的 ocr.pth, text.cfg, text.names, text.weights 到models目录
复制文件夹中的所有文件到models目录

或者可将yolo3模型转换为keras版本,详细参考https://github.com/qqwweee/keras-yolo3.git
也可将yolo3模型转换为keras版本,详细参考https://github.com/qqwweee/keras-yolo3.git

或者直接运用opencv>=3.4 dnn直接调用darknet模型(参考 opencv_dnn_detect.py),cpu文本检测小于1秒
或者直接运用opencv>=3.4 dnn模块调用darknet模型(参考 opencv_dnn_detect.py)

## web服务启动
``` Bash
Expand All @@ -29,6 +45,7 @@ ipython app.py 8080 ##8080端口号,可以设置任意端口

<img width="500" height="300" src="https://github.com/chineseocr/chineseocr/blob/master/test/img1.png"/>
<img width="500" height="300" src="https://github.com/chineseocr/chineseocr/blob/master/test/4.png"/>
<img width="500" height="300" src="https://github.com/chineseocr/chineseocr/blob/master/test/card1.png"/>

## Play with Docker Container
``` Bash
Expand All @@ -46,5 +63,6 @@ http://127.0.0.1:8080/ocr
1. yolo3 https://github.com/pjreddie/darknet.git
2. crnn https://github.com/meijieru/crnn.pytorch.git
3. ctpn https://github.com/eragonruan/text-detection-ctpn
4. CTPN https://github.com/tianzhi0549/CTPN
4. CTPN https://github.com/tianzhi0549/CTPN
5.https://github.com/qqwweee/keras-yolo3.git

19 changes: 18 additions & 1 deletion config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,24 @@
import os
opencvFlag = True##opencvFlag==True 启用opencv dnn 反之 darkent
darknetRoot = os.path.join(os.path.curdir,"darknet")## yolo 安装目录
pwd = os.getcwd()
yoloCfg = os.path.join(pwd,"models","text.cfg")
yoloWeights = os.path.join(pwd,"models","text.weights")
yoloData = os.path.join(pwd,"models","text.data")
ocrModel = os.path.join(pwd,"models","ocr.pth")
##文字方向检测
AngleModelPb = os.path.join(pwd,"models","Angle-model.pb")
AngleModelPbtxt = os.path.join(pwd,"models","Angle-model.pbtxt")
IMGSIZE = (1024,1024)## yolo3 输入图像尺寸
##是否启用LSTM crnn模型
DETECTANGLE=True##是否进行文字方向检测
LSTMFLAG = True##OCR模型是否调用LSTM层
GPU = True##OCR 是否启用GPU
chinsesModel = True##模型选择 True:中英文模型 False:英文模型
if chinsesModel:
if LSTMFLAG:
ocrModel = os.path.join(pwd,"models","ocr-lstm.pth")
else:
ocrModel = os.path.join(pwd,"models","ocr-dense.pth")
else:
LSTMFLAG=True
ocrModel = os.path.join(pwd,"models","ocr-english.pth")
3 changes: 3 additions & 0 deletions darknet_detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
sys.path.append('python')
import darknet as dn


def array_to_image(arr):
arr = arr.transpose(2,0,1)
c = arr.shape[0]
Expand Down Expand Up @@ -56,6 +57,8 @@ def to_box(r):
meta = dn.load_meta(yoloData.encode('utf-8'))
os.chdir(pwd)
def text_detect(img):
inputBlob = cv2.dnn.blobFromImage(img, scalefactor=0.00390625, size=(608, 608),swapRB=True ,crop=False);

r = detect_np(net, meta, img,thresh=0.1, hier_thresh=0.5, nms=0.8)
bboxes = to_box(r)
return bboxes
95 changes: 82 additions & 13 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,14 @@
# -*- coding: utf-8 -*-
from detector.detectors import TextDetector
from detector.other import get_boxes
import darknet_detect
import opencv_dnn_detect ##opencv dnn model for darknet
from config import opencvFlag
from config import IMGSIZE
from opencv_dnn_detect import angle_detect##文字方向检测
if opencvFlag:
import opencv_dnn_detect as detect ##opencv dnn model for darknet
else:
import darknet_detect as detect

import numpy as np
from PIL import Image
import numpy as np
Expand All @@ -23,12 +29,9 @@ def text_detect(img,
LINE_MIN_SCORE=0.8,
TEXT_PROPOSALS_WIDTH=5,
MIN_NUM_PROPOSALS=1,
textmodel = 'darknet_detect'

):
if textmodel == 'darknet_detect':
boxes, scores = darknet_detect.text_detect(np.array(img))
else:
boxes, scores = opencv_dnn_detect.text_detect(np.array(img))
boxes, scores = detect.text_detect(np.array(img))


boxes = np.array(boxes,dtype=np.float32)
Expand Down Expand Up @@ -191,25 +194,91 @@ def letterbox_image(image, size):
'''
image_w, image_h = image.size
w, h = size
new_w = int(image_w * min(w*1.0/image_w, h*1.0/image_h))
new_h = int(image_h * min(w*1.0/image_w, h*1.0/image_h))
resized_image = image.resize((new_w,new_h), Image.BICUBIC)

if max(image_w, image_h)<min(size):
resized_image = image
new_w = w
new_h = h
else:
new_w = int(image_w * min(w*1.0/image_w, h*1.0/image_h))
new_h = int(image_h * min(w*1.0/image_w, h*1.0/image_h))
resized_image = image.resize((new_w,new_h), Image.BICUBIC)

boxed_image = Image.new('RGB', size, (128,128,128))
boxed_image.paste(resized_image, ((w-new_w)//2,(h-new_h)//2))
return boxed_image

from scipy.ndimage import filters,interpolation,morphology,measurements,minimum
#from pylab import amin, amax
from numpy import amin, amax
def estimate_skew_angle(raw):
"""
估计图像文字角度
"""
raw = resize_im(raw, scale=600, max_scale=900)
image = raw-amin(raw)
image = image/amax(image)
m = interpolation.zoom(image,0.5)
m = filters.percentile_filter(m,80,size=(20,2))
m = filters.percentile_filter(m,80,size=(2,20))
m = interpolation.zoom(m,1.0/0.5)
#w,h = image.shape[1],image.shape[0]
w,h = min(image.shape[1],m.shape[1]),min(image.shape[0],m.shape[0])
flat = np.clip(image[:h,:w]-m[:h,:w]+1,0,1)
d0,d1 = flat.shape
o0,o1 = int(0.1*d0),int(0.1*d1)
flat = amax(flat)-flat
flat -= amin(flat)
est = flat[o0:d0-o0,o1:d1-o1]
angles = range(-15,15)
estimates = []
for a in angles:

roest =interpolation.rotate(est,a,order=0,mode='constant')
v = np.mean(roest,axis=1)
v = np.var(v)
estimates.append((v,a))

_,a = max(estimates)
return a


def eval_angle(im,detectAngle=False,ifadjustDegree=True):
"""
估计图片偏移角度
@@param:img,
@@param:model,选择的ocr模型,支持keras\pytorch版本
@@param:adjust 调整文字识别结果
@@param:detectAngle 是否检测文字朝向
"""
angle = 0
degree=0.0
img = np.array(im)
if detectAngle:
angle = angle_detect(img=np.copy(img))##文字朝向检测
if angle==90:
im = im.transpose(Image.ROTATE_90)
elif angle==180:
im = im.transpose(Image.ROTATE_180)
elif angle==270:
im = im.transpose(Image.ROTATE_270)
img = np.array(im)

if ifadjustDegree:
degree = estimate_skew_angle(np.array(im.convert('L')))
return angle,degree,im.rotate(degree)


def model(img,detectAngle=False,config={},ifIm=True,leftAdjust=False,rightAdjust=False,alph=0.1):
def model(img,detectAngle=False,config={},ifIm=True,leftAdjust=False,rightAdjust=False,alph=0.2,ifadjustDegree=False):
"""
@@param:img,
@@param:adjust 调整文字识别结果
@@param:detectAngle,是否检测文字朝向
"""
angle = 0
img =letterbox_image(img, (608,608))
angle,degree,img = eval_angle(img,detectAngle=detectAngle,ifadjustDegree=ifadjustDegree)

img =letterbox_image(img, IMGSIZE)

config['img'] = img
text_recs,tmp = text_detect(**config)

Expand Down
1 change: 1 addition & 0 deletions models/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
## 下载对应的权重文件放在此目录
36 changes: 32 additions & 4 deletions opencv_dnn_detect.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
from config import yoloCfg,yoloWeights
from config import AngleModelPb,AngleModelPbtxt
from config import IMGSIZE
from PIL import Image
import numpy as np
import cv2
net = cv2.dnn.readNetFromDarknet(yoloCfg,yoloWeights)
textNet = cv2.dnn.readNetFromDarknet(yoloCfg,yoloWeights)
angleNet = cv2.dnn.readNetFromTensorflow(AngleModelPb,AngleModelPbtxt)##文字方向检测
def text_detect(img):
thresh=0.1
h,w = img.shape[:2]
inputBlob = cv2.dnn.blobFromImage(img, scalefactor=0.00390625, size=(608, 608),swapRB=True ,crop=False);
net.setInput(inputBlob)
pred = net.forward()
inputBlob = cv2.dnn.blobFromImage(img, scalefactor=0.00390625, size=IMGSIZE,swapRB=True ,crop=False);
textNet.setInput(inputBlob)
pred = textNet.forward()
cx = pred[:,0]*w
cy = pred[:,1]*h
xmin = cx - pred[:,2]*w/2
Expand All @@ -20,3 +23,28 @@ def text_detect(img):
scores = scores[indx]
boxes = np.array(list(zip(xmin[indx],ymin[indx],xmax[indx],ymax[indx])))
return boxes,scores


def angle_detect(img,adjust=True):
"""
文字方向检测
"""
h,w = img.shape[:2]
ROTATE = [0,90,180,270]
if adjust:
thesh = 0.05
xmin,ymin,xmax,ymax = int(thesh*w),int(thesh*h),w-int(thesh*w),h-int(thesh*h)
img = img[ymin:ymax,xmin:xmax]##剪切图片边缘,清楚边缘噪声


inputBlob = cv2.dnn.blobFromImage(img,
scalefactor=1.0,
size=(224, 224),
swapRB=True ,
mean=[103.939,116.779,123.68],crop=False);
angleNet.setInput(inputBlob)
pred = angleNet.forward()
index = np.argmax(pred,axis=1)[0]
return ROTATE[index]


9 changes: 5 additions & 4 deletions setup-cpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@ conda create -n chineseocr python=3.6 pip scipy numpy jupyter ipython ##运用co
source activate chineseocr
git submodule init && git submodule update
cd darknet/ && make && cd ..
pip install easydict opencv-contrib-python Cython -i https://pypi.tuna.tsinghua.edu.cn/simple/ ##选择国内源,速度更快
pip install easydict opencv-contrib-python Cython h5py lmdb mahotas pandas requests -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install -U pillow -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install h5py lmdb mahotas pandas -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install requests
pip install web.py==0.40.dev0
conda install pytorch=0.2.0 torchvision -c soumith
## mac
conda install pytorch torchvision -c pytorch
## linux
## conda install pytorch-cpu torchvision-cpu -c pytorch
pushd detector/utils && sh make-for-cpu.sh && popd

8 changes: 4 additions & 4 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
conda create -n chineseocr python=3.6 pip scipy numpy jupyter ipython ##运用conda 创建python环境
source activate chineseocr
git submodule init && git submodule update
pip install easydict opencv-contrib-python Cython -i https://pypi.tuna.tsinghua.edu.cn/simple/ ##选择国内源,速度更快
pip install easydict opencv-contrib-python Cython h5py lmdb mahotas pandas requests -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install -U pillow -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install h5py lmdb mahotas pandas,requests -i https://pypi.tuna.tsinghua.edu.cn/simple/
pip install web.py==0.40.dev0
conda install pytorch=0.2.0 cuda80 torchvision -c soumith
pushd detector/utils && sh make-for-cpu.sh && popd
conda install pytorch torchvision -c pytorch
## pip install torch torchvision
pushd detector/utils && sh make.sh && popd


Binary file modified test/[email protected]
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified test/card.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test/card1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified test/o-6.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 1e1eb76

Please sign in to comment.