新增ocr dense

oukaja · Oct 9, 2018 · 1e1eb76 · 1e1eb76
1 parent ee1e90d
commit 1e1eb76
Show file tree

Hide file tree

Showing 12 changed files with 170 additions and 33 deletions.
diff --git a/README.md b/README.md
@@ -1,23 +1,39 @@
 ## 本项目基于[yolo3](https://github.com/pjreddie/darknet.git) 与[crnn](https://github.com/meijieru/crnn.pytorch.git)  实现中文自然场景文字检测及识别
 
 ## 环境部署
-python=3.6 pytorch=0.2.0
+python=3.6 pytorch==0.4.1
 ``` Bash
 git clone https://github.com/chineseocr/chineseocr.git
 cd chineseocr
 sh setup.sh #(cpu sh setpu-cpu.sh)
 ```
 
+下载编译darknet(如果直接运用opencv dnn 可忽略darknet的编译)
+```
+git clone https://github.com/pjreddie/darknet.git 
+mv darknet chineseocr/
+##编译对GPU、cudnn的支持 修改 Makefile
+#GPU=1
+#CUDNN=1
+#OPENCV=0
+#OPENMP=0
+make 
+```
+修改 darknet/python/darknet.py line 48
+root = '/root/'##chineseocr所在目录
+lib = CDLL(root+"chineseocr/darknet/libdarknet.so", RTLD_GLOBAL)
+
+
 ## 下载模型文件   
 模型文件地址:
-* [baidu pan](https://pan.baidu.com/s/1dEopm8VkFLuDDfVkpuzHqQ)
-* [google drive](https://drive.google.com/drive/folders/1vlA6FjvicTt5GKvAfmycP5AlYxm4i9ze?usp=sharing)
+* [baidu pan](https://pan.baidu.com/s/1gTW9gwJR6hlwTuyB6nCkzQ)
+* [google drive](https://drive.google.com/drive/folders/1vlA6FjvicTt5GKvAfmycP5AlYxm4i9ze?usp=sharing)（暂时无更新）
 
-复制文件夹中的 ocr.pth, text.cfg, text.names, text.weights 到models目录
+复制文件夹中的所有文件到models目录
 
-或者可将yolo3模型转换为keras版本，详细参考https://github.com/qqwweee/keras-yolo3.git    
+也可将yolo3模型转换为keras版本，详细参考https://github.com/qqwweee/keras-yolo3.git    
 
-或者直接运用opencv>=3.4  dnn直接调用darknet模型（参考 opencv_dnn_detect.py），cpu文本检测小于1秒。   
+或者直接运用opencv>=3.4  dnn模块调用darknet模型(参考 opencv_dnn_detect.py)。   
 
 ## web服务启动
 ``` Bash
@@ -29,6 +45,7 @@ ipython app.py 8080 ##8080端口号，可以设置任意端口
 
 <img width="500" height="300" src="https://github.com/chineseocr/chineseocr/blob/master/test/img1.png"/>
 <img width="500" height="300" src="https://github.com/chineseocr/chineseocr/blob/master/test/4.png"/>
+<img width="500" height="300" src="https://github.com/chineseocr/chineseocr/blob/master/test/card1.png"/>
 
 ## Play with Docker Container
 ``` Bash
@@ -46,5 +63,6 @@ http://127.0.0.1:8080/ocr
 1. yolo3 https://github.com/pjreddie/darknet.git   
 2. crnn  https://github.com/meijieru/crnn.pytorch.git              
 3. ctpn  https://github.com/eragonruan/text-detection-ctpn    
-4. CTPN  https://github.com/tianzhi0549/CTPN     
+4. CTPN  https://github.com/tianzhi0549/CTPN   
+5.https://github.com/qqwweee/keras-yolo3.git 
 
diff --git a/config.py b/config.py
@@ -1,7 +1,24 @@
 import os
+opencvFlag = True##opencvFlag==True 启用opencv dnn 反之 darkent 
 darknetRoot = os.path.join(os.path.curdir,"darknet")## yolo 安装目录
 pwd = os.getcwd()
 yoloCfg = os.path.join(pwd,"models","text.cfg")
 yoloWeights = os.path.join(pwd,"models","text.weights")
 yoloData = os.path.join(pwd,"models","text.data")
-ocrModel = os.path.join(pwd,"models","ocr.pth")
+##文字方向检测
+AngleModelPb = os.path.join(pwd,"models","Angle-model.pb")
+AngleModelPbtxt = os.path.join(pwd,"models","Angle-model.pbtxt")
+IMGSIZE = (1024,1024)## yolo3 输入图像尺寸
+##是否启用LSTM crnn模型
+DETECTANGLE=True##是否进行文字方向检测
+LSTMFLAG = True##OCR模型是否调用LSTM层
+GPU = True##OCR 是否启用GPU
+chinsesModel = True##模型选择 True:中英文模型 False:英文模型
+if chinsesModel:
+    if LSTMFLAG:
+        ocrModel  = os.path.join(pwd,"models","ocr-lstm.pth")
+    else:
+        ocrModel = os.path.join(pwd,"models","ocr-dense.pth")
+else:
+        LSTMFLAG=True
+        ocrModel = os.path.join(pwd,"models","ocr-english.pth")
diff --git a/darknet_detect.py b/darknet_detect.py
@@ -8,6 +8,7 @@
 sys.path.append('python')
 import darknet as dn
 
+
 def array_to_image(arr):
     arr = arr.transpose(2,0,1)
     c = arr.shape[0]
@@ -56,6 +57,8 @@ def to_box(r):
 meta = dn.load_meta(yoloData.encode('utf-8'))
 os.chdir(pwd)
 def text_detect(img):
+    inputBlob = cv2.dnn.blobFromImage(img, scalefactor=0.00390625, size=(608, 608),swapRB=True ,crop=False);
+
     r = detect_np(net, meta, img,thresh=0.1, hier_thresh=0.5, nms=0.8)
     bboxes = to_box(r)
     return bboxes
diff --git a/model.py b/model.py
@@ -2,8 +2,14 @@
 # -*- coding: utf-8 -*-
 from detector.detectors import TextDetector
 from detector.other import get_boxes
-import darknet_detect
-import opencv_dnn_detect ##opencv dnn model for darknet
+from config import opencvFlag
+from config import IMGSIZE
+from opencv_dnn_detect import angle_detect##文字方向检测
+if opencvFlag:
+    import opencv_dnn_detect as detect ##opencv dnn model for darknet
+else:
+    import darknet_detect as detect
+
 import numpy as np
 from PIL import Image
 import numpy as np
@@ -23,12 +29,9 @@ def text_detect(img,
                 LINE_MIN_SCORE=0.8,
                 TEXT_PROPOSALS_WIDTH=5,
                 MIN_NUM_PROPOSALS=1,
-                textmodel = 'darknet_detect'
+
                 ):
-    if textmodel == 'darknet_detect':
-         boxes, scores = darknet_detect.text_detect(np.array(img))
-    else:
-        boxes, scores = opencv_dnn_detect.text_detect(np.array(img))
+    boxes, scores = detect.text_detect(np.array(img))
 
 
     boxes = np.array(boxes,dtype=np.float32)
@@ -191,25 +194,91 @@ def letterbox_image(image, size):
         '''
     image_w, image_h = image.size
     w, h = size
-    new_w = int(image_w * min(w*1.0/image_w, h*1.0/image_h))
-    new_h = int(image_h * min(w*1.0/image_w, h*1.0/image_h))
-    resized_image = image.resize((new_w,new_h), Image.BICUBIC)
+
+    if max(image_w, image_h)<min(size):
+        resized_image = image
+        new_w = w
+        new_h = h
+    else:
+        new_w = int(image_w * min(w*1.0/image_w, h*1.0/image_h))
+        new_h = int(image_h * min(w*1.0/image_w, h*1.0/image_h))
+        resized_image = image.resize((new_w,new_h), Image.BICUBIC)
 
     boxed_image = Image.new('RGB', size, (128,128,128))
     boxed_image.paste(resized_image, ((w-new_w)//2,(h-new_h)//2))
     return boxed_image
 
+from scipy.ndimage import filters,interpolation,morphology,measurements,minimum
+#from pylab import amin, amax
+from numpy import amin, amax
+def estimate_skew_angle(raw):
+    """
+    估计图像文字角度
+    """
+    raw = resize_im(raw, scale=600, max_scale=900)
+    image = raw-amin(raw)
+    image = image/amax(image)
+    m = interpolation.zoom(image,0.5)
+    m = filters.percentile_filter(m,80,size=(20,2))
+    m = filters.percentile_filter(m,80,size=(2,20))
+    m = interpolation.zoom(m,1.0/0.5)
+    #w,h = image.shape[1],image.shape[0]
+    w,h = min(image.shape[1],m.shape[1]),min(image.shape[0],m.shape[0])
+    flat = np.clip(image[:h,:w]-m[:h,:w]+1,0,1)
+    d0,d1 = flat.shape
+    o0,o1 = int(0.1*d0),int(0.1*d1)
+    flat = amax(flat)-flat
+    flat -= amin(flat)
+    est = flat[o0:d0-o0,o1:d1-o1]
+    angles = range(-15,15)
+    estimates = []
+    for a in angles:
+
+        roest =interpolation.rotate(est,a,order=0,mode='constant')
+        v = np.mean(roest,axis=1)
+        v = np.var(v)
+        estimates.append((v,a))
+
+    _,a = max(estimates)
+    return a
+
 
+def eval_angle(im,detectAngle=False,ifadjustDegree=True):
+    """
+    估计图片偏移角度
+    @@param:img,
+    @@param:model,选择的ocr模型，支持keras\pytorch版本
+    @@param:adjust 调整文字识别结果
+    @@param:detectAngle 是否检测文字朝向
+    """
+    angle = 0
+    degree=0.0
+    img = np.array(im)
+    if detectAngle:
+        angle = angle_detect(img=np.copy(img))##文字朝向检测
+        if angle==90:
+            im = im.transpose(Image.ROTATE_90)
+        elif angle==180:
+            im = im.transpose(Image.ROTATE_180)
+        elif angle==270:
+            im = im.transpose(Image.ROTATE_270)
+        img = np.array(im)
+
+    if ifadjustDegree:
+       degree = estimate_skew_angle(np.array(im.convert('L')))
+    return  angle,degree,im.rotate(degree)
 
 
-def model(img,detectAngle=False,config={},ifIm=True,leftAdjust=False,rightAdjust=False,alph=0.1):
+def model(img,detectAngle=False,config={},ifIm=True,leftAdjust=False,rightAdjust=False,alph=0.2,ifadjustDegree=False):
     """
     @@param:img,
     @@param:adjust 调整文字识别结果
     @@param:detectAngle,是否检测文字朝向
     """
-    angle = 0
-    img =letterbox_image(img, (608,608))
+    angle,degree,img = eval_angle(img,detectAngle=detectAngle,ifadjustDegree=ifadjustDegree)
+
+    img =letterbox_image(img, IMGSIZE)
+
     config['img'] = img
     text_recs,tmp = text_detect(**config)
 

diff --git a/models/README.md b/models/README.md
@@ -0,0 +1 @@
+## 下载对应的权重文件放在此目录
diff --git a/opencv_dnn_detect.py b/opencv_dnn_detect.py
@@ -1,14 +1,17 @@
 from config import yoloCfg,yoloWeights
+from config import AngleModelPb,AngleModelPbtxt
+from config import IMGSIZE
 from PIL import Image
 import numpy as np
 import cv2
-net = cv2.dnn.readNetFromDarknet(yoloCfg,yoloWeights)
+textNet = cv2.dnn.readNetFromDarknet(yoloCfg,yoloWeights)
+angleNet = cv2.dnn.readNetFromTensorflow(AngleModelPb,AngleModelPbtxt)##文字方向检测
 def text_detect(img):
     thresh=0.1
     h,w = img.shape[:2]
-    inputBlob = cv2.dnn.blobFromImage(img, scalefactor=0.00390625, size=(608, 608),swapRB=True ,crop=False);
-    net.setInput(inputBlob)
-    pred = net.forward()
+    inputBlob = cv2.dnn.blobFromImage(img, scalefactor=0.00390625, size=IMGSIZE,swapRB=True ,crop=False);
+    textNet.setInput(inputBlob)
+    pred = textNet.forward()
     cx = pred[:,0]*w
     cy = pred[:,1]*h
     xmin = cx - pred[:,2]*w/2
@@ -20,3 +23,28 @@ def text_detect(img):
     scores = scores[indx]
     boxes = np.array(list(zip(xmin[indx],ymin[indx],xmax[indx],ymax[indx])))
     return boxes,scores
+
+
+def angle_detect(img,adjust=True):
+    """
+    文字方向检测
+    """
+    h,w = img.shape[:2]
+    ROTATE = [0,90,180,270]
+    if adjust:
+       thesh = 0.05
+       xmin,ymin,xmax,ymax = int(thesh*w),int(thesh*h),w-int(thesh*w),h-int(thesh*h)
+       img = img[ymin:ymax,xmin:xmax]##剪切图片边缘，清楚边缘噪声
+
+
+    inputBlob = cv2.dnn.blobFromImage(img, 
+                                      scalefactor=1.0, 
+                                      size=(224, 224),
+                                      swapRB=True ,
+                                      mean=[103.939,116.779,123.68],crop=False);
+    angleNet.setInput(inputBlob)
+    pred = angleNet.forward()
+    index = np.argmax(pred,axis=1)[0]
+    return ROTATE[index]
+
+
diff --git a/setup-cpu.sh b/setup-cpu.sh
@@ -3,11 +3,12 @@ conda create -n chineseocr python=3.6 pip scipy numpy jupyter ipython ##运用co
 source activate chineseocr
 git submodule init && git submodule update
 cd darknet/ && make && cd ..
-pip install easydict opencv-contrib-python Cython  -i https://pypi.tuna.tsinghua.edu.cn/simple/ ##选择国内源，速度更快
+pip install easydict opencv-contrib-python Cython h5py lmdb mahotas pandas requests  -i https://pypi.tuna.tsinghua.edu.cn/simple/
 pip install -U pillow -i https://pypi.tuna.tsinghua.edu.cn/simple/
-pip install  h5py lmdb mahotas pandas -i https://pypi.tuna.tsinghua.edu.cn/simple/
-pip install requests
 pip install web.py==0.40.dev0
-conda install pytorch=0.2.0  torchvision -c soumith
+## mac
+conda install pytorch torchvision -c pytorch
+## linux
+## conda install pytorch-cpu torchvision-cpu -c pytorch
 pushd detector/utils && sh make-for-cpu.sh && popd
 
diff --git a/setup.sh b/setup.sh
@@ -2,11 +2,11 @@
 conda create -n chineseocr python=3.6 pip scipy numpy jupyter ipython ##运用conda 创建python环境
 source activate chineseocr
 git submodule init && git submodule update
-pip install easydict opencv-contrib-python Cython  -i https://pypi.tuna.tsinghua.edu.cn/simple/ ##选择国内源，速度更快
+pip install easydict opencv-contrib-python Cython h5py lmdb mahotas pandas requests  -i https://pypi.tuna.tsinghua.edu.cn/simple/
 pip install -U pillow -i https://pypi.tuna.tsinghua.edu.cn/simple/
-pip install  h5py lmdb mahotas pandas,requests -i https://pypi.tuna.tsinghua.edu.cn/simple/
 pip install web.py==0.40.dev0
-conda install pytorch=0.2.0 cuda80  torchvision -c soumith
-pushd detector/utils && sh make-for-cpu.sh && popd
+conda install pytorch torchvision -c pytorch
+## pip install torch torchvision
+pushd detector/utils && sh make.sh && popd
 
 
diff --git a/test/[email protected] b/test/[email protected]
diff --git a/test/card.png b/test/card.png
diff --git a/test/card1.png b/test/card1.png
diff --git a/test/o-6.jpg b/test/o-6.jpg