forked from spmallick/learnopencv
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathocr_simple.py
40 lines (30 loc) · 1.05 KB
/
ocr_simple.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import cv2
import sys
import pytesseract
if __name__ == '__main__':
if len(sys.argv) < 2:
print('Usage: python ocr_simple.py image.jpg')
sys.exit(1)
# Read image path from command line
imPath = sys.argv[1]
# Uncomment the line below to provide path to tesseract manually
# pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
# Define config parameters.
# '-l eng' for using the English language
# '--oem 1' sets the OCR Engine Mode to LSTM only.
#
# There are four OCR Engine Mode (oem) available
# 0 Legacy engine only.
# 1 Neural nets LSTM engine only.
# 2 Legacy + LSTM engines.
# 3 Default, based on what is available.
#
# '--psm 3' sets the Page Segmentation Mode (psm) to auto.
# Other important psm modes will be discussed in a future post.
config = ('-l eng --oem 1 --psm 3')
# Read image from disk
im = cv2.imread(imPath, cv2.IMREAD_COLOR)
# Run tesseract OCR on image
text = pytesseract.image_to_string(im, config=config)
# Print recognized text
print(text)