forked from lindsey98/Phishpedia
-
Notifications
You must be signed in to change notification settings - Fork 0
/
text_recog.py
32 lines (22 loc) · 928 Bytes
/
text_recog.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import re
def pred_text_in_image(ocr_model, shot_path):
result = ocr_model.ocr(shot_path, cls=True)
if result is None or result[0] is None:
return ''
most_fit_results = result[0]
ocr_text = [line[1][0] for line in most_fit_results]
detected_text = ' '.join(ocr_text)
return detected_text
def check_email_credential_taking(ocr_model, shot_path):
detected_text = pred_text_in_image(ocr_model, shot_path)
if len(detected_text) > 0:
return rule_matching(detected_text)
return False, None
def rule_matching(detected_text):
email_login_pattern = r'邮箱.*登录|邮箱.*登陆|邮件.*登录|邮件.*登陆'
specified_email_pattern = r'@[\w.-]+\.\w+'
if re.findall(email_login_pattern, detected_text):
find_email = re.findall(specified_email_pattern, detected_text)
if find_email:
return True, find_email[0]
return False, None