Skip to content

Commit

Permalink
Merge pull request #8 from AFEDQK/feat-add-extract-telephone
Browse files Browse the repository at this point in the history
feat:增加识别固定电话
  • Loading branch information
AFEDQK authored Sep 8, 2022
2 parents eab11c4 + 051e5d5 commit d7e513c
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 31 deletions.
17 changes: 6 additions & 11 deletions app/utils/get_parse_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,12 @@ def format_return_result(res):
working_place = to_list(res["期望工作地点"])
# 处理单独省、市、区的情况,以及不正确的地址
formated_res["期望工作地点"] = postprocess_working_place(working_place)
formated_res["招工单位"] = to_list(res["招工单位"])
# TODO:拍脑袋定的长度,后期修改
recruit_company = to_list(res["招工单位"])
if len(recruit_company) > 20:
formated_res["招工单位"] = []
else:
formated_res["招工单位"] = recruit_company
recruit_infos = res["招工信息"]
formated_res["联系人"] = to_str(res["联系人"])
formated_res["联系电话"] = to_list(res["联系电话"])
Expand Down Expand Up @@ -121,10 +126,6 @@ def seg_punc(msg):
if len(result[i]) != 0:
qa_sentence += result[i]
qa_sentence += ";"
# print(qa_sentence)
# if i == len(result) - 1:
# qa_sentence += "。"
# print(qa_sentence)
res['项目内容'] = qa_sentence
formated_res = format_return_result(res)
return formated_res
Expand All @@ -136,20 +137,14 @@ def find_job(msg):
return {}
result = re.split(pattern, msg)
res, person, city, types, contact = handle_search(msg)
# types = "你好"
# print(len(types))
print(result)
for i in range(len(result)):
# print("测试", i, ":", result[i])
for j in range(len(person)):
# print(types[j])
p1 = re.compile(person[j])
m = p1.findall(result[i])
# print("工种匹配:", m)
if len(m) != 0:
result[i] = ""
# print("工种匹配成功,以用空格替换")
# print("工种为:", types[j])
for j in range(len(city)):
p1 = re.compile(city[j])
m = p1.findall(result[i])
Expand Down
63 changes: 43 additions & 20 deletions app/utils/process_recruit_detail_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,16 @@ def getLOCName(str_content):
return res


def getMNumber(str_content):
text = str_content
lac_result = lac.run(text)
resdict = dict(zip(lac_result[0], lac_result[1]))
# print(resdict)
res = []
for item in resdict.items():
mystr = str(item[0]).replace('\n', '').replace(' ', '')
if (str(item[1]) == 'm' or str(item[1]) == 'n') and len(mystr) == 11 and mystr.isdigit():
res.append(mystr)
return res
def getMNumber(content):
tel_rules = ["1[3-9]\\d{9}", "0\\d{2,3}-[1-9]\\d{6,8}"]
phones = set()
for rule in tel_rules:
phone = re.findall(rule, content)
if not phone:
continue
for p in phone:
phones.add(p)
return list(phones)


def getAllMsg(content, data_original):
Expand Down Expand Up @@ -132,7 +131,7 @@ def getAllMsg(content, data_original):
money = ''
acquire = ''
contact = getMNumber(content)
if len(contact) == 0:
if not contact:
contact = getMNumber(data_original.replace(' ', ','))
# print(contact)
work_time = ''
Expand Down Expand Up @@ -215,6 +214,7 @@ def process_return(types, money, acquire, number, contact, city, work_time, plac
else:
one_contact = contact
one_contact = str(one_contact).replace(']', '').replace('[', '').replace(',', '').replace('\'', '')
print(f"place:{place}")
if len(place) >= len(types) and len(types) > 1 and type(types) == list:
one_place = place[i]
else:
Expand Down Expand Up @@ -292,17 +292,40 @@ def get_special_content(forward_text, content):
return re.findall(regex_concat, content)


def extract_telephone(data):
# 处理 电话号码
tel_rules = ["1[3-9]\\d{9}", "0\\d{2,3} [1-9]\\d{3,4} [0-9]\\d{3,4}", "0\\d{2,3}-[1-9]\\d{6,8}",
"1[0-9]{2} [0-9]{4} [0-9]{4}"]
all_phones = []
for each_rule in tel_rules:
phone = re.findall(each_rule, data)
if not phone:
continue
all_phones.extend(phone)
# 统一电话的格式
res = []
for ori_phone in all_phones:
if " " not in ori_phone:
res.append((ori_phone, " %s " % ori_phone))
else:
if ori_phone[0] == "0":
seg_phone = ori_phone.split(" ")
phone_str = " %s-%s " % (seg_phone[0], "".join(seg_phone[1:]))
res.append((ori_phone, phone_str))
else:
phone_str = ori_phone.replace(" ", "")
res.append((ori_phone, " %s " % phone_str))
return res


def extract_info(data, wxid, raw, time):
# globals().update(regex_config)
data_original = data
# 处理 电话号码人
phones = re.findall('[0-9]{11}', data)
for p in phones:
data = data.replace(p, ' ' + p + ' ')
# 处理 189 0074 8140
phones = re.findall('[0-9]{3} [0-9]{4} [0-9]{4}', data)
for p in phones:
data = data.replace(p, ' ' + p.replace(' ', '') + ' ')

# 提取文本中的电话号码 or 座机号码
phones = extract_telephone(data)
for ori_phone, format_phone in phones:
data = data.replace(ori_phone, format_phone)

# 十人 30到50人 四个 五六个 8九个人 shi ming shiwu ming 一个工管住不管吃 一个礼拜 一个班
# 算0.5个工 不要暑期工 暑假工不要 物流仓库 28个班 热水空调 汽车玻璃 服务费20 威特电梯 8个通层
Expand Down

0 comments on commit d7e513c

Please sign in to comment.