diff --git a/README.md b/README.md index f4115c6..7939b3d 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ - +

@@ -95,6 +95,7 @@ $ pip install . $ pip install jionlp ``` + ## 使用 Features - 导入工具包,查看工具包的主要功能与函数注释 @@ -230,7 +231,7 @@ $ jio_help - 如感兴趣合作完善本工具包,请参考 **TODO.txt** 文件进行功能添加。 ### 做 NLP不易,欢迎加入自然语言处理 Wechat 交流群 -### 请先添加vx:dongrixinyu89 +### 如以下码失效,请先添加vx:dongrixinyu89 ![image](../../blob/master/image/qr_code_for_collection.png) diff --git a/image/qr_code_for_collection.png b/image/qr_code_for_collection.png index bc041f6..bb3c094 100644 Binary files a/image/qr_code_for_collection.png and b/image/qr_code_for_collection.png differ diff --git a/jionlp/__init__.py b/jionlp/__init__.py index b0aff5e..6a1da1b 100644 --- a/jionlp/__init__.py +++ b/jionlp/__init__.py @@ -8,7 +8,7 @@ # description: Preprocessing tool for Chinese NLP """ -__version__ = '1.3.27' +__version__ = '1.3.28' import os diff --git a/jionlp/gadget/time_parser.py b/jionlp/gadget/time_parser.py index d905ff1..0744482 100644 --- a/jionlp/gadget/time_parser.py +++ b/jionlp/gadget/time_parser.py @@ -4473,14 +4473,14 @@ def string2handler(datetime_obj): 29, leap_month) # 当农历月无30 天时,按 29天计算 return string2handler(first_solar_time_handler),\ - string2handler(second_solar_time_handler) + string2handler(second_solar_time_handler) else: solar_time_handler = self.lunar2solar( lunar_time_handler[0], lunar_time_handler[1], lunar_time_handler[2], leap_month) return string2handler(solar_time_handler),\ - string2handler(solar_time_handler) + string2handler(solar_time_handler) def _parse_solar_terms(self, year, solar_term): """解析24节气 diff --git a/jionlp/rule/rule_pattern.py b/jionlp/rule/rule_pattern.py index 457c329..323376c 100644 --- a/jionlp/rule/rule_pattern.py +++ b/jionlp/rule/rule_pattern.py @@ -59,7 +59,7 @@ # 以及相应的常用符号,单字节符号、标点符号等。而日文、俄文、拉丁、希腊、数学公式、 # 物理单位等符号 绝大多数不常用的都被丢弃。其中 㐀-䶵 指的是另一个汉字字符集 # 仅保留了常用符号,数字标识,如 ① 等 -UNICODE_EXCEPTION_PATTERN = '[^‐-”•…‰※℃℉Ⅰ-ⅹ①-⒛\u3000-】〔-〞㈠-㈩一-龥﹐-﹫!-~¢£¥]' +UNICODE_EXCEPTION_PATTERN = '[^‐-”•·・…‰※℃℉Ⅰ-ⅹ①-⒛\u3000-】〔-〞㈠-㈩一-龥﹐-﹫!-~¢£¥]' EXCEPTION_PATTERN = ASCII_EXCEPTION_PATTERN[:-1] + UNICODE_EXCEPTION_PATTERN[2:] # --------------------------------------------------------------------- @@ -155,7 +155,7 @@ # --------------------------------------------------------------------- # 冗余字符处理 # 文本中有连续的 “哈哈哈哈哈” 等字符串,需要删除冗余字符串,返回为 “哈” -REDUNDANT_PATTERN = ' -\t\n啊哈呀~\u3000\xa0' +REDUNDANT_PATTERN = ' -\t\n啊哈呀~\u3000\xa0•·・' # --------------------------------------------------------------------- # 纯数字格式,用于过滤停用词时,过滤掉纯数字(包括汉字数字) @@ -305,7 +305,7 @@ # 固定公历节日 FIXED_SOLAR_FESTIVAL = r'((元旦|十一)|(三八|五一|六一|七一|八一|国庆|圣诞)(节)?|'\ r'((三八)?妇女|女神|植树|(五一)?劳动|(五四)?青年|(六一)?儿童|(七一)?建党|(八一)?建军|教师|情人|愚人|万圣|护士)节|'\ - r'地球日|三[\.·]?一五|双(十一|11)|(.{1,4})?消费者权益日)' + r'地球日|三[\.•·・]?一五|双(十一|11)|(.{1,4})?消费者权益日)' # 固定农历节日 FIXED_LUNAR_FESTIVAL = r'((春|填仓|上巳|寒食|清明|浴佛|姑姑|财神|下元|寒衣)节|'\ r'(龙抬头|除夕)|'\ diff --git a/setup.py b/setup.py index cd71952..8706227 100644 --- a/setup.py +++ b/setup.py @@ -50,7 +50,7 @@ __maintainer__ = "dongrixinyu" __email__ = "dongrixinyu.89@163.com" __url__ = 'https://github.com/dongrixinyu/JioNLP' -__description__ = 'Preprocessing tool for Chinese NLP' +__description__ = 'Chinese NLPreprocessing & Parsing' with open(os.path.join(DIR_PATH, 'requirements.txt'),