Skip to content

Commit

Permalink
Merge branch 'improve-tone-style' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
mozillazg committed Jan 23, 2022
2 parents a421a83 + 2b70c85 commit 7e02fbe
Show file tree
Hide file tree
Showing 23 changed files with 836 additions and 549 deletions.
11 changes: 10 additions & 1 deletion pypinyin/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,19 +34,28 @@
RE_HANS = re.compile(
r'^(?:['
r'\u3007' # 〇
r'\ue815-\ue864'
r'\ufa18'
r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF]
r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF]
r'\uf900-\ufaff' # CJK兼容:[F900-FAFF]
r'\U00020000-\U0002A6DF' # CJK扩展B:[20000-2A6DF]
r'\U0002A703-\U0002B73F' # CJK扩展C:[2A700-2B73F]
r'\U0002B740-\U0002B81D' # CJK扩展D:[2B740-2B81D]
r'\U0002B825-\U0002BF6E'
r'\U0002C029-\U0002CE93'
r'\U0002D016'
r'\U0002F80A-\U0002FA1F' # CJK兼容扩展:[2F800-2FA1F]
r'\U00030EDD'
r'\U00030EDE'
r'])+$'
)
else:
RE_HANS = re.compile( # pragma: no cover
RE_HANS = re.compile(
r'^(?:['
r'\u3007' # 〇
r'\ue815-\ue864'
r'\ufa18'
r'\u3400-\u4dbf' # CJK扩展A:[3400-4DBF]
r'\u4e00-\u9fff' # CJK基本:[4E00-9FFF]
r'\uf900-\ufaff' # CJK兼容:[F900-FAFF]
Expand Down
44 changes: 2 additions & 42 deletions pypinyin/contrib/_tone_rule.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import unicode_literals

"""
标调位置
有 ɑ 不放过,
  没 ɑ 找 o、e;
  ɑ、o、e、i、u、ü
  标调就按这顺序;
  i、u 若是连在一起,
  谁在后面就标谁。
http://www.hwjyw.com/resource/content/2010/06/04/8183.shtml
https://www.zhihu.com/question/23655297
https://github.com/mozillazg/python-pinyin/issues/160
http://www.pinyin.info/rules/where.html
"""


def right_mark_index(pinyin_no_tone):
# 有 ɑ 不放过, 没 ɑ 找 o、e
for c in ['a', 'o', 'e']:
if c in pinyin_no_tone:
return pinyin_no_tone.index(c)

# i、u 若是连在一起,谁在后面就标谁
for c in ['iu', 'ui']:
if c in pinyin_no_tone:
return pinyin_no_tone.index(c) + 1

# ɑ、o、e、i、u、ü
for c in ['i', 'u', 'v', 'ü']:
if c in pinyin_no_tone:
return pinyin_no_tone.index(c)

# n, m, ê
for c in ['n', 'm', 'ê']:
if c in pinyin_no_tone:
return pinyin_no_tone.index(c)
# 向后兼容
from pypinyin.style._tone_rule import right_mark_index # noqa
Loading

0 comments on commit 7e02fbe

Please sign in to comment.