Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
修复 ``pypinyin.contrib.tone_convert`` 中 ``to_`` 开头的转换函数
没有正确处理输入的拼音包含 ``5`` 的场景(当包含 ``5`` 时会导致返回的格式不符合函数预期的返回格式)
以及修复没有正确处理部分拼音中的 ``ü`` 或 ``v`` 的问题
(当 ``v_to_u=False`` 时返回结果需要将 ``ü`` 替换为 ``v``)

>>> to_tone('lve')
'lve'
>>> to_tone2('lün5')
'lv5n'
>>> to_tone3('lün5')
'lv5n'
>>> to_tone2('lvn5')
'lv5n'
>>> to_tone3('lvn5')
'lv5n'

>>> to_tone('lve')
'lüe'
>>> to_tone2('lün5')
'lvn'
>>> to_tone3('lün5')
'lvn'
>>> to_tone2('lvn5')
'lvn'
>>> to_tone3('lvn5')
'lvn'
>>> to_tone2('lvn5', v_to_u=True)
'lün'
>>> to_tone3('lvn5', v_to_u=True)
'lün'
>>> to_tone2('lvn', neutral_tone_with_five=True)
'lv5n'
>>> to_tone3('lvn', neutral_tone_with_five=True)
'lvn5'
  • Loading branch information
mozillazg committed Jan 14, 2023
1 parent a8878ec commit 05eacb3
Show file tree
Hide file tree
Showing 5 changed files with 200 additions and 34 deletions.
49 changes: 48 additions & 1 deletion CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,54 @@ Changelog
---------


`0.48.0`_ (2023-mm-dd)
+++++++++++++++++++++++++

* **[Bugfixed]** 修复 ``pypinyin.contrib.tone_convert`` 中 ``to_`` 开头的转换函数
没有正确处理输入的拼音包含 ``5`` 的场景(当包含 ``5`` 时会导致返回的格式不符合函数预期的返回格式)
以及修复没有正确处理部分拼音中的 ``ü`` 或 ``v`` 的问题
(当 ``v_to_u=False`` 时返回结果需要将 ``ü`` 替换为 ``v``) Fixed `#290`_ :

.. code-block:: python
# 修复前
>>> to_tone('lve')
'lve'
>>> to_tone2('lün5')
'lv5n'
>>> to_tone3('lün5')
'lv5n'
>>> to_tone2('lvn5')
'lv5n'
>>> to_tone3('lvn5')
'lv5n'
# 修复后
>>> to_tone('lve')
'lüe'
>>> to_tone2('lün5')
'lvn'
>>> to_tone3('lün5')
'lvn'
>>> to_tone2('lvn5')
'lvn'
>>> to_tone3('lvn5')
'lvn'
>>> to_tone2('lvn5', v_to_u=True)
'lün'
>>> to_tone3('lvn5', v_to_u=True)
'lün'
>>> to_tone2('lvn', neutral_tone_with_five=True)
'lv5n'
>>> to_tone3('lvn', neutral_tone_with_five=True)
'lvn5'
`0.47.1`_ (2022-08-21)
+++++++++++++++++++++++++

* **[Bugfixed]** 修复无声母和韵母的场景下指定 neutral_tone_with_five=True 会返回 ``5`` 作为拼音的问题:
* **[Bugfixed]** 修复无声母和韵母的场景下指定 neutral_tone_with_five=True 会返回
``5`` 作为拼音的问题(`#284`_):

.. code-block:: python
Expand Down Expand Up @@ -915,6 +959,8 @@ __ https://github.com/mozillazg/python-pinyin/issues/8
.. _#251: https://github.com/mozillazg/python-pinyin/issues/251
.. _#266: https://github.com/mozillazg/python-pinyin/issues/266
.. _#80: https://github.com/mozillazg/python-pinyin/issues/80
.. _#284: https://github.com/mozillazg/python-pinyin/issues/284
.. _#290: https://github.com/mozillazg/python-pinyin/issues/290
.. _#164: https://github.com/mozillazg/python-pinyin/pull/164
.. _#176: https://github.com/mozillazg/python-pinyin/pull/176
.. _#279: https://github.com/mozillazg/python-pinyin/pull/279
Expand Down Expand Up @@ -1009,3 +1055,4 @@ __ https://github.com/mozillazg/python-pinyin/issues/8
.. _0.46.0: https://github.com/mozillazg/python-pinyin/compare/v0.45.0...v0.46.0
.. _0.47.0: https://github.com/mozillazg/python-pinyin/compare/v0.46.0...v0.47.0
.. _0.47.1: https://github.com/mozillazg/python-pinyin/compare/v0.47.0...v0.47.1
.. _0.48.0: https://github.com/mozillazg/python-pinyin/compare/v0.47.1...v0.48.0
30 changes: 0 additions & 30 deletions docs/contrib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,33 +17,3 @@ contrib
.. autofunction:: pypinyin.contrib.tone_convert.to_finals_tone
.. autofunction:: pypinyin.contrib.tone_convert.to_finals_tone2
.. autofunction:: pypinyin.contrib.tone_convert.to_finals_tone3

.. autofunction:: pypinyin.contrib.tone_convert.tone_to_normal
.. autofunction:: pypinyin.contrib.tone_convert.tone_to_tone2
.. autofunction:: pypinyin.contrib.tone_convert.tone_to_tone3

.. autofunction:: pypinyin.contrib.tone_convert.tone2_to_normal
.. autofunction:: pypinyin.contrib.tone_convert.tone2_to_tone
.. autofunction:: pypinyin.contrib.tone_convert.tone2_to_tone3

.. autofunction:: pypinyin.contrib.tone_convert.tone3_to_normal
.. autofunction:: pypinyin.contrib.tone_convert.tone3_to_tone
.. autofunction:: pypinyin.contrib.tone_convert.tone3_to_tone2


.. V2UMixin
.. ---------
.. .. autoclass:: pypinyin.contrib.uv.V2UMixin
.. NeutralToneWith5Mixin
.. -----------------------
.. .. autoclass:: pypinyin.contrib.neutral_tone.NeutralToneWith5Mixin
.. ToneSandhiMixin
.. ----------------
.. .. autoclass:: pypinyin.contrib.tone_sandhi.ToneSandhiMixin
4 changes: 2 additions & 2 deletions pypinyin/style/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,9 @@
)

# 匹配使用数字标识声调的字符的正则表达式
RE_TONE2 = re.compile(r'([aeoiuvnmê])([1-5])$')
RE_TONE2 = re.compile(r'([aeoiuvnmêü])([1-5])$')
# 匹配 TONE2 中标识韵母声调的正则表达式
RE_TONE3 = re.compile(r'^([a-]+)([1-5])([a-]*)$')
RE_TONE3 = re.compile(r'^([a-zêü]+)([1-5])([a-zêü]*)$')

# 匹配单个数字
RE_NUMBER = re.compile(r'\d')
5 changes: 5 additions & 0 deletions pypinyin/style/_tone_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ def to_tone(pinyin):
>>> to_tone('zhong1')
'zhōng'
"""
pinyin = pinyin.replace('v', 'ü')
if not _re_number.search(pinyin):
return pinyin

Expand Down Expand Up @@ -104,6 +105,7 @@ def to_tone2(pinyin, v_to_u=False, neutral_tone_with_five=False, **kwargs):
"""
if kwargs.get('neutral_tone_with_5', None) is not None:
neutral_tone_with_five = kwargs['neutral_tone_with_5']
pinyin = pinyin.replace('5', '')
s = tone_to_tone3(
pinyin, v_to_u=True, neutral_tone_with_five=neutral_tone_with_five)
s = tone3_to_tone2(s)
Expand Down Expand Up @@ -143,6 +145,7 @@ def to_tone3(pinyin, v_to_u=False, neutral_tone_with_five=False, **kwargs):
"""
if kwargs.get('neutral_tone_with_5', None) is not None:
neutral_tone_with_five = kwargs['neutral_tone_with_5']
pinyin = pinyin.replace('5', '')
s = tone_to_tone2(
pinyin, v_to_u=True, neutral_tone_with_five=neutral_tone_with_five)
s = tone2_to_tone3(s)
Expand Down Expand Up @@ -255,6 +258,7 @@ def to_finals_tone2(pinyin, strict=True, v_to_u=False,
'o1ng'
"""
pinyin = pinyin.replace('5', '')
finals = to_finals_tone3(pinyin, strict=strict, v_to_u=v_to_u,
neutral_tone_with_five=neutral_tone_with_five)

Expand Down Expand Up @@ -287,6 +291,7 @@ def to_finals_tone3(pinyin, strict=True, v_to_u=False,
'ong1'
"""
pinyin = pinyin.replace('5', '')
finals = to_finals(pinyin, strict=strict, v_to_u=v_to_u)
if not finals:
return finals
Expand Down
146 changes: 145 additions & 1 deletion tests/contrib/test_tone_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,6 @@ def test_tone_tone3_with_v_to_u(pinyin, v_to_u, result):
['a1', 'a1'],
['a', 'a'],
['shang', 'shang'],
['sha5ng', 'shang5'],
])
def test_tone2_tone3(pinyin, result):
assert tone2_to_tone3(pinyin) == result
Expand Down Expand Up @@ -368,3 +367,148 @@ def test_tone_to_tone2_tone3_to_tone():
tone3_2 = tone3_to_tone2(tone3)
assert tone2_to_tone(tone3_2) == py
assert to_tone(tone3_2) == py


@mark.parametrize('input', [
'lün',
'lvn',
'lü5n',
'lün5',
'lv5n',
'lvn5',
])
def test_issue_290_1(input):
assert to_normal(input) == 'lvn'
assert to_normal(input, v_to_u=True) == 'lün'

assert to_tone(input) == 'lün'

assert to_tone2(input) == 'lvn'
assert to_tone2(input, neutral_tone_with_five=True) == 'lv5n'
assert to_tone2(input, v_to_u=True) == 'lün'
assert to_tone2(input, v_to_u=True, neutral_tone_with_five=True) == 'lü5n'

assert to_tone3(input) == 'lvn'
assert to_tone3(input, neutral_tone_with_five=True) == 'lvn5'
assert to_tone3(input, v_to_u=True) == 'lün'
assert to_tone3(input, v_to_u=True, neutral_tone_with_five=True) == 'lün5'

assert to_finals(input) == 'vn'
assert to_finals(input, v_to_u=True) == 'ün'

assert to_finals_tone(input) == 'ün'

assert to_finals_tone2(input) == 'vn'
assert to_finals_tone2(input, neutral_tone_with_five=True) == 'v5n'
assert to_finals_tone2(input, v_to_u=True) == 'ün'
assert to_finals_tone2(input, v_to_u=True,
neutral_tone_with_five=True) == 'ü5n'

assert to_finals_tone3(input) == 'vn'
assert to_finals_tone3(input, neutral_tone_with_five=True) == 'vn5'
assert to_finals_tone3(input, v_to_u=True) == 'ün'
assert to_finals_tone3(input, v_to_u=True,
neutral_tone_with_five=True) == 'ün5'


@mark.parametrize('input', [
'lǘ',
'lü2',
'lv2',
])
def test_issue_290_2(input):
assert to_normal(input) == 'lv'
assert to_normal(input, v_to_u=True) == 'lü'

assert to_tone(input) == 'lǘ'

assert to_tone2(input) == 'lv2'
assert to_tone2(input, v_to_u=True) == 'lü2'

assert to_tone3(input) == 'lv2'
assert to_tone3(input, v_to_u=True) == 'lü2'

assert to_finals(input) == 'v'
assert to_finals(input, v_to_u=True) == 'ü'

assert to_finals_tone(input) == 'ǘ'

assert to_finals_tone2(input) == 'v2'
assert to_finals_tone2(input, v_to_u=True) == 'ü2'

assert to_finals_tone3(input) == 'v2'
assert to_finals_tone3(input, v_to_u=True) == 'ü2'


@mark.parametrize('input', [
'lǘn',
'lü2n',
'lün2',
'lv2n',
'lvn2',
])
def test_issue_290_3(input):
assert to_normal(input) == 'lvn'
assert to_normal(input, v_to_u=True) == 'lün'

assert to_tone(input) == 'lǘn'

assert to_tone2(input) == 'lv2n'
assert to_tone2(input, v_to_u=True) == 'lü2n'

assert to_tone3(input) == 'lvn2'
assert to_tone3(input, v_to_u=True) == 'lün2'

assert to_finals(input) == 'vn'
assert to_finals(input, v_to_u=True) == 'ün'

assert to_finals_tone(input) == 'ǘn'

assert to_finals_tone2(input) == 'v2n'
assert to_finals_tone2(input, v_to_u=True) == 'ü2n'

assert to_finals_tone3(input) == 'vn2'
assert to_finals_tone3(input, v_to_u=True) == 'ün2'


@mark.parametrize('input', [
'shang',
'sha5ng',
'shang5',
])
def test_issue_290_4(input):
assert to_normal(input) == 'shang'
assert to_normal(input, v_to_u=True) == 'shang'

assert to_tone(input) == 'shang'

assert to_tone2(input) == 'shang'
assert to_tone2(input, neutral_tone_with_five=True) == 'sha5ng'
assert to_tone2(input, v_to_u=True) == 'shang'
assert to_tone2(input, v_to_u=True,
neutral_tone_with_five=True) == 'sha5ng'

assert to_tone3(input) == 'shang'
assert to_tone3(input, neutral_tone_with_five=True) == 'shang5'
assert to_tone3(input, v_to_u=True) == 'shang'
assert to_tone3(input, v_to_u=True,
neutral_tone_with_five=True) == 'shang5'

assert to_finals(input) == 'ang'
assert to_finals(input, v_to_u=True) == 'ang'

assert to_finals_tone(input) == 'ang'

assert to_finals_tone2(input) == 'ang'
assert to_finals_tone2(input, neutral_tone_with_five=True) == 'a5ng'
assert to_finals_tone2(input, v_to_u=True) == 'ang'
assert to_finals_tone2(input, v_to_u=True,
neutral_tone_with_five=True) == 'a5ng'

assert to_finals_tone3(input) == 'ang'
assert to_finals_tone3(input, neutral_tone_with_five=True) == 'ang5'
assert to_finals_tone3(input, v_to_u=True) == 'ang'
assert to_finals_tone3(input, v_to_u=True,
neutral_tone_with_five=True) == 'ang5'

0 comments on commit 05eacb3

Please sign in to comment.