Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
CjangCjengh committed Oct 11, 2022
1 parent 1d8096e commit 3cbd522
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 7 deletions.
10 changes: 5 additions & 5 deletions text/cleaners.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,20 +129,20 @@ def chinese_dialect_cleaners(text):
from text.cantonese import cantonese_to_ipa
from text.english import english_to_lazy_ipa2
from text.ngu_dialect import ngu_dialect_to_ipa
text = re.sub(r'\[MD\](.*?)\[MD\]',
lambda x: chinese_to_ipa2(x.group(1))+' ', text)
text = re.sub(r'\[(MD|ZH)\](.*?)\[\1\]',
lambda x: chinese_to_ipa2(x.group(2))+' ', text)
text = re.sub(r'\[TW\](.*?)\[TW\]',
lambda x: chinese_to_ipa2(x.group(1), True)+' ', text)
text = re.sub(r'\[JA\](.*?)\[JA\]',
lambda x: japanese_to_ipa3(x.group(1)).replace('Q','ʔ')+' ', text)
lambda x: japanese_to_ipa3(x.group(1)).replace('Q', 'ʔ')+' ', text)
text = re.sub(r'\[SH\](.*?)\[SH\]', lambda x: shanghainese_to_ipa(x.group(1)).replace('1', '˥˧').replace('5',
'˧˧˦').replace('6', '˩˩˧').replace('7', '˥').replace('8', '˩˨').replace('ᴀ', 'ɐ').replace('ᴇ', 'e')+' ', text)
text = re.sub(r'\[GD\](.*?)\[GD\]',
lambda x: cantonese_to_ipa(x.group(1))+' ', text)
text = re.sub(r'\[EN\](.*?)\[EN\]',
lambda x: english_to_lazy_ipa2(x.group(1))+' ', text)
text = re.sub(r'\[([A-Z]{2})\](.*?)\[\1\]',
lambda x: ngu_dialect_to_ipa(x.group(2), x.group(1))+' ', text)
text = re.sub(r'\[([A-Z]{2})\](.*?)\[\1\]', lambda x: ngu_dialect_to_ipa(x.group(2), x.group(
1)).replace('ʣ', 'dz').replace('ʥ', 'dʑ').replace('ʦ', 'ts').replace('ʨ', 'tɕ')+' ', text)
text = re.sub(r'\s+$', '', text)
text = re.sub(r'([^\.,!\?\-…~])$', r'\1.', text)
return text
2 changes: 1 addition & 1 deletion text/mandarin.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,6 @@ def chinese_to_ipa2(text, taiwanese=False):
text = bopomofo_to_ipa2(text)
text = re.sub(r'i([aoe])', r'j\1', text)
text = re.sub(r'u([aoəe])', r'w\1', text)
text = re.sub(r'([ʂɹ]`ʰ?)([˩˨˧˦˥ ]+|$)', r'\1ʅ\2', text)
text = re.sub(r'([ʂɹ]ʰ?)([˩˨˧˦˥ ]+|$)', r'\1ʅ\2', text)
text = re.sub(r'(sʰ?)([˩˨˧˦˥ ]+|$)', r'\1ɿ\2', text)
return text
5 changes: 4 additions & 1 deletion text/ngu_dialect.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@
converters = {}

for dialect in dialects.values():
converters[dialect] = opencc.OpenCC(dialect)
try:
converters[dialect] = opencc.OpenCC(dialect)
except:
pass


def ngu_dialect_to_ipa(text, dialect):
Expand Down

0 comments on commit 3cbd522

Please sign in to comment.