Skip to content

Commit

Permalink
edit
Browse files Browse the repository at this point in the history
  • Loading branch information
vivo committed Apr 15, 2019
1 parent c4841ab commit 56881b1
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 84 deletions.
25 changes: 11 additions & 14 deletions OALD4/OALD4_symbol_fixer.py
Original file line number Diff line number Diff line change
@@ -1,37 +1,34 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# OALD4原始文档音标使用了"Kingsoft Phonetic Plain"字体,导致不安装该字体的电脑会出现乱码,在此批量替换修正。
# OALD4原始文档音标使用了Kingsoft Phonetic Plain字体,导致不安装该字体的电脑会出现乱码,在此批量替换修正。
# 金山词霸音标字体编码表可参见 http://www.fmddlmyy.cn/text66.html

import re


file_src = "/users/vivo/desktop/OALD4_INIT.txt"

file_dst = "/users/vivo/desktop/OALD4_edited.txt"
file_src = '/users/vivo/desktop/OALD4_INIT.txt'
file_dst = '/users/vivo/desktop/OALD4_edited.txt'


def converter(match):
phonetic_string = match.group()
correct_symbol = phonetic_string.replace("5", "ˈ").replace("7", "ˌ").replace("9", "ˌ") \
.replace("A", "æ").replace("B", "ɑ").replace("C", "ɔ").replace("E", "ə").replace("F", "ʃ") \
.replace("I", "ɪ").replace("J", "ʊ").replace("N", "ŋ").replace("Q", "ʌ") \
.replace("R", "ɔ").replace("T", "ð").replace("U", "u").replace("V", "ʒ") \
.replace("W", "θ").replace("Z", "ɛ").replace(r"\\\\", "ɜ").replace("^", "ɡ") \
.replace(":", "ː").replace("[", "ɜːr").replace("L", "ər").replace("?@", "US")
correct_symbol = phonetic_string.replace('5', 'ˈ').replace('7', 'ˌ').replace('9', 'ˌ') \
.replace('A', 'æ').replace('B', 'ɑ').replace('C', 'ɔ').replace('E', 'ə').replace('F', 'ʃ') \
.replace('I', 'ɪ').replace('J', 'ʊ').replace('N', 'ŋ').replace('Q', 'ʌ') \
.replace('R', 'ɔ').replace('T', 'ð').replace('U', 'u').replace('V', 'ʒ') \
.replace('W', 'θ').replace('Z', 'ɛ').replace(r'\\\\', 'ɜ').replace('^', 'ɡ') \
.replace(':', 'ː').replace('[', 'ɜːr').replace('L', 'ər').replace('?@', 'US')

return correct_symbol



with open(file_src, 'r') as f:
text = f.read()

p = re.compile("/.*?; .*?/")

p = re.compile('/.*?; .*?/')
result = re.sub(p, converter, text)


with open(file_dst, 'w') as fo:
fo.write(result)
fo.write(result)
4 changes: 2 additions & 2 deletions ecdict/convert_dict.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#! /usr/bin/env python
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# csv数据转换成SQLite数据库格式,不直接放ecdict.db,是因为会膨胀很大。
# 此数据包含770611条记录,来自 https://github.com/skywind3000/ECDICT

import stardict

stardict.convert_dict("./ecdict.db", "./ecdict.csv")
stardict.convert_dict('./ecdict.db', './ecdict.csv')
115 changes: 57 additions & 58 deletions oxford_dict_english_2e/oxford_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,75 +2,74 @@
# -*- coding: UTF-8 -*-


file_src = "/users/vivo/desktop/oxford_dict_english.txt"

file_dst = "/users/vivo/desktop/oxford_dict_result.txt"
file_src = '/users/vivo/desktop/oxford_dict_english.txt'
file_dst = '/users/vivo/desktop/oxford_dict_result.txt'


def parser(text):
if text.startswith("★☆☆"):
if "▶" in text:
text = text.replace("▶", "\n")
if text.startswith('★☆☆'):
if '▶' in text:
text = text.replace('▶', '\n')
outtext = text[6:].strip()
else:
outtext = text[6:].strip()
elif text.startswith(" /"):
elif text.startswith(' /'):
outtext = text.strip()

elif text.startswith("1."):
outtext = "1." + text.partition("•")[2]
elif text.startswith("2."):
outtext = "2." + text.partition("•")[2]
elif text.startswith("3."):
outtext = "3." + text.partition("•")[2]
elif text.startswith("4."):
outtext = "4." + text.partition("•")[2]
elif text.startswith("5."):
outtext = "5." + text.partition("•")[2]
elif text.startswith("6."):
outtext = "6." + text.partition("•")[2]
elif text.startswith("7."):
outtext = "7." + text.partition("•")[2]
elif text.startswith("8."):
outtext = "8." + text.partition("•")[2]
elif text.startswith("9."):
outtext = "9." + text.partition("•")[2]
elif text.startswith("10."):
outtext = "10." + text.partition("•")[2]
elif text.startswith("11."):
outtext = "11." + text.partition("•")[2]
elif text.startswith("12."):
outtext = "12." + text.partition("•")[2]
elif text.startswith("13."):
outtext = "13." + text.partition("•")[2]
elif text.startswith("14."):
outtext = "14." + text.partition("•")[2]
elif text.startswith("15."):
outtext = "15." + text.partition("•")[2]
elif text.startswith("16."):
outtext = "16." + text.partition("•")[2]
elif text.startswith("17."):
outtext = "17." + text.partition("•")[2]
elif text.startswith("18."):
outtext = "18." + text.partition("•")[2]
elif text.startswith("▶"):
elif text.startswith('1.'):
outtext = '1.' + text.partition('•')[2]
elif text.startswith('2.'):
outtext = '2.' + text.partition('•')[2]
elif text.startswith('3.'):
outtext = '3.' + text.partition('•')[2]
elif text.startswith('4.'):
outtext = '4.' + text.partition('•')[2]
elif text.startswith('5.'):
outtext = '5.' + text.partition('•')[2]
elif text.startswith('6.'):
outtext = '6.' + text.partition('•')[2]
elif text.startswith('7.'):
outtext = '7.' + text.partition('•')[2]
elif text.startswith('8.'):
outtext = '8.' + text.partition('•')[2]
elif text.startswith('9.'):
outtext = '9.' + text.partition('•')[2]
elif text.startswith('10.'):
outtext = '10.' + text.partition('•')[2]
elif text.startswith('11.'):
outtext = '11.' + text.partition('•')[2]
elif text.startswith('12.'):
outtext = '12.' + text.partition('•')[2]
elif text.startswith('13.'):
outtext = '13.' + text.partition('•')[2]
elif text.startswith('14.'):
outtext = '14.' + text.partition('•')[2]
elif text.startswith('15.'):
outtext = '15.' + text.partition('•')[2]
elif text.startswith('16.'):
outtext = '16.' + text.partition('•')[2]
elif text.startswith('17.'):
outtext = '17.' + text.partition('•')[2]
elif text.startswith('18.'):
outtext = '18.' + text.partition('•')[2]
elif text.startswith('▶'):
outtext = text.strip()
elif text.startswith("【IDIOMS】"):
elif text.startswith('【IDIOMS】'):
outtext = text.strip()
elif text.startswith(" --›"):
elif text.startswith(' --›'):
outtext = text.strip()
elif text.startswith("◘"):
elif text.startswith('◘'):
outtext = text.strip()
elif text.startswith("【派生】"):
elif text.startswith('【派生】'):
outtext = text.strip()
elif text.startswith("♦"):
elif text.startswith('♦'):
outtext = text.strip()
elif text.startswith("【PHR V】"):
elif text.startswith('【PHR V】'):
outtext = text.strip()
elif text.startswith("•"):
elif text.startswith('•'):
outtext = text.strip()
else:
outtext = ""
outtext = ''

return outtext

Expand All @@ -79,24 +78,24 @@ def parser(text):

with open(file_src, 'r') as f:
full_text = f.read()
full_text = full_text.replace("/▶", "/\n▶")

full_text = full_text.replace('/▶', '/\n▶')

entry_list = full_text.split("————————————")
entry_list = full_text.split('————————————')
for entry in entry_list:
if entry:
row_list = []
line_list = entry.splitlines()
for line in line_list:
line_out = parser(line)
row_list.append(line_out)
row_list = [x for x in row_list if x != ""]
row_list = [x for x in row_list if x != '']
new_entry = '\n'.join(row_list)
new_entry = new_entry.strip()

all_entry.append(new_entry)


with open(file_dst, 'w') as f_out:
for each in all_entry:
# each = " ".join(each.splitlines())
f_out.write('\n\n' + each)
# each = ' '.join(each.splitlines())
f_out.write('\n\n' + each)
20 changes: 10 additions & 10 deletions 英汉大词典(第二版)/en_ch_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
# -*- coding: utf-8 -*-


file_src = "/users/vivo/desktop/英汉大词典_INIT.txt"
file_dst = "/users/vivo/desktop/英汉大词典_edited.txt"
file_src = '/users/vivo/desktop/英汉大词典_INIT.txt'
file_dst = '/users/vivo/desktop/英汉大词典_edited.txt'


def parser(text):
# "派生"标记【卍】 和 "习语"标记【★】尚未处理
outtext = text.replace("■", "").replace("ⓐ", "").\
replace("⏎", "").replace("▶", "").replace("➜", "")
# ”派生“标记【卍】 和 ”习语”标记【★】尚未处理
outtext = text.replace('■', '').replace('ⓐ', '').\
replace('⏎', '').replace('▶', '').replace('➜', '')

return outtext

Expand All @@ -19,22 +19,22 @@ def parser(text):
with open(file_src, 'r') as f:
full_text = f.read()


entry_list = full_text.split("————————————")
entry_list = full_text.split('————————————')
for entry in entry_list:
if entry:
row_list = []
line_list = entry.splitlines()
for line in line_list:
line_out = parser(line)
row_list.append(line_out)
# row_list = [x for x in row_list if x != ""]
new_entry = '\n'.join(row_list) #不分行显示则用空格代替"\n"
# row_list = [x for x in row_list if x != '']
new_entry = '\n'.join(row_list) #不分行显示则用空格代替'\n'
new_entry = new_entry.strip()

all_entry.append(new_entry)


with open(file_dst, 'w') as f_out:
for each in all_entry:
# each = " ".join(each.splitlines())
# each = ' '.join(each.splitlines())
f_out.write('\n\n————————————\n' + each)

0 comments on commit 56881b1

Please sign in to comment.