diff --git a/OALD4/OALD4_symbol_fixer.py b/OALD4/OALD4_symbol_fixer.py index 81c25d6..02930e3 100644 --- a/OALD4/OALD4_symbol_fixer.py +++ b/OALD4/OALD4_symbol_fixer.py @@ -1,37 +1,34 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -# OALD4原始文档音标使用了"Kingsoft Phonetic Plain"字体,导致不安装该字体的电脑会出现乱码,在此批量替换修正。 +# OALD4原始文档音标使用了“Kingsoft Phonetic Plain”字体,导致不安装该字体的电脑会出现乱码,在此批量替换修正。 # 金山词霸音标字体编码表可参见 http://www.fmddlmyy.cn/text66.html import re -file_src = "/users/vivo/desktop/OALD4_INIT.txt" - -file_dst = "/users/vivo/desktop/OALD4_edited.txt" +file_src = '/users/vivo/desktop/OALD4_INIT.txt' +file_dst = '/users/vivo/desktop/OALD4_edited.txt' def converter(match): phonetic_string = match.group() - correct_symbol = phonetic_string.replace("5", "ˈ").replace("7", "ˌ").replace("9", "ˌ") \ - .replace("A", "æ").replace("B", "ɑ").replace("C", "ɔ").replace("E", "ə").replace("F", "ʃ") \ - .replace("I", "ɪ").replace("J", "ʊ").replace("N", "ŋ").replace("Q", "ʌ") \ - .replace("R", "ɔ").replace("T", "ð").replace("U", "u").replace("V", "ʒ") \ - .replace("W", "θ").replace("Z", "ɛ").replace(r"\\\\", "ɜ").replace("^", "ɡ") \ - .replace(":", "ː").replace("[", "ɜːr").replace("L", "ər").replace("?@", "US") + correct_symbol = phonetic_string.replace('5', 'ˈ').replace('7', 'ˌ').replace('9', 'ˌ') \ + .replace('A', 'æ').replace('B', 'ɑ').replace('C', 'ɔ').replace('E', 'ə').replace('F', 'ʃ') \ + .replace('I', 'ɪ').replace('J', 'ʊ').replace('N', 'ŋ').replace('Q', 'ʌ') \ + .replace('R', 'ɔ').replace('T', 'ð').replace('U', 'u').replace('V', 'ʒ') \ + .replace('W', 'θ').replace('Z', 'ɛ').replace(r'\\\\', 'ɜ').replace('^', 'ɡ') \ + .replace(':', 'ː').replace('[', 'ɜːr').replace('L', 'ər').replace('?@', 'US') return correct_symbol - with open(file_src, 'r') as f: text = f.read() - p = re.compile("/.*?; .*?/") - + p = re.compile('/.*?; .*?/') result = re.sub(p, converter, text) with open(file_dst, 'w') as fo: - fo.write(result) \ No newline at end of file + fo.write(result) diff --git a/ecdict/convert_dict.py b/ecdict/convert_dict.py index d64c855..5442089 100644 --- a/ecdict/convert_dict.py +++ b/ecdict/convert_dict.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python +#!/usr/bin/env python # -*- coding: utf-8 -*- # csv数据转换成SQLite数据库格式,不直接放ecdict.db,是因为会膨胀很大。 @@ -6,4 +6,4 @@ import stardict -stardict.convert_dict("./ecdict.db", "./ecdict.csv") \ No newline at end of file +stardict.convert_dict('./ecdict.db', './ecdict.csv') diff --git a/oxford_dict_english_2e/oxford_dict.py b/oxford_dict_english_2e/oxford_dict.py index 02ff91e..e652df6 100644 --- a/oxford_dict_english_2e/oxford_dict.py +++ b/oxford_dict_english_2e/oxford_dict.py @@ -2,75 +2,74 @@ # -*- coding: UTF-8 -*- -file_src = "/users/vivo/desktop/oxford_dict_english.txt" - -file_dst = "/users/vivo/desktop/oxford_dict_result.txt" +file_src = '/users/vivo/desktop/oxford_dict_english.txt' +file_dst = '/users/vivo/desktop/oxford_dict_result.txt' def parser(text): - if text.startswith("★☆☆"): - if "▶" in text: - text = text.replace("▶", "\n▶") + if text.startswith('★☆☆'): + if '▶' in text: + text = text.replace('▶', '\n▶') outtext = text[6:].strip() else: outtext = text[6:].strip() - elif text.startswith(" /"): + elif text.startswith(' /'): outtext = text.strip() - elif text.startswith("1."): - outtext = "1." + text.partition("•")[2] - elif text.startswith("2."): - outtext = "2." + text.partition("•")[2] - elif text.startswith("3."): - outtext = "3." + text.partition("•")[2] - elif text.startswith("4."): - outtext = "4." + text.partition("•")[2] - elif text.startswith("5."): - outtext = "5." + text.partition("•")[2] - elif text.startswith("6."): - outtext = "6." + text.partition("•")[2] - elif text.startswith("7."): - outtext = "7." + text.partition("•")[2] - elif text.startswith("8."): - outtext = "8." + text.partition("•")[2] - elif text.startswith("9."): - outtext = "9." + text.partition("•")[2] - elif text.startswith("10."): - outtext = "10." + text.partition("•")[2] - elif text.startswith("11."): - outtext = "11." + text.partition("•")[2] - elif text.startswith("12."): - outtext = "12." + text.partition("•")[2] - elif text.startswith("13."): - outtext = "13." + text.partition("•")[2] - elif text.startswith("14."): - outtext = "14." + text.partition("•")[2] - elif text.startswith("15."): - outtext = "15." + text.partition("•")[2] - elif text.startswith("16."): - outtext = "16." + text.partition("•")[2] - elif text.startswith("17."): - outtext = "17." + text.partition("•")[2] - elif text.startswith("18."): - outtext = "18." + text.partition("•")[2] - elif text.startswith("▶"): + elif text.startswith('1.'): + outtext = '1.' + text.partition('•')[2] + elif text.startswith('2.'): + outtext = '2.' + text.partition('•')[2] + elif text.startswith('3.'): + outtext = '3.' + text.partition('•')[2] + elif text.startswith('4.'): + outtext = '4.' + text.partition('•')[2] + elif text.startswith('5.'): + outtext = '5.' + text.partition('•')[2] + elif text.startswith('6.'): + outtext = '6.' + text.partition('•')[2] + elif text.startswith('7.'): + outtext = '7.' + text.partition('•')[2] + elif text.startswith('8.'): + outtext = '8.' + text.partition('•')[2] + elif text.startswith('9.'): + outtext = '9.' + text.partition('•')[2] + elif text.startswith('10.'): + outtext = '10.' + text.partition('•')[2] + elif text.startswith('11.'): + outtext = '11.' + text.partition('•')[2] + elif text.startswith('12.'): + outtext = '12.' + text.partition('•')[2] + elif text.startswith('13.'): + outtext = '13.' + text.partition('•')[2] + elif text.startswith('14.'): + outtext = '14.' + text.partition('•')[2] + elif text.startswith('15.'): + outtext = '15.' + text.partition('•')[2] + elif text.startswith('16.'): + outtext = '16.' + text.partition('•')[2] + elif text.startswith('17.'): + outtext = '17.' + text.partition('•')[2] + elif text.startswith('18.'): + outtext = '18.' + text.partition('•')[2] + elif text.startswith('▶'): outtext = text.strip() - elif text.startswith("【IDIOMS】"): + elif text.startswith('【IDIOMS】'): outtext = text.strip() - elif text.startswith(" --›"): + elif text.startswith(' --›'): outtext = text.strip() - elif text.startswith("◘"): + elif text.startswith('◘'): outtext = text.strip() - elif text.startswith("【派生】"): + elif text.startswith('【派生】'): outtext = text.strip() - elif text.startswith("♦"): + elif text.startswith('♦'): outtext = text.strip() - elif text.startswith("【PHR V】"): + elif text.startswith('【PHR V】'): outtext = text.strip() - elif text.startswith("•"): + elif text.startswith('•'): outtext = text.strip() else: - outtext = "" + outtext = '' return outtext @@ -79,10 +78,9 @@ def parser(text): with open(file_src, 'r') as f: full_text = f.read() - full_text = full_text.replace("/▶", "/\n▶") - + full_text = full_text.replace('/▶', '/\n▶') - entry_list = full_text.split("————————————") + entry_list = full_text.split('————————————') for entry in entry_list: if entry: row_list = [] @@ -90,13 +88,14 @@ def parser(text): for line in line_list: line_out = parser(line) row_list.append(line_out) - row_list = [x for x in row_list if x != ""] + row_list = [x for x in row_list if x != ''] new_entry = '\n'.join(row_list) new_entry = new_entry.strip() all_entry.append(new_entry) + with open(file_dst, 'w') as f_out: for each in all_entry: - # each = " ".join(each.splitlines()) - f_out.write('\n\n' + each) \ No newline at end of file + # each = ' '.join(each.splitlines()) + f_out.write('\n\n' + each) diff --git "a/\350\213\261\346\261\211\345\244\247\350\257\215\345\205\270\357\274\210\347\254\254\344\272\214\347\211\210\357\274\211/en_ch_dict.py" "b/\350\213\261\346\261\211\345\244\247\350\257\215\345\205\270\357\274\210\347\254\254\344\272\214\347\211\210\357\274\211/en_ch_dict.py" index ce8b05f..289adac 100644 --- "a/\350\213\261\346\261\211\345\244\247\350\257\215\345\205\270\357\274\210\347\254\254\344\272\214\347\211\210\357\274\211/en_ch_dict.py" +++ "b/\350\213\261\346\261\211\345\244\247\350\257\215\345\205\270\357\274\210\347\254\254\344\272\214\347\211\210\357\274\211/en_ch_dict.py" @@ -2,14 +2,14 @@ # -*- coding: utf-8 -*- -file_src = "/users/vivo/desktop/英汉大词典_INIT.txt" -file_dst = "/users/vivo/desktop/英汉大词典_edited.txt" +file_src = '/users/vivo/desktop/英汉大词典_INIT.txt' +file_dst = '/users/vivo/desktop/英汉大词典_edited.txt' def parser(text): - # "派生"标记【卍】 和 "习语"标记【★】尚未处理 - outtext = text.replace("■", "").replace("ⓐ", "").\ - replace("⏎", "").replace("▶", "").replace("➜", "") + # ”派生“标记【卍】 和 ”习语”标记【★】尚未处理 + outtext = text.replace('■', '').replace('ⓐ', '').\ + replace('⏎', '').replace('▶', '').replace('➜', '') return outtext @@ -19,8 +19,7 @@ def parser(text): with open(file_src, 'r') as f: full_text = f.read() - - entry_list = full_text.split("————————————") + entry_list = full_text.split('————————————') for entry in entry_list: if entry: row_list = [] @@ -28,13 +27,14 @@ def parser(text): for line in line_list: line_out = parser(line) row_list.append(line_out) - # row_list = [x for x in row_list if x != ""] - new_entry = '\n'.join(row_list) #不分行显示则用空格代替"\n" + # row_list = [x for x in row_list if x != ''] + new_entry = '\n'.join(row_list) #不分行显示则用空格代替'\n' new_entry = new_entry.strip() all_entry.append(new_entry) + with open(file_dst, 'w') as f_out: for each in all_entry: - # each = " ".join(each.splitlines()) + # each = ' '.join(each.splitlines()) f_out.write('\n\n————————————\n' + each)