forked from hedyorg/hedy
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathyaml_to_lark_utils.py
65 lines (49 loc) · 2.72 KB
/
yaml_to_lark_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import collections
import copy
import os
import yaml
def extract_Lark_grammar_from_yaml():
"""Creates a lark file in ../grammars/ for all yaml files located in ../content/keywords/.
If a keyword is not yet translated, it will use the English translation of the keyword
Args:
only_new_lang (bool, optional): Specifies if only a lark file should be created for a new keyword language
or for all languages. Defaults to True.
"""
dirname = os.path.dirname(__file__)
input_path = os.path.join(dirname, 'keywords')
current_grammar_path = os.path.join(dirname, '../grammars')
yaml_languages = [f.replace('.yaml', '') for f in os.listdir(input_path) if
os.path.isfile(os.path.join(input_path, f)) and f.endswith('.yaml')]
template_lark = os.path.join(current_grammar_path, 'keywords-template.lark')
with open(template_lark, 'r', encoding='utf-8') as f:
template = f.read()
for yaml_lang in yaml_languages:
yaml_filesname_with_path = os.path.join(input_path, yaml_lang + '.yaml')
default_yaml_with_path = os.path.join(input_path, 'en' + '.yaml')
with open(default_yaml_with_path, 'r', encoding='utf-8') as stream:
en_command_combinations = yaml.safe_load(stream)
with open(yaml_filesname_with_path, 'r', encoding='utf-8') as stream:
command_combinations = yaml.safe_load(stream)
# Create an empty dictionary -> fill with english keywords and then overwrite all translated keywords
translations = collections.defaultdict(lambda: 'Unknown Exception')
translations.update(en_command_combinations)
translations.update(command_combinations)
translation_copy = copy.deepcopy(translations)
for k, v in translation_copy.items():
if yaml_lang == "ar":
mixed_tatweel_in = ''.join([' "ـ"* ' + '"' + lang + '"' for lang in v]) + ' "ـ"* '
translations[k] = mixed_tatweel_in
else:
# other languages need their translations surrounded by "'s
translations[k] = '"' + v + '"'
# we use | if we have multiple options, such as repete and repète
if "|" in v:
valid_translation = ""
options = v.split("|")
valid_translation = ' | '.join(['"' + option + '"' for option in options])
translations[k] = valid_translation
translated_template = template.format(**translations)
lark_filesname_with_path = os.path.join(current_grammar_path, 'keywords-' + yaml_lang + '.lark')
with open(lark_filesname_with_path, 'w', encoding='utf-8') as f:
f.write(translated_template)
extract_Lark_grammar_from_yaml()