forked from 2d6/talon_german
-
Notifications
You must be signed in to change notification settings - Fork 5
/
german_implementation.py
98 lines (77 loc) · 2.71 KB
/
german_implementation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
from talon import Context, actions
ctx = Context()
ctx.matches = """
mode: user.german
language: de_DE
"""
# dictionary for capitalization
path = os.path.dirname(os.path.abspath(__file__))
with open(path + "/dictionary/german.dic") as f:
list_of_words = f.read().split("\n")
dict_of_words = {}
for word in list_of_words:
if word.lower() in dict_of_words:
# multiple entries, use lower:
dict_of_words[word.lower()] = word.lower()
else:
dict_of_words[word.lower()] = word
# TODO: Read in ../knausj_talon/settings/capitalized_words_de_custom.txt to be
# capitalized
# Update: Is this really needed or can we just use words_to_replace_de.csv for
# that?
_space_after = ".,!?:;)]}–“‘$£€"
_no_space_before = ".,-!?:;)]}␣“‘’$£€"
_ascii_replace = {'–': '-', '„': '"', '“': '"', "‚": "'", "‘": "'", "’": "'"}
_capitalize_after = ".!?"
@ctx.capture("user.wort", rule='({user.number_key}+ | <user.vocabulary_german> | <word>)')
def wort(m) -> str:
"""word or spelled word or number, inserts space in the end"""
return ''.join(str(m).split()) + ' '
# XXX Continue here with capitlization
#word = ''.join(str(m).split())
# todo: capitalize here
@ctx.capture("user.gk_wort", rule='[{user.modifier}] <user.wort>')
def gk_wort(m) -> str:
"""potentially upper case word"""
word = " ".join(m[1:])
if m[0] == "CAP":
return actions.user.formatted_text(word, "CAPITALIZE_ALL_WORDS")
elif m[0] == "ALLCAPS":
return actions.user.formatted_text(word, "ALL_CAPS")
elif m[0] == "LOWER":
return actions.user.formatted_text(word, "ALL_LOWERCASE")
else:
word = str(m)
key = word.replace(" ", "")
if key in dict_of_words:
return dict_of_words[key] + " "
else:
return word
@ctx.capture("user.satzglied", rule='(<user.gk_wort> | {user.punctuation} | {user.symbol_key})')
def satzglied(m) -> str:
"""word or symbol"""
if str(m)[0] in _space_after:
return str(m) + ' '
else:
return str(m)
@ctx.capture("user.satz", rule='<user.satzglied>+')
def satz(m) -> str:
"""sentence"""
result = [str(m[0])]
for i in range(1, len(m)):
if str(m[i])[0] in _no_space_before and result[i - 1][-1] == ' ':
result[i - 1] = result[i - 1][:-1]
result.append(str(m[i]))
result = ''.join(result)
if result[-1] == ' ':
result = result[:-1]
result = result.replace('␣', ' ')
return result
@ctx.capture("user.weg", rule='weg+')
def weg(m) -> str:
"""capture multiple "weg"s"""
return str(m)
@ctx.capture("user.acronym", rule="{user.letter}+")
def acronym(m: str) -> str:
return "".join(m.letter_list).upper()