forked from Vocab-Apps/anki-hyper-tts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_text_utils.py
133 lines (107 loc) · 5.88 KB
/
test_text_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import text_utils
import errors
import constants
import unittest
import config_models
def test_process_text(qtbot):
# simple replacement
text_processing = config_models.TextProcessing()
rule = config_models.TextReplacementRule(constants.TextReplacementRuleType.Simple)
rule.source = 'word_a'
rule.target = 'word_b'
text_processing.add_text_replacement_rule(rule)
assert text_utils.process_text('sentence word_a word_c', text_processing) == 'sentence word_b word_c'
# regex replacement
rule = config_models.TextReplacementRule(constants.TextReplacementRuleType.Regex)
rule.source = '\(etw \+D\)'
rule.target = 'etwas +Dativ'
text_processing.add_text_replacement_rule(rule)
assert text_utils.process_text('unter (etw +D)', text_processing) == 'unter etwas +Dativ'
assert text_utils.process_text('<b>unter</b> (etw +D)', text_processing) == 'unter etwas +Dativ'
def test_replacement_regexp_error(qtbot):
# regex replacement with error
text_processing = config_models.TextProcessing()
rule = config_models.TextReplacementRule(constants.TextReplacementRuleType.Regex)
rule.source = 'yoyo)'
rule.target = 'rep'
text_processing.add_text_replacement_rule(rule)
testcase_instance = unittest.TestCase()
testcase_instance.assertRaises(errors.TextReplacementError, text_utils.process_text, 'yoyo', text_processing)
text_processing = config_models.TextProcessing()
rule = config_models.TextReplacementRule(constants.TextReplacementRuleType.Regex)
rule.source = None
rule.target = None
text_processing.add_text_replacement_rule(rule)
testcase_instance = unittest.TestCase()
testcase_instance.assertRaises(errors.TextReplacementError, text_utils.process_text, 'yoyo', text_processing)
def test_process_text_rules(qtbot):
# by default, html processing enabled
text_processing = config_models.TextProcessing()
assert text_utils.process_text('word1<br/>word2', text_processing) == 'word1word2'
# disable html processing
text_processing.html_to_text_line = False
text_processing.ssml_convert_characters = False
assert text_utils.process_text('word1<br/>word2', text_processing) == 'word1<br/>word2'
# add a replacement rule which targets the HTML tag
rule = config_models.TextReplacementRule(constants.TextReplacementRuleType.Simple)
rule.source = '<br/>'
rule.target = ' linebreak '
text_processing.add_text_replacement_rule(rule)
text_processing.html_to_text_line = True
# the expected replacement is not done, because text replacement rules have run after HTML replacement
assert text_utils.process_text('word1<br/>word2', text_processing) == 'word1word2'
text_processing.run_replace_rules_after = False
# now, our replacement rules will run first
assert text_utils.process_text('word1<br/>word2', text_processing) == 'word1 linebreak word2'
# SSML replacements
text_processing = config_models.TextProcessing()
text_processing.ssml_convert_characters = True
assert text_utils.process_text('patients age < 30', text_processing) == 'patients age < 30'
assert text_utils.process_text('M&A', text_processing) == 'M&A'
text_processing.ssml_convert_characters = False
assert text_utils.process_text('patients age < 30', text_processing) == 'patients age < 30'
def test_regex_backref(qtbot):
text_processing = config_models.TextProcessing()
rule = config_models.TextReplacementRule(constants.TextReplacementRuleType.Regex)
rule.source = '(.*)\s+\((.*)\)'
rule.target = '\\2 \\1'
text_processing.add_text_replacement_rule(rule)
source_text = 'word1 (word2)'
expected_result = 'word2 word1'
assert text_utils.process_text(source_text, text_processing) == expected_result
def test_regex_ignore_case_default(qtbot):
text_processing = config_models.TextProcessing()
rule = config_models.TextReplacementRule(constants.TextReplacementRuleType.Regex)
rule.source = 'abc'
rule.target = 'def'
text_processing.add_text_replacement_rule(rule)
source_text = 'ABC123'
expected_result = 'ABC123'
assert text_utils.process_text(source_text, text_processing) == expected_result
def test_regex_ignore_case(qtbot):
text_processing = config_models.TextProcessing()
text_processing.ignore_case = True
rule = config_models.TextReplacementRule(constants.TextReplacementRuleType.Regex)
rule.source = 'abc'
rule.target = 'def'
text_processing.add_text_replacement_rule(rule)
source_text = 'ABC123'
expected_result = 'def123'
assert text_utils.process_text(source_text, text_processing) == expected_result
def test_strip_brackets(qtbot):
text_processing = config_models.TextProcessing()
text_processing.strip_brackets = False
assert text_utils.process_text('word1 (word2)', text_processing) == 'word1 (word2)'
text_processing.strip_brackets = True
text_processing.html_to_text_line = False
assert text_utils.process_text('word1 (word2)', text_processing) == 'word1 '
assert text_utils.process_text('word1 [word2]', text_processing) == 'word1 '
assert text_utils.process_text('word1 [word2][word3]', text_processing) == 'word1 '
assert text_utils.process_text('word1[word2]', text_processing) == 'word1'
assert text_utils.process_text('word1 {word2}', text_processing) == 'word1 '
assert text_utils.process_text('word1 <word2>', text_processing) == 'word1 '
assert text_utils.process_text('word1 <word2>(word3)[word4]', text_processing) == 'word1 '
assert text_utils.process_text('word1 (word2) word3 (word4)', text_processing) == 'word1 word3 '
assert text_utils.process_text('word1 [word2] word3 [word4]', text_processing) == 'word1 word3 '
assert text_utils.process_text('word1 {word2} word3 {word4}', text_processing) == 'word1 word3 '
assert text_utils.process_text('word1 <word2> word3 <word4>', text_processing) == 'word1 word3 '