Skip to content

Commit

Permalink
remove phrases and dups in texthooker
Browse files Browse the repository at this point in the history
  • Loading branch information
mathewthe2 committed May 24, 2021
1 parent 74409e6 commit 68c9b63
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 95 deletions.
149 changes: 75 additions & 74 deletions config.ini
Original file line number Diff line number Diff line change
@@ -1,74 +1,75 @@
[APPEARANCE]
fontsize = 29
darktheme = true
selection_color = hotpink
selection_line_width = 1

[APPCONFIG]
browser = default
host = localhost
port = 0

[ANKICONFIG]
ankiserver = http://127.0.0.1:8765
deck = 日本語
model = Anime
cardtags = Game2Text
anki_dictionary = jmdict_english

[OCRCONFIG]
engine = Tesseract Default
tesseract_language = jpn
ocr_space_language = jpn
oem = 3
extra_options = "-c chop_enable=T -c use_new_state_cost=F -c segment_segcost_rating=F -c enable_new_segsearch=0 -c language_model_ngram_on=0 -c textord_force_make_prop_words=F -c edges_max_children_per_outline=40"

[TRANSLATIONCONFIG]
translation_service = Papago
source_lang = ja
target_lang = en

[LOGCONFIG]
launchlogwindow = false
currentsessionmaxlogsize = 30
lastsessionmaxlogsize = 15
logimages = true
logimagetype = jpg
logimagequality = 1.0
resize_screenshot = false
resize_screenshot_max_width = 1280
resize_screenshot_max_height = 720
logaudio = false
logaudiotype = mp3
logaudioduration = 7.0
logaudioframes = 512
logaudiohost = Windows WASAPI
logaudiodevice = CABLE Input (VB-Audio Virtual Cable)
gamescriptfile =

[SCRIPTMATCHCONFIG]
confidence_threshold = 85
match_limit = 5

[TEXTHOOKERCONFIG]
remove_repeat = true
remove_spaces = true

[WINDOWS_HOTKEYS]
refresh_ocr = <ctrl>+q
add_to_anki = <shift>+e
record_audio = <ctrl>+l

[MAC_HOTKEYS]
refresh_ocr = <cmd>+b
add_to_anki = <shift>+e
record_audio = <cmd>+l

[LINUX_HOTKEYS]
refresh_ocr = <ctrl>+q
add_to_anki = <shift>+e
record_audio = <ctrl>+l

[PATHS]
textractor = default

[APPEARANCE]
fontsize = 29
darktheme = true
selection_color = hotpink
selection_line_width = 1

[APPCONFIG]
browser = default
host = localhost
port = 0

[ANKICONFIG]
ankiserver = http://127.0.0.1:8765
deck = 日本語
model = Anime
cardtags = Game2Text
anki_dictionary = jmdict_english

[OCRCONFIG]
engine = Tesseract Default
tesseract_language = jpn
ocr_space_language = jpn
oem = 3
extra_options = "-c chop_enable=T -c use_new_state_cost=F -c segment_segcost_rating=F -c enable_new_segsearch=0 -c language_model_ngram_on=0 -c textord_force_make_prop_words=F -c edges_max_children_per_outline=40"

[TRANSLATIONCONFIG]
translation_service = Papago
source_lang = ja
target_lang = en

[LOGCONFIG]
launchlogwindow = false
currentsessionmaxlogsize = 30
lastsessionmaxlogsize = 15
logimages = true
logimagetype = jpg
logimagequality = 1.0
resize_screenshot = false
resize_screenshot_max_width = 1280
resize_screenshot_max_height = 720
logaudio = false
logaudiotype = mp3
logaudioduration = 7.0
logaudioframes = 512
logaudiohost = Windows WASAPI
logaudiodevice = CABLE Input (VB-Audio Virtual Cable)
gamescriptfile =

[SCRIPTMATCHCONFIG]
confidence_threshold = 85
match_limit = 5

[TEXTHOOKERCONFIG]
remove_repeat = true
remove_duplicates = false
remove_spaces = true

[WINDOWS_HOTKEYS]
refresh_ocr = <ctrl>+q
add_to_anki = <shift>+e
record_audio = <ctrl>+l

[MAC_HOTKEYS]
refresh_ocr = <cmd>+b
add_to_anki = <shift>+e
record_audio = <cmd>+l

[LINUX_HOTKEYS]
refresh_ocr = <ctrl>+q
add_to_anki = <shift>+e
record_audio = <ctrl>+l

[PATHS]
textractor = default

7 changes: 5 additions & 2 deletions game2text.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from ocr import detect_and_log
from translate import multi_translate
from hotkeys import hotkey_map
from util import RepeatedTimer, create_directory_if_not_exists, get_default_browser_name, get_PID_list, remove_repeat_phrases, remove_spaces
from util import RepeatedTimer, create_directory_if_not_exists, get_default_browser_name, get_PID_list, remove_duplicate_characters, remove_repeated_phrases, remove_spaces
from textractor import Textractor
from tools import path_to_textractor, open_folder_textractor_path
from audio import get_recommended_device_index
Expand Down Expand Up @@ -215,12 +215,15 @@ def hook_code(code, pids):
def monitor_textractor(output_objects):
texthooker_config = r_config_section(TEXTHOOKER_CONFIG)
is_remove_repeat = texthooker_config['remove_repeat'] == 'true'
is_remove_duplicates = texthooker_config['remove_duplicates'] == 'true'
is_remove_spaces = texthooker_config['remove_spaces'] == 'true'


if is_remove_repeat or is_remove_spaces:
for output in output_objects:
output['text'] = output['text'].strip()
output['text'] = remove_repeat_phrases(output['text']) if is_remove_repeat else output['text']
output['text'] = remove_repeated_phrases(output['text']) if is_remove_repeat else output['text']
output['text'] = remove_duplicate_characters(output['text']) if is_remove_duplicates else output['text']
output['text'] = remove_spaces(output['text']) if is_remove_spaces else output['text']

eel.textractorPipe(output_objects)
Expand Down
44 changes: 25 additions & 19 deletions util.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,26 +101,32 @@ def get_PID_list():

return pids

def remove_repeat_characters(s):
i = (s+s).find(s, 1, -1)
return s if i == -1 else s[:i]

def remove_spaces(s):
return "".join(s.split())

# TODO: use algorithm in textractor
def remove_repeat_phrases(s):
prefix_array=[]
for i in range(len(s)):
prefix_array.append(s[:i])

#stop at 1st element to avoid checking for the ' ' char
for i in prefix_array[:1:-1]:
if s.count(i) > 1 :
#find where the next repetition starts
offset = s[len(i):].find(i)

return s[:len(i)+offset]
def remove_duplicate_characters(sentence):
chars = list(sentence)
prev = None
k = 0

for c in sentence:
if prev != c:
chars[k] = c
prev = c
k = k + 1

return ''.join(chars[:k])

def remove_repeated_phrases(sentence):
head = 1
while 1:
scan_sentence = sentence[head:len(sentence)]
prefix = sentence[0:head]
if prefix in scan_sentence:
sentence = sentence.replace(prefix, '', 1)
head = 0
pass
head += 1
if head == len(sentence):
break

return s
return sentence
10 changes: 10 additions & 0 deletions web/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -729,6 +729,16 @@ <h4 class="mdl-dialog__title">Visual Novel Hooker</h4>
</label>
</span>
</li>
<li class="mdl-list__item">
<span class="mdl-list__item-primary-content">
<span>Remove Duplicate Characters</span>
</span>
<span class="mdl-list__item-secondary-action">
<label style="margin-right: 20px" class="mdl-switch mdl-js-switch mdl-js-ripple-effect" for="removeDuplicateCharactersSwitch">
<input type="checkbox" onclick="toggleRemoveDuplicateCharactersAndPersist()" id="removeDuplicateCharactersSwitch" class="mdl-switch__input" />
</label>
</span>
</li>
<li class="mdl-list__item">
<span class="mdl-list__item-primary-content">
<span>Remove White Spaces</span>
Expand Down
1 change: 1 addition & 0 deletions web/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ let resizeScreenshotMaxHeight = 720;

// Texthooker
let isRemoveRepeatedSentences = false
let isRemoveDuplicateCharacters = false
let isRemoveWhiteSpaces = false

const videoElement = document.getElementById("video");
Expand Down
15 changes: 15 additions & 0 deletions web/settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ function initConfig () {
// Texthooker
const texthookerConfig = config[TEXTHOOKER_CONFIG];
initSetRemoveRepeatedSentencesSwitch(texthookerConfig['remove_repeat']);
initSetRemoveDuplicateCharactersSwitch(texthookerConfig['remove_duplicates']);
initSetRemoveWhiteSpacesSwitch(texthookerConfig['remove_spaces']);
initSetTextractorPath();
// Hotkeys
Expand Down Expand Up @@ -573,6 +574,20 @@ function initSetRemoveRepeatedSentencesSwitch(isRemoveRepeatedSentences) {
document.getElementById("removeRepeatSentencesSwitch").parentElement.MaterialSwitch.on();
}
}
function toggleRemoveDuplicateCharacters() {
isRemoveDuplicateCharacters = !isRemoveDuplicateCharacters;
}
function toggleRemoveDuplicateCharactersAndPersist() {
toggleRemoveDuplicateCharacters();
eel.update_config(TEXTHOOKER_CONFIG, {'remove_duplicates': isRemoveDuplicateCharacters ? 'true' : 'false'})();

}
function initSetRemoveDuplicateCharactersSwitch(isRemoveDuplicateCharacters) {
if (isRemoveDuplicateCharacters === 'true') {
toggleRemoveDuplicateCharacters();
document.getElementById("removeDuplicateCharactersSwitch").parentElement.MaterialSwitch.on();
}
}
function toggleRemoveWhiteSpaces() {
isRemoveWhiteSpaces = !isRemoveWhiteSpaces;
}
Expand Down

0 comments on commit 68c9b63

Please sign in to comment.