Skip to content

Commit

Permalink
add remove repeated phrases modes
Browse files Browse the repository at this point in the history
  • Loading branch information
mathewthe2 committed May 26, 2021
1 parent 68c9b63 commit 6d0492b
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 122 deletions.
150 changes: 75 additions & 75 deletions config.ini
Original file line number Diff line number Diff line change
@@ -1,75 +1,75 @@
[APPEARANCE]
fontsize = 29
darktheme = true
selection_color = hotpink
selection_line_width = 1

[APPCONFIG]
browser = default
host = localhost
port = 0

[ANKICONFIG]
ankiserver = http://127.0.0.1:8765
deck = 日本語
model = Anime
cardtags = Game2Text
anki_dictionary = jmdict_english

[OCRCONFIG]
engine = Tesseract Default
tesseract_language = jpn
ocr_space_language = jpn
oem = 3
extra_options = "-c chop_enable=T -c use_new_state_cost=F -c segment_segcost_rating=F -c enable_new_segsearch=0 -c language_model_ngram_on=0 -c textord_force_make_prop_words=F -c edges_max_children_per_outline=40"

[TRANSLATIONCONFIG]
translation_service = Papago
source_lang = ja
target_lang = en

[LOGCONFIG]
launchlogwindow = false
currentsessionmaxlogsize = 30
lastsessionmaxlogsize = 15
logimages = true
logimagetype = jpg
logimagequality = 1.0
resize_screenshot = false
resize_screenshot_max_width = 1280
resize_screenshot_max_height = 720
logaudio = false
logaudiotype = mp3
logaudioduration = 7.0
logaudioframes = 512
logaudiohost = Windows WASAPI
logaudiodevice = CABLE Input (VB-Audio Virtual Cable)
gamescriptfile =

[SCRIPTMATCHCONFIG]
confidence_threshold = 85
match_limit = 5

[TEXTHOOKERCONFIG]
remove_repeat = true
remove_duplicates = false
remove_spaces = true

[WINDOWS_HOTKEYS]
refresh_ocr = <ctrl>+q
add_to_anki = <shift>+e
record_audio = <ctrl>+l

[MAC_HOTKEYS]
refresh_ocr = <cmd>+b
add_to_anki = <shift>+e
record_audio = <cmd>+l

[LINUX_HOTKEYS]
refresh_ocr = <ctrl>+q
add_to_anki = <shift>+e
record_audio = <ctrl>+l

[PATHS]
textractor = default

[APPEARANCE]
fontsize = 29
darktheme = true
selection_color = hotpink
selection_line_width = 1

[APPCONFIG]
browser = default
host = localhost
port = 0

[ANKICONFIG]
ankiserver = http://127.0.0.1:8765
deck = 日本語
model = Anime
cardtags = Game2Text
anki_dictionary = jmdict_english

[OCRCONFIG]
engine = Tesseract Default
tesseract_language = jpn
ocr_space_language = jpn
oem = 3
extra_options = "-c chop_enable=T -c use_new_state_cost=F -c segment_segcost_rating=F -c enable_new_segsearch=0 -c language_model_ngram_on=0 -c textord_force_make_prop_words=F -c edges_max_children_per_outline=40"

[TRANSLATIONCONFIG]
translation_service = Papago
source_lang = ja
target_lang = en

[LOGCONFIG]
launchlogwindow = false
currentsessionmaxlogsize = 30
lastsessionmaxlogsize = 15
logimages = true
logimagetype = jpg
logimagequality = 1.0
resize_screenshot = false
resize_screenshot_max_width = 1280
resize_screenshot_max_height = 720
logaudio = false
logaudiotype = mp3
logaudioduration = 7.0
logaudioframes = 512
logaudiohost = Windows WASAPI
logaudiodevice = CABLE Input (VB-Audio Virtual Cable)
gamescriptfile =

[SCRIPTMATCHCONFIG]
confidence_threshold = 85
match_limit = 5

[TEXTHOOKERCONFIG]
remove_repeat_mode = quick
remove_duplicates = false
remove_spaces = true

[WINDOWS_HOTKEYS]
refresh_ocr = <ctrl>+q
add_to_anki = <shift>+e
record_audio = <ctrl>+l

[MAC_HOTKEYS]
refresh_ocr = <cmd>+b
add_to_anki = <shift>+e
record_audio = <cmd>+l

[LINUX_HOTKEYS]
refresh_ocr = <ctrl>+q
add_to_anki = <shift>+e
record_audio = <ctrl>+l

[PATHS]
textractor = default

19 changes: 6 additions & 13 deletions game2text.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from ocr import detect_and_log
from translate import multi_translate
from hotkeys import hotkey_map
from util import RepeatedTimer, create_directory_if_not_exists, get_default_browser_name, get_PID_list, remove_duplicate_characters, remove_repeated_phrases, remove_spaces
from util import RepeatedTimer, create_directory_if_not_exists, get_default_browser_name, get_PID_list, format_output, remove_duplicate_characters, remove_spaces
from textractor import Textractor
from tools import path_to_textractor, open_folder_textractor_path
from audio import get_recommended_device_index
Expand Down Expand Up @@ -214,18 +214,11 @@ def hook_code(code, pids):

def monitor_textractor(output_objects):
texthooker_config = r_config_section(TEXTHOOKER_CONFIG)
is_remove_repeat = texthooker_config['remove_repeat'] == 'true'
is_remove_duplicates = texthooker_config['remove_duplicates'] == 'true'
is_remove_spaces = texthooker_config['remove_spaces'] == 'true'


if is_remove_repeat or is_remove_spaces:
for output in output_objects:
output['text'] = output['text'].strip()
output['text'] = remove_repeated_phrases(output['text']) if is_remove_repeat else output['text']
output['text'] = remove_duplicate_characters(output['text']) if is_remove_duplicates else output['text']
output['text'] = remove_spaces(output['text']) if is_remove_spaces else output['text']

output_objects = format_output(
output_objects=output_objects,
remove_repeat_mode=texthooker_config['remove_repeat_mode'].lower(),
is_remove_duplicates= texthooker_config['remove_duplicates'] == 'true',
is_remove_spaces=texthooker_config['remove_spaces'] == 'true')
eel.textractorPipe(output_objects)

@eel.expose
Expand Down
35 changes: 34 additions & 1 deletion util.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,23 @@ def remove_duplicate_characters(sentence):

return ''.join(chars[:k])

def remove_repeated_phrases(sentence):
def quick_remove_repeated_phrases(s):
prefix_array=[]
for i in range(len(s)):
prefix_array.append(s[:i])

#stop at 1st element to avoid checking for the ' ' char
for i in prefix_array[:1:-1]:
if s.count(i) > 1 :
#find where the next repetition starts
offset = s[len(i):].find(i)

return s[:len(i)+offset]
break

return s

def brute_remove_repeated_phrases(sentence):
head = 1
while 1:
scan_sentence = sentence[head:len(sentence)]
Expand All @@ -130,3 +146,20 @@ def remove_repeated_phrases(sentence):
if head == len(sentence):
break
return sentence

def quick_and_brute_remove_repeated_phrases(sentence):
return brute_remove_repeated_phrases(quick_remove_repeated_phrases(sentence))

def format_output(output_objects, remove_repeat_mode, is_remove_duplicates, is_remove_spaces):
remove_repeat_dict = {
'quick': quick_remove_repeated_phrases,
'brute force': brute_remove_repeated_phrases,
'quick + brute force': quick_and_brute_remove_repeated_phrases
}
for output in output_objects:
output['text'] = output['text'].strip()
if remove_repeat_mode in remove_repeat_dict:
remove_repeat_dict[remove_repeat_mode](output['text'])
output['text'] = remove_duplicate_characters(output['text']) if is_remove_duplicates else output['text']
output['text'] = remove_spaces(output['text']) if is_remove_spaces else output['text']
return output_objects
37 changes: 22 additions & 15 deletions web/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -649,13 +649,14 @@ <h4 class="mdl-dialog__title">Visual Novel Hooker</h4>
<a href="#texthookers_advanced" class="mdl-tabs__tab">Advanced</a>
</div>
<div class="mdl-tabs__panel is-active" id="texthookers_general">
<div class="settingsCard mdl-card mdl-shadow--2dp">
<ul class="mdl-list">
<li class="mdl-list__item">
<span class="mdl-list__item-primary-content">
<i style="color: #2e2e2e; background: white" class="material-icons mdl-list__item-avatar">sports_esports</i>
<span>Application</span>
<span class="tab_select_wrapper" >
<input style="margin-left: 10px; width: 355px" onkeyup="handleInputApplication()" onchange="selectTextHookerApplication(this)" type="text" name="application" list="pidNameSelect" placeholder="Select Application">
<input style="margin-left: 10px; width: 300px" onkeyup="handleInputApplication()" onchange="selectTextHookerApplication(this)" type="text" name="application" list="pidNameSelect" placeholder="Select Application">
<datalist id="pidNameSelect">
</datalist>
</span>
Expand All @@ -670,11 +671,14 @@ <h4 class="mdl-dialog__title">Visual Novel Hooker</h4>
<i style="color: #2e2e2e; background: white" class="material-icons mdl-list__item-avatar">add_task</i>
<span>Hooks</span>
<span style="padding-left: 63px" >
<select style="width: 360px" size="6" onchange="selectHook(this)" onfocus="this.selectedIndex=-1;this.blur()" id="hookSelect" name="hooks">
<select style="width: 308px" size="6" onchange="selectHook(this)" onfocus="this.selectedIndex=-1;this.blur()" id="hookSelect" name="hooks">
</select>
</span>
</span>
<li>
</li>
</ul>
</div>
<ul class="mdl-list">
<li class="mdl-list__item">
<span class="mdl-list__item-primary-content">
<span>Hook Code</span>
Expand All @@ -699,15 +703,15 @@ <h4 class="mdl-dialog__title">Visual Novel Hooker</h4>
<span class="mdl-list__item-primary-content">
<span>Custom Hooks</span>
<span style="padding-left: 50px" >
<input style="width: 300px" placeholder="Enter Hook Code" id="customHookCodeInput"></input>
<input style="width: 280px" placeholder="Enter Hook Code" id="customHookCodeInput"></input>
<button onclick="addCustomHook()">Add</button>
</span>
</li>
<li class="mdl-list__item">
<span class="mdl-list__item-primary-content">
<span>TextractorCLI.exe</span>
<span style="padding-left: 25px" >
<input disabled style="width: 300px" id="textractorPathInput"></input>
<input disabled style="width: 280px" id="textractorPathInput"></input>
<button id="selectTextractorExecutableButton" onclick="changeTextractorExecutablePath()" class="mdl-button mdl-js-button mdl-button--icon mdl-button--colored">
<i class="material-icons">folder</i>
</button>
Expand All @@ -719,16 +723,19 @@ <h4 class="mdl-dialog__title">Visual Novel Hooker</h4>
<li class="sectionTitle mdl-list__item">
Text Parsing
</li>
<li class="mdl-list__item">
<span class="mdl-list__item-primary-content">
<span>Remove Repeat Sentences</span>
</span>
<span class="mdl-list__item-secondary-action">
<label style="margin-right: 20px" class="mdl-switch mdl-js-switch mdl-js-ripple-effect" for="removeRepeatSentencesSwitch">
<input type="checkbox" onclick="toggleRemoveRepeatedSentencesAndPersist()" id="removeRepeatSentencesSwitch" class="mdl-switch__input" />
</label>
</span>
</li>
<li class="mdl-list__item">
<span class="mdl-list__item-primary-content">
<span>Remove Repeat Sentences</span>
</span>
<span class="mdl-list__item-secondary-action">
<select onchange="updateRemoveRepeatedModeAndPersist()" id="removeRepeatSelect" name="removeRepeat">
<option>None</option>
<option>Quick</option>
<option>Brute Force</option>
<option>Quick + Brute Force</option>
</select>
</span>
</li>
<li class="mdl-list__item">
<span class="mdl-list__item-primary-content">
<span>Remove Duplicate Characters</span>
Expand Down
44 changes: 26 additions & 18 deletions web/settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ const resizeScreenshotSwitch = document.getElementById('resizeScreenshotSwitch')
// Texthooker Settings Elements
const removeRepeatSentencesSwitch = document.getElementById('removeRepeatSentencesSwitch');
const textractorPathInput = document.getElementById('textractorPathInput');
const removeRepeatSelect = document.getElementById('removeRepeatSelect');

// Hotkeys
const refreshHotkeyInput = document.getElementById('refreshHotkeyInput');
Expand Down Expand Up @@ -95,7 +96,8 @@ function initConfig () {
initSetAnkiDictionaries(ankiConfig['anki_dictionary']);
// Texthooker
const texthookerConfig = config[TEXTHOOKER_CONFIG];
initSetRemoveRepeatedSentencesSwitch(texthookerConfig['remove_repeat']);
// initSetRemoveRepeatedSentencesSwitch(texthookerConfig['remove_repeat_mode']);
initSetRemoveRepeatedMode(texthookerConfig['remove_repeat_mode']);
initSetRemoveDuplicateCharactersSwitch(texthookerConfig['remove_duplicates']);
initSetRemoveWhiteSpacesSwitch(texthookerConfig['remove_spaces']);
initSetTextractorPath();
Expand Down Expand Up @@ -169,6 +171,13 @@ function initSetTranslationLanguages({sourceLang, targetLang}) {
targetLanguageInput.parentElement.MaterialTextfield.change(targetLang);
}

function updateTranslationServiceAndPersist() {
translationService = translationSelect.value;
if (currentConfig[TRANSLATION_CONFIG]['translation_service'] !== translationService) {
eel.update_config(TRANSLATION_CONFIG, {'translation_service':translationService})();
}
}

function changeSourceLanguage() {
if (sourceLanguageInput.value){
eel.update_config(TRANSLATION_CONFIG, {'source_lang':sourceLanguageInput.value })();
Expand Down Expand Up @@ -270,13 +279,6 @@ function updateOCREngineAndPersist() {
}
}
}

function updateTranslationServiceAndPersist() {
translationService = translationSelect.value;
if (currentConfig[TRANSLATION_CONFIG]['translation_service'] !== translationService) {
eel.update_config(TRANSLATION_CONFIG, {'translation_service':translationService})();
}
}

function toggleTextOrientation() {
verticalText = !verticalText;
Expand Down Expand Up @@ -561,17 +563,23 @@ async function selectDictionary() {
*
* Texthooker
*/
function toggleRemoveRepeatedSentences() {
isRemoveRepeatedSentences = !isRemoveRepeatedSentences;
}
async function toggleRemoveRepeatedSentencesAndPersist() {
toggleRemoveRepeatedSentences();
eel.update_config(TEXTHOOKER_CONFIG, {'remove_repeat': isRemoveRepeatedSentences ? 'true' : 'false'})();
function initSetRemoveRepeatedMode(removeRepeatedMode) {
if (removeRepeatedMode) {
const removeRepeatOptions = removeRepeatSelect.querySelectorAll("option");
const removeRepeatOption = Array.from(removeRepeatOptions).find(child=>child.innerText.toLowerCase() === removeRepeatedMode.toLowerCase());
if (removeRepeatOption) {
removeRepeatOption.setAttribute('selected', true);
} else {
// Fallback to quick
const defaultOption = Array.from(removeRepeatOptions).find(child => child.innerText.toLowerCase() == "quick")
defaultOption.setAttribute('selected', true);
}
}
}
function initSetRemoveRepeatedSentencesSwitch(isRemoveRepeatedSentences) {
if (isRemoveRepeatedSentences === 'true') {
toggleRemoveRepeatedSentences();
document.getElementById("removeRepeatSentencesSwitch").parentElement.MaterialSwitch.on();
function updateRemoveRepeatedModeAndPersist() {
const removeRepeatedMode = removeRepeatSelect.value.toLowerCase();
if (currentConfig[TEXTHOOKER_CONFIG]['remove_repeat_mode'] !== removeRepeatedMode) {
eel.update_config(TEXTHOOKER_CONFIG, {'remove_repeat_mode':removeRepeatedMode})();
}
}
function toggleRemoveDuplicateCharacters() {
Expand Down

0 comments on commit 6d0492b

Please sign in to comment.