Skip to content

Commit

Permalink
Fix bug
Browse files Browse the repository at this point in the history
  • Loading branch information
martinakaduc committed Mar 15, 2024
1 parent bd7a71e commit 37b81d7
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 61 deletions.
22 changes: 11 additions & 11 deletions apis/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
app.mount("/to-speech", StaticFiles(directory=os.path.abspath("to-speech")), name="to-speech")

# app.include_router(GraphTranslateRoute().router)
# read area.yaml file to check for current area. If KonTum then keep
# read area.yaml file to check for current area. If BinhDinh then keep
# else delete current area.yaml file

if not os.path.exists('data/cache/VIBA'):
Expand All @@ -42,10 +42,10 @@
os.mkdir('data/cache/BAVI/')

delete = False
determined_json_graph = 'data/cache/VIBA/{area}-graph.json'.format(area='KonTum')
determined_json_graph = 'data/cache/VIBA/{area}-graph.json'.format(area='BinhDinh')
if os.path.exists(determined_json_graph):
with open('data/cache/info.yaml', 'r+') as f:
# if the "area" field is not KonTum then delete
# if the "area" field is not BinhDinh then delete
data = yaml.safe_load(f)
area = data.get('area', None)
src = data.get('SRC', None)
Expand All @@ -57,13 +57,13 @@
if os.path.exists("data/cache/info.yaml"):
os.remove("data/cache/info.yaml")

yaml.dump({"area": "KonTum"}, open("data/cache/info.yaml", "w"))
yaml.dump({"area": "BinhDinh"}, open("data/cache/info.yaml", "w"))
# append SRC into info.yaml
yaml.dump({"SRC": Languages.SRC}, open("data/cache/info.yaml", "a"))
# append DST into info.yaml
yaml.dump({"DST": Languages.DST}, open("data/cache/info.yaml", "a"))

datapath = "data/" + 'KonTum/'
datapath = "data/" + 'BinhDinh/'
# count number of sentences in train, valid, test of the area
with open(datapath + Config.src_monolingual_paths[0], "r", encoding='utf-8') as f1:
src_train_count = len(f1.readlines())
Expand Down Expand Up @@ -91,10 +91,10 @@
with open("data/cache/info.yaml", "r") as f:
print(f.read())

app.include_router(VIBA_translate("KonTum").router)
app.include_router(VIBA_translate("BinhDinh").router)
app.include_router(SpeakRoute().router)
app.include_router(addWord("KonTum").router)
app.include_router(updateWord("KonTum").router)
app.include_router(changeCorpus("KonTum").router)
app.include_router(deleteWord("KonTum").router)
app.include_router(BAVI_translate("KonTum").router)
app.include_router(addWord("BinhDinh").router)
app.include_router(updateWord("BinhDinh").router)
app.include_router(changeCorpus("BinhDinh").router)
app.include_router(deleteWord("BinhDinh").router)
app.include_router(BAVI_translate("BinhDinh").router)
5 changes: 4 additions & 1 deletion objects/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@

class Word:
def __init__(self, text, language: Languages, ner_label=None):
self.is_upper = False if text is None else (text[0].isupper() or (len(text) > 1 and text[1].isupper()))
try:
self.is_upper = False if not text else (text[0].isupper() or (len(text) > 1 and text[1].isupper()))
except:
print(text)
self._text = text.lower() if text is not None else text
self.language = language
self._ner_label = ner_label
Expand Down
66 changes: 17 additions & 49 deletions pipeline/translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,17 @@ def __init__(self, area):
@staticmethod
def post_process(text):
words = text.split()
# output = []
# for w in words:
# if len(output) == 0 or output[-1] != w:
# output.append(w)
output = []
for w in words:
if len(output) == 0 or output[-1] != w:
output.append(w)

output = " ".join(words)
output = " ".join(output)
special_chars = [',', '.', ':', '?', '!']
for char in special_chars:
if char in output:
output = output.replace(' '+char, char)
output = output[0].capitalize() + output[1:]
return output

def printMenu(self, list_of_words: list, output: str):
Expand Down Expand Up @@ -73,20 +74,17 @@ def __call__(self, text: str, model: str = "BART_CHUNK"):
sentence = self.graph_translator.graph_service.add_info_node(sentence) # Update info about the NER
# print(sentence.mapped_words)
translation_graph = TranslationGraph(src_sent=sentence)

# print("Mapped words", sentence.mapped_words)
# print(type(sentence.mapped_words))


control_mapped = translation_graph.update_src_sentence() # Vị trí cần thực hiện việc translate các token trong dictionary

if model == "BART_CHUNK":
mapped_words = [w for w in translation_graph.src_sent if len(w.translations) > 0 or w.is_ner
or w.is_end_sent or w.is_end_paragraph or w.is_punctuation or w.is_conjunction or w.is_in_dictionary]
or w.is_end_sent or w.is_end_paragraph or w.is_punctuation or w.is_conjunction]
else:
mapped_words = [w for w in translation_graph.src_sent if w.is_ner
or w.is_end_sent or w.is_end_paragraph or w.is_punctuation or w.is_conjunction]
# print("Mapped words", sentence.mapped_words)
# print(mapped_words)
control_mapped = translation_graph.update_src_sentence() # Vị trí cần thực hiện việc translate các token trong dictionary
# print(control_mapped)

result = []
Expand All @@ -101,14 +99,12 @@ def __call__(self, text: str, model: str = "BART_CHUNK"):
result.append(ner_text.lower())
else:
result.append(ner_text)
else: # Apply the token không phải NER
translations = src_from_node.dst_word
#print("Translations", translations)
result.append(translations)
# if len(translations) == 1:
# result.append(translations[0].text)
# else:
# result.append(translations)
else:
translations = src_from_node.translations
if len(translations) == 1:
result.append(translations[0].text)
else:
result.append(translations)

src_mapping.append([src_from_node])
if(i == len(mapped_words) - 1):
Expand All @@ -128,8 +124,6 @@ def __call__(self, text: str, model: str = "BART_CHUNK"):
print(f"CHUNK TRANSLATE {chunk.text} -> {translated_chunk} : {time.time() - s}")
i += 1

print("Result before scoring", result)
## Phần dưới này không có tác dụng
if len(result) >= 3:
for i in range(len(result)):
if not isinstance(result[i], str):
Expand Down Expand Up @@ -183,34 +177,8 @@ def __call__(self, text: str, model: str = "BART_CHUNK"):
output = " ".join(output).replace("//@", "\n").replace("/@", ".").replace("@", "")
while " " in output or ". ." in output:
output = output.replace(" ", " ").replace(". .", ".")
candidate_output = self.post_process(output.strip())
return candidate_output

# print("Our suggested candidate:", candidate_output)
# ask if user is happy with this candidate
# if not, ask for a correction

# while True:
# reply = input("Happy with this translation? y/n: ")
# if reply=='y':
# break
# else:
# choosable = False
# for items in control_mapped:
# if len(items[1]) > 1:
# choosable = True
# break
# # find words in control_mapped
# if choosable:
# candidate_output = self.printMenu(control_mapped, candidate_output)
# else:
# print("Sorry, that's the best we can do now")
# break

# output = candidate_output
# output = output[0].capitalize() + output[1:]
# return self.post_process(output)

return self.post_process(output.strip())

else:
output = self.model_translator.translate(text)
output = output[0].capitalize() + output[1:]
Expand Down

0 comments on commit 37b81d7

Please sign in to comment.