Fix bug

nhatkhangcs · Mar 15, 2024 · 37b81d7 · 37b81d7
1 parent bd7a71e
commit 37b81d7
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 61 deletions.
diff --git a/apis/api.py b/apis/api.py
@@ -32,7 +32,7 @@
 app.mount("/to-speech", StaticFiles(directory=os.path.abspath("to-speech")), name="to-speech")
 
 # app.include_router(GraphTranslateRoute().router)
-# read area.yaml file to check for current area. If KonTum then keep
+# read area.yaml file to check for current area. If BinhDinh then keep
 # else delete current area.yaml file
 
 if not os.path.exists('data/cache/VIBA'):
@@ -42,10 +42,10 @@
     os.mkdir('data/cache/BAVI/')
 
 delete = False
-determined_json_graph = 'data/cache/VIBA/{area}-graph.json'.format(area='KonTum')
+determined_json_graph = 'data/cache/VIBA/{area}-graph.json'.format(area='BinhDinh')
 if os.path.exists(determined_json_graph):
     with open('data/cache/info.yaml', 'r+') as f:
-        # if the "area" field is not KonTum then delete
+        # if the "area" field is not BinhDinh then delete
         data = yaml.safe_load(f)
         area = data.get('area', None)
         src = data.get('SRC', None)
@@ -57,13 +57,13 @@
     if os.path.exists("data/cache/info.yaml"):
         os.remove("data/cache/info.yaml")
 
-yaml.dump({"area": "KonTum"}, open("data/cache/info.yaml", "w"))
+yaml.dump({"area": "BinhDinh"}, open("data/cache/info.yaml", "w"))
 # append SRC into info.yaml
 yaml.dump({"SRC": Languages.SRC}, open("data/cache/info.yaml", "a"))
 # append DST into info.yaml
 yaml.dump({"DST": Languages.DST}, open("data/cache/info.yaml", "a"))
 
-datapath = "data/" + 'KonTum/'
+datapath = "data/" + 'BinhDinh/'
 # count number of sentences in train, valid, test of the area
 with open(datapath + Config.src_monolingual_paths[0], "r", encoding='utf-8') as f1:
     src_train_count = len(f1.readlines())
@@ -91,10 +91,10 @@
 with open("data/cache/info.yaml", "r") as f:
     print(f.read())
 
-app.include_router(VIBA_translate("KonTum").router)
+app.include_router(VIBA_translate("BinhDinh").router)
 app.include_router(SpeakRoute().router)
-app.include_router(addWord("KonTum").router)
-app.include_router(updateWord("KonTum").router)
-app.include_router(changeCorpus("KonTum").router)
-app.include_router(deleteWord("KonTum").router)
-app.include_router(BAVI_translate("KonTum").router)
+app.include_router(addWord("BinhDinh").router)
+app.include_router(updateWord("BinhDinh").router)
+app.include_router(changeCorpus("BinhDinh").router)
+app.include_router(deleteWord("BinhDinh").router)
+app.include_router(BAVI_translate("BinhDinh").router)
diff --git a/objects/graph.py b/objects/graph.py
@@ -17,7 +17,10 @@
 
 class Word:
     def __init__(self, text, language: Languages, ner_label=None):
-        self.is_upper = False if text is None else (text[0].isupper() or (len(text) > 1 and text[1].isupper()))
+        try:
+            self.is_upper = False if not text else (text[0].isupper() or (len(text) > 1 and text[1].isupper()))
+        except:
+            print(text)
         self._text = text.lower() if text is not None else text
         self.language = language
         self._ner_label = ner_label

diff --git a/pipeline/translation.py b/pipeline/translation.py
@@ -23,16 +23,17 @@ def __init__(self, area):
     @staticmethod
     def post_process(text):
         words = text.split()
-        # output = []
-        # for w in words:
-        #     if len(output) == 0 or output[-1] != w:
-        #         output.append(w)
+        output = []
+        for w in words:
+            if len(output) == 0 or output[-1] != w:
+                output.append(w)
 
-        output = " ".join(words)
+        output = " ".join(output)
         special_chars = [',', '.', ':', '?', '!']
         for char in special_chars:
             if char in output:
                 output = output.replace(' '+char, char)
+        output = output[0].capitalize() + output[1:]
         return output
 
     def printMenu(self, list_of_words: list, output: str):
@@ -73,20 +74,17 @@ def __call__(self, text: str, model: str = "BART_CHUNK"):
             sentence = self.graph_translator.graph_service.add_info_node(sentence) # Update info about the NER
             # print(sentence.mapped_words)
             translation_graph = TranslationGraph(src_sent=sentence)
-
-            # print("Mapped words", sentence.mapped_words)
-            # print(type(sentence.mapped_words))
-
+
+            control_mapped = translation_graph.update_src_sentence()         # Vị trí cần thực hiện việc translate các token trong dictionary
 
             if model == "BART_CHUNK":
                 mapped_words = [w for w in translation_graph.src_sent if len(w.translations) > 0 or w.is_ner
-                                or w.is_end_sent or w.is_end_paragraph or w.is_punctuation or w.is_conjunction or w.is_in_dictionary]
+                                or w.is_end_sent or w.is_end_paragraph or w.is_punctuation or w.is_conjunction]
             else:
                 mapped_words = [w for w in translation_graph.src_sent if w.is_ner
                                 or w.is_end_sent or w.is_end_paragraph or w.is_punctuation or w.is_conjunction]
             # print("Mapped words", sentence.mapped_words)
             # print(mapped_words)
-            control_mapped = translation_graph.update_src_sentence()         # Vị trí cần thực hiện việc translate các token trong dictionary
             # print(control_mapped)
 
             result = []
@@ -101,14 +99,12 @@ def __call__(self, text: str, model: str = "BART_CHUNK"):
                         result.append(ner_text.lower())
                     else:
                         result.append(ner_text)
-                else:   # Apply the token không phải NER
-                    translations = src_from_node.dst_word
-                    #print("Translations", translations)
-                    result.append(translations)
-                    # if len(translations) == 1:
-                    #     result.append(translations[0].text)
-                    # else:
-                    #     result.append(translations)
+                else:
+                    translations = src_from_node.translations
+                    if len(translations) == 1:
+                        result.append(translations[0].text)
+                    else:
+                        result.append(translations)
 
                 src_mapping.append([src_from_node])
                 if(i == len(mapped_words) - 1):
@@ -128,8 +124,6 @@ def __call__(self, text: str, model: str = "BART_CHUNK"):
                             print(f"CHUNK TRANSLATE {chunk.text} -> {translated_chunk} : {time.time() - s}")
                 i += 1
 
-            print("Result before scoring", result)
-            ## Phần dưới này không có tác dụng
             if len(result) >= 3:
                 for i in range(len(result)):
                     if not isinstance(result[i], str):
@@ -183,34 +177,8 @@ def __call__(self, text: str, model: str = "BART_CHUNK"):
             output = "  ".join(output).replace("//@", "\n").replace("/@", ".").replace("@", "")
             while "  " in output or ". ." in output:
                 output = output.replace("  ", " ").replace(". .", ".")
-            candidate_output = self.post_process(output.strip())
-            return candidate_output
-
-#             print("Our suggested candidate:", candidate_output)
-            # ask if user is happy with this candidate
-            # if not, ask for a correction
-
-#             while True:
-#                 reply = input("Happy with this translation? y/n: ")
-#                 if reply=='y':
-#                     break
-#                 else:
-#                     choosable = False
-#                     for items in control_mapped:
-#                         if len(items[1]) > 1:
-#                             choosable = True
-#                             break
-#                     # find words in control_mapped
-#                     if choosable:
-#                         candidate_output = self.printMenu(control_mapped, candidate_output)
-#                     else:
-#                         print("Sorry, that's the best we can do now")
-#                         break
-
-#             output = candidate_output
-#             output = output[0].capitalize() + output[1:]
-#             return self.post_process(output)
-
+            return self.post_process(output.strip())
+
         else:
             output = self.model_translator.translate(text)
             output = output[0].capitalize() + output[1:]