Delete unnecessary part & Update Optimize for better use

moyitech · Oct 25, 2024 · 38c825d · 38c825d
1 parent 06c1915
commit 38c825d
Show file tree

Hide file tree

Showing 6 changed files with 64 additions and 45 deletions.
diff --git a/examples/aflow/README.md b/examples/aflow/README.md
@@ -38,14 +38,15 @@ For custom tasks, you can reference the code in the `metagpt/ext/aflow/benchmark
    - Open `examples/aflow/optimize.py`
    - Set the following parameters:
      ```python
-     dataset = "HumanEval"  # Choose from: "HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP" or your custom dataset name
-     question_type = "code"  # Choose from: "math", "code", "qa"
-     sample = 4  # Number of samples to use for optimization
-     check_convergence = True  # Whether to check for convergence
-     optimized_path = "path/to/optimized/workflows"  # Path to save optimized workflows, defaults to metagpt/ext/aflow/scripts/optimized
-     initial_round = 1  # Starting round number
-     max_rounds = 20  # Maximum number of optimization rounds
-     validation_rounds = 5  # The validation rounds of AFLOW.
+     dataset: DatasetType = "MATH"  # Ensure the type is consistent with DatasetType
+     sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
+     question_type: QuestionType = "math"  # Ensure the type is consistent with QuestionType
+     optimized_path: str = "metagpt/ext/aflow/scripts/optimized"  # Optimized Result Save Path
+     initial_round: int = 1  # Corrected the case from Initial_round to initial_round
+     max_rounds: int = 20  # The max iteration of AFLOW.
+     check_convergence: bool = True  # Whether Early Stop
+     validation_rounds: int = 5  # The validation rounds of AFLOW.
+     if_fisrt_optimize = True  # You should change it to False after the first optimize.
      ```
    - Adjust these parameters according to your specific requirements and dataset
 2. Set up parameters in `config/config2.yaml` (see `examples/aflow/config2.example.yaml` for reference)
@@ -66,10 +67,13 @@ For custom tasks, you can reference the code in the `metagpt/ext/aflow/benchmark
 If you use AFlow in your research, please cite our paper:
 
 ```
-@article{zhang2024aflow,
-  title={AFlow: Automating Agentic Workflow Generation},
-  author={Zhang, Jiayi and Xiang, Jinyu and Yu, Zhaoyang and Teng, Fengwei and Chen, Xionghui and Chen, Jiaqi and Zhuge, Mingchen and Cheng, Xin and Hong, Sirui and Wang, Jinlin and others},
-  journal={arXiv preprint arXiv:2410.10762},
-  year={2024}
+@misc{zhang2024aflow,
+      title={AFlow: Automating Agentic Workflow Generation}, 
+      author={Jiayi Zhang and Jinyu Xiang and Zhaoyang Yu and Fengwei Teng and Xionghui Chen and Jiaqi Chen and Mingchen Zhuge and Xin Cheng and Sirui Hong and Jinlin Wang and Bingnan Zheng and Bang Liu and Yuyu Luo and Chenglin Wu},
+      year={2024},
+      eprint={2410.10762},
+      archivePrefix={arXiv},
+      primaryClass={cs.AI},
+      url={https://arxiv.org/abs/2410.10762}, 
 }
 ```
diff --git a/examples/aflow/optimize.py b/examples/aflow/optimize.py
@@ -3,6 +3,18 @@
 # @Author  : didi
 # @Desc    : Entrance of AFlow.
 
+import os
+import sys
+
+
+def setup_environment():
+    current_path = os.path.abspath(__file__)
+    root_path = os.path.dirname(os.path.dirname(os.path.dirname(current_path)))
+    sys.path.insert(0, root_path)
+    os.chdir(root_path)
+
+
+setup_environment()
 
 from metagpt.configs.models_config import ModelsConfig
 from metagpt.ext.aflow.data.download_data import download
@@ -13,9 +25,6 @@
 # QuestionType = Literal["math", "code", "qa"]
 # OptimizerType = Literal["Graph", "Test"]
 
-# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results.
-download(["datasets", "initial_rounds"])
-
 # Crucial Parameters
 dataset: DatasetType = "MATH"  # Ensure the type is consistent with DatasetType
 sample: int = 4  # Sample Count, which means how many workflows will be resampled from generated workflows
@@ -25,6 +34,7 @@
 max_rounds: int = 20  # The max iteration of AFLOW.
 check_convergence: bool = True  # Whether Early Stop
 validation_rounds: int = 5  # The validation rounds of AFLOW.
+if_fisrt_optimize = True  # You should change it to False after the first optimize.
 
 # Config llm model, you can modify `config/config2.yaml` to use more llms.
 mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
@@ -56,6 +66,8 @@
 )
 
 if __name__ == "__main__":
+    # When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results.
+    download(["datasets", "initial_rounds"], if_first_download=if_fisrt_optimize)
     # Optimize workflow via setting the optimizer's mode to 'Graph'
     optimizer.optimize("Graph")
     # Test workflow via setting the optimizer's mode to 'Test'

diff --git a/metagpt/ext/aflow/benchmark/benchmark.py b/metagpt/ext/aflow/benchmark/benchmark.py
@@ -23,38 +23,39 @@ async def load_data(self, specific_indices: List[int] = None) -> List[dict]:
         async with aiofiles.open(self.file_path, mode="r", encoding="utf-8") as file:
             async for line in file:
                 data.append(json.loads(line))
-
         if specific_indices is not None:
             filtered_data = [data[i] for i in specific_indices if i < len(data)]
             return filtered_data
-
         return data
 
     def save_results_to_csv(self, results: List[Tuple[Any, ...]], columns: List[str]):
         df = pd.DataFrame(results, columns=columns)
         avg_score = df["score"].mean()
         t_cost = df["cost"].max()
         a_cost = t_cost / len(df) if len(df) > 0 else 0
-
         current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
         filename = f"{avg_score:.5f}_{current_time}.csv"
         output_file = os.path.join(self.log_path, filename)
-
         df.to_csv(output_file, index=False)
         logger.info(f"Results saved to {output_file}")
-
         return avg_score, a_cost, t_cost
 
-    def log_mismatch(self, problem: str, expected_output: Any, prediction: str, extracted_output: Any):
+    def log_mismatch(
+        self,
+        problem: str,
+        expected_output: Any,
+        prediction: str,
+        extracted_output: Any,
+        extract_answer_code: str = "None",
+    ):
         log_data = {
             "question": problem,
             "right_answer": expected_output,
             "model_output": prediction,
             "extracted_output": extracted_output,
+            "extract_answer_code": extract_answer_code,
         }
-
         log_file = os.path.join(self.log_path, "log.json")
-
         if os.path.exists(log_file):
             with open(log_file, "r", encoding="utf-8") as f:
                 try:
@@ -63,9 +64,7 @@ def log_mismatch(self, problem: str, expected_output: Any, prediction: str, extr
                     data = []
         else:
             data = []
-
         data.append(log_data)
-
         with open(log_file, "w", encoding="utf-8") as f:
             json.dump(data, f, indent=4, ensure_ascii=False)
 
@@ -89,7 +88,6 @@ async def sem_evaluate(problem):
                 return await self.evaluate_problem(problem, graph)
 
         tasks = [sem_evaluate(problem) for problem in data]
-
         return await tqdm_asyncio.gather(*tasks, desc=f"Evaluating {self.name} problems", total=len(data))
 
     async def run_evaluation(self, graph: Callable, va_list: List[int], max_concurrent_tasks: int = 50):

diff --git a/metagpt/ext/aflow/benchmark/math.py b/metagpt/ext/aflow/benchmark/math.py
@@ -1,3 +1,4 @@
+import inspect
 import re
 from math import isclose
 from typing import Any, Callable, List, Tuple
@@ -98,6 +99,13 @@ def _parse(s):
             pass
         return False
 
+    def get_function_code(self, func):
+        try:
+            source_code = inspect.getsource(func)
+            return source_code
+        except OSError:
+            return "no code"
+
     @retry(stop=stop_after_attempt(5), wait=wait_fixed(1), retry=retry_if_exception_type(Exception), reraise=True)
     async def _generate_output(self, graph, input_text):
         return await graph(input_text)
@@ -111,7 +119,13 @@ async def evaluate_problem(self, problem: dict, graph: Callable) -> Tuple[str, s
             uni_score, extracted_output = self.calculate_score(expected_output, output)
 
             if uni_score == 0:
-                self.log_mismatch(input_text, expected_output, output, extracted_output)
+                self.log_mismatch(
+                    input_text,
+                    expected_output,
+                    output,
+                    extracted_output,
+                    extract_answer_code=self.get_function_code(self.extract_model_answer),
+                )
 
             return input_text, output, expected_output, uni_score, cost
 

diff --git a/metagpt/ext/aflow/data/download_data.py b/metagpt/ext/aflow/data/download_data.py
@@ -68,21 +68,12 @@ def process_dataset(url: str, filename: str, extract_path: str) -> None:
 }
 
 
-def is_directory_empty(path: str) -> bool:
-    """Check if the directory is empty"""
-    return len(os.listdir(path)) == 0
-
-
-def download(datasets):
+def download(required_datasets, if_first_download: bool = True):
     """Main function to process all selected datasets"""
-    for dataset_name in datasets:
-        dataset = datasets_to_download[dataset_name]
-        extract_path = dataset["extract_path"]
-
-        if os.path.exists(extract_path) and not is_directory_empty(extract_path):
-            logger.info(
-                f"Target folder {extract_path} for {dataset_name} is not empty, skipping download and extraction."
-            )
-            continue
-
-        process_dataset(dataset["url"], dataset["filename"], extract_path)
+    if if_first_download:
+        for dataset_name in required_datasets:
+            dataset = datasets_to_download[dataset_name]
+            extract_path = dataset["extract_path"]
+            process_dataset(dataset["url"], dataset["filename"], extract_path)
+    else:
+        logger.info("Skip downloading datasets")
diff --git a/metagpt/ext/aflow/scripts/optimized/optimized.zip b/metagpt/ext/aflow/scripts/optimized/optimized.zip