Skip to content

Commit

Permalink
Delete unnecessary part & Update Optimize for better use
Browse files Browse the repository at this point in the history
  • Loading branch information
didiforgithub committed Oct 25, 2024
1 parent 06c1915 commit 38c825d
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 45 deletions.
30 changes: 17 additions & 13 deletions examples/aflow/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,15 @@ For custom tasks, you can reference the code in the `metagpt/ext/aflow/benchmark
- Open `examples/aflow/optimize.py`
- Set the following parameters:
```python
dataset = "HumanEval" # Choose from: "HumanEval", "MBPP", "GSM8K", "MATH", "HotpotQA", "DROP" or your custom dataset name
question_type = "code" # Choose from: "math", "code", "qa"
sample = 4 # Number of samples to use for optimization
check_convergence = True # Whether to check for convergence
optimized_path = "path/to/optimized/workflows" # Path to save optimized workflows, defaults to metagpt/ext/aflow/scripts/optimized
initial_round = 1 # Starting round number
max_rounds = 20 # Maximum number of optimization rounds
validation_rounds = 5 # The validation rounds of AFLOW.
dataset: DatasetType = "MATH" # Ensure the type is consistent with DatasetType
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
question_type: QuestionType = "math" # Ensure the type is consistent with QuestionType
optimized_path: str = "metagpt/ext/aflow/scripts/optimized" # Optimized Result Save Path
initial_round: int = 1 # Corrected the case from Initial_round to initial_round
max_rounds: int = 20 # The max iteration of AFLOW.
check_convergence: bool = True # Whether Early Stop
validation_rounds: int = 5 # The validation rounds of AFLOW.
if_fisrt_optimize = True # You should change it to False after the first optimize.
```
- Adjust these parameters according to your specific requirements and dataset
2. Set up parameters in `config/config2.yaml` (see `examples/aflow/config2.example.yaml` for reference)
Expand All @@ -66,10 +67,13 @@ For custom tasks, you can reference the code in the `metagpt/ext/aflow/benchmark
If you use AFlow in your research, please cite our paper:

```
@article{zhang2024aflow,
title={AFlow: Automating Agentic Workflow Generation},
author={Zhang, Jiayi and Xiang, Jinyu and Yu, Zhaoyang and Teng, Fengwei and Chen, Xionghui and Chen, Jiaqi and Zhuge, Mingchen and Cheng, Xin and Hong, Sirui and Wang, Jinlin and others},
journal={arXiv preprint arXiv:2410.10762},
year={2024}
@misc{zhang2024aflow,
title={AFlow: Automating Agentic Workflow Generation},
author={Jiayi Zhang and Jinyu Xiang and Zhaoyang Yu and Fengwei Teng and Xionghui Chen and Jiaqi Chen and Mingchen Zhuge and Xin Cheng and Sirui Hong and Jinlin Wang and Bingnan Zheng and Bang Liu and Yuyu Luo and Chenglin Wu},
year={2024},
eprint={2410.10762},
archivePrefix={arXiv},
primaryClass={cs.AI},
url={https://arxiv.org/abs/2410.10762},
}
```
18 changes: 15 additions & 3 deletions examples/aflow/optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,18 @@
# @Author : didi
# @Desc : Entrance of AFlow.

import os
import sys


def setup_environment():
current_path = os.path.abspath(__file__)
root_path = os.path.dirname(os.path.dirname(os.path.dirname(current_path)))
sys.path.insert(0, root_path)
os.chdir(root_path)


setup_environment()

from metagpt.configs.models_config import ModelsConfig
from metagpt.ext.aflow.data.download_data import download
Expand All @@ -13,9 +25,6 @@
# QuestionType = Literal["math", "code", "qa"]
# OptimizerType = Literal["Graph", "Test"]

# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results.
download(["datasets", "initial_rounds"])

# Crucial Parameters
dataset: DatasetType = "MATH" # Ensure the type is consistent with DatasetType
sample: int = 4 # Sample Count, which means how many workflows will be resampled from generated workflows
Expand All @@ -25,6 +34,7 @@
max_rounds: int = 20 # The max iteration of AFLOW.
check_convergence: bool = True # Whether Early Stop
validation_rounds: int = 5 # The validation rounds of AFLOW.
if_fisrt_optimize = True # You should change it to False after the first optimize.

# Config llm model, you can modify `config/config2.yaml` to use more llms.
mini_llm_config = ModelsConfig.default().get("gpt-4o-mini")
Expand Down Expand Up @@ -56,6 +66,8 @@
)

if __name__ == "__main__":
# When you fisrt use, please download the datasets and initial rounds; If you want to get a look of the results, please download the results.
download(["datasets", "initial_rounds"], if_first_download=if_fisrt_optimize)
# Optimize workflow via setting the optimizer's mode to 'Graph'
optimizer.optimize("Graph")
# Test workflow via setting the optimizer's mode to 'Test'
Expand Down
20 changes: 9 additions & 11 deletions metagpt/ext/aflow/benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,38 +23,39 @@ async def load_data(self, specific_indices: List[int] = None) -> List[dict]:
async with aiofiles.open(self.file_path, mode="r", encoding="utf-8") as file:
async for line in file:
data.append(json.loads(line))

if specific_indices is not None:
filtered_data = [data[i] for i in specific_indices if i < len(data)]
return filtered_data

return data

def save_results_to_csv(self, results: List[Tuple[Any, ...]], columns: List[str]):
df = pd.DataFrame(results, columns=columns)
avg_score = df["score"].mean()
t_cost = df["cost"].max()
a_cost = t_cost / len(df) if len(df) > 0 else 0

current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"{avg_score:.5f}_{current_time}.csv"
output_file = os.path.join(self.log_path, filename)

df.to_csv(output_file, index=False)
logger.info(f"Results saved to {output_file}")

return avg_score, a_cost, t_cost

def log_mismatch(self, problem: str, expected_output: Any, prediction: str, extracted_output: Any):
def log_mismatch(
self,
problem: str,
expected_output: Any,
prediction: str,
extracted_output: Any,
extract_answer_code: str = "None",
):
log_data = {
"question": problem,
"right_answer": expected_output,
"model_output": prediction,
"extracted_output": extracted_output,
"extract_answer_code": extract_answer_code,
}

log_file = os.path.join(self.log_path, "log.json")

if os.path.exists(log_file):
with open(log_file, "r", encoding="utf-8") as f:
try:
Expand All @@ -63,9 +64,7 @@ def log_mismatch(self, problem: str, expected_output: Any, prediction: str, extr
data = []
else:
data = []

data.append(log_data)

with open(log_file, "w", encoding="utf-8") as f:
json.dump(data, f, indent=4, ensure_ascii=False)

Expand All @@ -89,7 +88,6 @@ async def sem_evaluate(problem):
return await self.evaluate_problem(problem, graph)

tasks = [sem_evaluate(problem) for problem in data]

return await tqdm_asyncio.gather(*tasks, desc=f"Evaluating {self.name} problems", total=len(data))

async def run_evaluation(self, graph: Callable, va_list: List[int], max_concurrent_tasks: int = 50):
Expand Down
16 changes: 15 additions & 1 deletion metagpt/ext/aflow/benchmark/math.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import inspect
import re
from math import isclose
from typing import Any, Callable, List, Tuple
Expand Down Expand Up @@ -98,6 +99,13 @@ def _parse(s):
pass
return False

def get_function_code(self, func):
try:
source_code = inspect.getsource(func)
return source_code
except OSError:
return "no code"

@retry(stop=stop_after_attempt(5), wait=wait_fixed(1), retry=retry_if_exception_type(Exception), reraise=True)
async def _generate_output(self, graph, input_text):
return await graph(input_text)
Expand All @@ -111,7 +119,13 @@ async def evaluate_problem(self, problem: dict, graph: Callable) -> Tuple[str, s
uni_score, extracted_output = self.calculate_score(expected_output, output)

if uni_score == 0:
self.log_mismatch(input_text, expected_output, output, extracted_output)
self.log_mismatch(
input_text,
expected_output,
output,
extracted_output,
extract_answer_code=self.get_function_code(self.extract_model_answer),
)

return input_text, output, expected_output, uni_score, cost

Expand Down
25 changes: 8 additions & 17 deletions metagpt/ext/aflow/data/download_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,21 +68,12 @@ def process_dataset(url: str, filename: str, extract_path: str) -> None:
}


def is_directory_empty(path: str) -> bool:
"""Check if the directory is empty"""
return len(os.listdir(path)) == 0


def download(datasets):
def download(required_datasets, if_first_download: bool = True):
"""Main function to process all selected datasets"""
for dataset_name in datasets:
dataset = datasets_to_download[dataset_name]
extract_path = dataset["extract_path"]

if os.path.exists(extract_path) and not is_directory_empty(extract_path):
logger.info(
f"Target folder {extract_path} for {dataset_name} is not empty, skipping download and extraction."
)
continue

process_dataset(dataset["url"], dataset["filename"], extract_path)
if if_first_download:
for dataset_name in required_datasets:
dataset = datasets_to_download[dataset_name]
extract_path = dataset["extract_path"]
process_dataset(dataset["url"], dataset["filename"], extract_path)
else:
logger.info("Skip downloading datasets")
Binary file removed metagpt/ext/aflow/scripts/optimized/optimized.zip
Binary file not shown.

0 comments on commit 38c825d

Please sign in to comment.