-
Notifications
You must be signed in to change notification settings - Fork 2
/
update_result.py
72 lines (65 loc) · 3.37 KB
/
update_result.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from collections import OrderedDict
# from evaluator.utils import parse_data
from evaluator.utils_win import parse_data
def merge_result(model_names):
def _load_result(result_dir):
with open(result_dir, 'r') as f:
data = f.readlines()
return data
def _save_result(result, result_dir):
with open(result_dir, 'w') as f:
f.writelines('\n'.join(result))
return
overall_dict = OrderedDict()
overall_softwares = []
for model_name in model_names:
result_dir = f'results/{model_name}/results.txt'
result = _load_result(result_dir)
result_dict = parse_data(result)
overall_softwares.extend(list(result_dict.keys()))
overall_dict[model_name] = result_dict
overall_softwares = list(set(overall_softwares))
current_index = overall_softwares.index('Overall')
overall_softwares.pop(current_index)
overall_softwares = overall_softwares + ['Overall']
head_line = ['Model']
start_line = ['Task']
software_lines = [[overall_softwares[i]] for i in range(len(overall_softwares))]
for i in range(len(model_names)):
model_name = model_names[i]
head_line.extend([model_name for _ in range(2)])
start_line.extend(['Basic', 'Advanced'])
for j in range(len(overall_softwares)):
software = overall_softwares[j]
if software in overall_dict[model_name]:
software_lines[j].extend(list(overall_dict[model_name][software].values()))
else:
software_lines[j].extend(['-','-'])
overall_lines = ['\t'.join(head_line), '\t'.join(start_line)] + ['\t'.join(line) for line in software_lines]
_save_result(overall_lines, f'results/results_col.txt')
overall_softwares = list(set(overall_softwares))
current_index = overall_softwares.index('Overall')
overall_softwares.pop(current_index)
overall_softwares = ['Overall'] + overall_softwares
head_line = ['Task']
start_line = ['Model']
model_lines = [[model_names[i]] for i in range(len(model_names))]
for i in range(len(overall_softwares)):
software = overall_softwares[i]
head_line.extend([software for _ in range(2)])
start_line.extend(['Basic', 'Advanced'])
for j in range(len(model_names)):
model_name = model_names[j]
if software in overall_dict[model_name]:
model_lines[j].extend(list(overall_dict[model_name][software].values()))
else:
model_lines[j].extend(['-','-'])
overall_lines = ['\t'.join(head_line), '\t'.join(start_line)] + ['\t'.join(line) for line in model_lines]
# print(overall_lines)
_save_result(overall_lines, f'results/results.txt')
return
if __name__ == '__main__':
# merge_result(['gemini-1.5-flash', 'gpt-4', 'gpt-4o-mini', 'MetaGPT', 'ChatDev'])
# merge_result(['res1', 'res2', 'res3'])
merge_result(['ChatDev-updating-0', 'ChatDev-updating-1', 'ChatDev-updating-2', 'ChatDev-updating-3', 'ChatDev-updating-4', 'ChatDev-updating-5'])
# merge_result(['ChatDev-BackendFirst', 'ChatDev-BackendFirst-Updating-0', 'ChatDev-BackendFirst-Updating-1', 'ChatDev-Coding-0', 'ChatDev-Coding-1', 'ChatDev-Coding-Updating', 'ChatDev-Coding-Updating2', 'ChatDev-Coding-Updating3','ChatDev-Coding-Reviewing','ChatDev-Coding-Reviewing2','ChatDev-Coding-Reviewing3', 'ChatDev-Coding-Reviewing-Updating'])