-
Notifications
You must be signed in to change notification settings - Fork 0
/
compute_top_accuracy.py
92 lines (73 loc) · 3.12 KB
/
compute_top_accuracy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
def makedirs(path: str, isfile: bool = False):
"""
Creates a directory given a path to either a directory or file.
If a directory is provided, creates that directory. If a file is provided (i.e. isfiled == True),
creates the parent directory for that file.
:param path: Path to a directory or file.
:param isfile: Whether the provided path is a directory or file.
"""
if isfile:
path = os.path.dirname(path)
if path != '':
os.makedirs(path, exist_ok=True)
def plot_results(complex_name,top_accuracy_list,path):
makedirs(path)
fig,ax = plt.subplots()
ax.plot(top_accuracy_list, '.-')
ax.set_ylim([0, 1.0])
ax.grid(True)
ax.set_xlabel('Top ranked considered')
ax.set_ylabel('Hit Rate')
ax.set_title(complex_name)
plt.savefig(os.path.join(path, "Top ranked considered.pdf"))
if __name__ == '__main__':
#1.sampling
fir_dir = "/home/fei/Research/Dataset/decoy/1_decoys_bm4_zd3.0.2_15deg/exp_3/9_test_results"
num_folds = 5
top_high =50
top_results = []
cut_off = 0.7
for i in range(num_folds):
preds_path = os.path.join(fir_dir, f'fold_{i}', '20200327-3', 'predsLogModel.csv')
data = pd.read_csv(preds_path)
# data.rename(columns={' label': 'label'}, inplace=True)
complex_name_list = []
for j in range(data.shape[0]):
decoy_info = data['decoy_no'][j]
complex_name = decoy_info.split('.')[0]
if complex_name not in complex_name_list:
complex_name_list.append(complex_name)
top_accuracy_all = {}
for complex_name in complex_name_list:
df = data[data['decoy_no'].str.contains(complex_name)]
df = df.sort_values(by=['prediction','label'], ascending=False).reset_index(drop=True)
df = df.loc[df['prediction'] > cut_off]
top_accuracy_list = []
for k in range(df.shape[0]):
top_dataframe = df.iloc[0:k+1]
top_accuracy = top_dataframe["label"].sum(axis=0)
top_accuracy_list.append(top_accuracy/(k+1))
# plot_results(complex_name, top_accuracy_list, os.path.join(fir_dir, f'fold_{i}', '20200306-1'))
top_accuracy_all[complex_name] = top_accuracy_list
sum_top = np.zeros(top_high)
for m in range(top_high):
complex_num = 0
for complex_name in complex_name_list:
if len(top_accuracy_all[complex_name]) > m:
complex_num += 1
sum_top[m] = top_accuracy_all[complex_name][m] +sum_top[m]
sum_top[m] = sum_top[m]/complex_num
# plot_results("average", sum_top, os.path.join(fir_dir, f'fold_{i}', '20200306-1'))
top_results.append(sum_top)
average_top = []
for i in range(top_high):
average=0
for j in range(num_folds):
average +=top_results[j][i]
average = average/num_folds
average_top.append(average)
plot_results("average", average_top, os.path.join(fir_dir,'20200327-3-0.7'))