-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path_model_standardizer.py
163 lines (124 loc) · 6.67 KB
/
_model_standardizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
"""
This module has the functions which take a model prediction function (which returns probability) and return
a standardized function that can be used by the CF generator.
"""
import copy
import numpy as np
import pandas as pd
from ._data_standardizer import _seg_to_img
def _standardize_predictor(factual, model_predict_proba):
# Convert the output of prediction function to something that can be treated
# mp1 always return the 1 class and [Num] or [Num, Num, Num]
prob_fact = model_predict_proba(factual.to_frame().T)
# Check how it's the output of multiple
prob_fact_multiple = model_predict_proba(pd.concat([factual.to_frame().T, factual.to_frame().T]))
if str(prob_fact).isnumeric():
# Result returns a number directly
if len(np.array(prob_fact_multiple).shape) == 1:
# Single: Num
# Multiple: [Num, Num, Num]
def _mp1(x): return np.array([model_predict_proba(x)]) if x.shape[0] == 1 else \
np.array(model_predict_proba(x))
else:
# Single: Num
# Multiple: [[Num], [Num], [Num]]
def _mp1(x): return np.array([model_predict_proba(x)]) if x.shape[0] == 1 else \
np.array(model_predict_proba(x))[:, 0]
elif len(np.array(prob_fact).shape) == 1:
if len(np.array(prob_fact_multiple).shape) == 1:
# Single: [Num]
# Multiple [Num, Num, Num]
def _mp1(x): return np.array(model_predict_proba(x))
else:
# Single: [Num]
# Multiple [[Num], [Num], [Num]]
if len(np.array(prob_fact_multiple)[0]) > 1:
# If there are more than one class, the multiclass nonspecific strategy will be performed
# where the factual (the largest initial output) is compared with the highest scoring class
# TODO: This can be improved with other strategies like second best or even specific class
_adjusted_nonspecific_mp1 = _adjust_multiclass_nonspecific(
np.array([factual.to_numpy()]), lambda z: np.array([model_predict_proba(z)])
if np.array(z).shape[0] == 1 else np.array(model_predict_proba(z)))
def _mp1(x): return _adjusted_nonspecific_mp1(x)
else:
def _mp1(x): return np.array([model_predict_proba(x)[0]]) if x.shape[0] == 1 else \
np.array(model_predict_proba(x))[:, 0]
else:
# Single: [[Num]]
# Multiple [[Num], [Num], [Num]]
if len(prob_fact[0]) > 1:
# If there are more than one class, the multiclass nonspecific strategy will be performed
# where the factual (the largest initial output) is compared with the highest scoring class
# TODO: This can be improved with other strategies like second best or even specific class
_adjusted_nonspecific_mp1 = _adjust_multiclass_nonspecific(np.array([factual.to_numpy()]),
lambda z: np.array(model_predict_proba(z)))
def _mp1(x): return _adjusted_nonspecific_mp1(x)
else:
# This function gives an array containing the class 1 probability
def _mp1(x): return np.array(model_predict_proba(x))[:, 0]
return _mp1
def _adjust_model_class(factual, mp1):
# Define the cf try
cf_try = copy.copy(factual).to_numpy()
_mp1c = mp1
# Adjust class, it must be binary and lower than 0
if mp1(np.array([cf_try]))[0] > 0.5:
def _mp1c(x): return 1 - mp1(x)
return _mp1c
def _adjust_image_model(img, model_predict, segments, replace_img):
# x is the array where the index represent the segmentation area number and the value 1 means the original
# image and 0 the replaced image
def _mic(seg_arr):
converted_images = _seg_to_img(seg_arr, img, segments, replace_img)
return model_predict(np.array(converted_images))
return _mic
def _convert_to_numpy(data):
if type(data) == pd.DataFrame:
return data.to_numpy()
if type(data) == np.ndarray:
return data
def _adjust_multiclass_nonspecific(factual: np.ndarray, mic):
# Compare the factual class value to the other highest
pred_factual = mic(np.array([factual]) if len(factual.shape) == 1 else factual)
factual_idx = np.argmax(pred_factual)
def _mimns(cf_candidates):
# Calculate the prediction of the candidates
pred_cfs = mic(_convert_to_numpy(cf_candidates)).astype('float')
# Get the value of the factual class
pred_factual_class = np.copy(pred_cfs[:, factual_idx])
# Now, to guarantee to get the best non factual value, let's consider the factual idx as -infinity
pred_cfs[:, factual_idx] = -np.inf
# Now, get the best values for each candidate
pred_best_cf_class = np.max(pred_cfs, axis=1)
# Make the comparison
class_dif = pred_factual_class - pred_best_cf_class
# Return a probability like value, where 0 is the factual and 1 counterfactual
return 1/(1+np.e**class_dif)
return _mimns
def _adjust_multiclass_second_best(factual: np.ndarray, mic):
# In this function, we get the second-highest scored class and make it as the CF to be found.
# Then, the score of this target (the originally second-highest scored class) is compared with the other best
# result.
# Compare the factual class value to the other highest
pred_factual = mic(np.array([factual]) if len(factual.shape) == 1 else factual)
factual_idx = copy.copy(np.argmax(pred_factual))
pred_factual[0][factual_idx] = -np.inf
cf_idx = copy.copy(np.argmax(pred_factual))
def _mimns(cf_candidates):
# Calculate the prediction of the candidates
# Sometimes it can receive a dataframe, if it's the case, treat accordingly
pred_cfs = mic(_convert_to_numpy(cf_candidates))
# Get the value of the cf class
pred_cf_class = np.copy(pred_cfs[:, cf_idx])
# Now, to guarantee to get the best non cf value, let's consider the factual idx as -infinity
pred_cfs[:, cf_idx] = -np.inf
# Now, get the best value which is not the CF
pred_best_ncf_class = np.max(pred_cfs, axis=1)
# Make the comparison
class_dif = pred_best_ncf_class - pred_cf_class
# Return a probability like value, where 0 is the factual and 1 counterfactual
return 1/(1+np.e**class_dif)
return _mimns
def _adjust_textual_classifier(textual_classifier, converter, original_text_classification):
return lambda array_texts: textual_classifier(converter(array_texts)) \
if original_text_classification < 0.5 else 1 - textual_classifier(converter(array_texts))