Skip to content

Commit

Permalink
Merge pull request Trusted-AI#37 from IBM/test-meta-bugfix
Browse files Browse the repository at this point in the history
added random seed for meta classifier test script
  • Loading branch information
nrkarthikeyan authored Oct 18, 2018
2 parents 31df2ad + 1b65873 commit ce25a63
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 121 deletions.
94 changes: 33 additions & 61 deletions aif360/algorithms/inprocessing/meta_fair_classifier.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,46 @@
# The code for Meta-Classification-Algorithm is based on, the paper https://arxiv.org/abs/1806.06055
# See: https://github.com/vijaykeswani/FairClassification

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import numpy as np
import pandas as pd
import tempfile
import os
import subprocess

from aif360.algorithms import Transformer
from aif360.algorithms.inprocessing.celisMeta.FalseDiscovery import FalseDiscovery
from aif360.algorithms.inprocessing.celisMeta.StatisticalRate import StatisticalRate

class MetaFairClassifier(Transformer):
"""The meta algorithm here takes the fairness metric as part of the input
and returns a classifier optimized w.r.t. that fairness metric.
and returns a classifier optimized w.r.t. that fairness metric [11]_.
References:
Celis, L. E., Huang, L., Keswani, V., & Vishnoi, N. K. (2018).
"Classification with Fairness Constraints: A Meta-Algorithm with Provable Guarantees.""
.. [11] L. E. Celis, L. Huang, V. Keswani, and N. K. Vishnoi.
"Classification with Fairness Constraints: A Meta-Algorithm with
Provable Guarantees," 2018.
"""

def __init__(self, tau=0.8, sensitive_attr="", type="fdr"):
"""
Args:
tau (double, optional): fairness penalty parameter
sensitive_attr (str, optional): name of protected attribute
type (str, optional): the type of fairness metric to be used. Currently "fdr" and "sr" are supported.
To use another type, the corresponding optimization class has to be implemented.
tau (double, optional): Fairness penalty parameter.
sensitive_attr (str, optional): Name of protected attribute.
type (str, optional): The type of fairness metric to be used.
Currently "fdr" (false discovery rate ratio) and "sr"
(statistical rate/disparate impact) are supported. To use
another type, the corresponding optimization class has to be
implemented.
"""
super(MetaFairClassifier, self).__init__(tau=tau,
sensitive_attr=sensitive_attr)

self.tau = tau
self.sensitive_attr = sensitive_attr
if type == "fdr":
self.obj = FalseDiscovery()
if type == "sr":
elif type == "sr":
self.obj = StatisticalRate()

def fit(self, dataset):
Expand All @@ -50,69 +50,41 @@ def fit(self, dataset):
dataset (BinaryLabelDataset): Dataset containing true labels.
Returns:
PrejudiceRemover: Returns self.
"""

data = np.column_stack([dataset.features, dataset.labels])
columns = dataset.feature_names + dataset.label_names
train_df = pd.DataFrame(data=data, columns=columns)


x_train = dataset.features
#print([dataset.favorable_label])
y_train = np.array([1 if y == [dataset.favorable_label] else -1 for y in dataset.labels])
x_control_train = np.array(train_df[self.sensitive_attr])
#print(x_train, y_train, x_control_train)

all_sensitive_attributes = dataset.protected_attribute_names

MetaFairClassifier: Returns self.
"""
if not self.sensitive_attr:
self.sensitive_attr = all_sensitive_attributes[0]
self.sensitive_attr = dataset.protected_attribute_names[0]
sens_index = dataset.feature_names.index(self.sensitive_attr)

model_name = self.obj.getModel(self.tau, x_train, y_train, x_control_train)
x_train = dataset.features
y_train = np.array([1 if y == [dataset.favorable_label] else
-1 for y in dataset.labels])
x_control_train = x_train[:, sens_index].copy()

self.model = self.obj.getModel(self.tau, x_train, y_train,
x_control_train)

self.model_name = model_name
return self

def predict(self, dataset):
"""Obtain the predictions for the provided dataset using the learned classifier
model
"""Obtain the predictions for the provided dataset using the learned
classifier model.
Args:
dataset (BinaryLabelDataset): Dataset containing labels that needs
to be transformed.
Returns:
dataset (BinaryLabelDataset): Transformed dataset.
BinaryLabelDataset: Transformed dataset.
"""

data = np.column_stack([dataset.features, dataset.labels])
columns = dataset.feature_names + dataset.label_names
test_df = pd.DataFrame(data=data, columns=columns)
x_test = dataset.features
#y_test = np.array([1 if y == [dataset.favorable_label] else -1 for y in dataset.labels])
#x_control_test = np.array(test_df[self.sensitive_attr])

all_sensitive_attributes = dataset.protected_attribute_names

model = self.model_name
predictions, y_res, scores = [], [], []
for x in x_test:
t = model(x)
if t > 0 :
predictions.append(1)
y_res.append(1)
else:
predictions.append(0)
y_res.append(-1)
predictions, scores = [], []
for x in dataset.features:
t = self.model(x)
predictions.append(int(t > 0))
scores.append((t+1)/2)


#print("Gamma: ", self.obj.getGamma(y_test, y_res, x_control_test))
pred_dataset = dataset.copy()
pred_dataset.labels = np.array(predictions)
pred_dataset.scores = np.array(scores)
pred_dataset.labels = np.array([predictions])
pred_dataset.scores = np.array([scores])

return pred_dataset


7 changes: 7 additions & 0 deletions docs/source/modules/inprocessing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ ART Classifier
:inherited-members:
:exclude-members: transform, fit_transform

Meta Fair Classifier
--------------------
.. autoclass:: MetaFairClassifier
:members:
:inherited-members:
:exclude-members: transform, fit_transform

Prejudice Remover
-----------------

Expand Down
123 changes: 63 additions & 60 deletions tests/test_meta_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,67 +3,70 @@
from __future__ import print_function
from __future__ import unicode_literals

import sys
sys.path.append("../")
from aif360.datasets import BinaryLabelDataset
from aif360.datasets import AdultDataset, GermanDataset, CompasDataset
from aif360.metrics import BinaryLabelDatasetMetric
from aif360.metrics import ClassificationMetric
from aif360.metrics.utils import compute_boolean_conditioning_vector
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, MaxAbsScaler
from sklearn.metrics import accuracy_score

from aif360.algorithms.preprocessing.optim_preproc_helpers.data_preproc_functions import load_preproc_data_adult, load_preproc_data_compas, load_preproc_data_german

from aif360.algorithms.inprocessing.meta_fair_classifier import MetaFairClassifier
from aif360.algorithms.inprocessing.celisMeta.utils import getStats
from IPython.display import Markdown, display
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

def test_adult():
protected = 'sex'
ad = AdultDataset(protected_attribute_names=[protected],
privileged_classes=[['Male']], categorical_features=[],
features_to_keep=['age', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week'])

#scaler = MinMaxScaler(copy=False)
# ad.features = scaler.fit_transform(ad.features)

train, test = ad.split([32562])
assert np.any(test.labels)

#print(test.labels)

biased_model = MetaFairClassifier(tau=0, sensitive_attr=protected)
biased_model.fit(train)

dataset_bias_test = biased_model.predict(test)

predictions = [1 if y == train.favorable_label else -1 for y in list(dataset_bias_test.labels)]
y_test = np.array([1 if y == [train.favorable_label] else -1 for y in test.labels])
x_control_test = pd.DataFrame(data=test.features, columns=test.feature_names)[protected]

acc, sr, unconstrainedFDR = getStats(y_test, predictions, x_control_test)
#print(unconstrainedFDR)


tau = 0.9
debiased_model = MetaFairClassifier(tau=tau, sensitive_attr=protected)
debiased_model.fit(train)

#dataset_debiasing_train = debiased_model.predict(dataset_orig_train)
dataset_debiasing_test = debiased_model.predict(test)

predictions = list(dataset_debiasing_test.labels)
predictions = [1 if y == train.favorable_label else -1 for y in predictions]
y_test = np.array([1 if y == [train.favorable_label] else -1 for y in test.labels])
x_control_test = pd.DataFrame(data=test.features, columns=test.feature_names)[protected]

acc, sr, fdr = getStats(y_test, predictions, x_control_test)
#print(fdr, unconstrainedFDR)
assert(fdr >= unconstrainedFDR)
from aif360.datasets import AdultDataset
from aif360.metrics import ClassificationMetric
from aif360.algorithms.inprocessing import MetaFairClassifier
from aif360.algorithms.inprocessing.celisMeta.utils import getStats

#test_adult()
def test_adult():
np.random.seed(1)
# np.random.seed(9876)

protected = 'sex'
ad = AdultDataset(protected_attribute_names=[protected],
privileged_classes=[['Male']], categorical_features=[],
features_to_keep=['age', 'education-num', 'capital-gain',
'capital-loss', 'hours-per-week'])

#scaler = MinMaxScaler(copy=False)
# ad.features = scaler.fit_transform(ad.features)

train, test = ad.split([32561])

biased_model = MetaFairClassifier(tau=0, sensitive_attr=protected)
biased_model.fit(train)

dataset_bias_test = biased_model.predict(test)

biased_cm = ClassificationMetric(test, dataset_bias_test,
unprivileged_groups=[{protected: 0}], privileged_groups=[{protected: 1}])
unconstrainedFDR2 = biased_cm.false_discovery_rate_ratio()
unconstrainedFDR2 = min(unconstrainedFDR2, 1/unconstrainedFDR2)

predictions = [1 if y == train.favorable_label else
-1 for y in dataset_bias_test.labels.ravel()]
y_test = np.array([1 if y == train.favorable_label else
-1 for y in test.labels.ravel()])
x_control_test = pd.DataFrame(data=test.features,
columns=test.feature_names)[protected]

acc, sr, unconstrainedFDR = getStats(y_test, predictions, x_control_test)
assert np.isclose(unconstrainedFDR, unconstrainedFDR2)

tau = 0.9
debiased_model = MetaFairClassifier(tau=tau, sensitive_attr=protected)
debiased_model.fit(train)

#dataset_debiasing_train = debiased_model.predict(dataset_orig_train)
dataset_debiasing_test = debiased_model.predict(test)

predictions = list(dataset_debiasing_test.labels)
predictions = [1 if y == train.favorable_label else
-1 for y in dataset_debiasing_test.labels.ravel()]
y_test = np.array([1 if y == train.favorable_label else
-1 for y in test.labels.ravel()])
x_control_test = pd.DataFrame(data=test.features,
columns=test.feature_names)[protected]

acc, sr, fdr = getStats(y_test, predictions, x_control_test)

debiased_cm = ClassificationMetric(test, dataset_debiasing_test,
unprivileged_groups=[{protected: 0}], privileged_groups=[{protected: 1}])
fdr2 = debiased_cm.false_discovery_rate_ratio()
fdr2 = min(fdr2, 1/fdr2)
assert np.isclose(fdr, fdr2)
#print(fdr, unconstrainedFDR)
assert(fdr2 >= unconstrainedFDR2)

0 comments on commit ce25a63

Please sign in to comment.