Skip to content

Commit

Permalink
Merge pull request Trusted-AI#249 from Viktour19/master
Browse files Browse the repository at this point in the history
Update mdss definition of privileged group
  • Loading branch information
nrkarthikeyan authored Jun 15, 2021
2 parents aa3d077 + 56864ee commit 5a5f54c
Show file tree
Hide file tree
Showing 6 changed files with 253 additions and 137 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ Get in touch with us on [Slack](https://aif360.slack.com) (invitation
* Comprehensive set of sample distortion metrics
* Generalized Entropy Index ([Speicher et al., 2018](https://doi.org/10.1145/3219819.3220046))
* Differential Fairness and Bias Amplification ([Foulds et al., 2018](https://arxiv.org/pdf/1807.08362))
* Bias Scan with Multi-Dimensional Subset Scan ([Zhang et al., 2017](https://arxiv.org/abs/1611.08292))
* Bias Scan with Multi-Dimensional Subset Scan ([Zhang, Neill, 2017](https://arxiv.org/abs/1611.08292))

## Setup

Expand Down
4 changes: 2 additions & 2 deletions aif360/metrics/mdss/MDSS.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,12 +196,12 @@ def score_current_subset(self, coordinates: pd.DataFrame, probs: pd.Series, outc
penalized_score = scoring_function.score(observed_sum, probs, total_penalty, current_q_mle)
return penalized_score

def scan(self, coordinates: pd.DataFrame, outcomes: pd.Series, probs: pd.Series, penalty: float,
def scan(self, coordinates: pd.DataFrame, probs: pd.Series, outcomes: pd.Series, penalty: float,
num_iters: int, verbose: bool = False, seed: int = 0):
"""
:param coordinates: data frame containing having as columns the covariates/features
:param outcomes: data series containing the outcomes/observed outcomes
:param probs: data series containing the probabilities/expected outcomes
:param outcomes: data series containing the outcomes/observed outcomes
:param penalty: penalty coefficient
:param num_iters: number of iteration
:param verbose: logging flag
Expand Down
25 changes: 14 additions & 11 deletions aif360/metrics/mdss_classification_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from aif360.datasets import BinaryLabelDataset
from aif360.metrics import ClassificationMetric

from aif360.metrics.mdss.ScoringFunctions import Bernoulli
from aif360.metrics.mdss.ScoringFunctions import Bernoulli, ScoringFunction
from aif360.metrics.mdss.MDSS import MDSS

import pandas as pd
Expand All @@ -15,15 +15,15 @@ class MDSSClassificationMetric(ClassificationMetric):
.. [1] Zhang, Z., & Neill, D. B. (2016). Identifying significant predictive bias in classifiers. arXiv preprint arXiv:1611.08292.
"""
def __init__(self, dataset: BinaryLabelDataset, classified_dataset: BinaryLabelDataset,
scoring_function: Bernoulli, unprivileged_groups: dict = None, privileged_groups:dict = None):
scoring_function: ScoringFunction = Bernoulli(direction='positive'), unprivileged_groups: dict = None, privileged_groups:dict = None):

super(MDSSClassificationMetric, self).__init__(dataset, classified_dataset,
unprivileged_groups=unprivileged_groups,
privileged_groups=privileged_groups)

self.scanner = MDSS(scoring_function)

def score_groups(self, privileged=True, penalty = 0.0):
def score_groups(self, privileged=True, penalty = 1e-17):
"""
compute the bias score for a prespecified group of records.
Expand All @@ -36,14 +36,17 @@ def score_groups(self, privileged=True, penalty = 0.0):
:returns: the score for the group
"""
groups = self.privileged_groups if privileged else self.unprivileged_groups
subset = defaultdict(list)
subset = dict()

xor_op = privileged ^ bool(self.classified_dataset.favorable_label)
direction = 'negative' if xor_op else 'positive'
direction = 'positive' if xor_op else 'negative'

for g in groups:
for k, v in g.items():
subset[k].append(v)
if k in subset.keys():
subset[k].append(v)
else:
subset[k] = [v]

coordinates = pd.DataFrame(self.dataset.features, columns=self.dataset.feature_names)
expected = pd.Series(self.classified_dataset.scores.flatten())
Expand All @@ -52,7 +55,7 @@ def score_groups(self, privileged=True, penalty = 0.0):
self.scanner.scoring_function.kwargs['direction'] = direction
return self.scanner.score_current_subset(coordinates, expected, outcomes, dict(subset), penalty)

def bias_scan(self, privileged=True, num_iters = 10, penalty = 0.0):
def bias_scan(self, privileged=True, num_iters = 10, penalty = 1e-17):
"""
scan to find the highest scoring subset of records
Expand All @@ -67,12 +70,12 @@ def bias_scan(self, privileged=True, num_iters = 10, penalty = 0.0):
"""

xor_op = privileged ^ bool(self.classified_dataset.favorable_label)
direction = 'negative' if xor_op else 'positive'
direction = 'positive' if xor_op else 'negative'
self.scanner.scoring_function.kwargs['direction'] = direction

coordinates = pd.DataFrame(self.classified_dataset.features, columns=self.classified_dataset.feature_names)

expected = pd.Series(self.classified_dataset.scores.flatten())
outcomes = pd.Series(self.dataset.labels.flatten())

return self.scanner.scan(coordinates, outcomes, expected, penalty, num_iters)
return self.scanner.scan(coordinates, expected, outcomes, penalty, num_iters)
6 changes: 3 additions & 3 deletions aif360/sklearn/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,7 +431,7 @@ def mdss_bias_score(y_true, y_pred, pos_label=1, privileged=True, num_iters = 10
:param num_iters (scalar, optional): number of iterations
"""
xor_op = privileged ^ bool(pos_label)
direction = 'negative' if xor_op else 'positive'
direction = 'positive' if xor_op else 'negative'

dummy_subset = dict({'index': range(len(y_true))})
expected = pd.Series(y_pred)
Expand All @@ -458,7 +458,7 @@ def mdss_bias_scan(y_true, y_pred, dataset=None, pos_label=1, privileged=True, n
"""

xor_op = privileged ^ bool(pos_label)
direction = 'negative' if xor_op else 'positive'
direction = 'positive' if xor_op else 'negative'

expected = pd.Series(y_pred)
outcomes = pd.Series(y_true)
Expand All @@ -473,7 +473,7 @@ def mdss_bias_scan(y_true, y_pred, dataset=None, pos_label=1, privileged=True, n
scoring_function = Bernoulli(direction=direction)
scanner = MDSS(scoring_function)

return scanner.scan(coordinates, outcomes, expected, penalty, num_iters)
return scanner.scan(coordinates, expected, outcomes, penalty, num_iters)


# ========================== INDIVIDUAL FAIRNESS ===============================
Expand Down
Loading

0 comments on commit 5a5f54c

Please sign in to comment.