Merge pull request Trusted-AI#249 from Viktour19/master

Update mdss definition of privileged group
Ms-ML · Jun 15, 2021 · 5a5f54c · 5a5f54c
2 parents aa3d077 + 56864ee
commit 5a5f54c
Show file tree

Hide file tree

Showing 6 changed files with 253 additions and 137 deletions.
diff --git a/README.md b/README.md
@@ -56,7 +56,7 @@ Get in touch with us on [Slack](https://aif360.slack.com) (invitation
 * Comprehensive set of sample distortion metrics
 * Generalized Entropy Index ([Speicher et al., 2018](https://doi.org/10.1145/3219819.3220046))
 * Differential Fairness and Bias Amplification ([Foulds et al., 2018](https://arxiv.org/pdf/1807.08362))
-* Bias Scan with Multi-Dimensional Subset Scan ([Zhang et al., 2017](https://arxiv.org/abs/1611.08292))
+* Bias Scan with Multi-Dimensional Subset Scan ([Zhang, Neill, 2017](https://arxiv.org/abs/1611.08292))
 
 ## Setup
 

diff --git a/aif360/metrics/mdss/MDSS.py b/aif360/metrics/mdss/MDSS.py
@@ -196,12 +196,12 @@ def score_current_subset(self, coordinates: pd.DataFrame, probs: pd.Series, outc
         penalized_score = scoring_function.score(observed_sum, probs, total_penalty, current_q_mle)
         return penalized_score
 
-    def scan(self, coordinates: pd.DataFrame, outcomes: pd.Series, probs: pd.Series, penalty: float,
+    def scan(self, coordinates: pd.DataFrame, probs: pd.Series, outcomes: pd.Series, penalty: float,
                     num_iters: int, verbose: bool = False, seed: int = 0):
         """
         :param coordinates: data frame containing having as columns the covariates/features
-        :param outcomes: data series containing the outcomes/observed outcomes
         :param probs: data series containing the probabilities/expected outcomes
+        :param outcomes: data series containing the outcomes/observed outcomes
         :param penalty: penalty coefficient
         :param num_iters: number of iteration
         :param verbose: logging flag

diff --git a/aif360/metrics/mdss_classification_metric.py b/aif360/metrics/mdss_classification_metric.py
@@ -2,7 +2,7 @@
 from aif360.datasets import BinaryLabelDataset
 from aif360.metrics import ClassificationMetric
 
-from aif360.metrics.mdss.ScoringFunctions import Bernoulli
+from aif360.metrics.mdss.ScoringFunctions import Bernoulli, ScoringFunction
 from aif360.metrics.mdss.MDSS import MDSS
 
 import pandas as pd
@@ -15,15 +15,15 @@ class MDSSClassificationMetric(ClassificationMetric):
         .. [1] Zhang, Z., & Neill, D. B. (2016). Identifying significant predictive bias in classifiers. arXiv preprint arXiv:1611.08292.
     """
     def __init__(self, dataset: BinaryLabelDataset, classified_dataset: BinaryLabelDataset, 
-                scoring_function: Bernoulli, unprivileged_groups: dict = None, privileged_groups:dict = None):
+                scoring_function: ScoringFunction = Bernoulli(direction='positive'), unprivileged_groups: dict = None, privileged_groups:dict = None):
 
         super(MDSSClassificationMetric, self).__init__(dataset, classified_dataset,
                                                        unprivileged_groups=unprivileged_groups,
                                                        privileged_groups=privileged_groups)
-
+        
         self.scanner = MDSS(scoring_function)
 
-    def score_groups(self, privileged=True, penalty = 0.0):
+    def score_groups(self, privileged=True, penalty = 1e-17):
         """
         compute the bias score for a prespecified group of records.
         
@@ -36,14 +36,17 @@ def score_groups(self, privileged=True, penalty = 0.0):
         :returns: the score for the group
         """
         groups = self.privileged_groups if privileged else self.unprivileged_groups
-        subset = defaultdict(list)
+        subset = dict()
 
         xor_op = privileged ^ bool(self.classified_dataset.favorable_label)
-        direction = 'negative' if xor_op else 'positive'
+        direction = 'positive' if xor_op else 'negative'
 
         for g in groups:
             for k, v in g.items():
-                subset[k].append(v)
+                if k in subset.keys():
+                    subset[k].append(v)
+                else:
+                    subset[k] = [v]
 
         coordinates = pd.DataFrame(self.dataset.features, columns=self.dataset.feature_names)
         expected = pd.Series(self.classified_dataset.scores.flatten())
@@ -52,7 +55,7 @@ def score_groups(self, privileged=True, penalty = 0.0):
         self.scanner.scoring_function.kwargs['direction'] = direction
         return self.scanner.score_current_subset(coordinates, expected, outcomes, dict(subset), penalty)
 
-    def bias_scan(self, privileged=True, num_iters = 10, penalty = 0.0):
+    def bias_scan(self, privileged=True, num_iters = 10, penalty = 1e-17):
         """
         scan to find the highest scoring subset of records
         
@@ -67,12 +70,12 @@ def bias_scan(self, privileged=True, num_iters = 10, penalty = 0.0):
         """
 
         xor_op = privileged ^ bool(self.classified_dataset.favorable_label)
-        direction = 'negative' if xor_op else 'positive'
+        direction = 'positive' if xor_op else 'negative'
         self.scanner.scoring_function.kwargs['direction'] = direction
 
         coordinates = pd.DataFrame(self.classified_dataset.features, columns=self.classified_dataset.feature_names)
 
         expected = pd.Series(self.classified_dataset.scores.flatten())
         outcomes = pd.Series(self.dataset.labels.flatten())
-
-        return self.scanner.scan(coordinates, outcomes, expected, penalty, num_iters)
+        
+        return self.scanner.scan(coordinates, expected, outcomes, penalty, num_iters)
diff --git a/aif360/sklearn/metrics/metrics.py b/aif360/sklearn/metrics/metrics.py
@@ -431,7 +431,7 @@ def mdss_bias_score(y_true, y_pred, pos_label=1, privileged=True, num_iters = 10
     :param num_iters (scalar, optional): number of iterations
     """
     xor_op = privileged ^ bool(pos_label)
-    direction = 'negative' if xor_op else 'positive'
+    direction = 'positive' if xor_op else 'negative'
 
     dummy_subset = dict({'index': range(len(y_true))})
     expected = pd.Series(y_pred)
@@ -458,7 +458,7 @@ def mdss_bias_scan(y_true, y_pred, dataset=None, pos_label=1, privileged=True, n
     """
 
     xor_op = privileged ^ bool(pos_label)
-    direction = 'negative' if xor_op else 'positive'
+    direction = 'positive' if xor_op else 'negative'
 
     expected = pd.Series(y_pred)
     outcomes = pd.Series(y_true)
@@ -473,7 +473,7 @@ def mdss_bias_scan(y_true, y_pred, dataset=None, pos_label=1, privileged=True, n
     scoring_function = Bernoulli(direction=direction)
     scanner = MDSS(scoring_function)
 
-    return scanner.scan(coordinates, outcomes, expected, penalty, num_iters)
+    return scanner.scan(coordinates, expected, outcomes, penalty, num_iters)
 
 
 # ========================== INDIVIDUAL FAIRNESS ===============================