Merge pull request ivadomed#383 from ivadomed/cg/roc

charleygros · web-flow · commit 0b2f02283960 · 2020-07-28T11:23:21.000+10:00
Find optimal threshold with ROC analysis
diff --git a/docs/source/configuration_file.rst b/docs/source/configuration_file.rst
@@ -303,9 +303,9 @@ UNet3D (Optional)
 Testing parameters
 ------------------
 
--  ``binarize_prediction``: Bool. Binarize output predictions using a
-   threshold of 0.5. If ``false``, output predictions are float between
-   0 and 1.
+- ``binarize_prediction``: Float. Threshold (between 0 and 1) used to binarize
+    the predictions before computing the validation metrics. To use soft predictions
+    (i.e. no binarisation, float between 0 and 1) for metric computation, indicate -1.
 
 uncertainty
 ^^^^^^^^^^^
diff --git a/ivadomed/config/config.json b/ivadomed/config/config.json
@@ -72,7 +72,7 @@
         "film_layers": [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
     },
     "testing_parameters": {
-        "binarize_prediction": true,
+        "binarize_prediction": -1,
         "uncertainty": {
             "epistemic": false,
             "aleatoric": false,
diff --git a/ivadomed/config/config_classification.json b/ivadomed/config/config_classification.json
@@ -67,7 +67,7 @@
         "applied": true
     },
     "testing_parameters": {
-        "binarize_prediction": true,
+        "binarize_prediction": -1,
         "uncertainty": {
             "epistemic": false,
             "aleatoric": false,
diff --git a/ivadomed/config/config_sctTesting.json b/ivadomed/config/config_sctTesting.json
@@ -64,7 +64,7 @@
         "depth": 2
     },
     "testing_parameters": {
-        "binarize_prediction": true,
+        "binarize_prediction": -1,
         "uncertainty": {
             "epistemic": false,
             "aleatoric": false,
diff --git a/ivadomed/config/config_small.json b/ivadomed/config/config_small.json
@@ -69,7 +69,7 @@
         "film_layers": [0, 1, 0, 0, 0, 0, 0, 0]
     },
     "testing_parameters": {
-        "binarize_prediction": true,
+        "binarize_prediction": -1,
         "uncertainty": {
             "epistemic": false,
             "aleatoric": false,
diff --git a/ivadomed/config/config_spineGeHemis.json b/ivadomed/config/config_spineGeHemis.json
@@ -82,7 +82,7 @@
         "n_filters": 1
     },
     "testing_parameters": {
-        "binarize_prediction": true,
+        "binarize_prediction": -1,
         "uncertainty": {
             "epistemic": false,
             "aleatoric": false,
diff --git a/ivadomed/config/config_tumorSeg.json b/ivadomed/config/config_tumorSeg.json
@@ -75,7 +75,7 @@
         "n_filters": 8
     },
     "testing_parameters": {
-        "binarize_prediction": true,
+        "binarize_prediction": -1,
         "uncertainty": {
             "epistemic": false,
             "aleatoric": false,
diff --git a/ivadomed/config/config_vertebral_labeling.json b/ivadomed/config/config_vertebral_labeling.json
@@ -68,7 +68,7 @@
         "film_layers": [0, 1, 0, 0, 0, 0, 0, 0, 0, 0]
     },
     "testing_parameters": {
-        "binarize_prediction": false,
+        "binarize_prediction": -1,
         "uncertainty": {
             "epistemic": false,
             "aleatoric": false,
diff --git a/ivadomed/evaluation.py b/ivadomed/evaluation.py
@@ -50,6 +50,7 @@ def evaluate(bids_path, log_directory, path_preds, target_suffix, eval_params):
         # 3D evaluation
         nib_pred = nib.load(fname_pred)
         data_pred = nib_pred.get_fdata()
+
         h, w, d = data_pred.shape[:3]
         n_classes = len(fname_gt)
         data_gt = np.zeros((h, w, d, n_classes))
diff --git a/ivadomed/main.py b/ivadomed/main.py
@@ -35,13 +35,20 @@ def get_parser():
                                     ' The parameter indicates the number of 2D slices used to generate GIFs, one GIF '
                                     'per slice. A GIF shows predictions of a given slice from the validation '
                                     'sub-dataset. They are saved within the log directory.')
+    optional_args.add_argument('-t', '--thr-increment', dest="thr_increment", required=False, type=float,
+                               help='A threshold analysis is performed at the end of the training using the trained '
+                                    'model and the validation sub-dataset to find the optimal binarization threshold. '
+                                    'The specified value indicates the increment between 0 and 1 used during the '
+                                    'analysis (e.g. 0.1). Plot is saved under "log_directory/thr.png" and the '
+                                    'optimal threshold in "log_directory/config_file.json as "binarize_prediction" '
+                                    'parameter.')
     optional_args.add_argument('-h', '--help', action='help', default=argparse.SUPPRESS,
                                help='Shows function documentation.')
 
     return parser
 
 
-def run_command(context, n_gif=0):
+def run_command(context, n_gif=0, thr_increment=None):
     """Run main command.
 
     This function is central in the ivadomed project as training / testing / evaluation commands are run via this
@@ -53,7 +60,9 @@ def run_command(context, n_gif=0):
         n_gif (int): Generates a GIF during training if larger than zero, one frame per epoch for a given slice. The
             parameter indicates the number of 2D slices used to generate GIFs, one GIF per slice. A GIF shows
             predictions of a given slice from the validation sub-dataset. They are saved within the log directory.
-
+        thr_increment (float): A threshold analysis is performed at the end of the training using the trained model and
+            the validation sub-dataset to find the optimal binarization threshold. The specified value indicates the
+            increment between 0 and 1 used during the ROC analysis (e.g. 0.1).
     Returns:
         If "train" command: Returns floats: best loss score for both training and validation.
         If "test" command: Returns dict: of averaged metrics computed on the testing sub dataset.
@@ -172,7 +181,7 @@ def run_command(context, n_gif=0):
             print('Model directory already exists: {}'.format(path_model))
 
         # RUN TRAINING
-        best_training_dice, best_training_loss, best_validation_dice, best_validation_loss = imed_training.train(
+        best_training_dice, best_training_loss, best_validation_dice, best_validation_loss, thr = imed_training.train(
             model_params=model_params,
             dataset_train=ds_train,
             dataset_val=ds_valid,
@@ -182,8 +191,13 @@ def run_command(context, n_gif=0):
             cuda_available=cuda_available,
             metric_fns=metric_fns,
             n_gif=n_gif,
+            thr_increment=thr_increment,
             debugging=context["debugging"])
 
+        # Update threshold in config file
+        if thr_increment:
+            context["testing_parameters"]["binarize_prediction"] = thr
+
         # Save config file within log_directory and log_directory/model_name
         with open(os.path.join(log_directory, "config_file.json"), 'w') as fp:
             json.dump(context, fp, indent=4)
@@ -265,7 +279,9 @@ def run_main():
         context = json.load(fhandle)
 
     # Run command
-    run_command(context=context, n_gif=args.gif)
+    run_command(context=context,
+                n_gif=args.gif if args.gif is not None else 0,
+                thr_increment=args.thr_increment if args.thr_increment else None)
 
 
 if __name__ == "__main__":
diff --git a/ivadomed/metrics.py b/ivadomed/metrics.py
@@ -1,7 +1,8 @@
 from collections import defaultdict
 
-from scipy import spatial
+import matplotlib.pyplot as plt
 import numpy as np
+from scipy import spatial
 
 
 class MetricManager(object):
@@ -15,7 +16,7 @@ class MetricManager(object):
         result_dict (dict): Dictionary storing metrics.
         num_samples (int): Number of samples.
     """
-    
+
     def __init__(self, metric_fns):
         self.metric_fns = metric_fns
         self.num_samples = 0
@@ -275,3 +276,46 @@ def multi_class_dice_score(im1, im2):
         dice_per_class += dice_score(im1[i,], im2[i,], empty_score=1.0)
 
     return dice_per_class / n_classes
+
+
+def plot_roc_curve(tpr, fpr, opt_thr_idx, fname_out):
+    """Plot ROC curve.
+
+    Args:
+        tpr (list): True positive rates.
+        fpr (list): False positive rates.
+        opt_thr_idx (int): Index of the optimal threshold.
+        fname_out (str): Output filename.
+    """
+    plt.figure()
+    lw = 2
+    plt.plot(fpr, tpr, color='darkorange', lw=lw, marker='o')
+    plt.plot([fpr[opt_thr_idx]], [tpr[opt_thr_idx]], color="darkgreen", marker="o", linestyle="None")
+    plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
+    plt.xlim([0.0, 1.0])
+    plt.ylim([0.0, 1.05])
+    plt.xlabel('False Positive Rate')
+    plt.ylabel('True Positive Rate')
+    plt.title('ROC curve')
+    plt.savefig(fname_out)
+
+
+def plot_dice_thr(thr_list, dice_list, opt_thr_idx, fname_out):
+    """Plot Dice results against thresholds.
+
+    Args:
+        thr_list (list): Thresholds list.
+        dice_list (list): Dice results.
+        opt_thr_idx (int): Index of the optimal threshold.
+        fname_out (str): Output filename.
+    """
+    plt.figure()
+    lw = 2
+    plt.plot(thr_list, dice_list, color='darkorange', lw=lw, marker='o')
+    plt.plot([thr_list[opt_thr_idx]], [dice_list[opt_thr_idx]], color="darkgreen", marker="o", linestyle="None")
+    plt.xlim([0.0, 1.0])
+    plt.ylim([min(dice_list) - 0.02, max(dice_list) + 0.02])
+    plt.xlabel('Thresholds')
+    plt.ylabel('Dice')
+    plt.title('Threshold analysis')
+    plt.savefig(fname_out)
diff --git a/ivadomed/models.py b/ivadomed/models.py
@@ -798,7 +798,7 @@ def forward(self, x):
             out = out[:, 1:, ]
         else:
             if self.relu_activation:
-                out = nn.ReLU()(x) / nn.ReLU()(x).max() if bool(nn.ReLU()(x).max()) else nn.ReLU()(x)
+                out = nn.ReLU()(seg_layer) / nn.ReLU()(seg_layer).max() if bool(nn.ReLU()(seg_layer).max()) else nn.ReLU()(seg_layer)
             else:
                 out = torch.sigmoid(seg_layer)
         return out
diff --git a/ivadomed/scripts/automate_training.py b/ivadomed/scripts/automate_training.py
@@ -9,6 +9,7 @@
 
 import argparse
 import copy
+from functools import partial
 import json
 import logging
 import os
@@ -44,11 +45,15 @@ def get_parser():
                         help="Keep a constant dataset split for all configs and iterations")
     parser.add_argument("-l", "--all-logs", dest="all_logs", action='store_true',
                         help="Keep all log directories for each iteration.")
+    parser.add_argument('-t', '--thr-increment', dest="thr_increment", required=False, type=float,
+                        help="A threshold analysis is performed at the end of the training using the trained model and "
+                             "the validation sub-dataset to find the optimal binarization threshold. The specified "
+                             "value indicates the increment between 0 and 1 used during the analysis (e.g. 0.1).")
 
     return parser
 
 
-def train_worker(config):
+def train_worker(config, thr_incr):
     current = mp.current_process()
     # ID of process used to assign a GPU
     ID = int(current.name[-1]) - 1
@@ -59,7 +64,8 @@ def train_worker(config):
     # Call ivado cmd_train
     try:
         # Save best validation score
-        best_training_dice, best_training_loss, best_validation_dice, best_validation_loss = ivado.run_command(config)
+        best_training_dice, best_training_loss, best_validation_dice, best_validation_loss = \
+            ivado.run_command(config, thr_increment=thr_incr)
 
     except:
         logging.exception('Got exception on main handler')
@@ -74,13 +80,6 @@ def train_worker(config):
 
 
 def test_worker(config):
-    current = mp.current_process()
-    # ID of process used to assign a GPU
-    ID = int(current.name[-1]) - 1
-
-    # Use GPU i from the array specified in the config file
-    config["gpu"] = config["gpu"][ID]
-
     # Call ivado cmd_eval
     try:
         # Save best test score
@@ -130,7 +129,8 @@ def make_category(base_item, keys, values, is_all_combin=False):
     return items, names
 
 
-def automate_training(config, param, fixed_split, all_combin, n_iterations=1, run_test=False, all_logs=False):
+def automate_training(config, param, fixed_split, all_combin, n_iterations=1, run_test=False, all_logs=False,
+                      thr_increment=None):
     """Automate multiple training processes on multiple GPUs.
 
     Hyperparameter optimization of models is tedious and time-consuming. This function automatizes this optimization
@@ -157,6 +157,9 @@ def automate_training(config, param, fixed_split, all_combin, n_iterations=1, ru
                             Flag: --n-iteration, -n
         run_test (bool): If True, the trained model is also run on the testing subdataset. flag: --run-test
         all_logs (bool): If True, all the log directories are kept for every iteration. Flag: --all-logs, -l
+        thr_increment (float): A threshold analysis is performed at the end of the training using the trained model and
+            the validation sub-dataset to find the optimal binarization threshold. The specified value indicates the
+            increment between 0 and 1 used during the ROC analysis (e.g. 0.1). Flag: -t, --thr-increment
     """
     # Load initial config
     with open(config, "r") as fhandle:
@@ -240,12 +243,13 @@ def automate_training(config, param, fixed_split, all_combin, n_iterations=1, ru
                                                                                   "_n=" + str(i).zfill(2))
                     else:
                         config["log_directory"] += "_n=" + str(i).zfill(2)
-        validation_scores = pool.map(train_worker, config_list)
+        validation_scores = pool.map(partial(train_worker, thr_incr=thr_increment), config_list)
         val_df = pd.DataFrame(validation_scores, columns=[
             'log_directory', 'best_training_dice', 'best_training_loss', 'best_validation_dice',
             'best_validation_loss'])
 
         if run_test:
+            new_config_list = []
             for config in config_list:
                 # Delete path_pred
                 path_pred = os.path.join(config['log_directory'], 'pred_masks')
@@ -255,7 +259,13 @@ def automate_training(config, param, fixed_split, all_combin, n_iterations=1, ru
                     except OSError as e:
                         print("Error: %s - %s." % (e.filename, e.strerror))
 
-            test_results = pool.map(test_worker, config_list)
+                # Take the config file within the log_directory because binarize_prediction may have been updated
+                json_path = os.path.join(config['log_directory'], 'config_file.json')
+                with open(json_path) as f:
+                    config = json.load(f)
+                new_config_list.append(config)
+
+            test_results = pool.map(test_worker, new_config_list)
 
             df_lst = []
             # Merge all eval df together to have a single excel file
@@ -318,9 +328,13 @@ def automate_training(config, param, fixed_split, all_combin, n_iterations=1, ru
 def main():
     parser = get_parser()
     args = parser.parse_args()
+
+    # Get thr increment if available
+    thr_increment = args.thr_increment if args.thr_increment else None
+
     # Run automate training
     automate_training(args.config, args.params, bool(args.fixed_split), bool(args.all_combin), int(args.n_iterations),
-                      bool(args.run_test), args.all_logs)
+                      bool(args.run_test), args.all_logs, thr_increment)
 
 
 if __name__ == '__main__':
diff --git a/ivadomed/testing.py b/ivadomed/testing.py
@@ -175,15 +175,17 @@ def run_inference(test_loader, model, model_params, testing_params, ofolder, cud
                                                         fname_out=fname_pred,
                                                         slice_axis=slice_axis,
                                                         kernel_dim='2d',
-                                                        bin_thr=0.9 if testing_params["binarize_prediction"] else -1)
+                                                        bin_thr=testing_params["binarize_prediction"])
                     # TODO: Adapt to multilabel
-                    preds_npy_list.append(output_nii.get_fdata()[:, :, :, 0])
+                    output_data = output_nii.get_fdata()[:, :, :, 0]
+                    preds_npy_list.append(output_data)
+
                     gt_npy_list.append(nib.load(fname_tmp).get_fdata())
 
                     output_nii_shape = output_nii.get_fdata().shape
                     if len(output_nii_shape) == 4 and output_nii_shape[-1] > 1:
                         imed_utils.save_color_labels(np.stack(pred_tmp_lst, -1),
-                                                     testing_params["binarize_prediction"],
+                                                     testing_params["binarize_prediction"] > 0,
                                                      fname_tmp,
                                                      fname_pred.split(".nii.gz")[0] + '_color.nii.gz',
                                                      imed_utils.AXIS_DCT[testing_params['slice_axis']])
@@ -221,8 +223,10 @@ def run_inference(test_loader, model, model_params, testing_params, ofolder, cud
                                                         fname_out=fname_pred,
                                                         slice_axis=slice_axis,
                                                         kernel_dim='3d',
-                                                        bin_thr=0.5 if testing_params["binarize_prediction"] else -1)
-                    preds_npy_list.append(output_nii.get_fdata().transpose(3, 0, 1, 2))
+                                                        bin_thr=testing_params["binarize_prediction"])
+                    output_data = output_nii.get_fdata().transpose(3, 0, 1, 2)
+                    preds_npy_list.append(output_data)
+
                     gt_lst = []
                     for gt in metadata[0]['gt_filenames']:
                         # For multi-label, if all labels are not in every image
@@ -236,7 +240,7 @@ def run_inference(test_loader, model, model_params, testing_params, ofolder, cud
 
                     if pred_undo.shape[0] > 1:
                         imed_utils.save_color_labels(pred_undo,
-                                                     testing_params['binarize_prediction'],
+                                                     testing_params['binarize_prediction'] > 0,
                                                      batch['input_metadata'][smp_idx][0]['input_filenames'],
                                                      fname_pred.split(".nii.gz")[0] + '_color.nii.gz',
                                                      slice_axis)
diff --git a/ivadomed/training.py b/ivadomed/training.py
diff --git a/ivadomed/transforms.py b/ivadomed/transforms.py
diff --git a/setup.py b/setup.py
diff --git a/testing/test_inference.py b/testing/test_inference.py