diff --git a/deeplabcut/__init__.py b/deeplabcut/__init__.py
index 128239c19..09b650606 100644
--- a/deeplabcut/__init__.py
+++ b/deeplabcut/__init__.py
@@ -28,6 +28,7 @@
 
 try:
     import wx
+
     mpl.use("WxAgg")
     from deeplabcut import generate_training_dataset
     from deeplabcut import refine_training_dataset
diff --git a/deeplabcut/__main__.py b/deeplabcut/__main__.py
index 89609024f..e11ec3826 100644
--- a/deeplabcut/__main__.py
+++ b/deeplabcut/__main__.py
@@ -9,6 +9,7 @@
 """
 try:
     import wx
+
     lite = False
 except ModuleNotFoundError:
     lite = True
@@ -20,4 +21,6 @@
 
     deeplabcut.launch_dlc()
 else:
-    print("You installed DLC lite, thus GUI's cannot be used. If you need GUI support please: pip install deeplabcut[gui]")
+    print(
+        "You installed DLC lite, thus GUI's cannot be used. If you need GUI support please: pip install deeplabcut[gui]"
+    )
diff --git a/deeplabcut/generate_training_dataset/trainingsetmanipulation.py b/deeplabcut/generate_training_dataset/trainingsetmanipulation.py
index 1d71fde10..311d2e73d 100755
--- a/deeplabcut/generate_training_dataset/trainingsetmanipulation.py
+++ b/deeplabcut/generate_training_dataset/trainingsetmanipulation.py
@@ -397,7 +397,9 @@ def cropimagesandlabels(
                 # moving old entry to _original, dropping it from video_set and update crop parameters
                 video_orig = sep.join((vidpath, vidname + videotype))
                 if video_orig not in cfg["video_sets_original"]:
-                    cfg["video_sets_original"][video_orig] = cfg["video_sets"][video_orig]
+                    cfg["video_sets_original"][video_orig] = cfg["video_sets"][
+                        video_orig
+                    ]
                     cfg["video_sets"].pop(video_orig)
                     cfg["video_sets"][sep.join((vidpath, new_vidname + videotype))] = {
                         "crop": ", ".join(map(str, [0, temp_size[1], 0, temp_size[0]]))
@@ -461,7 +463,7 @@ def check_labels(
     for folder in folders:
         try:
             DataCombined = pd.read_hdf(
-                os.path.join(str(folder), "CollectedData_" + cfg["scorer"] + ".h5"),
+                os.path.join(str(folder), "CollectedData_" + cfg["scorer"] + ".h5")
             )
             if cfg.get("multianimalproject", False):
                 color_by = "individual" if visualizeindividuals else "bodypart"
@@ -887,7 +889,11 @@ def create_training_dataset(
         if net_type is None:  # loading & linking pretrained models
             net_type = cfg.get("default_net_type", "resnet_50")
         else:
-            if "resnet" in net_type or "mobilenet" in net_type or "efficientnet" in net_type:
+            if (
+                "resnet" in net_type
+                or "mobilenet" in net_type
+                or "efficientnet" in net_type
+            ):
                 pass
             else:
                 raise ValueError("Invalid network type:", net_type)
diff --git a/deeplabcut/gui/auxfun_drag.py b/deeplabcut/gui/auxfun_drag.py
index cefa6477b..f80827643 100644
--- a/deeplabcut/gui/auxfun_drag.py
+++ b/deeplabcut/gui/auxfun_drag.py
@@ -89,11 +89,7 @@ def on_press(self, event):
             message = f"Do you want to remove the label {self.bodyParts}?"
             if self.likelihood is not None:
                 message += " You cannot undo this step!"
-            msg = wx.MessageBox(
-                message,
-                "Remove!",
-                wx.YES_NO | wx.ICON_WARNING,
-            )
+            msg = wx.MessageBox(message, "Remove!", wx.YES_NO | wx.ICON_WARNING)
             if msg == 2:
                 self.delete_data()
 
diff --git a/deeplabcut/gui/create_training_dataset.py b/deeplabcut/gui/create_training_dataset.py
index 153d64904..2285094d6 100644
--- a/deeplabcut/gui/create_training_dataset.py
+++ b/deeplabcut/gui/create_training_dataset.py
@@ -146,7 +146,9 @@ def __init__(self, parent, gui_size, cfg):
             )
             self.cropandlabel.Bind(wx.EVT_RADIOBOX, self.input_crop_size)
             self.cropandlabel.SetSelection(0)
-            self.crop_text = wx.StaticBox(self, label="Crop settings (set to smaller than your input images)")
+            self.crop_text = wx.StaticBox(
+                self, label="Crop settings (set to smaller than your input images)"
+            )
             self.crop_sizer = wx.StaticBoxSizer(self.crop_text, wx.VERTICAL)
             self.crop_widgets = []
             for name, val in [
@@ -171,7 +173,9 @@ def __init__(self, parent, gui_size, cfg):
         self.hbox3.Add(self.userfeedback, 10, wx.EXPAND | wx.TOP | wx.BOTTOM, 5)
 
         if config_file.get("multianimalproject", False):
-            print("more networks are available soon for maDLC, but currenlty this uses DLC-ResNet50 only")
+            print(
+                "more networks are available soon for maDLC, but currenlty this uses DLC-ResNet50 only"
+            )
             self.model_comparison_choice = "No"
         else:
             self.model_comparison_choice = wx.RadioBox(
diff --git a/deeplabcut/gui/frame_extraction_toolbox.py b/deeplabcut/gui/frame_extraction_toolbox.py
index ff8263132..9fc739d9d 100644
--- a/deeplabcut/gui/frame_extraction_toolbox.py
+++ b/deeplabcut/gui/frame_extraction_toolbox.py
@@ -45,7 +45,7 @@ def getColorIndices(self, img, bodyparts):
 class MainFrame(BaseFrame):
     def __init__(self, parent, config, slider_width=25):
         super(MainFrame, self).__init__(
-            "DeepLabCut2.0 - Manual Frame Extraction", parent,
+            "DeepLabCut2.0 - Manual Frame Extraction", parent
         )
 
         ###################################################################################################################################################
@@ -158,8 +158,9 @@ def __init__(self, parent, config, slider_width=25):
         self.date = self.cfg["date"]
         self.trainFraction = self.cfg["TrainingFraction"]
         self.trainFraction = self.trainFraction[0]
-        self.videos = list(self.cfg.get("video_sets_original")
-                           or self.cfg["video_sets"])
+        self.videos = list(
+            self.cfg.get("video_sets_original") or self.cfg["video_sets"]
+        )
         self.bodyparts = self.cfg["bodyparts"]
         self.colormap = plt.get_cmap(self.cfg["colormap"])
         self.colormap = self.colormap.reversed()
diff --git a/deeplabcut/gui/label_frames.py b/deeplabcut/gui/label_frames.py
index 1d26bd824..9399280d2 100644
--- a/deeplabcut/gui/label_frames.py
+++ b/deeplabcut/gui/label_frames.py
@@ -71,9 +71,7 @@ def label_frames(
     os.chdir(str(wd))
     cfg = auxiliaryfunctions.read_config(config)
     if cfg.get("multianimalproject", False) or multiple_individualsGUI:
-        from deeplabcut.gui import (
-            multiple_individuals_labeling_toolbox,
-        )
+        from deeplabcut.gui import multiple_individuals_labeling_toolbox
 
         multiple_individuals_labeling_toolbox.show(config, config3d, sourceCam)
     else:
diff --git a/deeplabcut/gui/labeling_toolbox.py b/deeplabcut/gui/labeling_toolbox.py
index 3b81ec0a9..221a84dfb 100755
--- a/deeplabcut/gui/labeling_toolbox.py
+++ b/deeplabcut/gui/labeling_toolbox.py
@@ -58,7 +58,9 @@ def retrieveData_and_computeEpLines(self, img, imNum):
                     if self.sourceCam is None:
                         sourceCam = [
                             otherCam for otherCam in cams if cam not in otherCam
-                        ][0] #WHY?
+                        ][
+                            0
+                        ]  # WHY?
                     else:
                         sourceCam = self.sourceCam
 
@@ -80,11 +82,13 @@ def retrieveData_and_computeEpLines(self, img, imNum):
 
             try:
                 dataFrame = pd.read_hdf(
-                    os.path.join(sourceCam_path, "CollectedData_" + scorer + ".h5"),
+                    os.path.join(sourceCam_path, "CollectedData_" + scorer + ".h5")
                 )
                 dataFrame.sort_index(inplace=True)
             except IOError:
-                print("source camera images have not yet been labeled, or you have opened this folder in the wrong mode!")
+                print(
+                    "source camera images have not yet been labeled, or you have opened this folder in the wrong mode!"
+                )
                 return None, None, None
 
             # Find offset terms for drawing epipolar Lines
@@ -241,7 +245,7 @@ def clearBoxer(self):
 class MainFrame(BaseFrame):
     def __init__(self, parent, config, imtypes, config3d, sourceCam):
         super(MainFrame, self).__init__(
-            "DeepLabCut2.0 - Labeling ToolBox", parent, imtypes,
+            "DeepLabCut2.0 - Labeling ToolBox", parent, imtypes
         )
 
         self.statusbar.SetStatusText(
@@ -370,9 +374,7 @@ def OnKeyPressed(self, event=None):
             inv = self.axes.transData.inverted()
             pos_rel = list(inv.transform(pos_abs))
             y1, y2 = self.axes.get_ylim()
-            pos_rel[1] = (
-                y1 - pos_rel[1] + y2
-            )  # Recall y-axis is inverted
+            pos_rel[1] = y1 - pos_rel[1] + y2  # Recall y-axis is inverted
             i = np.nanargmin(
                 [self.calc_distance(*dp.point.center, *pos_rel) for dp in self.drs]
             )
@@ -619,7 +621,7 @@ def browseDir(self, event):
         # Reading the existing dataset,if already present
         try:
             self.dataFrame = pd.read_hdf(
-                os.path.join(self.dir, "CollectedData_" + self.scorer + ".h5"),
+                os.path.join(self.dir, "CollectedData_" + self.scorer + ".h5")
             )
             self.dataFrame.sort_index(inplace=True)
             self.prev.Enable(True)
@@ -892,9 +894,7 @@ def plot(self, img):
                 )
             ]
             self.axes.add_patch(circle[0])
-            self.dr = auxfun_drag.DraggablePoint(
-                circle[0], self.bodyparts[bpindex]
-            )
+            self.dr = auxfun_drag.DraggablePoint(circle[0], self.bodyparts[bpindex])
             self.dr.connect()
             self.dr.coords = MainFrame.getLabels(self, self.iter)[bpindex]
             self.drs.append(self.dr)
@@ -952,7 +952,6 @@ def onChecked(self, event):
             self.slider.Enable(False)
 
 
-
 def show(config, config3d, sourceCam, imtypes=["*.png"]):
     app = wx.App()
     frame = MainFrame(None, config, imtypes, config3d, sourceCam).Show()
diff --git a/deeplabcut/gui/multiple_individuals_labeling_toolbox.py b/deeplabcut/gui/multiple_individuals_labeling_toolbox.py
index 18e196ea9..3df4058ea 100755
--- a/deeplabcut/gui/multiple_individuals_labeling_toolbox.py
+++ b/deeplabcut/gui/multiple_individuals_labeling_toolbox.py
@@ -86,11 +86,13 @@ def retrieveData_and_computeEpLines(self, img, imNum):
 
             try:
                 dataFrame = pd.read_hdf(
-                    os.path.join(sourceCam_path, "CollectedData_" + scorer + ".h5"),
+                    os.path.join(sourceCam_path, "CollectedData_" + scorer + ".h5")
                 )
                 dataFrame.sort_index(inplace=True)
             except IOError:
-                print("source camera images have not yet been labeled, or you have opened this folder in the wrong mode!")
+                print(
+                    "source camera images have not yet been labeled, or you have opened this folder in the wrong mode!"
+                )
                 return None, None, None
 
             # Find offset terms for drawing epipolar Lines
@@ -283,7 +285,7 @@ def clearBoxer(self):
 class MainFrame(BaseFrame):
     def __init__(self, parent, config, config3d, sourceCam):
         super(MainFrame, self).__init__(
-            "DeepLabCut2.0 - Multiple Individuals Labeling ToolBox", parent,
+            "DeepLabCut2.0 - Multiple Individuals Labeling ToolBox", parent
         )
 
         self.statusbar.SetStatusText(
@@ -415,9 +417,7 @@ def OnKeyPressed(self, event=None):
             inv = self.axes.transData.inverted()
             pos_rel = list(inv.transform(pos_abs))
             y1, y2 = self.axes.get_ylim()
-            pos_rel[1] = (
-                y1 - pos_rel[1] + y2
-            )  # Recall y-axis is inverted
+            pos_rel[1] = y1 - pos_rel[1] + y2  # Recall y-axis is inverted
             i = np.nanargmin(
                 [self.calc_distance(*dp.point.center, *pos_rel) for dp in self.drs]
             )
@@ -789,7 +789,7 @@ def browseDir(self, event):
         # Reading the existing dataset,if already present
         try:
             self.dataFrame = pd.read_hdf(
-                os.path.join(self.dir, "CollectedData_" + self.scorer + ".h5"),
+                os.path.join(self.dir, "CollectedData_" + self.scorer + ".h5")
             )
             # Handle data previously labeled on a different platform
             sep = "/" if "/" in self.dataFrame.index[0] else "\\"
@@ -1205,9 +1205,7 @@ def plot(self, img):
                     )
                     self.axes.add_patch(circle)
                     self.dr = auxfun_drag.DraggablePoint(
-                        circle,
-                        self.uniquebodyparts[c],
-                        individual_names=ind,
+                        circle, self.uniquebodyparts[c], individual_names=ind
                     )
                     self.dr.connect()
                     self.dr.coords = image_points
@@ -1243,9 +1241,7 @@ def plot(self, img):
                     )
                     self.axes.add_patch(circle)
                     self.dr = auxfun_drag.DraggablePoint(
-                        circle,
-                        self.multibodyparts[c],
-                        individual_names=ind,
+                        circle, self.multibodyparts[c], individual_names=ind
                     )
                     self.dr.connect()
                     self.dr.coords = image_points
diff --git a/deeplabcut/gui/multiple_individuals_refinement_toolbox.py b/deeplabcut/gui/multiple_individuals_refinement_toolbox.py
index 5c4ec2111..225385672 100644
--- a/deeplabcut/gui/multiple_individuals_refinement_toolbox.py
+++ b/deeplabcut/gui/multiple_individuals_refinement_toolbox.py
@@ -75,7 +75,7 @@ def drawplot(
             self.axes.set_xlim(xlim)
             self.axes.set_ylim(ylim)
         self.figure.canvas.draw()
-        if not hasattr(self, 'toolbar'):
+        if not hasattr(self, "toolbar"):
             self.toolbar = NavigationToolbar(self.canvas)
         return (self.figure, self.axes, self.canvas, self.toolbar, self.ax)
 
@@ -137,9 +137,7 @@ def clearBoxer(self):
 
 class MainFrame(BaseFrame):
     def __init__(self, parent, config):
-        super(MainFrame, self).__init__(
-            "DeepLabCut - Refinement ToolBox", parent,
-        )
+        super(MainFrame, self).__init__("DeepLabCut - Refinement ToolBox", parent)
         self.Bind(wx.EVT_CHAR_HOOK, self.OnKeyPressed)
 
         ###################################################################################################################################################
@@ -281,9 +279,7 @@ def OnKeyPressed(self, event=None):
             inv = self.axes.transData.inverted()
             pos_rel = list(inv.transform(pos_abs))
             y1, y2 = self.axes.get_ylim()
-            pos_rel[1] = (
-                y1 - pos_rel[1] + y2
-            )  # Recall y-axis is inverted
+            pos_rel[1] = y1 - pos_rel[1] + y2  # Recall y-axis is inverted
             i = np.nanargmin(
                 [self.calc_distance(*dp.point.center, *pos_rel) for dp in self.drs]
             )
@@ -810,7 +806,7 @@ def saveDataSet(self, event):
                 "A training dataset file is already found for this video. The refined machine labels are merged to this data!"
             )
             DataU1 = pd.read_hdf(
-                os.path.join(self.dir, "CollectedData_" + self.humanscorer + ".h5"),
+                os.path.join(self.dir, "CollectedData_" + self.humanscorer + ".h5")
             )
             # combine datasets Original Col. + corrected machinefiles:
             DataCombined = pd.concat([self.Dataframe, DataU1])
diff --git a/deeplabcut/gui/outlier_frame_extraction_toolbox.py b/deeplabcut/gui/outlier_frame_extraction_toolbox.py
index 7c0d8e5de..61753b011 100644
--- a/deeplabcut/gui/outlier_frame_extraction_toolbox.py
+++ b/deeplabcut/gui/outlier_frame_extraction_toolbox.py
@@ -80,7 +80,7 @@ def __init__(
         self, parent, config, video, shuffle, Dataframe, savelabeled, multianimal
     ):
         super(MainFrame, self).__init__(
-            "DeepLabCut2.0 - Manual Outlier Frame Extraction", parent,
+            "DeepLabCut2.0 - Manual Outlier Frame Extraction", parent
         )
 
         ###################################################################################################################################################
diff --git a/deeplabcut/gui/refine_labels.py b/deeplabcut/gui/refine_labels.py
index bd27bafe5..54a815997 100644
--- a/deeplabcut/gui/refine_labels.py
+++ b/deeplabcut/gui/refine_labels.py
@@ -54,9 +54,7 @@ def refine_labels(config, multianimal=False):
 
         refinement.show(config)
     else:  # loading multianimal labeling GUI
-        from deeplabcut.gui import (
-            multiple_individuals_refinement_toolbox,
-        )
+        from deeplabcut.gui import multiple_individuals_refinement_toolbox
 
         multiple_individuals_refinement_toolbox.show(config)
 
diff --git a/deeplabcut/gui/refine_tracklets.py b/deeplabcut/gui/refine_tracklets.py
index ab2690a25..f9311ddce 100644
--- a/deeplabcut/gui/refine_tracklets.py
+++ b/deeplabcut/gui/refine_tracklets.py
@@ -185,7 +185,6 @@ def __init__(self, parent, gui_size, cfg):
             hbox2, pos=(7, 0), flag=wx.EXPAND | wx.TOP | wx.LEFT | wx.RIGHT, border=10
         )
 
-
         self.inf_cfg_text = wx.Button(self, label="Edit inference_config.yaml")
         sizer.Add(self.inf_cfg_text, pos=(10, 1), flag=wx.BOTTOM | wx.RIGHT, border=10)
         self.inf_cfg_text.Bind(wx.EVT_BUTTON, self.edit_inf_config)
@@ -202,7 +201,9 @@ def __init__(self, parent, gui_size, cfg):
         sizer.Add(self.reset, pos=(8, 1), flag=wx.BOTTOM | wx.RIGHT, border=10)
         self.reset.Bind(wx.EVT_BUTTON, self.reset_refine_tracklets)
 
-        self.filter = wx.Button(self, label=" Step2: Filter Tracks (then you also get a CSV file!)")
+        self.filter = wx.Button(
+            self, label=" Step2: Filter Tracks (then you also get a CSV file!)"
+        )
         sizer.Add(self.filter, pos=(8, 3), flag=wx.BOTTOM | wx.RIGHT, border=10)
         self.filter.Bind(wx.EVT_BUTTON, self.filter_after_refinement)
 
diff --git a/deeplabcut/gui/refinement.py b/deeplabcut/gui/refinement.py
index b72a27d32..304c81bac 100644
--- a/deeplabcut/gui/refinement.py
+++ b/deeplabcut/gui/refinement.py
@@ -93,7 +93,7 @@ def drawplot(
             self.axes.set_xlim(xlim)
             self.axes.set_ylim(ylim)
         self.figure.canvas.draw()
-        if not hasattr(self, 'toolbar'):
+        if not hasattr(self, "toolbar"):
             self.toolbar = NavigationToolbar(self.canvas)
         return (self.figure, self.axes, self.canvas, self.toolbar)
 
@@ -145,9 +145,7 @@ def clearBoxer(self):
 
 class MainFrame(BaseFrame):
     def __init__(self, parent, config):
-        super(MainFrame, self).__init__(
-            "DeepLabCut2.0 - Refinement ToolBox", parent,
-        )
+        super(MainFrame, self).__init__("DeepLabCut2.0 - Refinement ToolBox", parent)
         self.Bind(wx.EVT_CHAR_HOOK, self.OnKeyPressed)
 
         ###################################################################################################################################################
@@ -279,9 +277,7 @@ def OnKeyPressed(self, event=None):
             inv = self.axes.transData.inverted()
             pos_rel = list(inv.transform(pos_abs))
             y1, y2 = self.axes.get_ylim()
-            pos_rel[1] = (
-                y1 - pos_rel[1] + y2
-            )  # Recall y-axis is inverted
+            pos_rel[1] = y1 - pos_rel[1] + y2  # Recall y-axis is inverted
             i = np.nanargmin(
                 [self.calc_distance(*dp.point.center, *pos_rel) for dp in self.drs]
             )
@@ -739,7 +735,7 @@ def saveDataSet(self, event):
                 "A training dataset file is already found for this video. The refined machine labels are merged to this data!"
             )
             DataU1 = pd.read_hdf(
-                os.path.join(self.dir, "CollectedData_" + self.humanscorer + ".h5"),
+                os.path.join(self.dir, "CollectedData_" + self.humanscorer + ".h5")
             )
             # combine datasets Original Col. + corrected machinefiles:
             DataCombined = pd.concat([self.Dataframe, DataU1])
@@ -886,9 +882,7 @@ def plot(self, im):
 
             self.axes.add_patch(circle[0])
             self.dr = auxfun_drag.DraggablePoint(
-                circle[0],
-                bp,
-                likelihood=self.likelihood
+                circle[0], bp, likelihood=self.likelihood
             )
             self.dr.connect()
             self.dr.coords = MainFrame.getLabels(self, self.iter)[bpindex]
diff --git a/deeplabcut/gui/select_crop_parameters.py b/deeplabcut/gui/select_crop_parameters.py
index 463379e2d..1c7d23b4c 100644
--- a/deeplabcut/gui/select_crop_parameters.py
+++ b/deeplabcut/gui/select_crop_parameters.py
@@ -19,7 +19,7 @@
 class MainFrame(BaseFrame):
     def __init__(self, parent, config, image):
         super(MainFrame, self).__init__(
-            "DeepLabCut2.0 - Select Crop Parameters", parent,
+            "DeepLabCut2.0 - Select Crop Parameters", parent
         )
 
         ###################################################################################################################################################
diff --git a/deeplabcut/gui/tracklet_toolbox.py b/deeplabcut/gui/tracklet_toolbox.py
index dc5e82260..8b67360cc 100644
--- a/deeplabcut/gui/tracklet_toolbox.py
+++ b/deeplabcut/gui/tracklet_toolbox.py
@@ -307,9 +307,7 @@ def toggle_draggable_points(self, *args):
     def add_point(self, center, animal, bodypart, **kwargs):
         circle = patches.Circle(center, **kwargs)
         self.ax1.add_patch(circle)
-        dp = auxfun_drag.DraggablePoint(
-            circle, bodypart, animal,
-        )
+        dp = auxfun_drag.DraggablePoint(circle, bodypart, animal)
         dp.connect()
         self.dps.append(dp)
 
diff --git a/deeplabcut/gui/video_editing.py b/deeplabcut/gui/video_editing.py
index 614d629eb..22f7ba29b 100644
--- a/deeplabcut/gui/video_editing.py
+++ b/deeplabcut/gui/video_editing.py
@@ -103,7 +103,7 @@ def __init__(self, parent, gui_size, cfg):
             self,
             label="Downsample: rotate video?",
             choices=["Yes", "No", "Arbitrary"],
-            #majorDimension=0,
+            # majorDimension=0,
             style=wx.RA_SPECIFY_COLS,
         )
         self.rotate.SetSelection(1)
@@ -120,7 +120,9 @@ def __init__(self, parent, gui_size, cfg):
         )
         angle = wx.StaticBox(self, label="Angle for arbitrary rotation (deg)")
         vangle_boxsizer = wx.StaticBoxSizer(angle, wx.VERTICAL)
-        self.vangle = FS.FloatSpin(self, value="0.0", min_val=-360.0, max_val=360.0, digits=2)
+        self.vangle = FS.FloatSpin(
+            self, value="0.0", min_val=-360.0, max_val=360.0, digits=2
+        )
         vangle_boxsizer.Add(self.vangle, 1, wx.EXPAND | wx.TOP | wx.BOTTOM, 10)
 
         video_start = wx.StaticBox(self, label="Shorten: start time (sec)")
@@ -155,8 +157,7 @@ def __init__(self, parent, gui_size, cfg):
         self.ok.Bind(wx.EVT_BUTTON, self.crop_video)
 
         self.reset = wx.Button(self, label="Reset")
-        self.sizer.Add(
-            self.reset, pos=(6, 0), flag=wx.LEFT, border=10)
+        self.sizer.Add(self.reset, pos=(6, 0), flag=wx.LEFT, border=10)
         self.reset.Bind(wx.EVT_BUTTON, self.reset_edit_videos)
 
         self.sizer.AddGrowableCol(3)
diff --git a/deeplabcut/gui/welcome.py b/deeplabcut/gui/welcome.py
index 4ce62dfbc..30c8699eb 100644
--- a/deeplabcut/gui/welcome.py
+++ b/deeplabcut/gui/welcome.py
@@ -26,7 +26,9 @@ def __init__(self, parent, gui_size):
         ##         design the panel
         sizer = wx.GridBagSizer(10, 7)
         # Add image of DLC
-        icon = wx.StaticBitmap(self, bitmap=wx.Bitmap(os.path.join(MEDIA_PATH, "dlc_1-01.png")))
+        icon = wx.StaticBitmap(
+            self, bitmap=wx.Bitmap(os.path.join(MEDIA_PATH, "dlc_1-01.png"))
+        )
         sizer.Add(icon, pos=(0, 0), span=(0, 8), flag=wx.EXPAND | wx.BOTTOM, border=10)
         line = wx.StaticLine(self)
         sizer.Add(line, pos=(1, 0), span=(1, 8), flag=wx.EXPAND | wx.BOTTOM, border=10)
diff --git a/deeplabcut/pose_estimation_3d/triangulation.py b/deeplabcut/pose_estimation_3d/triangulation.py
index b80bb68a7..7e18c6dec 100644
--- a/deeplabcut/pose_estimation_3d/triangulation.py
+++ b/deeplabcut/pose_estimation_3d/triangulation.py
@@ -203,7 +203,9 @@ def triangulate(
                     )
                     stereo_file = auxiliaryfunctions.read_pickle(path_stereo_file)
                     cam_pair = str(cam_names[0] + "-" + cam_names[1])
-                    if_video_analyzed = False  # variable to keep track if the video was already analyzed
+                    if_video_analyzed = (
+                        False
+                    )  # variable to keep track if the video was already analyzed
                     # Check for the camera matrix
                     for k in metadata_["stereo_matrix"].keys():
                         if np.all(
diff --git a/deeplabcut/pose_estimation_tensorflow/dataset/factory.py b/deeplabcut/pose_estimation_tensorflow/dataset/factory.py
index f13d9b4d0..b9a61f478 100644
--- a/deeplabcut/pose_estimation_tensorflow/dataset/factory.py
+++ b/deeplabcut/pose_estimation_tensorflow/dataset/factory.py
@@ -15,7 +15,7 @@
 
 
 def create(cfg):
-    dataset_type = cfg['dataset_type']
+    dataset_type = cfg["dataset_type"]
     if dataset_type == "scalecrop":
         print("Starting with scalecrop pose-dataset loader.")
         from deeplabcut.pose_estimation_tensorflow.dataset.pose_dataset_scalecrop import (
diff --git a/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_deterministic.py b/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_deterministic.py
index 3b3d60387..ee36213d3 100644
--- a/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_deterministic.py
+++ b/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_deterministic.py
@@ -38,14 +38,16 @@ def __init__(self, cfg):
         self.cfg = cfg
         self.data = self.load_dataset()
         self.num_images = len(self.data)
-        if self.cfg['mirror']:
-            self.symmetric_joints = mirror_joints_map(cfg['all_joints'], cfg['num_joints'])
+        if self.cfg["mirror"]:
+            self.symmetric_joints = mirror_joints_map(
+                cfg["all_joints"], cfg["num_joints"]
+            )
         self.curr_img = 0
-        self.set_shuffle(cfg['shuffle'])
+        self.set_shuffle(cfg["shuffle"])
 
     def load_dataset(self):
         cfg = self.cfg
-        file_name = os.path.join(self.cfg['project_path'], cfg['dataset'])
+        file_name = os.path.join(self.cfg["project_path"], cfg["dataset"])
         # Load Matlab file dataset annotation
         mlab = sio.loadmat(file_name)
         self.raw_data = mlab
@@ -69,7 +71,7 @@ def load_dataset(self):
                 joint_id = joints[:, 0]
                 # make sure joint ids are 0-indexed
                 if joint_id.size != 0:
-                    assert (joint_id < cfg['num_joints']).any()
+                    assert (joint_id < cfg["num_joints"]).any()
                 joints[:, 0] = joint_id
                 item.joints = [joints]
             else:
@@ -88,7 +90,7 @@ def set_test_mode(self, test_mode):
     def set_shuffle(self, shuffle):
         self.shuffle = shuffle
         if not shuffle:
-            assert not self.cfg['mirror']
+            assert not self.cfg["mirror"]
             self.image_indices = np.arange(self.num_images)
 
     def mirror_joint_coords(self, joints, image_width):
@@ -106,10 +108,10 @@ def mirror_joints(self, joints, symmetric_joints, image_width):
         return res
 
     def shuffle_images(self):
-        if self.cfg['deterministic']:
+        if self.cfg["deterministic"]:
             np.random.seed(42)
         num_images = self.num_images
-        if self.cfg['mirror']:
+        if self.cfg["mirror"]:
             image_indices = np.random.permutation(num_images * 2)
             self.mirrored = image_indices >= num_images
             image_indices[self.mirrored] = image_indices[self.mirrored] - num_images
@@ -119,7 +121,7 @@ def shuffle_images(self):
 
     def num_training_samples(self):
         num = self.num_images
-        if self.cfg['mirror']:
+        if self.cfg["mirror"]:
             num *= 2
         return num
 
@@ -131,7 +133,7 @@ def next_training_sample(self):
         self.curr_img = (self.curr_img + 1) % self.num_training_samples()
 
         imidx = self.image_indices[curr_img]
-        mirror = self.cfg['mirror'] and self.mirrored[curr_img]
+        mirror = self.cfg["mirror"] and self.mirrored[curr_img]
 
         return imidx, mirror
 
@@ -140,11 +142,11 @@ def get_training_sample(self, imidx):
 
     def get_scale(self):
         cfg = self.cfg
-        if cfg['deterministic']:
+        if cfg["deterministic"]:
             rand.seed(42)
-        scale = cfg['global_scale']
+        scale = cfg["global_scale"]
         if hasattr(cfg, "scale_jitter_lo") and hasattr(cfg, "scale_jitter_up"):
-            scale_jitter = rand.uniform(cfg['scale_jitter_lo'], cfg['scale_jitter_up'])
+            scale_jitter = rand.uniform(cfg["scale_jitter_lo"], cfg["scale_jitter_up"])
             scale *= scale_jitter
         return scale
 
@@ -164,11 +166,11 @@ def is_valid_size(self, image_size, scale):
             input_width = image_size[2] * scale
             input_height = image_size[1] * scale
             if (
-                input_height < self.cfg['min_input_size']
-                or input_width < self.cfg['min_input_size']
+                input_height < self.cfg["min_input_size"]
+                or input_width < self.cfg["min_input_size"]
             ):
                 return False
-            if input_height * input_width > self.cfg['max_input_size'] ** 2:
+            if input_height * input_width > self.cfg["max_input_size"] ** 2:
                 return False
 
         return True
@@ -181,13 +183,13 @@ def make_batch(self, data_item, scale, mirror):
 
         # print(im_file, os.getcwd())
         # print(self.cfg.project_path)
-        image = imread(os.path.join(self.cfg['project_path'], im_file), mode="RGB")
+        image = imread(os.path.join(self.cfg["project_path"], im_file), mode="RGB")
 
         if self.has_gt:
             joints = np.copy(data_item.joints)
 
-        if self.cfg['crop']:  # adapted cropping for DLC
-            if np.random.rand() < self.cfg['cropratio']:
+        if self.cfg["crop"]:  # adapted cropping for DLC
+            if np.random.rand() < self.cfg["cropratio"]:
                 # 1. get center of joints
                 j = np.random.randint(np.shape(joints)[1])  # pick a random joint
                 # draw random crop dimensions & subtract joint points
@@ -219,7 +221,7 @@ def make_batch(self, data_item, scale, mirror):
         batch = {Batch.inputs: img}
 
         if self.has_gt:
-            stride = self.cfg['stride']
+            stride = self.cfg["stride"]
 
             if mirror:
                 joints = [
@@ -259,16 +261,16 @@ def make_batch(self, data_item, scale, mirror):
         return batch
 
     def compute_target_part_scoremap(self, joint_id, coords, data_item, size, scale):
-        stride = self.cfg['stride']
-        dist_thresh = self.cfg['pos_dist_thresh'] * scale
-        num_joints = self.cfg['num_joints']
+        stride = self.cfg["stride"]
+        dist_thresh = self.cfg["pos_dist_thresh"] * scale
+        num_joints = self.cfg["num_joints"]
         half_stride = stride / 2
         scmap = np.zeros(cat([size, arr([num_joints])]))
         locref_size = cat([size, arr([num_joints * 2])])
         locref_mask = np.zeros(locref_size)
         locref_map = np.zeros(locref_size)
 
-        locref_scale = 1.0 / self.cfg['locref_stdev']
+        locref_scale = 1.0 / self.cfg["locref_stdev"]
         dist_thresh_sq = dist_thresh ** 2
 
         width = size[1]
@@ -312,7 +314,7 @@ def compute_target_part_scoremap(self, joint_id, coords, data_item, size, scale)
 
     def compute_scmap_weights(self, scmap_shape, joint_id, data_item):
         cfg = self.cfg
-        if cfg['weigh_only_present_joints']:
+        if cfg["weigh_only_present_joints"]:
             weights = np.zeros(scmap_shape)
             for person_joint_id in joint_id:
                 for j_id in person_joint_id:
diff --git a/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_scalecrop.py b/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_scalecrop.py
index c189127ec..78c9e1808 100755
--- a/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_scalecrop.py
+++ b/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_scalecrop.py
@@ -36,31 +36,33 @@ def __init__(self, cfg):
         self.num_images = len(self.data)
         self.max_input_sizesquare = cfg.get("max_input_size", 1500) ** 2
         self.min_input_sizesquare = cfg.get("min_input_size", 64) ** 2
-        self.locref_scale = 1.0 / cfg['locref_stdev']
-        self.stride = cfg['stride']
+        self.locref_scale = 1.0 / cfg["locref_stdev"]
+        self.stride = cfg["stride"]
         self.half_stride = self.stride / 2
-        self.scale = cfg['global_scale']
+        self.scale = cfg["global_scale"]
         self.scale_jitter_lo = cfg.get("scale_jitter_lo", 0.75)
         self.scale_jitter_up = cfg.get("scale_jitter_up", 1.25)
 
-        self.cfg['crop'] = cfg.get("crop", True)
-        self.cfg['cropratio'] = cfg.get("cropratio", 0.4)
+        self.cfg["crop"] = cfg.get("crop", True)
+        self.cfg["cropratio"] = cfg.get("cropratio", 0.4)
 
         # what is the minimal frames size for cropping plus/minus ie.. [-100,100]^2 for an arb. joint
-        self.cfg['minsize'] = cfg.get("minsize", 100)
-        self.cfg['leftwidth'] = cfg.get("leftwidth", 400)
-        self.cfg['rightwidth'] = cfg.get("rightwidth", 400)
-        self.cfg['topheight'] = cfg.get("topheight", 400)
-        self.cfg['bottomheight'] = cfg.get("bottomheight", 400)
-
-        if self.cfg['mirror']:
-            self.symmetric_joints = mirror_joints_map(cfg['all_joints'], cfg['num_joints'])
+        self.cfg["minsize"] = cfg.get("minsize", 100)
+        self.cfg["leftwidth"] = cfg.get("leftwidth", 400)
+        self.cfg["rightwidth"] = cfg.get("rightwidth", 400)
+        self.cfg["topheight"] = cfg.get("topheight", 400)
+        self.cfg["bottomheight"] = cfg.get("bottomheight", 400)
+
+        if self.cfg["mirror"]:
+            self.symmetric_joints = mirror_joints_map(
+                cfg["all_joints"], cfg["num_joints"]
+            )
         self.curr_img = 0
-        self.set_shuffle(cfg['shuffle'])
+        self.set_shuffle(cfg["shuffle"])
 
     def load_dataset(self):
         cfg = self.cfg
-        file_name = os.path.join(self.cfg['project_path'], cfg['dataset'])
+        file_name = os.path.join(self.cfg["project_path"], cfg["dataset"])
         # Load Matlab file dataset annotation
         mlab = sio.loadmat(file_name)
         self.raw_data = mlab
@@ -84,7 +86,7 @@ def load_dataset(self):
                 joint_id = joints[:, 0]
                 # make sure joint ids are 0-indexed
                 if joint_id.size != 0:
-                    assert (joint_id < cfg['num_joints']).any()
+                    assert (joint_id < cfg["num_joints"]).any()
                 joints[:, 0] = joint_id
                 item.joints = [joints]
             else:
@@ -103,7 +105,7 @@ def set_test_mode(self, test_mode):
     def set_shuffle(self, shuffle):
         self.shuffle = shuffle
         if not shuffle:
-            assert not self.cfg['mirror']
+            assert not self.cfg["mirror"]
             self.image_indices = np.arange(self.num_images)
 
     def mirror_joint_coords(self, joints, image_width):
@@ -122,7 +124,7 @@ def mirror_joints(self, joints, symmetric_joints, image_width):
 
     def shuffle_images(self):
         num_images = self.num_images
-        if self.cfg['mirror']:
+        if self.cfg["mirror"]:
             image_indices = np.random.permutation(num_images * 2)
             self.mirrored = image_indices >= num_images
             image_indices[self.mirrored] = image_indices[self.mirrored] - num_images
@@ -132,7 +134,7 @@ def shuffle_images(self):
 
     def num_training_samples(self):
         num = self.num_images
-        if self.cfg['mirror']:
+        if self.cfg["mirror"]:
             num *= 2
         return num
 
@@ -144,7 +146,7 @@ def next_training_sample(self):
         self.curr_img = (self.curr_img + 1) % self.num_training_samples()
 
         imidx = self.image_indices[curr_img]
-        mirror = self.cfg['mirror'] and self.mirrored[curr_img]
+        mirror = self.cfg["mirror"] and self.mirrored[curr_img]
 
         return imidx, mirror
 
@@ -181,13 +183,13 @@ def make_batch(self, data_item, scale, mirror):
         im_file = data_item.im_path
         logging.debug("image %s", im_file)
         logging.debug("mirror %r", mirror)
-        image = imread(os.path.join(self.cfg['project_path'], im_file), mode="RGB")
+        image = imread(os.path.join(self.cfg["project_path"], im_file), mode="RGB")
 
         if self.has_gt:
             joints = np.copy(data_item.joints)
 
-        if self.cfg['crop']:  # adapted cropping for DLC
-            if np.random.rand() < self.cfg['cropratio']:
+        if self.cfg["crop"]:  # adapted cropping for DLC
+            if np.random.rand() < self.cfg["cropratio"]:
                 j = np.random.randint(np.shape(joints)[1])  # pick a random joint
                 joints, image = CropImage(
                     joints, image, joints[0, j, 1], joints[0, j, 2], self.cfg
@@ -211,7 +213,7 @@ def make_batch(self, data_item, scale, mirror):
         batch = {Batch.inputs: img}
 
         if self.has_gt:
-            stride = self.cfg['stride']
+            stride = self.cfg["stride"]
 
             if mirror:
                 joints = [
@@ -251,9 +253,9 @@ def make_batch(self, data_item, scale, mirror):
         return batch
 
     def compute_target_part_scoremap(self, joint_id, coords, data_item, size, scale):
-        dist_thresh = self.cfg['pos_dist_thresh'] * scale
+        dist_thresh = self.cfg["pos_dist_thresh"] * scale
         dist_thresh_sq = dist_thresh ** 2
-        num_joints = self.cfg['num_joints']
+        num_joints = self.cfg["num_joints"]
 
         scmap = np.zeros(cat([size, arr([num_joints])]))
         locref_size = cat([size, arr([num_joints * 2])])
@@ -299,7 +301,7 @@ def compute_target_part_scoremap(self, joint_id, coords, data_item, size, scale)
         return scmap, weights, locref_map, locref_mask
 
     def compute_scmap_weights(self, scmap_shape, joint_id, data_item):
-        if self.cfg['weigh_only_present_joints']:
+        if self.cfg["weigh_only_present_joints"]:
             weights = np.zeros(scmap_shape)
             for person_joint_id in joint_id:
                 for j_id in person_joint_id:
diff --git a/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_tensorpack.py b/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_tensorpack.py
index 4100d6be5..0b6d3566e 100644
--- a/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_tensorpack.py
+++ b/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_tensorpack.py
@@ -99,7 +99,7 @@ def __init__(self, cfg, shuffle=True, dir=None):
 
     def load_dataset(self):
         cfg = self.cfg
-        file_name = os.path.join(self.cfg['project_path'], cfg['dataset'])
+        file_name = os.path.join(self.cfg["project_path"], cfg["dataset"])
         # Load Matlab file dataset annotation
         mlab = sio.loadmat(file_name)
         self.raw_data = mlab
@@ -125,7 +125,7 @@ def load_dataset(self):
                 joint_id = joints[:, 0]
                 # make sure joint ids are 0-indexed
                 if joint_id.size != 0:
-                    assert (joint_id < cfg['num_joints']).any()
+                    assert (joint_id < cfg["num_joints"]).any()
                 joints[:, 0] = joint_id
                 coords = [joint[1:] for joint in joints]
                 coords = arr(coords)
@@ -276,7 +276,7 @@ def __init__(self, cfg):
         ]
 
         self.has_gt = True
-        self.set_shuffle(cfg['shuffle'])
+        self.set_shuffle(cfg["shuffle"])
         p = Pose(cfg=self.cfg, shuffle=self.shuffle)
         self.data = p.load_dataset()
         self.num_images = len(self.data)
@@ -335,9 +335,9 @@ def compute_target_part_scoremap(self, components):
         img_size = components[4]
         scale = components[5]
 
-        stride = self.cfg['stride']
-        dist_thresh = self.cfg['pos_dist_thresh'] * scale
-        num_joints = self.cfg['num_joints']
+        stride = self.cfg["stride"]
+        dist_thresh = self.cfg["pos_dist_thresh"] * scale
+        num_joints = self.cfg["num_joints"]
         half_stride = stride / 2
         size = np.ceil(arr(img_size) / (stride * 2)).astype(int) * 2
         scmap = np.zeros(np.append(size, num_joints))
@@ -345,7 +345,7 @@ def compute_target_part_scoremap(self, components):
         locref_mask = np.zeros(locref_size)
         locref_map = np.zeros(locref_size)
 
-        locref_scale = 1.0 / self.cfg['locref_stdev']
+        locref_scale = 1.0 / self.cfg["locref_stdev"]
         dist_thresh_sq = dist_thresh ** 2
 
         width = size[1]
@@ -393,12 +393,12 @@ def set_test_mode(self, test_mode):
     def set_shuffle(self, shuffle):
         self.shuffle = shuffle
         if not shuffle:
-            assert not self.cfg['mirror']
+            assert not self.cfg["mirror"]
             self.image_indices = np.arange(self.num_images)
 
     def shuffle_images(self):
         num_images = self.num_images
-        if self.cfg['mirror']:
+        if self.cfg["mirror"]:
             image_indices = np.random.permutation(num_images * 2)
             self.mirrored = image_indices >= num_images
             image_indices[self.mirrored] = image_indices[self.mirrored] - num_images
@@ -408,9 +408,9 @@ def shuffle_images(self):
 
     def get_scale(self):
         cfg = self.cfg
-        scale = cfg['global_scale']
+        scale = cfg["global_scale"]
         if hasattr(cfg, "scale_jitter_lo") and hasattr(cfg, "scale_jitter_up"):
-            scale_jitter = rand.uniform(cfg['scale_jitter_lo'], cfg['scale_jitter_up'])
+            scale_jitter = rand.uniform(cfg["scale_jitter_lo"], cfg["scale_jitter_up"])
             scale *= scale_jitter
         return scale
 
@@ -423,11 +423,11 @@ def is_valid_size(self, image_size, scale):
             input_width = image_size[2] * scale
             input_height = image_size[1] * scale
             if (
-                input_height < self.cfg['min_input_size']
-                or input_width < self.cfg['min_input_size']
+                input_height < self.cfg["min_input_size"]
+                or input_width < self.cfg["min_input_size"]
             ):
                 return False
-            if input_height * input_width > self.cfg['max_input_size'] ** 2:
+            if input_height * input_width > self.cfg["max_input_size"] ** 2:
                 return False
 
         return True
@@ -467,7 +467,7 @@ def make_batch(self, components):
 
     def compute_scmap_weights(self, scmap_shape, joint_id):
         cfg = self.cfg
-        if cfg['weigh_only_present_joints']:
+        if cfg["weigh_only_present_joints"]:
             weights = np.zeros(scmap_shape)
             for person_joint_id in joint_id:
                 for j_id in person_joint_id:
diff --git a/deeplabcut/pose_estimation_tensorflow/dataset/pose_multianimal_imgaug.py b/deeplabcut/pose_estimation_tensorflow/dataset/pose_multianimal_imgaug.py
index 6cb6b3ca8..6cd28591d 100644
--- a/deeplabcut/pose_estimation_tensorflow/dataset/pose_multianimal_imgaug.py
+++ b/deeplabcut/pose_estimation_tensorflow/dataset/pose_multianimal_imgaug.py
@@ -33,13 +33,13 @@ def __init__(self, cfg):
         self.cfg = cfg
         self.data = self.load_dataset()
         self.num_images = len(self.data)
-        self.batch_size = cfg['batch_size']
+        self.batch_size = cfg["batch_size"]
         print("Batch Size is %d" % self.batch_size)
 
     def load_dataset(self):
         cfg = self.cfg
-        file_name = os.path.join(self.cfg['project_path'], cfg['dataset'])
-        with open(os.path.join(self.cfg['project_path'], file_name), "rb") as f:
+        file_name = os.path.join(self.cfg["project_path"], cfg["dataset"])
+        with open(os.path.join(self.cfg["project_path"], file_name), "rb") as f:
             # Pickle the 'data' dictionary using the highest protocol available.
             pickledata = pickle.load(f)
 
@@ -127,7 +127,8 @@ def build_augmentation_pipeline(self, height=None, width=None, apply_prob=0.5):
         if height is not None and width is not None:
             pipeline.add(
                 iaa.Sometimes(
-                    cfg['cropratio'], iaa.CropAndPad(percent=(-0.3, 0.1), keep_size=False)
+                    cfg["cropratio"],
+                    iaa.CropAndPad(percent=(-0.3, 0.1), keep_size=False),
                 )
             )
             pipeline.add(iaa.Resize({"height": height, "width": width}))
@@ -150,7 +151,7 @@ def get_batch(self):
             if self.is_valid_size(target_size):
                 break
 
-        stride = self.cfg['stride']
+        stride = self.cfg["stride"]
         for i in range(self.batch_size):
             data_item = self.data[img_idx[i]]
 
@@ -158,7 +159,7 @@ def get_batch(self):
             im_file = data_item.im_path
 
             logging.debug("image %s", im_file)
-            image = imread(os.path.join(self.cfg['project_path'], im_file), mode="RGB")
+            image = imread(os.path.join(self.cfg["project_path"], im_file), mode="RGB")
             if self.has_gt:
                 Joints = data_item.joints
                 joint_id = [
@@ -262,7 +263,9 @@ def next_batch(self, plotting=False):
                     )
                     im = kps.draw_on_image(batch_images[i])
                     # imageio.imwrite(data_items[i].im_path.split('/')[-1],im)
-                    imageio.imwrite(os.path.join(self.cfg['project_path'], str(i) + ".png"), im)
+                    imageio.imwrite(
+                        os.path.join(self.cfg["project_path"], str(i) + ".png"), im
+                    )
 
             image_shape = arr(batch_images).shape[1:3]
             batch = {Batch.inputs: arr(batch_images).astype(np.float64)}
@@ -284,15 +287,15 @@ def set_test_mode(self, test_mode):
 
     def num_training_samples(self):
         num = self.num_images
-        if self.cfg['mirror']:
+        if self.cfg["mirror"]:
             num *= 2
         return num
 
     def get_scale(self):
         cfg = self.cfg
-        scale = cfg['global_scale']
+        scale = cfg["global_scale"]
         if hasattr(cfg, "scale_jitter_lo") and hasattr(cfg, "scale_jitter_up"):
-            scale_jitter = rand.uniform(cfg['scale_jitter_lo'], cfg['scale_jitter_up'])
+            scale_jitter = rand.uniform(cfg["scale_jitter_lo"], cfg["scale_jitter_up"])
             scale *= scale_jitter
         return scale
 
@@ -303,14 +306,14 @@ def is_valid_size(self, target_size):
         if im_height < min_input_size or im_width < min_input_size:
             return False
         if hasattr(self.cfg, "max_input_size"):
-            max_input_size = self.cfg['max_input_size']
+            max_input_size = self.cfg["max_input_size"]
             if im_width * im_height > max_input_size * max_input_size:
                 return False
         return True
 
     def compute_scmap_weights(self, scmap_shape, joint_id, data_item):
         cfg = self.cfg
-        if cfg['weigh_only_present_joints']:
+        if cfg["weigh_only_present_joints"]:
             weights = np.zeros(scmap_shape)
             for k, j_id in enumerate(
                 np.concatenate(joint_id)
@@ -323,23 +326,23 @@ def compute_scmap_weights(self, scmap_shape, joint_id, data_item):
     def compute_target_part_scoremap_numpy(
         self, joint_id, coords, data_item, size, scale
     ):
-        stride = self.cfg['stride']
-        dist_thresh = float(self.cfg['pos_dist_thresh'] * scale)
+        stride = self.cfg["stride"]
+        dist_thresh = float(self.cfg["pos_dist_thresh"] * scale)
         num_idchannel = self.cfg.get("num_idchannel", 0)
 
-        num_joints = self.cfg['num_joints']
+        num_joints = self.cfg["num_joints"]
         half_stride = stride / 2
 
         scmap = np.zeros(cat([size, arr([num_joints + num_idchannel])]))
         locref_size = cat([size, arr([num_joints * 2])])
 
         locref_map = np.zeros(locref_size)
-        locref_scale = 1.0 / self.cfg['locref_stdev']
+        locref_scale = 1.0 / self.cfg["locref_stdev"]
         dist_thresh_sq = dist_thresh ** 2
 
-        partaffinityfield_shape = cat([size, arr([self.cfg['num_limbs'] * 2])])
+        partaffinityfield_shape = cat([size, arr([self.cfg["num_limbs"] * 2])])
         partaffinityfield_map = np.zeros(partaffinityfield_shape)
-        if self.cfg['weigh_only_present_joints']:
+        if self.cfg["weigh_only_present_joints"]:
             partaffinityfield_mask = np.zeros(partaffinityfield_shape)
             locref_mask = np.zeros(locref_size)
         else:
@@ -373,7 +376,7 @@ def compute_target_part_scoremap_numpy(
             mask3 = (y >= min_y) & (y <= max_y)
             mask = mask1 & mask2 & mask3
             scmap[mask, j_id] = 1
-            if self.cfg['weigh_only_present_joints']:
+            if self.cfg["weigh_only_present_joints"]:
                 locref_mask[mask, j_id * 2 + 0] = 1.0
                 locref_mask[mask, j_id * 2 + 1] = 1.0
             locref_map[mask, j_id * 2 + 0] = (dx * locref_scale)[mask]
@@ -420,8 +423,8 @@ def compute_target_part_scoremap_numpy(
             joint_ids = joint_id[person_id].copy()
             if len(joint_ids) > 1:  # otherwise there cannot be a joint!
                 # CONSIDER SMARTER SEARCHES here... (i.e. calculate the bpts beforehand?)
-                for l in range(self.cfg['num_limbs']):
-                    bp1, bp2 = self.cfg['partaffinityfield_graph'][l]
+                for l in range(self.cfg["num_limbs"]):
+                    bp1, bp2 = self.cfg["partaffinityfield_graph"][l]
                     I1 = np.where(np.array(joint_ids) == bp1)[0]
                     I2 = np.where(np.array(joint_ids) == bp2)[0]
                     if (len(I1) > 0) * (len(I2) > 0):
@@ -460,7 +463,7 @@ def compute_target_part_scoremap_numpy(
                                     - d2mid
                                 )
                                 * 1.0
-                                / self.cfg['pafwidth']
+                                / self.cfg["pafwidth"]
                                 * scale
                             )
 
@@ -482,7 +485,7 @@ def compute_target_part_scoremap_numpy(
                             # mask3 = ((x >= 0) & (x <= width-1))
                             # mask4 = ((y >= 0) & (y <= height-1))
                             mask = mask1 & mask2  # &mask3 &mask4
-                            if self.cfg['weigh_only_present_joints']:
+                            if self.cfg["weigh_only_present_joints"]:
                                 partaffinityfield_mask[mask, l * 2 + 0] = 1.0
                                 partaffinityfield_mask[mask, l * 2 + 1] = 1.0
 
@@ -553,23 +556,23 @@ def compute_target_part_scoremap_numpy(
 
     def gaussian_scmap(self, joint_id, coords, data_item, size, scale):
         # WIP!
-        stride = self.cfg['stride']
-        dist_thresh = float(self.cfg['pos_dist_thresh'] * scale)
+        stride = self.cfg["stride"]
+        dist_thresh = float(self.cfg["pos_dist_thresh"] * scale)
         num_idchannel = self.cfg.get("num_idchannel", 0)
 
-        num_joints = self.cfg['num_joints']
+        num_joints = self.cfg["num_joints"]
         half_stride = stride / 2
         scmap = np.zeros(cat([size, arr([num_joints])]))
         locref_size = cat([size, arr([num_joints * 2])])
         locref_mask = np.zeros(locref_size)
         locref_map = np.zeros(locref_size)
 
-        locref_scale = 1.0 / self.cfg['locref_stdev']
+        locref_scale = 1.0 / self.cfg["locref_stdev"]
         dist_thresh_sq = dist_thresh ** 2
 
-        partaffinityfield_shape = cat([size, arr([self.cfg['num_limbs'] * 2])])
+        partaffinityfield_shape = cat([size, arr([self.cfg["num_limbs"] * 2])])
         partaffinityfield_map = np.zeros(partaffinityfield_shape)
-        if self.cfg['weigh_only_present_joints']:
+        if self.cfg["weigh_only_present_joints"]:
             partaffinityfield_mask = np.zeros(partaffinityfield_shape)
             locref_mask = np.zeros(locref_size)
         else:
@@ -614,8 +617,8 @@ def gaussian_scmap(self, joint_id, coords, data_item, size, scale):
             joint_ids = joint_id[person_id].copy()
             if len(joint_ids) > 1:  # otherwise there cannot be a joint!
                 # CONSIDER SMARTER SEARCHES here... (i.e. calculate the bpts beforehand?)
-                for l in range(self.cfg['num_limbs']):
-                    bp1, bp2 = self.cfg['partaffinityfield_graph'][l]
+                for l in range(self.cfg["num_limbs"]):
+                    bp1, bp2 = self.cfg["partaffinityfield_graph"][l]
                     I1 = np.where(np.array(joint_ids) == bp1)[0]
                     I2 = np.where(np.array(joint_ids) == bp2)[0]
                     if (len(I1) > 0) * (len(I2) > 0):
@@ -654,7 +657,7 @@ def gaussian_scmap(self, joint_id, coords, data_item, size, scale):
                                     - d2mid
                                 )
                                 * 1.0
-                                / self.cfg['pafwidth']
+                                / self.cfg["pafwidth"]
                                 * scale
                             )
                             mask1 = (distance_along >= d1lowerboundary) & (
@@ -664,7 +667,7 @@ def gaussian_scmap(self, joint_id, coords, data_item, size, scale):
                             # mask3 = ((x >= 0) & (x <= width-1))
                             # mask4 = ((y >= 0) & (y <= height-1))
                             mask = mask1 & mask2  # &mask3 &mask4
-                            if self.cfg['weigh_only_present_joints']:
+                            if self.cfg["weigh_only_present_joints"]:
                                 partaffinityfield_mask[mask, l * 2 + 0] = 1.0
                                 partaffinityfield_mask[mask, l * 2 + 1] = 1.0
 
diff --git a/deeplabcut/pose_estimation_tensorflow/default_config.py b/deeplabcut/pose_estimation_tensorflow/default_config.py
index b8e973d54..129e0b2c5 100644
--- a/deeplabcut/pose_estimation_tensorflow/default_config.py
+++ b/deeplabcut/pose_estimation_tensorflow/default_config.py
@@ -13,39 +13,39 @@
 
 cfg = dict()
 
-cfg['stride'] = 8.0
-cfg['weigh_part_predictions'] = False
-cfg['weigh_negatives'] = False
-cfg['fg_fraction'] = 0.25
+cfg["stride"] = 8.0
+cfg["weigh_part_predictions"] = False
+cfg["weigh_negatives"] = False
+cfg["fg_fraction"] = 0.25
 
 # imagenet mean for resnet pretraining:
-cfg['mean_pixel'] = [123.68, 116.779, 103.939]
-cfg['shuffle'] = True
-cfg['snapshot_prefix'] = "./snapshot"
-cfg['log_dir'] = "log"
-cfg['global_scale'] = 1.0
-cfg['location_refinement'] = False
-cfg['locref_stdev'] = 7.2801
-cfg['locref_loss_weight'] = 1.0
-cfg['locref_huber_loss'] = True
-cfg['optimizer'] = "sgd"
-cfg['intermediate_supervision'] = False
-cfg['intermediate_supervision_layer'] = 12
-cfg['regularize'] = False
-cfg['weight_decay'] = 0.0001
-cfg['crop_pad'] = 0
-cfg['scoremap_dir'] = "test"
-
-cfg['batch_size'] = 1
+cfg["mean_pixel"] = [123.68, 116.779, 103.939]
+cfg["shuffle"] = True
+cfg["snapshot_prefix"] = "./snapshot"
+cfg["log_dir"] = "log"
+cfg["global_scale"] = 1.0
+cfg["location_refinement"] = False
+cfg["locref_stdev"] = 7.2801
+cfg["locref_loss_weight"] = 1.0
+cfg["locref_huber_loss"] = True
+cfg["optimizer"] = "sgd"
+cfg["intermediate_supervision"] = False
+cfg["intermediate_supervision_layer"] = 12
+cfg["regularize"] = False
+cfg["weight_decay"] = 0.0001
+cfg["crop_pad"] = 0
+cfg["scoremap_dir"] = "test"
+
+cfg["batch_size"] = 1
 
 # types of datasets, see factory: deeplabcut/pose_estimation_tensorflow/dataset/factory.py
-cfg['dataset_type'] = "imgaug"  # >> imagaug default as of 2.2
+cfg["dataset_type"] = "imgaug"  # >> imagaug default as of 2.2
 # you can also set this to deterministic, see https://github.com/AlexEMG/DeepLabCut/pull/324
-cfg['deterministic'] = False
-cfg['mirror'] = False
+cfg["deterministic"] = False
+cfg["mirror"] = False
 
 # for DLC 2.2. (here all set False to not use PAFs/pairwise fields)
-cfg['pairwise_huber_loss'] = True
-cfg['weigh_only_present_joints'] = False
-cfg['partaffinityfield_predict'] = False
-cfg['pairwise_predict'] = False
+cfg["pairwise_huber_loss"] = True
+cfg["weigh_only_present_joints"] = False
+cfg["partaffinityfield_predict"] = False
+cfg["pairwise_predict"] = False
diff --git a/deeplabcut/pose_estimation_tensorflow/evaluate.py b/deeplabcut/pose_estimation_tensorflow/evaluate.py
index d3b8fbb87..8560a1b9f 100644
--- a/deeplabcut/pose_estimation_tensorflow/evaluate.py
+++ b/deeplabcut/pose_estimation_tensorflow/evaluate.py
@@ -114,9 +114,9 @@ def calculatepafdistancebounds(
         dlc_cfg = load_config(str(path_test_config))
 
         # get the graph!
-        partaffinityfield_graph = dlc_cfg['partaffinityfield_graph']
+        partaffinityfield_graph = dlc_cfg["partaffinityfield_graph"]
         jointnames = [
-            dlc_cfg['all_joints_names'][i] for i in range(len(dlc_cfg['all_joints']))
+            dlc_cfg["all_joints_names"][i] for i in range(len(dlc_cfg["all_joints"]))
         ]
         path_inferencebounds_config = (
             Path(modelfolder) / "test" / "inferencebounds.yaml"
@@ -134,10 +134,13 @@ def calculatepafdistancebounds(
                             j2,
                             "y",
                         ) in Data.keys():
-                            distances = np.sqrt(
-                                (Data[ind, j1, "x"] - Data[ind2, j2, "x"]) ** 2
-                                + (Data[ind, j1, "y"] - Data[ind2, j2, "y"]) ** 2
-                            ) / dlc_cfg["stride"]
+                            distances = (
+                                np.sqrt(
+                                    (Data[ind, j1, "x"] - Data[ind2, j2, "x"]) ** 2
+                                    + (Data[ind, j1, "y"] - Data[ind2, j2, "y"]) ** 2
+                                )
+                                / dlc_cfg["stride"]
+                            )
                         else:
                             distances = None
 
@@ -312,7 +315,7 @@ def return_evaluate_network_data(
                     cfg["project_path"],
                     str(trainingsetfolder),
                     "CollectedData_" + cfg["scorer"] + ".h5",
-                ),
+                )
             )
             * scale
         )
@@ -323,7 +326,7 @@ def return_evaluate_network_data(
                 cfg["project_path"],
                 str(trainingsetfolder),
                 "CollectedData_" + cfg["scorer"] + ".h5",
-            ),
+            )
         )
 
     evaluationfolder = os.path.join(
@@ -614,7 +617,7 @@ def evaluate_network(
                 cfg["project_path"],
                 str(trainingsetfolder),
                 "CollectedData_" + cfg["scorer"] + ".h5",
-            ),
+            )
         )
 
         # Get list of body parts to evaluate network for
@@ -719,7 +722,7 @@ def evaluate_network(
                                 cfg["project_path"],
                                 str(trainingsetfolder),
                                 "CollectedData_" + cfg["scorer"] + ".h5",
-                            ),
+                            )
                         )
                         * scale
                     )
@@ -789,7 +792,7 @@ def evaluate_network(
 
                             # Extract maximum scoring location from the heatmap, assume 1 person
                             pose = predict.argmax_pose_predict(
-                                scmap, locref, dlc_cfg['stride']
+                                scmap, locref, dlc_cfg["stride"]
                             )
                             PredicteData[
                                 imageindex, :
@@ -972,15 +975,18 @@ def make_results_file(final_result, evaluationfolder, DLCscorer):
     df.to_csv(output_path)
 
     ## Also storing one "large" table with results:
-    #note: evaluationfolder.parents[0] to get common folder above all shuffle evaluations.
+    # note: evaluationfolder.parents[0] to get common folder above all shuffle evaluations.
     df = pd.DataFrame(final_result, columns=col_names)
-    output_path = os.path.join(str(Path(evaluationfolder).parents[0]), "CombinedEvaluation-results.csv")
+    output_path = os.path.join(
+        str(Path(evaluationfolder).parents[0]), "CombinedEvaluation-results.csv"
+    )
     if os.path.exists(output_path):
         temp = pd.read_csv(output_path, index_col=0)
         df = pd.concat((df, temp)).reset_index(drop=True)
 
     df.to_csv(output_path)
 
+
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("config")
diff --git a/deeplabcut/pose_estimation_tensorflow/evaluate_multianimal.py b/deeplabcut/pose_estimation_tensorflow/evaluate_multianimal.py
index 71ce49ce4..78e5fd136 100755
--- a/deeplabcut/pose_estimation_tensorflow/evaluate_multianimal.py
+++ b/deeplabcut/pose_estimation_tensorflow/evaluate_multianimal.py
@@ -68,12 +68,12 @@ def _find_closest_neighbors(xy_true, xy_pred, k=5):
 
 
 def _calc_prediction_error(data):
-    _ = data.pop('metadata', None)
+    _ = data.pop("metadata", None)
     dists = []
     for n, dict_ in enumerate(tqdm(data.values())):
-        gt = np.concatenate(dict_['groundtruth'][1])
-        xy = np.concatenate(dict_['prediction']['coordinates'][0])
-        p = np.concatenate(dict_['prediction']['confidence'])
+        gt = np.concatenate(dict_["groundtruth"][1])
+        xy = np.concatenate(dict_["prediction"]["coordinates"][0])
+        p = np.concatenate(dict_["prediction"]["confidence"])
         neighbors = _find_closest_neighbors(gt, xy)
         found = neighbors != -1
         gt2 = gt[found]
@@ -83,7 +83,7 @@ def _calc_prediction_error(data):
 
 
 def _calc_train_test_error(data, metadata, pcutoff=0.3):
-    train_inds = set(metadata['data']['trainIndices'])
+    train_inds = set(metadata["data"]["trainIndices"])
     dists = _calc_prediction_error(data)
     dists_train, dists_test = [], []
     for n, dist in enumerate(dists):
@@ -145,7 +145,7 @@ def evaluate_multianimal_full(
             cfg["project_path"],
             str(trainingsetfolder),
             "CollectedData_" + cfg["scorer"] + ".h5",
-        ),
+        )
     )
     # Handle data previously annotated on a different platform
     sep = "/" if "/" in Data.index[0] else "\\"
@@ -315,14 +315,14 @@ def evaluate_multianimal_full(
                             frame = img_as_ubyte(image)
 
                             GT = Data.iloc[imageindex]
-                            df = GT.unstack("coords").reindex(joints, level='bodyparts')
+                            df = GT.unstack("coords").reindex(joints, level="bodyparts")
 
                             # Evaluate PAF edge lengths to calibrate `distnorm`
                             temp_xy = GT.unstack("bodyparts")[joints]
-                            xy = temp_xy.values.reshape((-1, 2, temp_xy.shape[1])).swapaxes(
-                                1, 2
-                            )
-                            if dlc_cfg['partaffinityfield_predict']:
+                            xy = temp_xy.values.reshape(
+                                (-1, 2, temp_xy.shape[1])
+                            ).swapaxes(1, 2)
+                            if dlc_cfg["partaffinityfield_predict"]:
                                 edges = xy[:, dlc_cfg["partaffinityfield_graph"]]
                                 lengths = np.sum(
                                     (edges[:, :, 0] - edges[:, :, 1]) ** 2, axis=2
@@ -354,8 +354,8 @@ def evaluate_multianimal_full(
                                 inputs,
                                 outputs,
                                 outall=False,
-                                nms_radius=dlc_cfg['nmsradius'],
-                                det_min_score=dlc_cfg['minconfidence'],
+                                nms_radius=dlc_cfg["nmsradius"],
+                                det_min_score=dlc_cfg["minconfidence"],
                                 c_engine=c_engine,
                             )
                             PredicteData[imagename]["prediction"] = pred
@@ -385,9 +385,9 @@ def evaluate_multianimal_full(
                                     conf[sl] = probs_pred[n_joint][cols].squeeze()
 
                             if plotting:
-                                gt = (temp_xy.values
-                                      .reshape((-1, 2, temp_xy.shape[1]))
-                                      .T.swapaxes(1, 2))
+                                gt = temp_xy.values.reshape(
+                                    (-1, 2, temp_xy.shape[1])
+                                ).T.swapaxes(1, 2)
                                 fig = visualization.make_multianimal_labeled_image(
                                     frame,
                                     gt,
@@ -415,7 +415,7 @@ def evaluate_multianimal_full(
                             [df_dist, df_conf],
                             keys=["rmse", "conf"],
                             names=["metrics"],
-                            axis=1
+                            axis=1,
                         )
                         df_joint = df_joint.reorder_levels(
                             list(np.roll(df_joint.columns.names, -1)), axis=1
@@ -424,14 +424,19 @@ def evaluate_multianimal_full(
                             axis=1,
                             level=["individuals", "bodyparts"],
                             ascending=[True, True],
-                            inplace=True
+                            inplace=True,
+                        )
+                        write_path = os.path.join(
+                            evaluationfolder, f"dist_{trainingsiterations}.csv"
                         )
-                        write_path = os.path.join(evaluationfolder, f"dist_{trainingsiterations}.csv")
                         df_joint.to_csv(write_path)
 
                         # Calculate overall prediction error
                         error = df_joint.xs("rmse", level="metrics", axis=1)
-                        mask = df_joint.xs("conf", level="metrics", axis=1) >= cfg["pcutoff"]
+                        mask = (
+                            df_joint.xs("conf", level="metrics", axis=1)
+                            >= cfg["pcutoff"]
+                        )
                         error_masked = error[mask]
                         error_train = np.nanmean(error.iloc[trainIndices])
                         error_train_cut = np.nanmean(error_masked.iloc[trainIndices])
@@ -455,26 +460,44 @@ def evaluate_multianimal_full(
                         sd.to_csv(write_path.replace("dist.csv", "sd.csv"))
 
                         if show_errors:
-                            string = "Results for {} training iterations: {}, shuffle {}:\n" \
-                                     "Train error: {} pixels. Test error: {} pixels.\n" \
-                                     "With pcutoff of {}:\n" \
-                                     "Train error: {} pixels. Test error: {} pixels."
+                            string = (
+                                "Results for {} training iterations: {}, shuffle {}:\n"
+                                "Train error: {} pixels. Test error: {} pixels.\n"
+                                "With pcutoff of {}:\n"
+                                "Train error: {} pixels. Test error: {} pixels."
+                            )
                             print(string.format(*results))
 
                             print("##########################################")
-                            print("Average Euclidean distance to GT per individual (in pixels)")
-                            print(error_masked.groupby('individuals', axis=1).mean().mean().to_string())
-                            print("Average Euclidean distance to GT per bodypart (in pixels)")
-                            print(error_masked.groupby('bodyparts', axis=1).mean().mean().to_string())
+                            print(
+                                "Average Euclidean distance to GT per individual (in pixels)"
+                            )
+                            print(
+                                error_masked.groupby("individuals", axis=1)
+                                .mean()
+                                .mean()
+                                .to_string()
+                            )
+                            print(
+                                "Average Euclidean distance to GT per bodypart (in pixels)"
+                            )
+                            print(
+                                error_masked.groupby("bodyparts", axis=1)
+                                .mean()
+                                .mean()
+                                .to_string()
+                            )
 
                         PredicteData["metadata"] = {
-                            "nms radius": dlc_cfg['nmsradius'],
-                            "minimal confidence": dlc_cfg['minconfidence'],
-                            "PAFgraph": dlc_cfg['partaffinityfield_graph'],
-                            "all_joints": [[i] for i in range(len(dlc_cfg['all_joints']))],
+                            "nms radius": dlc_cfg["nmsradius"],
+                            "minimal confidence": dlc_cfg["minconfidence"],
+                            "PAFgraph": dlc_cfg["partaffinityfield_graph"],
+                            "all_joints": [
+                                [i] for i in range(len(dlc_cfg["all_joints"]))
+                            ],
                             "all_joints_names": [
-                                dlc_cfg['all_joints_names'][i]
-                                for i in range(len(dlc_cfg['all_joints']))
+                                dlc_cfg["all_joints_names"][i]
+                                for i in range(len(dlc_cfg["all_joints"]))
                             ],
                             "stride": dlc_cfg.get("stride", 8),
                         }
@@ -604,7 +627,7 @@ def evaluate_multianimal_crossvalidate(
             cfg["project_path"],
             str(trainingsetfolder),
             "CollectedData_" + cfg["scorer"] + ".h5",
-        ),
+        )
     )
     comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser(
         cfg, "all"
@@ -698,7 +721,7 @@ def evaluate_multianimal_crossvalidate(
         stats_file = os.path.join(evaluationfolder, "sd.csv")
         if os.path.isfile(stats_file):
             stats = pd.read_csv(stats_file, header=None, index_col=0)
-            inferencecfg['distnormalization'] = np.round(
+            inferencecfg["distnormalization"] = np.round(
                 stats.loc["distnorm", 1], 2
             ).item()
             stats = stats.drop("distnorm")
@@ -707,7 +730,7 @@ def evaluate_multianimal_crossvalidate(
             )  # Taken as 2*SD error between predictions and ground truth
         else:
             dcorr = 10
-        inferencecfg['topktoretain'] = np.inf
+        inferencecfg["topktoretain"] = np.inf
         inferencecfg, opt = crossvalutils.bayesian_search(
             config,
             inferencecfg,
@@ -728,7 +751,7 @@ def evaluate_multianimal_crossvalidate(
         )
 
         # update number of individuals to retain.
-        inferencecfg['topktoretain'] = len(cfg["individuals"]) + 1 * (
+        inferencecfg["topktoretain"] = len(cfg["individuals"]) + 1 * (
             len(cfg["uniquebodyparts"]) > 0
         )
 
diff --git a/deeplabcut/pose_estimation_tensorflow/lib/inferenceutils.py b/deeplabcut/pose_estimation_tensorflow/lib/inferenceutils.py
index 6183dd08d..ffc1d01aa 100644
--- a/deeplabcut/pose_estimation_tensorflow/lib/inferenceutils.py
+++ b/deeplabcut/pose_estimation_tensorflow/lib/inferenceutils.py
@@ -28,9 +28,11 @@ def individual2boundingbox(cfg, animals, X1=0):
 
     for id, individual in enumerate(animals):
         boundingboxes[id, 0:4:2] = minmax(
-            individual[::3] + X1, slack=cfg['boundingboxslack']
+            individual[::3] + X1, slack=cfg["boundingboxslack"]
+        )
+        boundingboxes[id, 1:4:2] = minmax(
+            individual[1::3], slack=cfg["boundingboxslack"]
         )
-        boundingboxes[id, 1:4:2] = minmax(individual[1::3], slack=cfg['boundingboxslack'])
         boundingboxes[id, 4] = np.nanmean(
             individual[2::3]
         )  # average likelihood of all bpts
@@ -139,7 +141,7 @@ def extractstrongconnections(
                 for j in range(n_b):
                     if evaluation:
                         score_with_dist_prior = abs(
-                            dataimage["prediction"]["costs"][PAF[edge]][cfg['method']][
+                            dataimage["prediction"]["costs"][PAF[edge]][cfg["method"]][
                                 i, j
                             ]
                         )
@@ -148,7 +150,7 @@ def extractstrongconnections(
                         ]
                     else:
                         score_with_dist_prior = abs(
-                            dataimage["costs"][PAF[edge]][cfg['method']][i, j]
+                            dataimage["costs"][PAF[edge]][cfg["method"]][i, j]
                         )
                         d = dataimage["costs"][PAF[edge]]["distance"][i, j]
 
@@ -157,9 +159,11 @@ def extractstrongconnections(
                     # filtering with global distance bounds
                     if lowerbound is None and upperbound is None:
                         if (
-                            score_with_dist_prior > cfg['pafthreshold']
-                            and cfg['distnormalizationLOWER'] <= d < cfg['distnormalization']
-                            and si * sj > cfg['detectionthresholdsquare']
+                            score_with_dist_prior > cfg["pafthreshold"]
+                            and cfg["distnormalizationLOWER"]
+                            <= d
+                            < cfg["distnormalization"]
+                            and si * sj > cfg["detectionthresholdsquare"]
                         ):
 
                             connection_candidate.append(
@@ -168,15 +172,15 @@ def extractstrongconnections(
                                     j,
                                     score_with_dist_prior,
                                     score_with_dist_prior
-                                    + np.sqrt(si * sj) * cfg['addlikelihoods'],
+                                    + np.sqrt(si * sj) * cfg["addlikelihoods"],
                                 ]
                             )
 
                     else:  # filtering with edgewise distance bounds
                         if (
-                            score_with_dist_prior > cfg['pafthreshold']
+                            score_with_dist_prior > cfg["pafthreshold"]
                             and lowerbound[edge] <= d < upperbound[edge]
-                            and si * sj > cfg['detectionthresholdsquare']
+                            and si * sj > cfg["detectionthresholdsquare"]
                         ):
                             connection_candidate.append(
                                 [
@@ -184,7 +188,7 @@ def extractstrongconnections(
                                     j,
                                     score_with_dist_prior,
                                     score_with_dist_prior
-                                    + np.sqrt(si * sj) * cfg['addlikelihoods'],
+                                    + np.sqrt(si * sj) * cfg["addlikelihoods"],
                                 ]
                             )
 
@@ -302,8 +306,8 @@ def linkjoints2individuals(
         len(subset)
     ):  # delete animal proposals with too few edges or too low average score
         if (
-            subset[i][-1] < cfg['minimalnumberofconnections']
-            or subset[i][-2] / subset[i][-1] < cfg['averagescore']
+            subset[i][-1] < cfg["minimalnumberofconnections"]
+            or subset[i][-2] / subset[i][-1] < cfg["averagescore"]
         ):
             deleteIdx.append(i)
 
@@ -328,7 +332,7 @@ def assemble_individuals(
 
     # filter detections according to inferencecfg parameters
     all_detections = convertdetectiondict2listoflist(
-        data, BPTS, withid=inference_cfg['withid'], evaluation=evaluation
+        data, BPTS, withid=inference_cfg["withid"], evaluation=evaluation
     )
 
     # filter connections according to inferencecfg parameters
@@ -359,8 +363,8 @@ def assemble_individuals(
         print(subset)
 
     sortedindividuals = np.argsort(-subset[:, -2])  # sort by top score!
-    if len(sortedindividuals) > inference_cfg['topktoretain']:
-        sortedindividuals = sortedindividuals[: inference_cfg['topktoretain']]
+    if len(sortedindividuals) > inference_cfg["topktoretain"]:
+        sortedindividuals = sortedindividuals[: inference_cfg["topktoretain"]]
 
     animals = []
     for n in sortedindividuals:  # number of individuals
diff --git a/deeplabcut/pose_estimation_tensorflow/lib/trackingutils.py b/deeplabcut/pose_estimation_tensorflow/lib/trackingutils.py
index ce37b632c..cc3d6a280 100644
--- a/deeplabcut/pose_estimation_tensorflow/lib/trackingutils.py
+++ b/deeplabcut/pose_estimation_tensorflow/lib/trackingutils.py
@@ -42,7 +42,7 @@ class SkeletonTracker developed for DLC 2.2.
 from shapely.geometry import Polygon
 
 
-warnings.simplefilter('ignore', category=NumbaPerformanceWarning)
+warnings.simplefilter("ignore", category=NumbaPerformanceWarning)
 
 
 @jit
@@ -225,7 +225,9 @@ def calc_similarity_with(self, other_ellipse):
         max_dist = max(
             self.height, self.width, other_ellipse.height, other_ellipse.width
         )
-        dist = math.sqrt((self.x - other_ellipse.x) ** 2 + (self.y - other_ellipse.y) ** 2)
+        dist = math.sqrt(
+            (self.x - other_ellipse.x) ** 2 + (self.y - other_ellipse.y) ** 2
+        )
         cost1 = 1 - min(dist / max_dist, 1)
         cost2 = abs(math.cos(self.theta - other_ellipse.theta))
         return 0.8 * cost1 + 0.2 * cost2 * cost1
@@ -235,8 +237,10 @@ def contains_points(self, xy, tol=0.1):
         sa = math.sin(self.theta)
         x_demean = xy[:, 0] - self.x
         y_demean = xy[:, 1] - self.y
-        return (((ca * x_demean + sa * y_demean) ** 2 / (0.5 * self.width) ** 2)
-                + ((sa * x_demean - ca * y_demean) ** 2 / (0.5 * self.height) ** 2)) <= 1 + tol
+        return (
+            ((ca * x_demean + sa * y_demean) ** 2 / (0.5 * self.width) ** 2)
+            + ((sa * x_demean - ca * y_demean) ** 2 / (0.5 * self.height) ** 2)
+        ) <= 1 + tol
 
     def draw(self, show_axes=True, ax=None, **kwargs):
         import matplotlib.pyplot as plt
@@ -244,15 +248,21 @@ def draw(self, show_axes=True, ax=None, **kwargs):
         from matplotlib.transforms import Affine2D
 
         if ax is None:
-            ax = plt.subplot(111, aspect='equal')
-        el = patches.Ellipse(xy=(self.x, self.y), width=self.width, height=self.height,
-                             angle=np.rad2deg(self.theta), **kwargs)
+            ax = plt.subplot(111, aspect="equal")
+        el = patches.Ellipse(
+            xy=(self.x, self.y),
+            width=self.width,
+            height=self.height,
+            angle=np.rad2deg(self.theta),
+            **kwargs,
+        )
         ax.add_patch(el)
         if show_axes:
             major = Line2D([-self.width / 2, self.width / 2], [0, 0], lw=3, zorder=3)
             minor = Line2D([0, 0], [-self.height / 2, self.height / 2], lw=3, zorder=3)
-            trans = (Affine2D().rotate(self.theta).translate(self.x, self.y)
-                     + ax.transData)
+            trans = (
+                Affine2D().rotate(self.theta).translate(self.x, self.y) + ax.transData
+            )
             major.set_transform(trans)
             minor.set_transform(trans)
             ax.add_artist(major)
@@ -363,13 +373,13 @@ def calc_parameters(coeffs):
         f *= 0.5
 
         # Ellipse center coordinates
-        x0 = (c*d - b*f) / (b*b - a*c)
-        y0 = (a*f - b*d) / (b*b - a*c)
+        x0 = (c * d - b * f) / (b * b - a * c)
+        y0 = (a * f - b * d) / (b * b - a * c)
 
         # Semi-axes lengths
-        num = 2 * (a*f*f + c*d*d + g*b*b - 2*b*d*f - a*c*g)
-        den1 = (b*b - a*c) * (np.sqrt((a - c)**2 + 4*b*b) - (a + c))
-        den2 = (b*b - a*c) * (-np.sqrt((a - c)**2 + 4*b*b) - (a + c))
+        num = 2 * (a * f * f + c * d * d + g * b * b - 2 * b * d * f - a * c * g)
+        den1 = (b * b - a * c) * (np.sqrt((a - c) ** 2 + 4 * b * b) - (a + c))
+        den2 = (b * b - a * c) * (-np.sqrt((a - c) ** 2 + 4 * b * b) - (a + c))
         major = np.sqrt(num / den1)
         minor = np.sqrt(num / den2)
 
@@ -378,12 +388,12 @@ def calc_parameters(coeffs):
             if a < c:
                 phi = 0
             else:
-                phi = np.pi/2
+                phi = np.pi / 2
         else:
             if a < c:
-                phi = np.arctan(2*b / (a-c)) / 2
+                phi = np.arctan(2 * b / (a - c)) / 2
             else:
-                phi = np.pi/2 + np.arctan(2*b / (a-c)) / 2
+                phi = np.pi / 2 + np.arctan(2 * b / (a - c)) / 2
 
         return [x0, y0, 2 * major, 2 * minor, phi]
 
@@ -394,7 +404,9 @@ class EllipseTracker:
     def __init__(self, params):
         self.kf = kinematic_kf(5, order=1, dim_z=5, order_by_dim=False)
         self.kf.R[2:, 2:] *= 10.0
-        self.kf.P[5:, 5:] *= 1000.0  # High uncertainty to the unobservable initial velocities
+        self.kf.P[
+            5:, 5:
+        ] *= 1000.0  # High uncertainty to the unobservable initial velocities
         self.kf.P *= 10.0
         self.kf.Q[5:, 5:] *= 0.01
         self.state = params
@@ -473,8 +485,12 @@ def track(self, poses, identities=None):
                         cost *= match
                     cost_matrix[i, j] = cost
             row_indices, col_indices = linear_sum_assignment(cost_matrix, maximize=True)
-            unmatched_detections = [i for i, _ in enumerate(ellipses) if i not in row_indices]
-            unmatched_trackers = [j for j, _ in enumerate(trackers) if j not in col_indices]
+            unmatched_detections = [
+                i for i, _ in enumerate(ellipses) if i not in row_indices
+            ]
+            unmatched_trackers = [
+                j for j, _ in enumerate(trackers) if j not in col_indices
+            ]
             matches = []
             for row, col in zip(row_indices, col_indices):
                 val = cost_matrix[row, col]
@@ -518,7 +534,7 @@ def track(self, poses, identities=None):
         for trk in reversed(self.trackers):
             d = trk.state
             if (trk.time_since_update < 1) and (
-                    trk.hit_streak >= self.min_hits or self.n_frames <= self.min_hits
+                trk.hit_streak >= self.min_hits or self.n_frames <= self.min_hits
             ):
                 ret.append(
                     np.concatenate((d, [trk.id, int(animalindex[i - 1])])).reshape(
@@ -880,37 +896,24 @@ def reconstruct_all_ellipses(data, sd):
 
 
 def _track_individuals(
-    individuals,
-    min_hits=1,
-    max_age=5,
-    similarity_threshold=0.6,
-    track_method='ellipse',
+    individuals, min_hits=1, max_age=5, similarity_threshold=0.6, track_method="ellipse"
 ):
-    if track_method not in ('box', 'skeleton', 'ellipse'):
-        raise ValueError(f'Unknown {track_method} tracker.')
-
-    if track_method == 'ellipse':
-        tracker = SORTEllipse(
-            max_age,
-            min_hits,
-            similarity_threshold
-        )
-    elif track_method == 'box':
+    if track_method not in ("box", "skeleton", "ellipse"):
+        raise ValueError(f"Unknown {track_method} tracker.")
+
+    if track_method == "ellipse":
+        tracker = SORTEllipse(max_age, min_hits, similarity_threshold)
+    elif track_method == "box":
         tracker = Sort(
             {
-                'max_age': max_age,
-                'min_hits': min_hits,
-                'iou_threshold': similarity_threshold
+                "max_age": max_age,
+                "min_hits": min_hits,
+                "iou_threshold": similarity_threshold,
             }
         )
     else:
         n_bodyparts = individuals[0][0].shape[0]
-        tracker = SORT(
-            n_bodyparts,
-            max_age,
-            min_hits,
-            similarity_threshold,
-        )
+        tracker = SORT(n_bodyparts, max_age, min_hits, similarity_threshold)
 
     tracklets = defaultdict(dict)
     all_hyps = dict()
diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/efficientnet_builder.py b/deeplabcut/pose_estimation_tensorflow/nnet/efficientnet_builder.py
index 3180f46fe..01ebb19f2 100644
--- a/deeplabcut/pose_estimation_tensorflow/nnet/efficientnet_builder.py
+++ b/deeplabcut/pose_estimation_tensorflow/nnet/efficientnet_builder.py
@@ -31,94 +31,95 @@
 
 
 def efficientnet_params(model_name):
-  """Get efficientnet params based on model name."""
-  params_dict = {
-      # (width_coefficient, depth_coefficient, resolution, dropout_rate)
-      'efficientnet-b0': (1.0, 1.0, 224, 0.2),
-      'efficientnet-b1': (1.0, 1.1, 240, 0.2),
-      'efficientnet-b2': (1.1, 1.2, 260, 0.3),
-      'efficientnet-b3': (1.2, 1.4, 300, 0.3),
-      'efficientnet-b4': (1.4, 1.8, 380, 0.4),
-      'efficientnet-b5': (1.6, 2.2, 456, 0.4),
-      'efficientnet-b6': (1.8, 2.6, 528, 0.5),
-      'efficientnet-b7': (2.0, 3.1, 600, 0.5),
-  }
-  return params_dict[model_name]
+    """Get efficientnet params based on model name."""
+    params_dict = {
+        # (width_coefficient, depth_coefficient, resolution, dropout_rate)
+        "efficientnet-b0": (1.0, 1.0, 224, 0.2),
+        "efficientnet-b1": (1.0, 1.1, 240, 0.2),
+        "efficientnet-b2": (1.1, 1.2, 260, 0.3),
+        "efficientnet-b3": (1.2, 1.4, 300, 0.3),
+        "efficientnet-b4": (1.4, 1.8, 380, 0.4),
+        "efficientnet-b5": (1.6, 2.2, 456, 0.4),
+        "efficientnet-b6": (1.8, 2.6, 528, 0.5),
+        "efficientnet-b7": (2.0, 3.1, 600, 0.5),
+    }
+    return params_dict[model_name]
 
 
 class BlockDecoder(object):
-  """Block Decoder for readability."""
+    """Block Decoder for readability."""
 
-  def _decode_block_string(self, block_string):
-    """Gets a block through a string notation of arguments."""
-    assert isinstance(block_string, str)
-    ops = block_string.split('_')
-    options = {}
-    for op in ops:
-      splits = re.split(r'(\d.*)', op)
-      if len(splits) >= 2:
-        key, value = splits[:2]
-        options[key] = value
+    def _decode_block_string(self, block_string):
+        """Gets a block through a string notation of arguments."""
+        assert isinstance(block_string, str)
+        ops = block_string.split("_")
+        options = {}
+        for op in ops:
+            splits = re.split(r"(\d.*)", op)
+            if len(splits) >= 2:
+                key, value = splits[:2]
+                options[key] = value
 
-    if 's' not in options or len(options['s']) != 2:
-      raise ValueError('Strides options should be a pair of integers.')
+        if "s" not in options or len(options["s"]) != 2:
+            raise ValueError("Strides options should be a pair of integers.")
 
-    return efficientnet_model.BlockArgs(
-        kernel_size=int(options['k']),
-        num_repeat=int(options['r']),
-        input_filters=int(options['i']),
-        output_filters=int(options['o']),
-        expand_ratio=int(options['e']),
-        id_skip=('noskip' not in block_string),
-        se_ratio=float(options['se']) if 'se' in options else None,
-        strides=[int(options['s'][0]), int(options['s'][1])],
-        conv_type=int(options['c']) if 'c' in options else 0)
+        return efficientnet_model.BlockArgs(
+            kernel_size=int(options["k"]),
+            num_repeat=int(options["r"]),
+            input_filters=int(options["i"]),
+            output_filters=int(options["o"]),
+            expand_ratio=int(options["e"]),
+            id_skip=("noskip" not in block_string),
+            se_ratio=float(options["se"]) if "se" in options else None,
+            strides=[int(options["s"][0]), int(options["s"][1])],
+            conv_type=int(options["c"]) if "c" in options else 0,
+        )
 
-  def _encode_block_string(self, block):
-    """Encodes a block to a string."""
-    args = [
-        'r%d' % block.num_repeat,
-        'k%d' % block.kernel_size,
-        's%d%d' % (block.strides[0], block.strides[1]),
-        'e%s' % block.expand_ratio,
-        'i%d' % block.input_filters,
-        'o%d' % block.output_filters,
-        'c%d' % block.conv_type,
-    ]
-    if block.se_ratio > 0 and block.se_ratio <= 1:
-      args.append('se%s' % block.se_ratio)
-    if block.id_skip is False:
-      args.append('noskip')
-    return '_'.join(args)
+    def _encode_block_string(self, block):
+        """Encodes a block to a string."""
+        args = [
+            "r%d" % block.num_repeat,
+            "k%d" % block.kernel_size,
+            "s%d%d" % (block.strides[0], block.strides[1]),
+            "e%s" % block.expand_ratio,
+            "i%d" % block.input_filters,
+            "o%d" % block.output_filters,
+            "c%d" % block.conv_type,
+        ]
+        if block.se_ratio > 0 and block.se_ratio <= 1:
+            args.append("se%s" % block.se_ratio)
+        if block.id_skip is False:
+            args.append("noskip")
+        return "_".join(args)
 
-  def decode(self, string_list):
-    """Decodes a list of string notations to specify blocks inside the network.
+    def decode(self, string_list):
+        """Decodes a list of string notations to specify blocks inside the network.
     Args:
       string_list: a list of strings, each string is a notation of block.
     Returns:
       A list of namedtuples to represent blocks arguments.
     """
-    assert isinstance(string_list, list)
-    blocks_args = []
-    for block_string in string_list:
-      blocks_args.append(self._decode_block_string(block_string))
-    return blocks_args
+        assert isinstance(string_list, list)
+        blocks_args = []
+        for block_string in string_list:
+            blocks_args.append(self._decode_block_string(block_string))
+        return blocks_args
 
-  def encode(self, blocks_args):
-    """Encodes a list of Blocks to a list of strings.
+    def encode(self, blocks_args):
+        """Encodes a list of Blocks to a list of strings.
     Args:
       blocks_args: A list of namedtuples to represent blocks arguments.
     Returns:
       a list of strings, each string is a notation of block.
     """
-    block_strings = []
-    for block in blocks_args:
-      block_strings.append(self._encode_block_string(block))
-    return block_strings
+        block_strings = []
+        for block in blocks_args:
+            block_strings.append(self._encode_block_string(block))
+        return block_strings
 
 
 def swish(features, use_native=True):
-  """Computes the Swish activation function.
+    """Computes the Swish activation function.
   The tf.nn.swish operation uses a custom gradient to reduce memory usage.
   Since saving custom gradients in SavedModel is currently not supported, and
   one would not be able to use an exported TF-Hub module for fine-tuning, we
@@ -133,79 +134,89 @@ def swish(features, use_native=True):
   Returns:
     The activation value.
   """
-  if use_native:
-    return tf.nn.swish(features)
-  else:
-    features = tf.convert_to_tensor(features, name='features')
-    return features * tf.nn.sigmoid(features)
+    if use_native:
+        return tf.nn.swish(features)
+    else:
+        features = tf.convert_to_tensor(features, name="features")
+        return features * tf.nn.sigmoid(features)
 
 
-def efficientnet(width_coefficient=None,
-                 depth_coefficient=None,
-                 dropout_rate=0.2,
-                 drop_connect_rate=0.2):
-  """Creates a efficientnet model."""
-  blocks_args = [
-      'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25',
-      'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25',
-      'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s11_e6_i112_o192_se0.25',
-      'r1_k3_s11_e6_i192_o320_se0.25',
-  ]
-  # blocks_args = [
-  #     'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25',
-  #     'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25',
-  #     'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25',
-  #     'r1_k3_s11_e6_i192_o320_se0.25',
-  # ]
-  global_params = efficientnet_model.GlobalParams(
-      batch_norm_momentum=0.99,
-      batch_norm_epsilon=1e-3,
-      dropout_rate=dropout_rate,
-      drop_connect_rate=drop_connect_rate,
-      data_format='channels_last',
-      num_classes=1000,
-      width_coefficient=width_coefficient,
-      depth_coefficient=depth_coefficient,
-      depth_divisor=8,
-      min_depth=None,
-      relu_fn=tf.nn.swish,
-      # The default is TPU-specific batch norm.
-      # The alternative is tf.layers.BatchNormalization.
-      # batch_norm=utils.TpuBatchNormalization,  # TPU-specific requirement.
-      batch_norm=utils.BatchNormalization,
-      use_se=True)
-  decoder = BlockDecoder()
-  return decoder.decode(blocks_args), global_params
+def efficientnet(
+    width_coefficient=None,
+    depth_coefficient=None,
+    dropout_rate=0.2,
+    drop_connect_rate=0.2,
+):
+    """Creates a efficientnet model."""
+    blocks_args = [
+        "r1_k3_s11_e1_i32_o16_se0.25",
+        "r2_k3_s22_e6_i16_o24_se0.25",
+        "r2_k5_s22_e6_i24_o40_se0.25",
+        "r3_k3_s22_e6_i40_o80_se0.25",
+        "r3_k5_s11_e6_i80_o112_se0.25",
+        "r4_k5_s11_e6_i112_o192_se0.25",
+        "r1_k3_s11_e6_i192_o320_se0.25",
+    ]
+    # blocks_args = [
+    #     'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25',
+    #     'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25',
+    #     'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25',
+    #     'r1_k3_s11_e6_i192_o320_se0.25',
+    # ]
+    global_params = efficientnet_model.GlobalParams(
+        batch_norm_momentum=0.99,
+        batch_norm_epsilon=1e-3,
+        dropout_rate=dropout_rate,
+        drop_connect_rate=drop_connect_rate,
+        data_format="channels_last",
+        num_classes=1000,
+        width_coefficient=width_coefficient,
+        depth_coefficient=depth_coefficient,
+        depth_divisor=8,
+        min_depth=None,
+        relu_fn=tf.nn.swish,
+        # The default is TPU-specific batch norm.
+        # The alternative is tf.layers.BatchNormalization.
+        # batch_norm=utils.TpuBatchNormalization,  # TPU-specific requirement.
+        batch_norm=utils.BatchNormalization,
+        use_se=True,
+    )
+    decoder = BlockDecoder()
+    return decoder.decode(blocks_args), global_params
 
 
 def get_model_params(model_name, override_params):
-  """Get the block args and global params for a given model."""
-  if model_name.startswith('efficientnet'):
-    width_coefficient, depth_coefficient, _, dropout_rate = (
-        efficientnet_params(model_name))
-    blocks_args, global_params = efficientnet(
-        width_coefficient, depth_coefficient, dropout_rate)
-  else:
-    raise NotImplementedError('model name is not pre-defined: %s' % model_name)
+    """Get the block args and global params for a given model."""
+    if model_name.startswith("efficientnet"):
+        width_coefficient, depth_coefficient, _, dropout_rate = efficientnet_params(
+            model_name
+        )
+        blocks_args, global_params = efficientnet(
+            width_coefficient, depth_coefficient, dropout_rate
+        )
+    else:
+        raise NotImplementedError("model name is not pre-defined: %s" % model_name)
 
-  if override_params:
-    # ValueError will be raised here if override_params has fields not included
-    # in global_params.
-    global_params = global_params._replace(**override_params)
+    if override_params:
+        # ValueError will be raised here if override_params has fields not included
+        # in global_params.
+        global_params = global_params._replace(**override_params)
 
-  tf.logging.info('global_params= %s', global_params)
-  tf.logging.info('blocks_args= %s', blocks_args)
-  return blocks_args, global_params
+    tf.logging.info("global_params= %s", global_params)
+    tf.logging.info("blocks_args= %s", blocks_args)
+    return blocks_args, global_params
 
 
-def build_model(images,
-                model_name,
-                training,
-                override_params=None,
-                model_dir=None,
-                fine_tuning=False,
-                features_only=False):
-  """A helper functiion to creates a model and returns predicted logits.
+def build_model(
+    images,
+    model_name,
+    training,
+    override_params=None,
+    model_dir=None,
+    fine_tuning=False,
+    features_only=False,
+):
+    """A helper functiion to creates a model and returns predicted logits.
   Args:
     images: input images tensor.
     model_name: string, the predefined model name.
@@ -222,34 +233,36 @@ def build_model(images,
     When model_name specified an undefined model, raises NotImplementedError.
     When override_params has invalid fields, raises ValueError.
   """
-  assert isinstance(images, tf.Tensor)
-  if not training or fine_tuning:
-    if not override_params:
-      override_params = {}
-    override_params['batch_norm'] = utils.BatchNormalization
-    override_params['relu_fn'] = functools.partial(swish, use_native=False)
-  blocks_args, global_params = get_model_params(model_name, override_params)
+    assert isinstance(images, tf.Tensor)
+    if not training or fine_tuning:
+        if not override_params:
+            override_params = {}
+        override_params["batch_norm"] = utils.BatchNormalization
+        override_params["relu_fn"] = functools.partial(swish, use_native=False)
+    blocks_args, global_params = get_model_params(model_name, override_params)
 
-  if model_dir:
-    param_file = os.path.join(model_dir, 'model_params.txt')
-    if not tf.gfile.Exists(param_file):
-      if not tf.gfile.Exists(model_dir):
-        tf.gfile.MakeDirs(model_dir)
-      with tf.gfile.GFile(param_file, 'w') as f:
-        tf.logging.info('writing to %s' % param_file)
-        f.write('model_name= %s\n\n' % model_name)
-        f.write('global_params= %s\n\n' % str(global_params))
-        f.write('blocks_args= %s\n\n' % str(blocks_args))
+    if model_dir:
+        param_file = os.path.join(model_dir, "model_params.txt")
+        if not tf.gfile.Exists(param_file):
+            if not tf.gfile.Exists(model_dir):
+                tf.gfile.MakeDirs(model_dir)
+            with tf.gfile.GFile(param_file, "w") as f:
+                tf.logging.info("writing to %s" % param_file)
+                f.write("model_name= %s\n\n" % model_name)
+                f.write("global_params= %s\n\n" % str(global_params))
+                f.write("blocks_args= %s\n\n" % str(blocks_args))
 
-  with tf.variable_scope(model_name):
-    model = efficientnet_model.Model(blocks_args, global_params)
-    outputs = model(images, training=training, features_only=features_only)
-  outputs = tf.identity(outputs, 'features' if features_only else 'logits')
-  return outputs, model.endpoints
+    with tf.variable_scope(model_name):
+        model = efficientnet_model.Model(blocks_args, global_params)
+        outputs = model(images, training=training, features_only=features_only)
+    outputs = tf.identity(outputs, "features" if features_only else "logits")
+    return outputs, model.endpoints
 
 
-def build_model_base(images, model_name, use_batch_norm=False, drop_out=False, override_params=None):
-  """A helper functiion to create a base model and return global_pool.
+def build_model_base(
+    images, model_name, use_batch_norm=False, drop_out=False, override_params=None
+):
+    """A helper functiion to create a base model and return global_pool.
   Args:
     images: input images tensor.
     model_name: string, the predefined model name.
@@ -263,12 +276,14 @@ def build_model_base(images, model_name, use_batch_norm=False, drop_out=False, o
     When model_name specified an undefined model, raises NotImplementedError.
     When override_params has invalid fields, raises ValueError.
   """
-  assert isinstance(images, tf.Tensor)
-  blocks_args, global_params = get_model_params(model_name, override_params)
+    assert isinstance(images, tf.Tensor)
+    blocks_args, global_params = get_model_params(model_name, override_params)
 
-  with tf.variable_scope(model_name):
-    model = efficientnet_model.Model(blocks_args, global_params)
-    features = model(images, use_batch_norm=use_batch_norm, drop_out=drop_out, features_only=True)
+    with tf.variable_scope(model_name):
+        model = efficientnet_model.Model(blocks_args, global_params)
+        features = model(
+            images, use_batch_norm=use_batch_norm, drop_out=drop_out, features_only=True
+        )
 
-  features = tf.identity(features, 'features')
-  return features, model.endpoints
+    features = tf.identity(features, "features")
+    return features, model.endpoints
diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/efficientnet_model.py b/deeplabcut/pose_estimation_tensorflow/nnet/efficientnet_model.py
index c4a854272..677ddf25b 100644
--- a/deeplabcut/pose_estimation_tensorflow/nnet/efficientnet_model.py
+++ b/deeplabcut/pose_estimation_tensorflow/nnet/efficientnet_model.py
@@ -31,25 +31,47 @@
 
 import deeplabcut.pose_estimation_tensorflow.nnet.utils as utils
 
-GlobalParams = collections.namedtuple('GlobalParams', [
-    'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate', 'data_format',
-    'num_classes', 'width_coefficient', 'depth_coefficient',
-    'depth_divisor', 'min_depth', 'drop_connect_rate', 'relu_fn',
-    'batch_norm', 'use_se',
-])
+GlobalParams = collections.namedtuple(
+    "GlobalParams",
+    [
+        "batch_norm_momentum",
+        "batch_norm_epsilon",
+        "dropout_rate",
+        "data_format",
+        "num_classes",
+        "width_coefficient",
+        "depth_coefficient",
+        "depth_divisor",
+        "min_depth",
+        "drop_connect_rate",
+        "relu_fn",
+        "batch_norm",
+        "use_se",
+    ],
+)
 GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields)
 
-BlockArgs = collections.namedtuple('BlockArgs', [
-    'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
-    'expand_ratio', 'id_skip', 'strides', 'se_ratio', 'conv_type',
-])
+BlockArgs = collections.namedtuple(
+    "BlockArgs",
+    [
+        "kernel_size",
+        "num_repeat",
+        "input_filters",
+        "output_filters",
+        "expand_ratio",
+        "id_skip",
+        "strides",
+        "se_ratio",
+        "conv_type",
+    ],
+)
 # defaults will be a public argument for namedtuple in Python 3.7
 # https://docs.python.org/3/library/collections.html#collections.namedtuple
 BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields)
 
 
 def conv_kernel_initializer(shape, dtype=None, partition_info=None):
-  """Initialization for convolutional kernels.
+    """Initialization for convolutional kernels.
   The main difference with tf.variance_scaling_initializer is that
   tf.variance_scaling_initializer uses a truncated normal with an uncorrected
   standard deviation, whereas here we use a normal distribution. Similarly,
@@ -62,15 +84,14 @@ def conv_kernel_initializer(shape, dtype=None, partition_info=None):
   Returns:
     an initialization for the variable
   """
-  del partition_info
-  kernel_height, kernel_width, _, out_filters = shape
-  fan_out = int(kernel_height * kernel_width * out_filters)
-  return tf.random_normal(
-      shape, mean=0.0, stddev=np.sqrt(2.0 / fan_out), dtype=dtype)
+    del partition_info
+    kernel_height, kernel_width, _, out_filters = shape
+    fan_out = int(kernel_height * kernel_width * out_filters)
+    return tf.random_normal(shape, mean=0.0, stddev=np.sqrt(2.0 / fan_out), dtype=dtype)
 
 
 def dense_kernel_initializer(shape, dtype=None, partition_info=None):
-  """Initialization for dense kernels.
+    """Initialization for dense kernels.
   This initialization is equal to
     tf.variance_scaling_initializer(scale=1.0/3.0, mode='fan_out',
                                     distribution='uniform').
@@ -82,159 +103,173 @@ def dense_kernel_initializer(shape, dtype=None, partition_info=None):
   Returns:
     an initialization for the variable
   """
-  del partition_info
-  init_range = 1.0 / np.sqrt(shape[1])
-  return tf.random_uniform(shape, -init_range, init_range, dtype=dtype)
+    del partition_info
+    init_range = 1.0 / np.sqrt(shape[1])
+    return tf.random_uniform(shape, -init_range, init_range, dtype=dtype)
 
 
 def round_filters(filters, global_params):
-  """Round number of filters based on depth multiplier."""
-  orig_f = filters
-  multiplier = global_params.width_coefficient
-  divisor = global_params.depth_divisor
-  min_depth = global_params.min_depth
-  if not multiplier:
-    return filters
-
-  filters *= multiplier
-  min_depth = min_depth or divisor
-  new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
-  # Make sure that round down does not go down by more than 10%.
-  if new_filters < 0.9 * filters:
-    new_filters += divisor
-  tf.logging.info('round_filter input={} output={}'.format(orig_f, new_filters))
-  return int(new_filters)
+    """Round number of filters based on depth multiplier."""
+    orig_f = filters
+    multiplier = global_params.width_coefficient
+    divisor = global_params.depth_divisor
+    min_depth = global_params.min_depth
+    if not multiplier:
+        return filters
+
+    filters *= multiplier
+    min_depth = min_depth or divisor
+    new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_filters < 0.9 * filters:
+        new_filters += divisor
+    tf.logging.info("round_filter input={} output={}".format(orig_f, new_filters))
+    return int(new_filters)
 
 
 def round_repeats(repeats, global_params):
-  """Round number of filters based on depth multiplier."""
-  multiplier = global_params.depth_coefficient
-  if not multiplier:
-    return repeats
-  return int(math.ceil(multiplier * repeats))
+    """Round number of filters based on depth multiplier."""
+    multiplier = global_params.depth_coefficient
+    if not multiplier:
+        return repeats
+    return int(math.ceil(multiplier * repeats))
 
 
 class MBConvBlock(tf.keras.layers.Layer):
-  """A class of MBConv: Mobile Inverted Residual Bottleneck.
+    """A class of MBConv: Mobile Inverted Residual Bottleneck.
   Attributes:
     endpoints: dict. A list of internal tensors.
   """
 
-  def __init__(self, block_args, global_params):
-    """Initializes a MBConv block.
+    def __init__(self, block_args, global_params):
+        """Initializes a MBConv block.
     Args:
       block_args: BlockArgs, arguments to create a Block.
       global_params: GlobalParams, a set of global parameters.
     """
-    super(MBConvBlock, self).__init__()
-    self._block_args = block_args
-    self._batch_norm_momentum = global_params.batch_norm_momentum
-    self._batch_norm_epsilon = global_params.batch_norm_epsilon
-    self._batch_norm = global_params.batch_norm
-    self._data_format = global_params.data_format
-    if self._data_format == 'channels_first':
-      self._channel_axis = 1
-      self._spatial_dims = [2, 3]
-    else:
-      self._channel_axis = -1
-      self._spatial_dims = [1, 2]
-
-    self._relu_fn = global_params.relu_fn or tf.nn.swish
-    self._has_se = (
-        global_params.use_se and self._block_args.se_ratio is not None and
-        0 < self._block_args.se_ratio <= 1)
-
-    self.endpoints = None
-
-    # Builds the block accordings to arguments.
-    self._build()
-
-  def block_args(self):
-    return self._block_args
-
-  def _build(self):
-    """Builds block according to the arguments."""
-    filters = self._block_args.input_filters * self._block_args.expand_ratio
-    if self._block_args.expand_ratio != 1:
-      # Expansion phase:
-      self._expand_conv = tf.layers.Conv2D(
-          filters,
-          kernel_size=[1, 1],
-          strides=[1, 1],
-          kernel_initializer=conv_kernel_initializer,
-          padding='same',
-          data_format=self._data_format,
-          use_bias=False)
-      self._bn0 = self._batch_norm(
-          axis=self._channel_axis,
-          momentum=self._batch_norm_momentum,
-          epsilon=self._batch_norm_epsilon)
-
-    kernel_size = self._block_args.kernel_size
-    # Depth-wise convolution phase:
-    self._depthwise_conv = utils.DepthwiseConv2D(
-        [kernel_size, kernel_size],
-        strides=self._block_args.strides,
-        depthwise_initializer=conv_kernel_initializer,
-        padding='same',
-        data_format=self._data_format,
-        use_bias=False)
-    self._bn1 = self._batch_norm(
-        axis=self._channel_axis,
-        momentum=self._batch_norm_momentum,
-        epsilon=self._batch_norm_epsilon)
-
-    if self._has_se:
-      num_reduced_filters = max(
-          1, int(self._block_args.input_filters * self._block_args.se_ratio))
-      # Squeeze and Excitation layer.
-      self._se_reduce = tf.layers.Conv2D(
-          num_reduced_filters,
-          kernel_size=[1, 1],
-          strides=[1, 1],
-          kernel_initializer=conv_kernel_initializer,
-          padding='same',
-          data_format=self._data_format,
-          use_bias=True)
-      self._se_expand = tf.layers.Conv2D(
-          filters,
-          kernel_size=[1, 1],
-          strides=[1, 1],
-          kernel_initializer=conv_kernel_initializer,
-          padding='same',
-          data_format=self._data_format,
-          use_bias=True)
-
-    # Output phase:
-    filters = self._block_args.output_filters
-    self._project_conv = tf.layers.Conv2D(
-        filters,
-        kernel_size=[1, 1],
-        strides=[1, 1],
-        kernel_initializer=conv_kernel_initializer,
-        padding='same',
-        data_format=self._data_format,
-        use_bias=False)
-    self._bn2 = self._batch_norm(
-        axis=self._channel_axis,
-        momentum=self._batch_norm_momentum,
-        epsilon=self._batch_norm_epsilon)
-
-  def _call_se(self, input_tensor):
-    """Call Squeeze and Excitation layer.
+        super(MBConvBlock, self).__init__()
+        self._block_args = block_args
+        self._batch_norm_momentum = global_params.batch_norm_momentum
+        self._batch_norm_epsilon = global_params.batch_norm_epsilon
+        self._batch_norm = global_params.batch_norm
+        self._data_format = global_params.data_format
+        if self._data_format == "channels_first":
+            self._channel_axis = 1
+            self._spatial_dims = [2, 3]
+        else:
+            self._channel_axis = -1
+            self._spatial_dims = [1, 2]
+
+        self._relu_fn = global_params.relu_fn or tf.nn.swish
+        self._has_se = (
+            global_params.use_se
+            and self._block_args.se_ratio is not None
+            and 0 < self._block_args.se_ratio <= 1
+        )
+
+        self.endpoints = None
+
+        # Builds the block accordings to arguments.
+        self._build()
+
+    def block_args(self):
+        return self._block_args
+
+    def _build(self):
+        """Builds block according to the arguments."""
+        filters = self._block_args.input_filters * self._block_args.expand_ratio
+        if self._block_args.expand_ratio != 1:
+            # Expansion phase:
+            self._expand_conv = tf.layers.Conv2D(
+                filters,
+                kernel_size=[1, 1],
+                strides=[1, 1],
+                kernel_initializer=conv_kernel_initializer,
+                padding="same",
+                data_format=self._data_format,
+                use_bias=False,
+            )
+            self._bn0 = self._batch_norm(
+                axis=self._channel_axis,
+                momentum=self._batch_norm_momentum,
+                epsilon=self._batch_norm_epsilon,
+            )
+
+        kernel_size = self._block_args.kernel_size
+        # Depth-wise convolution phase:
+        self._depthwise_conv = utils.DepthwiseConv2D(
+            [kernel_size, kernel_size],
+            strides=self._block_args.strides,
+            depthwise_initializer=conv_kernel_initializer,
+            padding="same",
+            data_format=self._data_format,
+            use_bias=False,
+        )
+        self._bn1 = self._batch_norm(
+            axis=self._channel_axis,
+            momentum=self._batch_norm_momentum,
+            epsilon=self._batch_norm_epsilon,
+        )
+
+        if self._has_se:
+            num_reduced_filters = max(
+                1, int(self._block_args.input_filters * self._block_args.se_ratio)
+            )
+            # Squeeze and Excitation layer.
+            self._se_reduce = tf.layers.Conv2D(
+                num_reduced_filters,
+                kernel_size=[1, 1],
+                strides=[1, 1],
+                kernel_initializer=conv_kernel_initializer,
+                padding="same",
+                data_format=self._data_format,
+                use_bias=True,
+            )
+            self._se_expand = tf.layers.Conv2D(
+                filters,
+                kernel_size=[1, 1],
+                strides=[1, 1],
+                kernel_initializer=conv_kernel_initializer,
+                padding="same",
+                data_format=self._data_format,
+                use_bias=True,
+            )
+
+        # Output phase:
+        filters = self._block_args.output_filters
+        self._project_conv = tf.layers.Conv2D(
+            filters,
+            kernel_size=[1, 1],
+            strides=[1, 1],
+            kernel_initializer=conv_kernel_initializer,
+            padding="same",
+            data_format=self._data_format,
+            use_bias=False,
+        )
+        self._bn2 = self._batch_norm(
+            axis=self._channel_axis,
+            momentum=self._batch_norm_momentum,
+            epsilon=self._batch_norm_epsilon,
+        )
+
+    def _call_se(self, input_tensor):
+        """Call Squeeze and Excitation layer.
     Args:
       input_tensor: Tensor, a single input tensor for Squeeze/Excitation layer.
     Returns:
       A output tensor, which should have the same shape as input.
     """
-    se_tensor = tf.reduce_mean(input_tensor, self._spatial_dims, keepdims=True)
-    se_tensor = self._se_expand(self._relu_fn(self._se_reduce(se_tensor)))
-    tf.logging.info('Built Squeeze and Excitation with tensor shape: %s' %
-                    (se_tensor.shape))
-    return tf.sigmoid(se_tensor) * input_tensor
-
-  def call(self, inputs, use_batch_norm=False, drop_out=False, drop_connect_rate=None):
-    """Implementation of call().
+        se_tensor = tf.reduce_mean(input_tensor, self._spatial_dims, keepdims=True)
+        se_tensor = self._se_expand(self._relu_fn(self._se_reduce(se_tensor)))
+        tf.logging.info(
+            "Built Squeeze and Excitation with tensor shape: %s" % (se_tensor.shape)
+        )
+        return tf.sigmoid(se_tensor) * input_tensor
+
+    def call(
+        self, inputs, use_batch_norm=False, drop_out=False, drop_connect_rate=None
+    ):
+        """Implementation of call().
     Args:
       inputs: the inputs tensor.
       training: boolean, whether the model is constructed for training.
@@ -242,71 +277,80 @@ def call(self, inputs, use_batch_norm=False, drop_out=False, drop_connect_rate=N
     Returns:
       A output tensor.
     """
-    tf.logging.info('Block input: %s shape: %s' % (inputs.name, inputs.shape))
-    if self._block_args.expand_ratio != 1:
-      x = self._relu_fn(self._bn0(self._expand_conv(inputs), training=use_batch_norm))
-    else:
-      x = inputs
-    tf.logging.info('Expand: %s shape: %s' % (x.name, x.shape))
-
-    x = self._relu_fn(self._bn1(self._depthwise_conv(x), training=use_batch_norm))
-    tf.logging.info('DWConv: %s shape: %s' % (x.name, x.shape))
-
-    if self._has_se:
-      with tf.variable_scope('se'):
-        x = self._call_se(x)
-
-    self.endpoints = {'expansion_output': x}
-
-    x = self._bn2(self._project_conv(x), training=use_batch_norm)
-    if self._block_args.id_skip:
-      if all(
-          s == 1 for s in self._block_args.strides
-      ) and self._block_args.input_filters == self._block_args.output_filters:
-        # only apply drop_connect if skip presents.
-        if drop_connect_rate:
-          x = utils.drop_connect(x, drop_out, drop_connect_rate)
-        x = tf.add(x, inputs)
-    tf.logging.info('Project: %s shape: %s' % (x.name, x.shape))
-    return x
+        tf.logging.info("Block input: %s shape: %s" % (inputs.name, inputs.shape))
+        if self._block_args.expand_ratio != 1:
+            x = self._relu_fn(
+                self._bn0(self._expand_conv(inputs), training=use_batch_norm)
+            )
+        else:
+            x = inputs
+        tf.logging.info("Expand: %s shape: %s" % (x.name, x.shape))
+
+        x = self._relu_fn(self._bn1(self._depthwise_conv(x), training=use_batch_norm))
+        tf.logging.info("DWConv: %s shape: %s" % (x.name, x.shape))
+
+        if self._has_se:
+            with tf.variable_scope("se"):
+                x = self._call_se(x)
+
+        self.endpoints = {"expansion_output": x}
+
+        x = self._bn2(self._project_conv(x), training=use_batch_norm)
+        if self._block_args.id_skip:
+            if (
+                all(s == 1 for s in self._block_args.strides)
+                and self._block_args.input_filters == self._block_args.output_filters
+            ):
+                # only apply drop_connect if skip presents.
+                if drop_connect_rate:
+                    x = utils.drop_connect(x, drop_out, drop_connect_rate)
+                x = tf.add(x, inputs)
+        tf.logging.info("Project: %s shape: %s" % (x.name, x.shape))
+        return x
 
 
 class MBConvBlockWithoutDepthwise(MBConvBlock):
-  """MBConv-like block without depthwise convolution and squeeze-and-excite."""
-
-  def _build(self):
-    """Builds block according to the arguments."""
-    filters = self._block_args.input_filters * self._block_args.expand_ratio
-    if self._block_args.expand_ratio != 1:
-      # Expansion phase:
-      self._expand_conv = tf.layers.Conv2D(
-          filters,
-          kernel_size=[3, 3],
-          strides=[1, 1],
-          kernel_initializer=conv_kernel_initializer,
-          padding='same',
-          use_bias=False)
-      self._bn0 = self._batch_norm(
-          axis=self._channel_axis,
-          momentum=self._batch_norm_momentum,
-          epsilon=self._batch_norm_epsilon)
-
-    # Output phase:
-    filters = self._block_args.output_filters
-    self._project_conv = tf.layers.Conv2D(
-        filters,
-        kernel_size=[1, 1],
-        strides=self._block_args.strides,
-        kernel_initializer=conv_kernel_initializer,
-        padding='same',
-        use_bias=False)
-    self._bn1 = self._batch_norm(
-        axis=self._channel_axis,
-        momentum=self._batch_norm_momentum,
-        epsilon=self._batch_norm_epsilon)
-
-  def call(self, inputs, use_batch_norm=False, drop_out=False, drop_connect_rate=None):
-    """Implementation of call().
+    """MBConv-like block without depthwise convolution and squeeze-and-excite."""
+
+    def _build(self):
+        """Builds block according to the arguments."""
+        filters = self._block_args.input_filters * self._block_args.expand_ratio
+        if self._block_args.expand_ratio != 1:
+            # Expansion phase:
+            self._expand_conv = tf.layers.Conv2D(
+                filters,
+                kernel_size=[3, 3],
+                strides=[1, 1],
+                kernel_initializer=conv_kernel_initializer,
+                padding="same",
+                use_bias=False,
+            )
+            self._bn0 = self._batch_norm(
+                axis=self._channel_axis,
+                momentum=self._batch_norm_momentum,
+                epsilon=self._batch_norm_epsilon,
+            )
+
+        # Output phase:
+        filters = self._block_args.output_filters
+        self._project_conv = tf.layers.Conv2D(
+            filters,
+            kernel_size=[1, 1],
+            strides=self._block_args.strides,
+            kernel_initializer=conv_kernel_initializer,
+            padding="same",
+            use_bias=False,
+        )
+        self._bn1 = self._batch_norm(
+            axis=self._channel_axis,
+            momentum=self._batch_norm_momentum,
+            epsilon=self._batch_norm_epsilon,
+        )
+
+    def call(
+        self, inputs, use_batch_norm=False, drop_out=False, drop_connect_rate=None
+    ):
+        """Implementation of call().
     Args:
       inputs: the inputs tensor.
       training: boolean, whether the model is constructed for training.
@@ -314,132 +358,141 @@ def call(self, inputs, use_batch_norm=False, drop_out=False, drop_connect_rate=N
     Returns:
       A output tensor.
     """
-    tf.logging.info('Block input: %s shape: %s' % (inputs.name, inputs.shape))
-    if self._block_args.expand_ratio != 1:
-      x = self._relu_fn(self._bn0(self._expand_conv(inputs), training=use_batch_norm))
-    else:
-      x = inputs
-    tf.logging.info('Expand: %s shape: %s' % (x.name, x.shape))
-
-    self.endpoints = {'expansion_output': x}
-
-    x = self._bn1(self._project_conv(x), training=use_batch_norm)
-    if self._block_args.id_skip:
-      if all(
-          s == 1 for s in self._block_args.strides
-      ) and self._block_args.input_filters == self._block_args.output_filters:
-        # only apply drop_connect if skip presents.
-        if drop_connect_rate:
-          x = utils.drop_connect(x, drop_out, drop_connect_rate)
-        x = tf.add(x, inputs)
-    tf.logging.info('Project: %s shape: %s' % (x.name, x.shape))
-    return x
+        tf.logging.info("Block input: %s shape: %s" % (inputs.name, inputs.shape))
+        if self._block_args.expand_ratio != 1:
+            x = self._relu_fn(
+                self._bn0(self._expand_conv(inputs), training=use_batch_norm)
+            )
+        else:
+            x = inputs
+        tf.logging.info("Expand: %s shape: %s" % (x.name, x.shape))
+
+        self.endpoints = {"expansion_output": x}
+
+        x = self._bn1(self._project_conv(x), training=use_batch_norm)
+        if self._block_args.id_skip:
+            if (
+                all(s == 1 for s in self._block_args.strides)
+                and self._block_args.input_filters == self._block_args.output_filters
+            ):
+                # only apply drop_connect if skip presents.
+                if drop_connect_rate:
+                    x = utils.drop_connect(x, drop_out, drop_connect_rate)
+                x = tf.add(x, inputs)
+        tf.logging.info("Project: %s shape: %s" % (x.name, x.shape))
+        return x
 
 
 class Model(tf.keras.Model):
-  """A class implements tf.keras.Model for MNAS-like model.
+    """A class implements tf.keras.Model for MNAS-like model.
     Reference: https://arxiv.org/abs/1807.11626
   """
 
-  def __init__(self, blocks_args=None, global_params=None):
-    """Initializes an `Model` instance.
+    def __init__(self, blocks_args=None, global_params=None):
+        """Initializes an `Model` instance.
     Args:
       blocks_args: A list of BlockArgs to construct block modules.
       global_params: GlobalParams, a set of global parameters.
     Raises:
       ValueError: when blocks_args is not specified as a list.
     """
-    super(Model, self).__init__()
-    if not isinstance(blocks_args, list):
-      raise ValueError('blocks_args should be a list.')
-    self._global_params = global_params
-    self._blocks_args = blocks_args
-    self._relu_fn = global_params.relu_fn or tf.nn.swish
-    self._batch_norm = global_params.batch_norm
-
-    self.endpoints = None
-
-    self._build()
-
-  def _get_conv_block(self, conv_type):
-    conv_block_map = {0: MBConvBlock, 1: MBConvBlockWithoutDepthwise}
-    return conv_block_map[conv_type]
-
-  def _build(self):
-    """Builds a model."""
-    self._blocks = []
-    # Builds blocks.
-    for block_args in self._blocks_args:
-      assert block_args.num_repeat > 0
-      # Update block input and output filters based on depth multiplier.
-      block_args = block_args._replace(
-          input_filters=round_filters(block_args.input_filters,
-                                      self._global_params),
-          output_filters=round_filters(block_args.output_filters,
-                                       self._global_params),
-          num_repeat=round_repeats(block_args.num_repeat, self._global_params))
-
-      # The first block needs to take care of stride and filter size increase.
-      conv_block = self._get_conv_block(block_args.conv_type)
-      self._blocks.append(conv_block(block_args, self._global_params))
-      if block_args.num_repeat > 1:
-        # pylint: disable=protected-access
-        block_args = block_args._replace(
-            input_filters=block_args.output_filters, strides=[1, 1])
-        # pylint: enable=protected-access
-      for _ in xrange(block_args.num_repeat - 1):
-        self._blocks.append(conv_block(block_args, self._global_params))
-
-    batch_norm_momentum = self._global_params.batch_norm_momentum
-    batch_norm_epsilon = self._global_params.batch_norm_epsilon
-    if self._global_params.data_format == 'channels_first':
-      channel_axis = 1
-    else:
-      channel_axis = -1
-
-    # Stem part.
-    self._conv_stem = tf.layers.Conv2D(
-        filters=round_filters(32, self._global_params),
-        kernel_size=[3, 3],
-        strides=[2, 2],
-        kernel_initializer=conv_kernel_initializer,
-        padding='same',
-        data_format=self._global_params.data_format,
-        use_bias=False)
-    self._bn0 = self._batch_norm(
-        axis=channel_axis,
-        momentum=batch_norm_momentum,
-        epsilon=batch_norm_epsilon)
-
-    # Head part.
-    self._conv_head = tf.layers.Conv2D(
-        filters=round_filters(1280, self._global_params),
-        kernel_size=[1, 1],
-        strides=[1, 1],
-        kernel_initializer=conv_kernel_initializer,
-        padding='same',
-        use_bias=False)
-    self._bn1 = self._batch_norm(
-        axis=channel_axis,
-        momentum=batch_norm_momentum,
-        epsilon=batch_norm_epsilon)
-
-    self._avg_pooling = tf.keras.layers.GlobalAveragePooling2D(
-        data_format=self._global_params.data_format)
-    if self._global_params.num_classes:
-      self._fc = tf.layers.Dense(
-          self._global_params.num_classes,
-          kernel_initializer=dense_kernel_initializer)
-    else:
-      self._fc = None
-
-    if self._global_params.dropout_rate > 0:
-      self._dropout = tf.keras.layers.Dropout(self._global_params.dropout_rate)
-    else:
-      self._dropout = None
-
-  def call(self, inputs, use_batch_norm=False, drop_out=False, features_only=None):
-    """Implementation of call().
+        super(Model, self).__init__()
+        if not isinstance(blocks_args, list):
+            raise ValueError("blocks_args should be a list.")
+        self._global_params = global_params
+        self._blocks_args = blocks_args
+        self._relu_fn = global_params.relu_fn or tf.nn.swish
+        self._batch_norm = global_params.batch_norm
+
+        self.endpoints = None
+
+        self._build()
+
+    def _get_conv_block(self, conv_type):
+        conv_block_map = {0: MBConvBlock, 1: MBConvBlockWithoutDepthwise}
+        return conv_block_map[conv_type]
+
+    def _build(self):
+        """Builds a model."""
+        self._blocks = []
+        # Builds blocks.
+        for block_args in self._blocks_args:
+            assert block_args.num_repeat > 0
+            # Update block input and output filters based on depth multiplier.
+            block_args = block_args._replace(
+                input_filters=round_filters(
+                    block_args.input_filters, self._global_params
+                ),
+                output_filters=round_filters(
+                    block_args.output_filters, self._global_params
+                ),
+                num_repeat=round_repeats(block_args.num_repeat, self._global_params),
+            )
+
+            # The first block needs to take care of stride and filter size increase.
+            conv_block = self._get_conv_block(block_args.conv_type)
+            self._blocks.append(conv_block(block_args, self._global_params))
+            if block_args.num_repeat > 1:
+                # pylint: disable=protected-access
+                block_args = block_args._replace(
+                    input_filters=block_args.output_filters, strides=[1, 1]
+                )
+                # pylint: enable=protected-access
+            for _ in xrange(block_args.num_repeat - 1):
+                self._blocks.append(conv_block(block_args, self._global_params))
+
+        batch_norm_momentum = self._global_params.batch_norm_momentum
+        batch_norm_epsilon = self._global_params.batch_norm_epsilon
+        if self._global_params.data_format == "channels_first":
+            channel_axis = 1
+        else:
+            channel_axis = -1
+
+        # Stem part.
+        self._conv_stem = tf.layers.Conv2D(
+            filters=round_filters(32, self._global_params),
+            kernel_size=[3, 3],
+            strides=[2, 2],
+            kernel_initializer=conv_kernel_initializer,
+            padding="same",
+            data_format=self._global_params.data_format,
+            use_bias=False,
+        )
+        self._bn0 = self._batch_norm(
+            axis=channel_axis, momentum=batch_norm_momentum, epsilon=batch_norm_epsilon
+        )
+
+        # Head part.
+        self._conv_head = tf.layers.Conv2D(
+            filters=round_filters(1280, self._global_params),
+            kernel_size=[1, 1],
+            strides=[1, 1],
+            kernel_initializer=conv_kernel_initializer,
+            padding="same",
+            use_bias=False,
+        )
+        self._bn1 = self._batch_norm(
+            axis=channel_axis, momentum=batch_norm_momentum, epsilon=batch_norm_epsilon
+        )
+
+        self._avg_pooling = tf.keras.layers.GlobalAveragePooling2D(
+            data_format=self._global_params.data_format
+        )
+        if self._global_params.num_classes:
+            self._fc = tf.layers.Dense(
+                self._global_params.num_classes,
+                kernel_initializer=dense_kernel_initializer,
+            )
+        else:
+            self._fc = None
+
+        if self._global_params.dropout_rate > 0:
+            self._dropout = tf.keras.layers.Dropout(self._global_params.dropout_rate)
+        else:
+            self._dropout = None
+
+    def call(self, inputs, use_batch_norm=False, drop_out=False, features_only=None):
+        """Implementation of call().
     Args:
       inputs: input tensors.
       training: boolean, whether the model is constructed for training.
@@ -447,51 +500,58 @@ def call(self, inputs, use_batch_norm=False, drop_out=False, features_only=None)
     Returns:
       output tensors.
     """
-    outputs = None
-    self.endpoints = {}
-    # Calls Stem layers
-    with tf.variable_scope('stem'):
-      outputs = self._relu_fn(
-          self._bn0(self._conv_stem(inputs), training=use_batch_norm))
-    tf.logging.info('Built stem layers with output shape: %s' % outputs.shape)
-    self.endpoints['stem'] = outputs
-
-    # Calls blocks.
-    reduction_idx = 0
-    for idx, block in enumerate(self._blocks):
-      is_reduction = False
-      if ((idx == len(self._blocks) - 1) or
-          self._blocks[idx + 1].block_args().strides[0] > 1):
-        is_reduction = True
-        reduction_idx += 1
-
-      with tf.variable_scope('blocks_%s' % idx):
-        drop_rate = self._global_params.drop_connect_rate
-        if drop_rate:
-          drop_rate *= float(idx) / len(self._blocks)
-          tf.logging.info('block_%s drop_connect_rate: %s' % (idx, drop_rate))
-        outputs = block.call(
-            outputs, use_batch_norm=use_batch_norm, drop_out=drop_out, drop_connect_rate=drop_rate)
-        self.endpoints['block_%s' % idx] = outputs
-        if is_reduction:
-          self.endpoints['reduction_%s' % reduction_idx] = outputs
-        if block.endpoints:
-          for k, v in six.iteritems(block.endpoints):
-            self.endpoints['block_%s/%s' % (idx, k)] = v
-            if is_reduction:
-              self.endpoints['reduction_%s/%s' % (reduction_idx, k)] = v
-    self.endpoints['features'] = outputs
-
-    if not features_only:
-      # Calls final layers and returns logits.
-      with tf.variable_scope('head'):
-        outputs = self._relu_fn(
-            self._bn1(self._conv_head(outputs), training=use_batch_norm))
-        outputs = self._avg_pooling(outputs)
-        if self._dropout:
-          outputs = self._dropout(outputs, training=drop_out)
-        self.endpoints['global_pool'] = outputs
-        if self._fc:
-          outputs = self._fc(outputs)
-        self.endpoints['head'] = outputs
-    return outputs
+        outputs = None
+        self.endpoints = {}
+        # Calls Stem layers
+        with tf.variable_scope("stem"):
+            outputs = self._relu_fn(
+                self._bn0(self._conv_stem(inputs), training=use_batch_norm)
+            )
+        tf.logging.info("Built stem layers with output shape: %s" % outputs.shape)
+        self.endpoints["stem"] = outputs
+
+        # Calls blocks.
+        reduction_idx = 0
+        for idx, block in enumerate(self._blocks):
+            is_reduction = False
+            if (idx == len(self._blocks) - 1) or self._blocks[
+                idx + 1
+            ].block_args().strides[0] > 1:
+                is_reduction = True
+                reduction_idx += 1
+
+            with tf.variable_scope("blocks_%s" % idx):
+                drop_rate = self._global_params.drop_connect_rate
+                if drop_rate:
+                    drop_rate *= float(idx) / len(self._blocks)
+                    tf.logging.info("block_%s drop_connect_rate: %s" % (idx, drop_rate))
+                outputs = block.call(
+                    outputs,
+                    use_batch_norm=use_batch_norm,
+                    drop_out=drop_out,
+                    drop_connect_rate=drop_rate,
+                )
+                self.endpoints["block_%s" % idx] = outputs
+                if is_reduction:
+                    self.endpoints["reduction_%s" % reduction_idx] = outputs
+                if block.endpoints:
+                    for k, v in six.iteritems(block.endpoints):
+                        self.endpoints["block_%s/%s" % (idx, k)] = v
+                        if is_reduction:
+                            self.endpoints["reduction_%s/%s" % (reduction_idx, k)] = v
+        self.endpoints["features"] = outputs
+
+        if not features_only:
+            # Calls final layers and returns logits.
+            with tf.variable_scope("head"):
+                outputs = self._relu_fn(
+                    self._bn1(self._conv_head(outputs), training=use_batch_norm)
+                )
+                outputs = self._avg_pooling(outputs)
+                if self._dropout:
+                    outputs = self._dropout(outputs, training=drop_out)
+                self.endpoints["global_pool"] = outputs
+                if self._fc:
+                    outputs = self._fc(outputs)
+                self.endpoints["head"] = outputs
+        return outputs
diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/net_factory.py b/deeplabcut/pose_estimation_tensorflow/nnet/net_factory.py
index bb7248dfe..1b50711e9 100755
--- a/deeplabcut/pose_estimation_tensorflow/nnet/net_factory.py
+++ b/deeplabcut/pose_estimation_tensorflow/nnet/net_factory.py
@@ -10,7 +10,7 @@
 
 
 def pose_net(cfg):
-    net_type = cfg['net_type']
+    net_type = cfg["net_type"]
     if "mobilenet" in net_type:  # multi currently not supported
         if (
             cfg.get("stride", 8) < 8
@@ -23,6 +23,7 @@ def pose_net(cfg):
             from deeplabcut.pose_estimation_tensorflow.nnet.pose_net_mobilenet import (
                 PoseNet,
             )
+
             cls = PoseNet
 
     elif "resnet" in net_type:
@@ -40,7 +41,7 @@ def pose_net(cfg):
             from deeplabcut.pose_estimation_tensorflow.nnet.pose_net import PoseNet
 
             cls = PoseNet
-    elif 'efficientnet' in net_type:
+    elif "efficientnet" in net_type:
         if (
             cfg.get("stride", 8) < 8
         ):  # this supports multianimal (with PAFs) or pairwise prediction
@@ -49,7 +50,10 @@ def pose_net(cfg):
             cls = PoseNet
         else:
             print("Initializing Efficientnet")
-            from deeplabcut.pose_estimation_tensorflow.nnet.pose_net_efficientnet import PoseNet
+            from deeplabcut.pose_estimation_tensorflow.nnet.pose_net_efficientnet import (
+                PoseNet,
+            )
+
             cls = PoseNet
     else:
         raise Exception('Unsupported class of network: "{}"'.format(net_type))
diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/pose_net.py b/deeplabcut/pose_estimation_tensorflow/nnet/pose_net.py
index 596822242..839f40482 100644
--- a/deeplabcut/pose_estimation_tensorflow/nnet/pose_net.py
+++ b/deeplabcut/pose_estimation_tensorflow/nnet/pose_net.py
@@ -39,7 +39,7 @@ def prediction_layer(cfg, input, name, num_outputs):
         padding="SAME",
         activation_fn=None,
         normalizer_fn=None,
-        weights_regularizer=slim.l2_regularizer(cfg['weight_decay']),
+        weights_regularizer=slim.l2_regularizer(cfg["weight_decay"]),
     ):
         with tf.variable_scope(name):
             pred = slim.conv2d_transpose(
@@ -49,9 +49,9 @@ def prediction_layer(cfg, input, name, num_outputs):
 
 
 def get_batch_spec(cfg):
-    num_joints = cfg['num_joints']
-    num_limbs = cfg['num_limbs']
-    batch_size = cfg['batch_size']
+    num_joints = cfg["num_joints"]
+    num_limbs = cfg["num_limbs"]
+    batch_size = cfg["batch_size"]
     batch_spec = {
         Batch.inputs: [batch_size, None, None, 3],
         Batch.part_score_targets: [
@@ -67,13 +67,13 @@ def get_batch_spec(cfg):
             num_joints + cfg.get("num_idchannel", 0),
         ],
     }
-    if cfg['location_refinement']:
+    if cfg["location_refinement"]:
         batch_spec[Batch.locref_targets] = [batch_size, None, None, num_joints * 2]
         batch_spec[Batch.locref_mask] = [batch_size, None, None, num_joints * 2]
-    if cfg['pairwise_predict']:
-        print("Getting specs", cfg['dataset_type'], num_limbs, num_joints)
+    if cfg["pairwise_predict"]:
+        print("Getting specs", cfg["dataset_type"], num_limbs, num_joints)
         if (
-            "multi-animal" not in cfg['dataset_type']
+            "multi-animal" not in cfg["dataset_type"]
         ):  # this can be used for pairwise conditional
             batch_spec[Batch.pairwise_targets] = [
                 batch_size,
@@ -88,18 +88,8 @@ def get_batch_spec(cfg):
                 num_joints * (num_joints - 1) * 2,
             ]
         else:  # train partaffinity fields
-            batch_spec[Batch.pairwise_targets] = [
-                batch_size,
-                None,
-                None,
-                num_limbs * 2,
-            ]
-            batch_spec[Batch.pairwise_mask] = [
-                batch_size,
-                None,
-                None,
-                num_limbs * 2,
-            ]
+            batch_spec[Batch.pairwise_targets] = [batch_size, None, None, num_limbs * 2]
+            batch_spec[Batch.pairwise_mask] = [batch_size, None, None, num_limbs * 2]
     return batch_spec
 
 
@@ -108,10 +98,13 @@ def __init__(self, cfg):
         self.cfg = cfg
 
     def extract_features(self, inputs):
-        net_fun = net_funcs[self.cfg['net_type']]
+        net_fun = net_funcs[self.cfg["net_type"]]
 
         mean = tf.constant(
-            self.cfg['mean_pixel'], dtype=tf.float32, shape=[1, 1, 1, 3], name="img_mean"
+            self.cfg["mean_pixel"],
+            dtype=tf.float32,
+            shape=[1, 1, 1, 3],
+            name="img_mean",
         )
         im_centered = inputs - mean
 
@@ -137,8 +130,8 @@ def prediction_layers(
         self, features, end_points, reuse=None, no_interm=False, scope="pose"
     ):
         cfg = self.cfg
-        n_joints = cfg['num_joints']
-        num_layers = re.findall("resnet_([0-9]*)", cfg['net_type'])[0]
+        n_joints = cfg["num_joints"]
+        num_layers = re.findall("resnet_([0-9]*)", cfg["net_type"])[0]
         layer_name = (
             "resnet_v1_{}".format(num_layers) + "/block{}/unit_{}/bottleneck_v1"
         )
@@ -148,23 +141,25 @@ def prediction_layers(
             out["part_pred"] = prediction_layer(
                 cfg, features, "part_pred", n_joints + cfg.get("num_idchannel", 0)
             )
-            if cfg['location_refinement']:
+            if cfg["location_refinement"]:
                 out["locref"] = prediction_layer(
                     cfg, features, "locref_pred", n_joints * 2
                 )
-            if cfg['pairwise_predict'] and "multi-animal" not in cfg['dataset_type']:
+            if cfg["pairwise_predict"] and "multi-animal" not in cfg["dataset_type"]:
                 out["pairwise_pred"] = prediction_layer(
-                    cfg,
-                    features,
-                    "pairwise_pred",
-                    n_joints * (n_joints - 1) * 2,
+                    cfg, features, "pairwise_pred", n_joints * (n_joints - 1) * 2
                 )
-            if cfg['partaffinityfield_predict'] and "multi-animal" in cfg['dataset_type']:
+            if (
+                cfg["partaffinityfield_predict"]
+                and "multi-animal" in cfg["dataset_type"]
+            ):
                 out["pairwise_pred"] = prediction_layer(
-                    cfg, features, "pairwise_pred", cfg['num_limbs'] * 2
+                    cfg, features, "pairwise_pred", cfg["num_limbs"] * 2
+                )
+            if cfg["intermediate_supervision"] and not no_interm:
+                interm_name = layer_name.format(
+                    3, cfg["intermediate_supervision_layer"]
                 )
-            if cfg['intermediate_supervision'] and not no_interm:
-                interm_name = layer_name.format(3, cfg['intermediate_supervision_layer'])
                 block_interm_out = end_points[interm_name]
                 out["part_pred_interm"] = prediction_layer(
                     cfg,
@@ -191,7 +186,7 @@ def inference(self, inputs):
         locref = heads["locref"]
         probs = tf.sigmoid(heads["part_pred"])
 
-        if self.cfg['batch_size'] == 1:
+        if self.cfg["batch_size"] == 1:
             # assuming batchsize 1 here!
             probs = tf.squeeze(probs, axis=0)
             locref = tf.squeeze(locref, axis=0)
@@ -216,9 +211,9 @@ def inference(self, inputs):
             likelihood = tf.reshape(tf.gather_nd(probs, indices), (-1, 1))
 
             pose = (
-                self.cfg['stride'] * tf.cast(tf.transpose(loc), dtype=tf.float32)
-                + self.cfg['stride'] * 0.5
-                + offset * self.cfg['locref_stdev']
+                self.cfg["stride"] * tf.cast(tf.transpose(loc), dtype=tf.float32)
+                + self.cfg["stride"] * 0.5
+                + offset * self.cfg["locref_stdev"]
             )
             pose = tf.concat([pose, likelihood], axis=1)
 
@@ -257,9 +252,9 @@ def inference(self, inputs):
             likelihood = tf.reshape(tf.gather_nd(probs, indices), (-1, 1))
 
             pose = (
-                self.cfg['stride'] * tf.cast(tf.transpose(loc), dtype=tf.float32)
-                + self.cfg['stride'] * 0.5
-                + offset * self.cfg['locref_stdev']
+                self.cfg["stride"] * tf.cast(tf.transpose(loc), dtype=tf.float32)
+                + self.cfg["stride"] * 0.5
+                + offset * self.cfg["locref_stdev"]
             )
             pose = tf.concat([pose, likelihood], axis=1)
             return {"pose": pose}
@@ -268,9 +263,9 @@ def add_inference_layers(self, heads):
         """ initialized during inference """
         prob = tf.sigmoid(heads["part_pred"])
         outputs = {"part_prob": prob}
-        if self.cfg['location_refinement']:
+        if self.cfg["location_refinement"]:
             outputs["locref"] = heads["locref"]
-        if self.cfg['pairwise_predict'] or self.cfg['partaffinityfield_predict']:
+        if self.cfg["pairwise_predict"] or self.cfg["partaffinityfield_predict"]:
             outputs["pairwise_pred"] = heads["pairwise_pred"]
         return outputs
 
@@ -278,7 +273,7 @@ def train(self, batch):
         cfg = self.cfg
 
         heads = self.get_net(batch[Batch.inputs])
-        weigh_part_predictions = cfg['weigh_part_predictions']
+        weigh_part_predictions = cfg["weigh_part_predictions"]
         part_score_weights = (
             batch[Batch.part_score_weights] if weigh_part_predictions else 1.0
         )
@@ -291,25 +286,25 @@ def add_part_loss(pred_layer):
         loss = {}
         loss["part_loss"] = add_part_loss("part_pred")
         total_loss = loss["part_loss"]
-        if cfg['intermediate_supervision']:
+        if cfg["intermediate_supervision"]:
             loss["part_loss_interm"] = add_part_loss("part_pred_interm")
             total_loss = total_loss + loss["part_loss_interm"]
 
-        if cfg['location_refinement']:
+        if cfg["location_refinement"]:
             locref_pred = heads["locref"]
             locref_targets = batch[Batch.locref_targets]
             locref_weights = batch[Batch.locref_mask]
             loss_func = (
                 losses.huber_loss
-                if cfg['locref_huber_loss']
+                if cfg["locref_huber_loss"]
                 else tf.losses.mean_squared_error
             )
-            loss["locref_loss"] = cfg['locref_loss_weight'] * loss_func(
+            loss["locref_loss"] = cfg["locref_loss_weight"] * loss_func(
                 locref_targets, locref_pred, locref_weights
             )
             total_loss = total_loss + loss["locref_loss"]
 
-        if cfg['pairwise_predict'] or cfg['partaffinityfield_predict']:
+        if cfg["pairwise_predict"] or cfg["partaffinityfield_predict"]:
             # setting pairwise bodypart loss
             pairwise_pred = heads["pairwise_pred"]
             pairwise_targets = batch[Batch.pairwise_targets]
@@ -317,10 +312,10 @@ def add_part_loss(pred_layer):
 
             loss_func = (
                 losses.huber_loss
-                if cfg['pairwise_huber_loss']
+                if cfg["pairwise_huber_loss"]
                 else tf.losses.mean_squared_error
             )
-            loss["pairwise_loss"] = cfg['pairwise_loss_weight'] * loss_func(
+            loss["pairwise_loss"] = cfg["pairwise_loss_weight"] * loss_func(
                 pairwise_targets, pairwise_pred, pairwise_weights
             )
             total_loss = total_loss + loss["pairwise_loss"]
diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/pose_net_efficientnet.py b/deeplabcut/pose_estimation_tensorflow/nnet/pose_net_efficientnet.py
index eba5b5121..e065c81b8 100644
--- a/deeplabcut/pose_estimation_tensorflow/nnet/pose_net_efficientnet.py
+++ b/deeplabcut/pose_estimation_tensorflow/nnet/pose_net_efficientnet.py
@@ -1,4 +1,4 @@
-'''
+"""
 DeepLabCut 2.1.9 Toolbox (deeplabcut.org)
 © A. & M. Mathis Labs
 https://github.com/AlexEMG/DeepLabCut
@@ -14,7 +14,7 @@
 Efficient Nets added by T. Biasi & AM
 See https://openaccess.thecvf.com/content/WACV2021/html/Mathis_Pretraining_Boosts_Out-of-Domain_Robustness_for_Pose_Estimation_WACV_2021_paper.html
 
-'''
+"""
 
 import re
 import tensorflow as tf
@@ -25,19 +25,23 @@
 
 
 def prediction_layer(cfg, input, name, num_outputs):
-    with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], padding='SAME',
-                        activation_fn=None, normalizer_fn=None,
-                        weights_regularizer=slim.l2_regularizer(cfg['weight_decay'])):
+    with slim.arg_scope(
+        [slim.conv2d, slim.conv2d_transpose],
+        padding="SAME",
+        activation_fn=None,
+        normalizer_fn=None,
+        weights_regularizer=slim.l2_regularizer(cfg["weight_decay"]),
+    ):
         with tf.variable_scope(name):
-            pred = slim.conv2d_transpose(input, num_outputs,
-                                         kernel_size=[3, 3], stride=2,
-                                         scope='block4')
+            pred = slim.conv2d_transpose(
+                input, num_outputs, kernel_size=[3, 3], stride=2, scope="block4"
+            )
             return pred
 
 
 def get_batch_spec(cfg):
-    num_joints = cfg['num_joints']
-    batch_size = cfg['batch_size']
+    num_joints = cfg["num_joints"]
+    batch_size = cfg["batch_size"]
     batch_spec = {
         Batch.inputs: [batch_size, None, None, 3],
         Batch.part_score_targets: [
@@ -53,13 +57,13 @@ def get_batch_spec(cfg):
             num_joints + cfg.get("num_idchannel", 0),
         ],
     }
-    if cfg['location_refinement']:
+    if cfg["location_refinement"]:
         batch_spec[Batch.locref_targets] = [batch_size, None, None, num_joints * 2]
         batch_spec[Batch.locref_mask] = [batch_size, None, None, num_joints * 2]
-    if cfg['pairwise_predict']:
-        print("Getting specs", cfg['dataset_type'], cfg['num_limbs'], cfg['num_joints'])
+    if cfg["pairwise_predict"]:
+        print("Getting specs", cfg["dataset_type"], cfg["num_limbs"], cfg["num_joints"])
         if (
-            "multi-animal" not in cfg['dataset_type']
+            "multi-animal" not in cfg["dataset_type"]
         ):  # this can be used for pairwise conditional
             batch_spec[Batch.pairwise_targets] = [
                 batch_size,
@@ -78,13 +82,13 @@ def get_batch_spec(cfg):
                 batch_size,
                 None,
                 None,
-                cfg['num_limbs'] * 2,
+                cfg["num_limbs"] * 2,
             ]
             batch_spec[Batch.pairwise_mask] = [
                 batch_size,
                 None,
                 None,
-                cfg['num_limbs'] * 2,
+                cfg["num_limbs"] * 2,
             ]
     return batch_spec
 
@@ -92,45 +96,63 @@ def get_batch_spec(cfg):
 class PoseNet:
     def __init__(self, cfg):
         self.cfg = cfg
-        if 'use_batch_norm' not in self.cfg.keys():
-            self.cfg['use_batch_norm'] = False
-        if 'use_drop_out' not in self.cfg.keys():
-            self.cfg['use_drop_out'] = False
+        if "use_batch_norm" not in self.cfg.keys():
+            self.cfg["use_batch_norm"] = False
+        if "use_drop_out" not in self.cfg.keys():
+            self.cfg["use_drop_out"] = False
 
     def extract_features(self, inputs, use_batch_norm=False, use_drop_out=False):
-        mean = tf.constant(self.cfg['mean_pixel'],
-                           dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean')
+        mean = tf.constant(
+            self.cfg["mean_pixel"],
+            dtype=tf.float32,
+            shape=[1, 1, 1, 3],
+            name="img_mean",
+        )
         im_centered = inputs - mean
-        im_centered /= tf.constant(
-                        eff.STDDEV_RGB, shape=[1, 1, 3])
+        im_centered /= tf.constant(eff.STDDEV_RGB, shape=[1, 1, 3])
         with tf.variable_scope("efficientnet"):
-            eff_net_type = self.cfg['net_type'].replace('_','-')
-            net, end_points = eff.build_model_base(im_centered, eff_net_type, use_batch_norm=use_batch_norm, drop_out=use_drop_out)
+            eff_net_type = self.cfg["net_type"].replace("_", "-")
+            net, end_points = eff.build_model_base(
+                im_centered,
+                eff_net_type,
+                use_batch_norm=use_batch_norm,
+                drop_out=use_drop_out,
+            )
         return net, end_points
 
     def prediction_layers(self, features, end_points, reuse=None):
         cfg = self.cfg
 
         out = {}
-        with tf.variable_scope('pose', reuse=reuse):
-            out['part_pred'] = prediction_layer(cfg, features, 'part_pred',
-                                                cfg['num_joints'] + cfg.get("num_idchannel", 0))
-            if cfg['location_refinement']:
-                out['locref'] = prediction_layer(cfg, features, 'locref_pred',
-                                                 cfg['num_joints'] * 2)
-            if cfg['pairwise_predict'] and "multi-animal" not in cfg['dataset_type']:
+        with tf.variable_scope("pose", reuse=reuse):
+            out["part_pred"] = prediction_layer(
+                cfg,
+                features,
+                "part_pred",
+                cfg["num_joints"] + cfg.get("num_idchannel", 0),
+            )
+            if cfg["location_refinement"]:
+                out["locref"] = prediction_layer(
+                    cfg, features, "locref_pred", cfg["num_joints"] * 2
+                )
+            if cfg["pairwise_predict"] and "multi-animal" not in cfg["dataset_type"]:
                 out["pairwise_pred"] = prediction_layer(
                     cfg,
                     features,
                     "pairwise_pred",
-                    cfg['num_joints'] * (cfg['num_joints'] - 1) * 2,
+                    cfg["num_joints"] * (cfg["num_joints"] - 1) * 2,
                 )
-            if cfg['partaffinityfield_predict'] and "multi-animal" in cfg['dataset_type']:
+            if (
+                cfg["partaffinityfield_predict"]
+                and "multi-animal" in cfg["dataset_type"]
+            ):
                 out["pairwise_pred"] = prediction_layer(
-                    cfg, features, "pairwise_pred", cfg['num_limbs'] * 2
+                    cfg, features, "pairwise_pred", cfg["num_limbs"] * 2
+                )
+            if cfg["intermediate_supervision"]:
+                raise NotImplementedError(
+                    "Intermediate supervision is currently disabled."
                 )
-            if cfg['intermediate_supervision']:
-                raise NotImplementedError("Intermediate supervision is currently disabled.")
 
         return out
 
@@ -139,7 +161,9 @@ def get_net(self, inputs, use_batch_norm, use_drop_out):
         return self.prediction_layers(net, end_points)
 
     def test(self, inputs):
-        heads = self.get_net(inputs, self.cfg['use_batch_norm'], self.cfg['use_drop_out'])
+        heads = self.get_net(
+            inputs, self.cfg["use_batch_norm"], self.cfg["use_drop_out"]
+        )
         return self.add_inference_layers(heads)
 
     def inference(self, inputs):
@@ -150,7 +174,7 @@ def inference(self, inputs):
         locref = heads["locref"]
         probs = tf.sigmoid(heads["part_pred"])
 
-        if self.cfg['batch_size'] == 1:
+        if self.cfg["batch_size"] == 1:
             # assuming batchsize 1 here!
             probs = tf.squeeze(probs, axis=0)
             locref = tf.squeeze(locref, axis=0)
@@ -175,9 +199,9 @@ def inference(self, inputs):
             likelihood = tf.reshape(tf.gather_nd(probs, indices), (-1, 1))
 
             pose = (
-                self.cfg['stride'] * tf.cast(tf.transpose(loc), dtype=tf.float32)
-                + self.cfg['stride'] * 0.5
-                + offset * self.cfg['locref_stdev']
+                self.cfg["stride"] * tf.cast(tf.transpose(loc), dtype=tf.float32)
+                + self.cfg["stride"] * 0.5
+                + offset * self.cfg["locref_stdev"]
             )
             pose = tf.concat([pose, likelihood], axis=1)
 
@@ -216,9 +240,9 @@ def inference(self, inputs):
             likelihood = tf.reshape(tf.gather_nd(probs, indices), (-1, 1))
 
             pose = (
-                self.cfg['stride'] * tf.cast(tf.transpose(loc), dtype=tf.float32)
-                + self.cfg['stride'] * 0.5
-                + offset * self.cfg['locref_stdev']
+                self.cfg["stride"] * tf.cast(tf.transpose(loc), dtype=tf.float32)
+                + self.cfg["stride"] * 0.5
+                + offset * self.cfg["locref_stdev"]
             )
             pose = tf.concat([pose, likelihood], axis=1)
             return {"pose": pose}
@@ -227,41 +251,51 @@ def add_inference_layers(self, heads):
         """ initialized during inference """
         prob = tf.sigmoid(heads["part_pred"])
         outputs = {"part_prob": prob}
-        if self.cfg['location_refinement']:
+        if self.cfg["location_refinement"]:
             outputs["locref"] = heads["locref"]
-        if self.cfg['pairwise_predict'] or self.cfg['partaffinityfield_predict']:
+        if self.cfg["pairwise_predict"] or self.cfg["partaffinityfield_predict"]:
             outputs["pairwise_pred"] = heads["pairwise_pred"]
         return outputs
 
     def train(self, batch):
         cfg = self.cfg
 
-        heads = self.get_net(batch[Batch.inputs], self.cfg['use_batch_norm'], self.cfg['use_drop_out'])
+        heads = self.get_net(
+            batch[Batch.inputs], self.cfg["use_batch_norm"], self.cfg["use_drop_out"]
+        )
 
-        weigh_part_predictions = cfg['weigh_part_predictions']
-        part_score_weights = batch[Batch.part_score_weights] if weigh_part_predictions else 1.0
+        weigh_part_predictions = cfg["weigh_part_predictions"]
+        part_score_weights = (
+            batch[Batch.part_score_weights] if weigh_part_predictions else 1.0
+        )
 
         def add_part_loss(pred_layer):
-            return tf.losses.sigmoid_cross_entropy(batch[Batch.part_score_targets],
-                                                   heads[pred_layer],
-                                                   part_score_weights)
+            return tf.losses.sigmoid_cross_entropy(
+                batch[Batch.part_score_targets], heads[pred_layer], part_score_weights
+            )
 
         loss = {}
-        loss['part_loss'] = add_part_loss('part_pred')
-        total_loss = loss['part_loss']
-        if cfg['intermediate_supervision']:
+        loss["part_loss"] = add_part_loss("part_pred")
+        total_loss = loss["part_loss"]
+        if cfg["intermediate_supervision"]:
             raise NotImplementedError("Intermediate supervision is currently disabled.")
 
-        if cfg['location_refinement']:
-            locref_pred = heads['locref']
+        if cfg["location_refinement"]:
+            locref_pred = heads["locref"]
             locref_targets = batch[Batch.locref_targets]
             locref_weights = batch[Batch.locref_mask]
 
-            loss_func = losses.huber_loss if cfg['locref_huber_loss'] else tf.losses.mean_squared_error
-            loss['locref_loss'] = cfg['locref_loss_weight'] * loss_func(locref_targets, locref_pred, locref_weights)
-            total_loss = total_loss + loss['locref_loss']
+            loss_func = (
+                losses.huber_loss
+                if cfg["locref_huber_loss"]
+                else tf.losses.mean_squared_error
+            )
+            loss["locref_loss"] = cfg["locref_loss_weight"] * loss_func(
+                locref_targets, locref_pred, locref_weights
+            )
+            total_loss = total_loss + loss["locref_loss"]
 
-        if cfg['pairwise_predict'] or cfg['partaffinityfield_predict']:
+        if cfg["pairwise_predict"] or cfg["partaffinityfield_predict"]:
             # setting pairwise bodypart loss
             pairwise_pred = heads["pairwise_pred"]
             pairwise_targets = batch[Batch.pairwise_targets]
@@ -269,14 +303,14 @@ def add_part_loss(pred_layer):
 
             loss_func = (
                 losses.huber_loss
-                if cfg['pairwise_huber_loss']
+                if cfg["pairwise_huber_loss"]
                 else tf.losses.mean_squared_error
             )
-            loss["pairwise_loss"] = cfg['pairwise_loss_weight'] * loss_func(
+            loss["pairwise_loss"] = cfg["pairwise_loss_weight"] * loss_func(
                 pairwise_targets, pairwise_pred, pairwise_weights
             )
             total_loss = total_loss + loss["pairwise_loss"]
 
         # loss['total_loss'] = slim.losses.get_total_loss(add_regularization_losses=params.regularize)
-        loss['total_loss'] = total_loss
+        loss["total_loss"] = total_loss
         return loss
diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/pose_net_mobilenet.py b/deeplabcut/pose_estimation_tensorflow/nnet/pose_net_mobilenet.py
index 951b7a50b..0ee47fbb3 100755
--- a/deeplabcut/pose_estimation_tensorflow/nnet/pose_net_mobilenet.py
+++ b/deeplabcut/pose_estimation_tensorflow/nnet/pose_net_mobilenet.py
@@ -67,7 +67,7 @@ def prediction_layer(cfg, input, name, num_outputs):
         padding="SAME",
         activation_fn=None,
         normalizer_fn=None,
-        weights_regularizer=slim.l2_regularizer(cfg['weight_decay']),
+        weights_regularizer=slim.l2_regularizer(cfg["weight_decay"]),
     ):
         with tf.variable_scope(name):
             pred = slim.conv2d_transpose(
@@ -75,10 +75,11 @@ def prediction_layer(cfg, input, name, num_outputs):
             )
             return pred
 
+
 def get_batch_spec(cfg):
-    num_joints = cfg['num_joints']
-    num_limbs = cfg['num_limbs']
-    batch_size = cfg['batch_size']
+    num_joints = cfg["num_joints"]
+    num_limbs = cfg["num_limbs"]
+    batch_size = cfg["batch_size"]
     batch_spec = {
         Batch.inputs: [batch_size, None, None, 3],
         Batch.part_score_targets: [
@@ -94,13 +95,13 @@ def get_batch_spec(cfg):
             num_joints + cfg.get("num_idchannel", 0),
         ],
     }
-    if cfg['location_refinement']:
+    if cfg["location_refinement"]:
         batch_spec[Batch.locref_targets] = [batch_size, None, None, num_joints * 2]
         batch_spec[Batch.locref_mask] = [batch_size, None, None, num_joints * 2]
-    if cfg['pairwise_predict']:
-        print("Getting specs", cfg['dataset_type'], num_limbs, num_joints)
+    if cfg["pairwise_predict"]:
+        print("Getting specs", cfg["dataset_type"], num_limbs, num_joints)
         if (
-            "multi-animal" not in cfg['dataset_type']
+            "multi-animal" not in cfg["dataset_type"]
         ):  # this can be used for pairwise conditional
             batch_spec[Batch.pairwise_targets] = [
                 batch_size,
@@ -115,28 +116,22 @@ def get_batch_spec(cfg):
                 num_joints * (num_joints - 1) * 2,
             ]
         else:  # train partaffinity fields
-            batch_spec[Batch.pairwise_targets] = [
-                batch_size,
-                None,
-                None,
-                num_limbs * 2,
-            ]
-            batch_spec[Batch.pairwise_mask] = [
-                batch_size,
-                None,
-                None,
-                num_limbs * 2,
-            ]
+            batch_spec[Batch.pairwise_targets] = [batch_size, None, None, num_limbs * 2]
+            batch_spec[Batch.pairwise_mask] = [batch_size, None, None, num_limbs * 2]
     return batch_spec
 
+
 class PoseNet:
     def __init__(self, cfg):
         self.cfg = cfg
 
     def extract_features(self, inputs):
-        net_fun, net_arg_scope = networks[self.cfg['net_type']]
+        net_fun, net_arg_scope = networks[self.cfg["net_type"]]
         mean = tf.constant(
-            self.cfg['mean_pixel'], dtype=tf.float32, shape=[1, 1, 1, 3], name="img_mean"
+            self.cfg["mean_pixel"],
+            dtype=tf.float32,
+            shape=[1, 1, 1, 3],
+            name="img_mean",
         )
         im_centered = inputs - mean
         with slim.arg_scope(net_arg_scope()):
@@ -146,29 +141,27 @@ def extract_features(self, inputs):
 
     def prediction_layers(self, features, end_points, reuse=None):
         cfg = self.cfg
-        num_joints = cfg['num_joints']
+        num_joints = cfg["num_joints"]
 
         out = {}
         with tf.variable_scope("pose", reuse=reuse):
-            out["part_pred"] = prediction_layer(
-                cfg, features, "part_pred", num_joints
-            )
-            if cfg['location_refinement']:
+            out["part_pred"] = prediction_layer(cfg, features, "part_pred", num_joints)
+            if cfg["location_refinement"]:
                 out["locref"] = prediction_layer(
                     cfg, features, "locref_pred", num_joints * 2
                 )
-            if cfg['pairwise_predict'] and "multi-animal" not in cfg['dataset_type']:
+            if cfg["pairwise_predict"] and "multi-animal" not in cfg["dataset_type"]:
                 out["pairwise_pred"] = prediction_layer(
-                    cfg,
-                    features,
-                    "pairwise_pred",
-                    num_joints * (num_joints - 1) * 2,
+                    cfg, features, "pairwise_pred", num_joints * (num_joints - 1) * 2
                 )
-            if cfg['partaffinityfield_predict'] and "multi-animal" in cfg['dataset_type']:
+            if (
+                cfg["partaffinityfield_predict"]
+                and "multi-animal" in cfg["dataset_type"]
+            ):
                 out["pairwise_pred"] = prediction_layer(
-                    cfg, features, "pairwise_pred", cfg['num_limbs'] * 2
+                    cfg, features, "pairwise_pred", cfg["num_limbs"] * 2
                 )
-            if cfg['intermediate_supervision']:
+            if cfg["intermediate_supervision"]:
                 # print(end_points.keys()) >> to see what else is available.
                 out["part_pred_interm"] = prediction_layer(
                     cfg,
@@ -194,7 +187,7 @@ def inference(self, inputs):
         locref = heads["locref"]
         probs = tf.sigmoid(heads["part_pred"])
 
-        if cfg['batch_size'] == 1:
+        if cfg["batch_size"] == 1:
             probs = tf.squeeze(probs, axis=0)
             locref = tf.squeeze(locref, axis=0)
             l_shape = tf.shape(probs)
@@ -218,9 +211,9 @@ def inference(self, inputs):
             likelihood = tf.reshape(tf.gather_nd(probs, indices), (-1, 1))
 
             pose = (
-                self.cfg['stride'] * tf.cast(tf.transpose(loc), dtype=tf.float32)
-                + self.cfg['stride'] * 0.5
-                + offset * cfg['locref_stdev']
+                self.cfg["stride"] * tf.cast(tf.transpose(loc), dtype=tf.float32)
+                + self.cfg["stride"] * 0.5
+                + offset * cfg["locref_stdev"]
             )
             pose = tf.concat([pose, likelihood], axis=1)
 
@@ -259,9 +252,9 @@ def inference(self, inputs):
             likelihood = tf.reshape(tf.gather_nd(probs, indices), (-1, 1))
 
             pose = (
-                self.cfg['stride'] * tf.cast(tf.transpose(loc), dtype=tf.float32)
-                + self.cfg['stride'] * 0.5
-                + offset * cfg['locref_stdev']
+                self.cfg["stride"] * tf.cast(tf.transpose(loc), dtype=tf.float32)
+                + self.cfg["stride"] * 0.5
+                + offset * cfg["locref_stdev"]
             )
             pose = tf.concat([pose, likelihood], axis=1)
             return {"pose": pose}
@@ -270,9 +263,9 @@ def add_inference_layers(self, heads):
         """ initialized during inference """
         prob = tf.sigmoid(heads["part_pred"])
         outputs = {"part_prob": prob}
-        if self.cfg['location_refinement']:
+        if self.cfg["location_refinement"]:
             outputs["locref"] = heads["locref"]
-        if self.cfg['pairwise_predict'] or self.cfg['partaffinityfield_predict']:
+        if self.cfg["pairwise_predict"] or self.cfg["partaffinityfield_predict"]:
             outputs["pairwise_pred"] = heads["pairwise_pred"]
         return outputs
 
@@ -281,7 +274,7 @@ def train(self, batch):
 
         heads = self.get_net(batch[Batch.inputs])
 
-        weigh_part_predictions = cfg['weigh_part_predictions']
+        weigh_part_predictions = cfg["weigh_part_predictions"]
         part_score_weights = (
             batch[Batch.part_score_weights] if weigh_part_predictions else 1.0
         )
@@ -294,26 +287,26 @@ def add_part_loss(pred_layer):
         loss = {}
         loss["part_loss"] = add_part_loss("part_pred")
         total_loss = loss["part_loss"]
-        if cfg['intermediate_supervision']:
+        if cfg["intermediate_supervision"]:
             loss["part_loss_interm"] = add_part_loss("part_pred_interm")
             total_loss = total_loss + loss["part_loss_interm"]
 
-        if cfg['location_refinement']:
+        if cfg["location_refinement"]:
             locref_pred = heads["locref"]
             locref_targets = batch[Batch.locref_targets]
             locref_weights = batch[Batch.locref_mask]
 
             loss_func = (
                 losses.huber_loss
-                if cfg['locref_huber_loss']
+                if cfg["locref_huber_loss"]
                 else tf.losses.mean_squared_error
             )
-            loss["locref_loss"] = cfg['locref_loss_weight'] * loss_func(
+            loss["locref_loss"] = cfg["locref_loss_weight"] * loss_func(
                 locref_targets, locref_pred, locref_weights
             )
             total_loss = total_loss + loss["locref_loss"]
 
-        if cfg['pairwise_predict'] or cfg['partaffinityfield_predict']:
+        if cfg["pairwise_predict"] or cfg["partaffinityfield_predict"]:
             # setting pairwise bodypart loss
             pairwise_pred = heads["pairwise_pred"]
             pairwise_targets = batch[Batch.pairwise_targets]
@@ -321,10 +314,10 @@ def add_part_loss(pred_layer):
 
             loss_func = (
                 losses.huber_loss
-                if cfg['pairwise_huber_loss']
+                if cfg["pairwise_huber_loss"]
                 else tf.losses.mean_squared_error
             )
-            loss["pairwise_loss"] = cfg['pairwise_loss_weight'] * loss_func(
+            loss["pairwise_loss"] = cfg["pairwise_loss_weight"] * loss_func(
                 pairwise_targets, pairwise_pred, pairwise_weights
             )
             total_loss = total_loss + loss["pairwise_loss"]
diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/pose_netmulti.py b/deeplabcut/pose_estimation_tensorflow/nnet/pose_netmulti.py
index fd50bf059..8b52e1e19 100755
--- a/deeplabcut/pose_estimation_tensorflow/nnet/pose_netmulti.py
+++ b/deeplabcut/pose_estimation_tensorflow/nnet/pose_netmulti.py
@@ -20,7 +20,11 @@
 
 from deeplabcut.pose_estimation_tensorflow.dataset.pose_dataset import Batch
 import deeplabcut.pose_estimation_tensorflow.nnet.efficientnet_builder as eff
-from deeplabcut.pose_estimation_tensorflow.nnet import mobilenet_v2, mobilenet, conv_blocks
+from deeplabcut.pose_estimation_tensorflow.nnet import (
+    mobilenet_v2,
+    mobilenet,
+    conv_blocks,
+)
 from deeplabcut.pose_estimation_tensorflow.nnet import losses
 
 vers = (tf.__version__).split(".")
@@ -30,44 +34,61 @@
     TF = tf
 
 # Change the stride from 2 to 1 to get 16x downscaling instead of 32x.
-mobilenet_v2.V2_DEF["spec"][14] = mobilenet.op(conv_blocks.expanded_conv, stride=1, num_outputs=160)
+mobilenet_v2.V2_DEF["spec"][14] = mobilenet.op(
+    conv_blocks.expanded_conv, stride=1, num_outputs=160
+)
+
+
 def wrapped_partial(func, *args, **kwargs):
     partial_func = functools.partial(func, *args, **kwargs)
     functools.update_wrapper(partial_func, func)
     return partial_func
 
+
 net_funcs = {
     "resnet_50": resnet_v1.resnet_v1_50,
     "resnet_101": resnet_v1.resnet_v1_101,
     "resnet_152": resnet_v1.resnet_v1_152,
-    'mobilenet_v2_1.0': mobilenet_v2.mobilenet_base,
-    'mobilenet_v2_0.75': wrapped_partial(mobilenet_v2.mobilenet_base,
-                                        depth_multiplier=0.75,
-                                        final_endpoint="layer_19",
-                                        finegrain_classification_mode=True),
-    'mobilenet_v2_0.5': wrapped_partial(mobilenet_v2.mobilenet_base,
-                                        depth_multiplier=0.5,
-                                        final_endpoint="layer_19",
-                                        finegrain_classification_mode=True),
-    'mobilenet_v2_0.35': wrapped_partial(mobilenet_v2.mobilenet_base,
-                                        depth_multiplier=0.35,
-                                        final_endpoint="layer_19",
-                                        finegrain_classification_mode=True),
-    'mobilenet_v2_0.1': wrapped_partial(mobilenet_v2.mobilenet_base,
-                                        depth_multiplier=0.1,
-                                        final_endpoint="layer_19",
-                                        finegrain_classification_mode=True),
-    'mobilenet_v2_0.35_10': wrapped_partial(mobilenet_v2.mobilenet_base,
-                                            depth_multiplier=0.35,
-                                            final_endpoint="layer_10",
-                                            finegrain_classification_mode=True),
-    'mobilenet_v2_0.1_10':  wrapped_partial(mobilenet_v2.mobilenet_base,
-                                            depth_multiplier=0.1,
-                                            final_endpoint="layer_10",
-                                            finegrain_classification_mode=True)
+    "mobilenet_v2_1.0": mobilenet_v2.mobilenet_base,
+    "mobilenet_v2_0.75": wrapped_partial(
+        mobilenet_v2.mobilenet_base,
+        depth_multiplier=0.75,
+        final_endpoint="layer_19",
+        finegrain_classification_mode=True,
+    ),
+    "mobilenet_v2_0.5": wrapped_partial(
+        mobilenet_v2.mobilenet_base,
+        depth_multiplier=0.5,
+        final_endpoint="layer_19",
+        finegrain_classification_mode=True,
+    ),
+    "mobilenet_v2_0.35": wrapped_partial(
+        mobilenet_v2.mobilenet_base,
+        depth_multiplier=0.35,
+        final_endpoint="layer_19",
+        finegrain_classification_mode=True,
+    ),
+    "mobilenet_v2_0.1": wrapped_partial(
+        mobilenet_v2.mobilenet_base,
+        depth_multiplier=0.1,
+        final_endpoint="layer_19",
+        finegrain_classification_mode=True,
+    ),
+    "mobilenet_v2_0.35_10": wrapped_partial(
+        mobilenet_v2.mobilenet_base,
+        depth_multiplier=0.35,
+        final_endpoint="layer_10",
+        finegrain_classification_mode=True,
+    ),
+    "mobilenet_v2_0.1_10": wrapped_partial(
+        mobilenet_v2.mobilenet_base,
+        depth_multiplier=0.1,
+        final_endpoint="layer_10",
+        finegrain_classification_mode=True,
+    ),
 }
 
-#https://towardsdatascience.com/complete-architectural-details-of-all-efficientnet-models-5fd5b736142
+# https://towardsdatascience.com/complete-architectural-details-of-all-efficientnet-models-5fd5b736142
 parallel_layers = {
     "b0": "4",
     "b1": "7",
@@ -76,8 +97,8 @@ def wrapped_partial(func, *args, **kwargs):
     "b4": "9",
     "b5": "12",
     "b6": "14",
-    "b7": "17"
-    }
+    "b7": "17",
+}
 
 
 def prediction_layer(cfg, input, name, num_outputs):
@@ -86,7 +107,7 @@ def prediction_layer(cfg, input, name, num_outputs):
         padding="SAME",
         activation_fn=None,
         normalizer_fn=None,
-        weights_regularizer=slim.l2_regularizer(cfg['weight_decay']),
+        weights_regularizer=slim.l2_regularizer(cfg["weight_decay"]),
     ):
         with tf.variable_scope(name):
             pred = slim.conv2d_transpose(
@@ -96,8 +117,8 @@ def prediction_layer(cfg, input, name, num_outputs):
 
 
 def get_batch_spec(cfg):
-    num_joints = cfg['num_joints']
-    batch_size = cfg['batch_size']
+    num_joints = cfg["num_joints"]
+    batch_size = cfg["batch_size"]
     batch_spec = {
         Batch.inputs: [batch_size, None, None, 3],
         Batch.part_score_targets: [
@@ -113,13 +134,13 @@ def get_batch_spec(cfg):
             num_joints + cfg.get("num_idchannel", 0),
         ],
     }
-    if cfg['location_refinement']:
+    if cfg["location_refinement"]:
         batch_spec[Batch.locref_targets] = [batch_size, None, None, num_joints * 2]
         batch_spec[Batch.locref_mask] = [batch_size, None, None, num_joints * 2]
-    if cfg['pairwise_predict']:
-        print("Getting specs", cfg['dataset_type'], cfg['num_limbs'], cfg['num_joints'])
+    if cfg["pairwise_predict"]:
+        print("Getting specs", cfg["dataset_type"], cfg["num_limbs"], cfg["num_joints"])
         if (
-            "multi-animal" not in cfg['dataset_type']
+            "multi-animal" not in cfg["dataset_type"]
         ):  # this can be used for pairwise conditional
             batch_spec[Batch.pairwise_targets] = [
                 batch_size,
@@ -138,13 +159,13 @@ def get_batch_spec(cfg):
                 batch_size,
                 None,
                 None,
-                cfg['num_limbs'] * 2,
+                cfg["num_limbs"] * 2,
             ]
             batch_spec[Batch.pairwise_mask] = [
                 batch_size,
                 None,
                 None,
-                cfg['num_limbs'] * 2,
+                cfg["num_limbs"] * 2,
             ]
     return batch_spec
 
@@ -152,26 +173,31 @@ def get_batch_spec(cfg):
 class PoseNet:
     def __init__(self, cfg):
         self.cfg = cfg
-        if 'use_batch_norm' not in self.cfg.keys():
-            self.cfg['use_batch_norm'] = False
-        if 'use_drop_out' not in self.cfg.keys():
-            self.cfg['use_drop_out'] = False
+        if "use_batch_norm" not in self.cfg.keys():
+            self.cfg["use_batch_norm"] = False
+        if "use_drop_out" not in self.cfg.keys():
+            self.cfg["use_drop_out"] = False
 
     def extract_features(self, inputs):
         mean = tf.constant(
-            self.cfg['mean_pixel'], dtype=tf.float32, shape=[1, 1, 1, 3], name="img_mean"
+            self.cfg["mean_pixel"],
+            dtype=tf.float32,
+            shape=[1, 1, 1, 3],
+            name="img_mean",
         )
         im_centered = inputs - mean
 
-        if 'resnet' in self.cfg['net_type']:
+        if "resnet" in self.cfg["net_type"]:
             # The next part of the code depends upon which tensorflow version you have.
             vers = tf.__version__
             vers = vers.split(
                 "."
             )  # Updated based on https://github.com/AlexEMG/DeepLabCut/issues/44
 
-            net_fun = net_funcs[self.cfg['net_type']]
-            if int(vers[0]) == 1 and int(vers[1]) < 4:  # check if lower than version 1.4.
+            net_fun = net_funcs[self.cfg["net_type"]]
+            if (
+                int(vers[0]) == 1 and int(vers[1]) < 4
+            ):  # check if lower than version 1.4.
                 with slim.arg_scope(resnet_v1.resnet_arg_scope(False)):
                     net, end_points = net_fun(
                         im_centered, global_pool=False, output_stride=16
@@ -179,18 +205,23 @@ def extract_features(self, inputs):
             else:
                 with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                     net, end_points = net_fun(
-                        im_centered, global_pool=False, output_stride=16, is_training=False
+                        im_centered,
+                        global_pool=False,
+                        output_stride=16,
+                        is_training=False,
                     )
-        elif 'mobilenet' in self.cfg['net_type']:
-            net_fun = net_funcs[self.cfg['net_type']]
+        elif "mobilenet" in self.cfg["net_type"]:
+            net_fun = net_funcs[self.cfg["net_type"]]
             with slim.arg_scope(mobilenet_v2.training_scope()):
                 net, end_points = net_fun(im_centered)
-        elif 'efficientnet' in self.cfg['net_type']:
+        elif "efficientnet" in self.cfg["net_type"]:
             im_centered /= tf.constant(eff.STDDEV_RGB, shape=[1, 1, 3])
-            net, end_points = eff.build_model_base(im_centered,
-                                                   self.cfg['net_type'],
-                                                   use_batch_norm=self.cfg['use_batch_norm'],
-                                                   drop_out=self.cfg['use_drop_out'])
+            net, end_points = eff.build_model_base(
+                im_centered,
+                self.cfg["net_type"],
+                use_batch_norm=self.cfg["use_batch_norm"],
+                drop_out=self.cfg["use_drop_out"],
+            )
         return net, end_points
 
     def prediction_layers(
@@ -203,20 +234,18 @@ def prediction_layers(
         scope="pose",
     ):
         cfg = self.cfg
-        if "resnet" in cfg['net_type']:
-            num_layers = re.findall("resnet_([0-9]*)", cfg['net_type'])[0]
+        if "resnet" in cfg["net_type"]:
+            num_layers = re.findall("resnet_([0-9]*)", cfg["net_type"])[0]
             layer_name = (
                 "resnet_v1_{}".format(num_layers) + "/block{}/unit_{}/bottleneck_v1"
             )
-            mid_pt = layer_name.format(2,3)
-        elif "mobilenet" in cfg['net_type']:
+            mid_pt = layer_name.format(2, 3)
+        elif "mobilenet" in cfg["net_type"]:
             mid_pt = "layer_7"
-        elif "efficientnet" in cfg['net_type']:
-            mid_pt = "block_"+parallel_layers[cfg['net_type'].split('-')[1]]
+        elif "efficientnet" in cfg["net_type"]:
+            mid_pt = "block_" + parallel_layers[cfg["net_type"].split("-")[1]]
 
-        final_dims = tf.ceil(
-            tf.divide(input_shape[1:3], tf.convert_to_tensor(16))
-        )
+        final_dims = tf.ceil(tf.divide(input_shape[1:3], tf.convert_to_tensor(16)))
         interim_dims = tf.scalar_mul(2, final_dims)
         interim_dims = tf.cast(interim_dims, tf.int32)
         bank_3 = end_points[mid_pt]
@@ -226,57 +255,72 @@ def prediction_layers(
             [slim.conv2d],
             padding="SAME",
             normalizer_fn=None,
-            weights_regularizer=slim.l2_regularizer(cfg['weight_decay']),
+            weights_regularizer=slim.l2_regularizer(cfg["weight_decay"]),
         ):
             with tf.variable_scope("decoder_filters"):
-                bank_3 = slim.conv2d(bank_3, cfg['bank3'], 1, scope="decoder_parallel_1")
+                bank_3 = slim.conv2d(
+                    bank_3, cfg["bank3"], 1, scope="decoder_parallel_1"
+                )
 
         with slim.arg_scope(
             [slim.conv2d_transpose],
             padding="SAME",
             normalizer_fn=None,
-            weights_regularizer=slim.l2_regularizer(cfg['weight_decay']),
+            weights_regularizer=slim.l2_regularizer(cfg["weight_decay"]),
         ):
             with tf.variable_scope("upsampled_features"):
                 upsampled_features = slim.conv2d_transpose(
-                    features, cfg['bank5'], kernel_size=[3, 3], stride=2, scope="block4"
+                    features, cfg["bank5"], kernel_size=[3, 3], stride=2, scope="block4"
                 )
         net = tf.concat([bank_3, upsampled_features], 3)
 
         out = {}
         with tf.variable_scope(scope, reuse=reuse):
             out["part_pred"] = prediction_layer(
-                cfg, net, "part_pred", cfg['num_joints'] + cfg.get("num_idchannel", 0)
+                cfg, net, "part_pred", cfg["num_joints"] + cfg.get("num_idchannel", 0)
             )
-            if cfg['location_refinement']:
+            if cfg["location_refinement"]:
                 out["locref"] = prediction_layer(
-                    cfg, net, "locref_pred", cfg['num_joints'] * 2
+                    cfg, net, "locref_pred", cfg["num_joints"] * 2
                 )
-            if cfg['pairwise_predict'] and "multi-animal" not in cfg['dataset_type']:
+            if cfg["pairwise_predict"] and "multi-animal" not in cfg["dataset_type"]:
                 out["pairwise_pred"] = prediction_layer(
-                    cfg, net, "pairwise_pred", cfg['num_joints'] * (cfg['num_joints'] - 1) * 2
+                    cfg,
+                    net,
+                    "pairwise_pred",
+                    cfg["num_joints"] * (cfg["num_joints"] - 1) * 2,
                 )
-            if cfg['partaffinityfield_predict'] and "multi-animal" in cfg['dataset_type']:
+            if (
+                cfg["partaffinityfield_predict"]
+                and "multi-animal" in cfg["dataset_type"]
+            ):
                 out["pairwise_pred"] = prediction_layer(
-                    cfg, net, "pairwise_pred", cfg['num_limbs'] * 2
+                    cfg, net, "pairwise_pred", cfg["num_limbs"] * 2
                 )
 
-            if cfg['intermediate_supervision'] and "efficientnet" not in cfg['net_type']:
-                if "mobilenet" in cfg['net_type']:
+            if (
+                cfg["intermediate_supervision"]
+                and "efficientnet" not in cfg["net_type"]
+            ):
+                if "mobilenet" in cfg["net_type"]:
                     out["part_pred_interm"] = prediction_layer(
                         cfg,
-                        end_points["layer_" + str(cfg["intermediate_supervision_layer"])],
+                        end_points[
+                            "layer_" + str(cfg["intermediate_supervision_layer"])
+                        ],
                         "intermediate_supervision",
-                        cfg['num_joints'],
+                        cfg["num_joints"],
+                    )
+                elif "resnet" in cfg["net_type"]:
+                    interm_name = layer_name.format(
+                        3, cfg["intermediate_supervision_layer"]
                     )
-                elif "resnet" in cfg['net_type']:
-                    interm_name = layer_name.format(3, cfg['intermediate_supervision_layer'])
                     block_interm_out = end_points[interm_name]
                     out["part_pred_interm"] = prediction_layer(
                         cfg,
                         block_interm_out,
                         "intermediate_supervision",
-                        cfg['num_joints'] + cfg.get("num_idchannel", 0),
+                        cfg["num_joints"] + cfg.get("num_idchannel", 0),
                     )
 
         return out
@@ -294,9 +338,9 @@ def add_inference_layers(self, heads):
         """ initialized during inference """
         prob = tf.sigmoid(heads["part_pred"])
         outputs = {"part_prob": prob}
-        if self.cfg['location_refinement']:
+        if self.cfg["location_refinement"]:
             outputs["locref"] = heads["locref"]
-        if self.cfg['pairwise_predict'] or self.cfg['partaffinityfield_predict']:
+        if self.cfg["pairwise_predict"] or self.cfg["partaffinityfield_predict"]:
             outputs["pairwise_pred"] = heads["pairwise_pred"]
         return outputs
 
@@ -304,7 +348,7 @@ def train(self, batch):
         cfg = self.cfg
 
         heads = self.get_net(batch[Batch.inputs])
-        weigh_part_predictions = cfg['weigh_part_predictions']
+        weigh_part_predictions = cfg["weigh_part_predictions"]
         part_score_weights = (
             batch[Batch.part_score_weights] if weigh_part_predictions else 1.0
         )
@@ -317,25 +361,25 @@ def add_part_loss(pred_layer):
         loss = {}
         loss["part_loss"] = add_part_loss("part_pred")
         total_loss = loss["part_loss"]
-        if cfg['intermediate_supervision'] and "efficientnet" not in cfg['net_type']:
+        if cfg["intermediate_supervision"] and "efficientnet" not in cfg["net_type"]:
             loss["part_loss_interm"] = add_part_loss("part_pred_interm")
             total_loss = total_loss + loss["part_loss_interm"]
 
-        if cfg['location_refinement']:
+        if cfg["location_refinement"]:
             locref_pred = heads["locref"]
             locref_targets = batch[Batch.locref_targets]
             locref_weights = batch[Batch.locref_mask]
             loss_func = (
                 losses.huber_loss
-                if cfg['locref_huber_loss']
+                if cfg["locref_huber_loss"]
                 else tf.losses.mean_squared_error
             )
-            loss["locref_loss"] = cfg['locref_loss_weight'] * loss_func(
+            loss["locref_loss"] = cfg["locref_loss_weight"] * loss_func(
                 locref_targets, locref_pred, locref_weights
             )
             total_loss = total_loss + loss["locref_loss"]
 
-        if cfg['pairwise_predict'] or cfg['partaffinityfield_predict']:
+        if cfg["pairwise_predict"] or cfg["partaffinityfield_predict"]:
             "setting pw bodypart loss..."
             pairwise_pred = heads["pairwise_pred"]
             pairwise_targets = batch[Batch.pairwise_targets]
@@ -343,10 +387,10 @@ def add_part_loss(pred_layer):
 
             loss_func = (
                 losses.huber_loss
-                if cfg['pairwise_huber_loss']
+                if cfg["pairwise_huber_loss"]
                 else tf.losses.mean_squared_error
             )
-            loss["pairwise_loss"] = cfg['pairwise_loss_weight'] * loss_func(
+            loss["pairwise_loss"] = cfg["pairwise_loss_weight"] * loss_func(
                 pairwise_targets, pairwise_pred, pairwise_weights
             )
             total_loss = total_loss + loss["pairwise_loss"]
diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/predict.py b/deeplabcut/pose_estimation_tensorflow/nnet/predict.py
index 75ea8789e..b4884f582 100644
--- a/deeplabcut/pose_estimation_tensorflow/nnet/predict.py
+++ b/deeplabcut/pose_estimation_tensorflow/nnet/predict.py
@@ -35,13 +35,13 @@
 
 def setup_pose_prediction(cfg):
     TF.reset_default_graph()
-    inputs = TF.placeholder(tf.float32, shape=[cfg['batch_size'], None, None, 3])
+    inputs = TF.placeholder(tf.float32, shape=[cfg["batch_size"], None, None, 3])
     net_heads = pose_net(cfg).test(inputs)
     outputs = [net_heads["part_prob"]]
-    if cfg['location_refinement']:
+    if cfg["location_refinement"]:
         outputs.append(net_heads["locref"])
 
-    if ("multi-animal" in cfg['dataset_type']) and cfg['partaffinityfield_predict']:
+    if ("multi-animal" in cfg["dataset_type"]) and cfg["partaffinityfield_predict"]:
         print("Activating extracting of PAFs")
         outputs.append(net_heads["pairwise_pred"])
 
@@ -51,7 +51,7 @@ def setup_pose_prediction(cfg):
     sess.run(TF.local_variables_initializer())
 
     # Restore variables from disk.
-    restorer.restore(sess, cfg['init_weights'])
+    restorer.restore(sess, cfg["init_weights"])
 
     return sess, inputs, outputs
 
@@ -61,11 +61,11 @@ def extract_cnn_output(outputs_np, cfg):
     scmap = outputs_np[0]
     scmap = np.squeeze(scmap)
     locref = None
-    if cfg['location_refinement']:
+    if cfg["location_refinement"]:
         locref = np.squeeze(outputs_np[1])
         shape = locref.shape
         locref = np.reshape(locref, (shape[0], shape[1], -1, 2))
-        locref *= cfg['locref_stdev']
+        locref *= cfg["locref_stdev"]
     if len(scmap.shape) == 2:  # for single body part!
         scmap = np.expand_dims(scmap, axis=2)
     return scmap, locref
@@ -116,9 +116,9 @@ def getpose(image, cfg, sess, inputs, outputs, outall=False):
     scmap, locref = extract_cnn_output(outputs_np, cfg)
     num_outputs = cfg.get("num_outputs", 1)
     if num_outputs > 1:
-        pose = multi_pose_predict(scmap, locref, cfg['stride'], num_outputs)
+        pose = multi_pose_predict(scmap, locref, cfg["stride"], num_outputs)
     else:
-        pose = argmax_pose_predict(scmap, locref, cfg['stride'])
+        pose = argmax_pose_predict(scmap, locref, cfg["stride"])
     if outall:
         return scmap, locref, pose
     else:
@@ -131,11 +131,11 @@ def extract_cnn_outputmulti(outputs_np, cfg):
     Dimensions: image batch x imagedim1 x imagedim2 x bodypart"""
     scmap = outputs_np[0]
     locref = None
-    if cfg['location_refinement']:
+    if cfg["location_refinement"]:
         locref = outputs_np[1]
         shape = locref.shape
         locref = np.reshape(locref, (shape[0], shape[1], shape[2], -1, 2))
-        locref *= cfg['locref_stdev']
+        locref *= cfg["locref_stdev"]
     if len(scmap.shape) == 2:  # for single body part!
         scmap = np.expand_dims(scmap, axis=2)
     return scmap, locref
@@ -180,8 +180,8 @@ def getposeNP(image, cfg, sess, inputs, outputs, outall=False):
                 DZ[m, l, k, :2] = locref[l, y, x, k, :]
                 DZ[m, l, k, 2] = scmap[l, y, x, k]
 
-    X = X.astype("float32") * cfg['stride'] + 0.5 * cfg['stride'] + DZ[:, :, :, 0]
-    Y = Y.astype("float32") * cfg['stride'] + 0.5 * cfg['stride'] + DZ[:, :, :, 1]
+    X = X.astype("float32") * cfg["stride"] + 0.5 * cfg["stride"] + DZ[:, :, :, 0]
+    Y = Y.astype("float32") * cfg["stride"] + 0.5 * cfg["stride"] + DZ[:, :, :, 1]
     P = DZ[:, :, :, 2]
 
     Xs = X.swapaxes(0, 2).swapaxes(0, 1)
@@ -204,7 +204,7 @@ def getposeNP(image, cfg, sess, inputs, outputs, outall=False):
 ### Code for TF inference on GPU
 def setup_GPUpose_prediction(cfg):
     tf.reset_default_graph()
-    inputs = tf.placeholder(tf.float32, shape=[cfg['batch_size'], None, None, 3])
+    inputs = tf.placeholder(tf.float32, shape=[cfg["batch_size"], None, None, 3])
     net_heads = pose_net(cfg).inference(inputs)
     outputs = [net_heads["pose"]]
 
@@ -215,7 +215,7 @@ def setup_GPUpose_prediction(cfg):
     sess.run(tf.local_variables_initializer())
 
     # Restore variables from disk.
-    restorer.restore(sess, cfg['init_weights'])
+    restorer.restore(sess, cfg["init_weights"])
 
     return sess, inputs, outputs
 
diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/predict_multianimal.py b/deeplabcut/pose_estimation_tensorflow/nnet/predict_multianimal.py
index 3307ad7ff..74b7206d3 100755
--- a/deeplabcut/pose_estimation_tensorflow/nnet/predict_multianimal.py
+++ b/deeplabcut/pose_estimation_tensorflow/nnet/predict_multianimal.py
@@ -28,14 +28,14 @@ def extract_cnn_output(outputs_np, cfg):
     """ extract locref, scmap and partaffinityfield from network """
     scmap = outputs_np[0]
     scmap = np.squeeze(scmap)
-    if cfg['location_refinement']:
+    if cfg["location_refinement"]:
         locref = np.squeeze(outputs_np[1])
         shape = locref.shape
         locref = np.reshape(locref, (shape[0], shape[1], -1, 2))
-        locref *= cfg['locref_stdev']
+        locref *= cfg["locref_stdev"]
     else:
         locref = None
-    if cfg['partaffinityfield_predict'] and ("multi-animal" in cfg['dataset_type']):
+    if cfg["partaffinityfield_predict"] and ("multi-animal" in cfg["dataset_type"]):
         paf = np.squeeze(outputs_np[2])
     else:
         paf = None
@@ -51,8 +51,8 @@ def AssociationCosts(
     """ Association costs for detections based on PAFs """
     Distances = {}
     ny, nx, nlimbs = np.shape(partaffinitymaps)
-    for l in range(cfg['num_limbs']):
-        bp1, bp2 = cfg['partaffinityfield_graph'][l]  # [(0,1),(1,2)
+    for l in range(cfg["num_limbs"]):
+        bp1, bp2 = cfg["partaffinityfield_graph"][l]  # [(0,1),(1,2)
         # get coordinates for bp1 and bp2
         C1 = coordinates[bp1]
         C2 = coordinates[bp2]
@@ -135,9 +135,9 @@ def extract_detections(cfg, scmap, locref, pafs, nms_radius, det_min_score):
     from nms_grid import nms_grid  # this needs to be installed (C-code)
 
     Detections = {}
-    stride = cfg['stride']
+    stride = cfg["stride"]
     halfstride = stride * 0.5
-    num_joints = cfg['num_joints']
+    num_joints = cfg["num_joints"]
     dist_grid = make_nms_grid(nms_radius)
     unProb = [None] * num_joints
     unPos = [None] * num_joints
@@ -174,10 +174,7 @@ def extract_detections(cfg, scmap, locref, pafs, nms_radius, det_min_score):
 
 def find_local_maxima(scmap, radius, threshold):
     peak_idx = peak_local_max(
-        scmap,
-        min_distance=radius,
-        threshold_abs=threshold,
-        exclude_border=False,
+        scmap, min_distance=radius, threshold_abs=threshold, exclude_border=False
     )
     grid = np.zeros_like(scmap, dtype=bool)
     grid[tuple(peak_idx.T)] = True
@@ -188,9 +185,9 @@ def find_local_maxima(scmap, radius, threshold):
 
 def extract_detections_python(cfg, scmap, locref, pafs, radius, threshold):
     Detections = {}
-    stride = cfg['stride']
+    stride = cfg["stride"]
     halfstride = stride * 0.5
-    num_joints = cfg['num_joints']
+    num_joints = cfg["num_joints"]
     unProb = [None] * num_joints
     unPos = [None] * num_joints
 
@@ -253,9 +250,9 @@ def extract_detection_withgroundtruth(
 
     Detections = {}
     num_idchannel = cfg.get("num_idchannel", 0)
-    stride = cfg['stride']
+    stride = cfg["stride"]
     halfstride = stride * 0.5
-    num_joints = cfg['num_joints']
+    num_joints = cfg["num_joints"]
     # get dist_grid
     dist_grid = make_nms_grid(nms_radius)
     unProb = [None] * num_joints
@@ -308,9 +305,9 @@ def extract_detection_withgroundtruth_python(
     cfg, groundtruthcoordinates, scmap, locref, pafs, radius, threshold
 ):
     Detections = {}
-    stride = cfg['stride']
+    stride = cfg["stride"]
     halfstride = stride * 0.5
-    num_joints = cfg['num_joints']
+    num_joints = cfg["num_joints"]
     num_idchannel = cfg.get("num_idchannel", 0)
     unProb = [None] * num_joints
     unPos = [None] * num_joints
@@ -391,14 +388,14 @@ def extract_cnn_outputmulti(outputs_np, cfg):
     """ extract locref + scmap from network
     Dimensions: image batch x imagedim1 x imagedim2 x bodypart"""
     scmap = outputs_np[0]
-    if cfg['location_refinement']:
+    if cfg["location_refinement"]:
         locref = outputs_np[1]
         shape = locref.shape
         locref = np.reshape(locref, (shape[0], shape[1], shape[2], -1, 2))
-        locref *= cfg['locref_stdev']
+        locref *= cfg["locref_stdev"]
     else:
         locref = None
-    if cfg['partaffinityfield_predict'] and ("multi-animal" in cfg['dataset_type']):
+    if cfg["partaffinityfield_predict"] and ("multi-animal" in cfg["dataset_type"]):
         paf = outputs_np[2]
     else:
         paf = None
@@ -467,9 +464,9 @@ def extract_batchdetections(
 
 def extract_batchdetections_python(cfg, scmap, locref, pafs, radius, threshold):
     Detections = {}
-    stride = cfg['stride']
+    stride = cfg["stride"]
     halfstride = stride * 0.5
-    num_joints = cfg['num_joints']
+    num_joints = cfg["num_joints"]
     num_idchannel = cfg.get("num_idchannel", 0)
     unProb = [None] * num_joints
     unPos = [None] * num_joints
diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/utils.py b/deeplabcut/pose_estimation_tensorflow/nnet/utils.py
index 85d7bbeb9..a8d89b50f 100644
--- a/deeplabcut/pose_estimation_tensorflow/nnet/utils.py
+++ b/deeplabcut/pose_estimation_tensorflow/nnet/utils.py
@@ -28,201 +28,212 @@
 from tensorflow.contrib.tpu.python.tpu import tpu_function
 
 
-def build_learning_rate(initial_lr,
-                        global_step,
-                        steps_per_epoch=None,
-                        lr_decay_type='exponential',
-                        decay_factor=0.97,
-                        decay_epochs=2.4,
-                        total_steps=None,
-                        warmup_epochs=5):
-  """Build learning rate."""
-  if lr_decay_type == 'exponential':
-    assert steps_per_epoch is not None
-    decay_steps = steps_per_epoch * decay_epochs
-    lr = tf.train.exponential_decay(
-        initial_lr, global_step, decay_steps, decay_factor, staircase=True)
-  elif lr_decay_type == 'cosine':
-    assert total_steps is not None
-    lr = 0.5 * initial_lr * (
-        1 + tf.cos(np.pi * tf.cast(global_step, tf.float32) / total_steps))
-  elif lr_decay_type == 'constant':
-    lr = initial_lr
-  else:
-    assert False, 'Unknown lr_decay_type : %s' % lr_decay_type
-
-  if warmup_epochs:
-    tf.logging.info('Learning rate warmup_epochs: %d' % warmup_epochs)
-    warmup_steps = int(warmup_epochs * steps_per_epoch)
-    warmup_lr = (
-        initial_lr * tf.cast(global_step, tf.float32) / tf.cast(
-            warmup_steps, tf.float32))
-    lr = tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr)
-
-  return lr
-
-
-def build_optimizer(learning_rate,
-                    optimizer_name='rmsprop',
-                    decay=0.9,
-                    epsilon=0.001,
-                    momentum=0.9):
-  """Build optimizer."""
-  if optimizer_name == 'sgd':
-    tf.logging.info('Using SGD optimizer')
-    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
-  elif optimizer_name == 'momentum':
-    tf.logging.info('Using Momentum optimizer')
-    optimizer = tf.train.MomentumOptimizer(
-        learning_rate=learning_rate, momentum=momentum)
-  elif optimizer_name == 'rmsprop':
-    tf.logging.info('Using RMSProp optimizer')
-    optimizer = tf.train.RMSPropOptimizer(learning_rate, decay, momentum,
-                                          epsilon)
-  else:
-    tf.logging.fatal('Unknown optimizer:', optimizer_name)
-
-  return optimizer
+def build_learning_rate(
+    initial_lr,
+    global_step,
+    steps_per_epoch=None,
+    lr_decay_type="exponential",
+    decay_factor=0.97,
+    decay_epochs=2.4,
+    total_steps=None,
+    warmup_epochs=5,
+):
+    """Build learning rate."""
+    if lr_decay_type == "exponential":
+        assert steps_per_epoch is not None
+        decay_steps = steps_per_epoch * decay_epochs
+        lr = tf.train.exponential_decay(
+            initial_lr, global_step, decay_steps, decay_factor, staircase=True
+        )
+    elif lr_decay_type == "cosine":
+        assert total_steps is not None
+        lr = (
+            0.5
+            * initial_lr
+            * (1 + tf.cos(np.pi * tf.cast(global_step, tf.float32) / total_steps))
+        )
+    elif lr_decay_type == "constant":
+        lr = initial_lr
+    else:
+        assert False, "Unknown lr_decay_type : %s" % lr_decay_type
+
+    if warmup_epochs:
+        tf.logging.info("Learning rate warmup_epochs: %d" % warmup_epochs)
+        warmup_steps = int(warmup_epochs * steps_per_epoch)
+        warmup_lr = (
+            initial_lr
+            * tf.cast(global_step, tf.float32)
+            / tf.cast(warmup_steps, tf.float32)
+        )
+        lr = tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr)
+
+    return lr
+
+
+def build_optimizer(
+    learning_rate, optimizer_name="rmsprop", decay=0.9, epsilon=0.001, momentum=0.9
+):
+    """Build optimizer."""
+    if optimizer_name == "sgd":
+        tf.logging.info("Using SGD optimizer")
+        optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
+    elif optimizer_name == "momentum":
+        tf.logging.info("Using Momentum optimizer")
+        optimizer = tf.train.MomentumOptimizer(
+            learning_rate=learning_rate, momentum=momentum
+        )
+    elif optimizer_name == "rmsprop":
+        tf.logging.info("Using RMSProp optimizer")
+        optimizer = tf.train.RMSPropOptimizer(learning_rate, decay, momentum, epsilon)
+    else:
+        tf.logging.fatal("Unknown optimizer:", optimizer_name)
+
+    return optimizer
 
 
 class TpuBatchNormalization(tf.layers.BatchNormalization):
-  # class TpuBatchNormalization(tf.layers.BatchNormalization):
-  """Cross replica batch normalization."""
-
-  def __init__(self, fused=False, **kwargs):
-    if fused in (True, None):
-      raise ValueError('TpuBatchNormalization does not support fused=True.')
-    super(TpuBatchNormalization, self).__init__(fused=fused, **kwargs)
-
-  def _cross_replica_average(self, t, num_shards_per_group):
-    """Calculates the average value of input tensor across TPU replicas."""
-    num_shards = tpu_function.get_tpu_context().number_of_shards
-    group_assignment = None
-    if num_shards_per_group > 1:
-      if num_shards % num_shards_per_group != 0:
-        raise ValueError('num_shards: %d mod shards_per_group: %d, should be 0'
-                         % (num_shards, num_shards_per_group))
-      num_groups = num_shards // num_shards_per_group
-      group_assignment = [[
-          x for x in range(num_shards) if x // num_shards_per_group == y
-      ] for y in range(num_groups)]
-    return tpu_ops.cross_replica_sum(t, group_assignment) / tf.cast(
-        num_shards_per_group, t.dtype)
-
-  def _moments(self, inputs, reduction_axes, keep_dims):
-    """Compute the mean and variance: it overrides the original _moments."""
-    shard_mean, shard_variance = super(TpuBatchNormalization, self)._moments(
-        inputs, reduction_axes, keep_dims=keep_dims)
-
-    num_shards = tpu_function.get_tpu_context().number_of_shards or 1
-    if num_shards <= 8:  # Skip cross_replica for 2x2 or smaller slices.
-      num_shards_per_group = 1
-    else:
-      num_shards_per_group = max(8, num_shards // 8)
-    tf.logging.info('TpuBatchNormalization with num_shards_per_group %s',
-                    num_shards_per_group)
-    if num_shards_per_group > 1:
-      # Compute variance using: Var[X]= E[X^2] - E[X]^2.
-      shard_square_of_mean = tf.math.square(shard_mean)
-      shard_mean_of_square = shard_variance + shard_square_of_mean
-      group_mean = self._cross_replica_average(
-          shard_mean, num_shards_per_group)
-      group_mean_of_square = self._cross_replica_average(
-          shard_mean_of_square, num_shards_per_group)
-      group_variance = group_mean_of_square - tf.math.square(group_mean)
-      return (group_mean, group_variance)
-    else:
-      return (shard_mean, shard_variance)
+    # class TpuBatchNormalization(tf.layers.BatchNormalization):
+    """Cross replica batch normalization."""
+
+    def __init__(self, fused=False, **kwargs):
+        if fused in (True, None):
+            raise ValueError("TpuBatchNormalization does not support fused=True.")
+        super(TpuBatchNormalization, self).__init__(fused=fused, **kwargs)
+
+    def _cross_replica_average(self, t, num_shards_per_group):
+        """Calculates the average value of input tensor across TPU replicas."""
+        num_shards = tpu_function.get_tpu_context().number_of_shards
+        group_assignment = None
+        if num_shards_per_group > 1:
+            if num_shards % num_shards_per_group != 0:
+                raise ValueError(
+                    "num_shards: %d mod shards_per_group: %d, should be 0"
+                    % (num_shards, num_shards_per_group)
+                )
+            num_groups = num_shards // num_shards_per_group
+            group_assignment = [
+                [x for x in range(num_shards) if x // num_shards_per_group == y]
+                for y in range(num_groups)
+            ]
+        return tpu_ops.cross_replica_sum(t, group_assignment) / tf.cast(
+            num_shards_per_group, t.dtype
+        )
+
+    def _moments(self, inputs, reduction_axes, keep_dims):
+        """Compute the mean and variance: it overrides the original _moments."""
+        shard_mean, shard_variance = super(TpuBatchNormalization, self)._moments(
+            inputs, reduction_axes, keep_dims=keep_dims
+        )
+
+        num_shards = tpu_function.get_tpu_context().number_of_shards or 1
+        if num_shards <= 8:  # Skip cross_replica for 2x2 or smaller slices.
+            num_shards_per_group = 1
+        else:
+            num_shards_per_group = max(8, num_shards // 8)
+        tf.logging.info(
+            "TpuBatchNormalization with num_shards_per_group %s", num_shards_per_group
+        )
+        if num_shards_per_group > 1:
+            # Compute variance using: Var[X]= E[X^2] - E[X]^2.
+            shard_square_of_mean = tf.math.square(shard_mean)
+            shard_mean_of_square = shard_variance + shard_square_of_mean
+            group_mean = self._cross_replica_average(shard_mean, num_shards_per_group)
+            group_mean_of_square = self._cross_replica_average(
+                shard_mean_of_square, num_shards_per_group
+            )
+            group_variance = group_mean_of_square - tf.math.square(group_mean)
+            return (group_mean, group_variance)
+        else:
+            return (shard_mean, shard_variance)
 
 
 class BatchNormalization(tf.layers.BatchNormalization):
-  """Fixed default name of BatchNormalization to match TpuBatchNormalization."""
+    """Fixed default name of BatchNormalization to match TpuBatchNormalization."""
 
-  def __init__(self, name='tpu_batch_normalization', **kwargs):
-    super(BatchNormalization, self).__init__(name=name, **kwargs)
+    def __init__(self, name="tpu_batch_normalization", **kwargs):
+        super(BatchNormalization, self).__init__(name=name, **kwargs)
 
 
 def drop_connect(inputs, is_training, drop_connect_rate):
-  """Apply drop connect."""
-  if not is_training:
-    return inputs
+    """Apply drop connect."""
+    if not is_training:
+        return inputs
 
-  # Compute keep_prob
-  # TODO(tanmingxing): add support for training progress.
-  keep_prob = 1.0 - drop_connect_rate
+    # Compute keep_prob
+    # TODO(tanmingxing): add support for training progress.
+    keep_prob = 1.0 - drop_connect_rate
 
-  # Compute drop_connect tensor
-  batch_size = tf.shape(inputs)[0]
-  random_tensor = keep_prob
-  random_tensor += tf.random_uniform([batch_size, 1, 1, 1], dtype=inputs.dtype)
-  binary_tensor = tf.floor(random_tensor)
-  output = tf.div(inputs, keep_prob) * binary_tensor
-  return output
+    # Compute drop_connect tensor
+    batch_size = tf.shape(inputs)[0]
+    random_tensor = keep_prob
+    random_tensor += tf.random_uniform([batch_size, 1, 1, 1], dtype=inputs.dtype)
+    binary_tensor = tf.floor(random_tensor)
+    output = tf.div(inputs, keep_prob) * binary_tensor
+    return output
 
 
 def archive_ckpt(ckpt_eval, ckpt_objective, ckpt_path):
-  """Archive a checkpoint if the metric is better."""
-  ckpt_dir, ckpt_name = os.path.split(ckpt_path)
-
-  saved_objective_path = os.path.join(ckpt_dir, 'best_objective.txt')
-  saved_objective = float('-inf')
-  if tf.gfile.Exists(saved_objective_path):
-    with tf.gfile.GFile(saved_objective_path, 'r') as f:
-      saved_objective = float(f.read())
-  if saved_objective > ckpt_objective:
-    tf.logging.info('Ckpt %s is worse than %s', ckpt_objective, saved_objective)
-    return False
-
-  filenames = tf.gfile.Glob(ckpt_path + '.*')
-  if filenames is None:
-    tf.logging.info('No files to copy for checkpoint %s', ckpt_path)
-    return False
-
-  # Clear the old folder.
-  dst_dir = os.path.join(ckpt_dir, 'archive')
-  if tf.gfile.Exists(dst_dir):
-    tf.gfile.DeleteRecursively(dst_dir)
-  tf.gfile.MakeDirs(dst_dir)
-
-  # Write checkpoints.
-  for f in filenames:
-    dest = os.path.join(dst_dir, os.path.basename(f))
-    tf.gfile.Copy(f, dest, overwrite=True)
-  ckpt_state = tf.train.generate_checkpoint_state_proto(
-      dst_dir,
-      model_checkpoint_path=ckpt_name,
-      all_model_checkpoint_paths=[ckpt_name])
-  with tf.gfile.GFile(os.path.join(dst_dir, 'checkpoint'), 'w') as f:
-    f.write(str(ckpt_state))
-  with tf.gfile.GFile(os.path.join(dst_dir, 'best_eval.txt'), 'w') as f:
-    f.write('%s' % ckpt_eval)
-
-  # Update the best objective.
-  with tf.gfile.GFile(saved_objective_path, 'w') as f:
-    f.write('%f' % ckpt_objective)
-
-  tf.logging.info('Copying checkpoint %s to %s', ckpt_path, dst_dir)
-  return True
+    """Archive a checkpoint if the metric is better."""
+    ckpt_dir, ckpt_name = os.path.split(ckpt_path)
+
+    saved_objective_path = os.path.join(ckpt_dir, "best_objective.txt")
+    saved_objective = float("-inf")
+    if tf.gfile.Exists(saved_objective_path):
+        with tf.gfile.GFile(saved_objective_path, "r") as f:
+            saved_objective = float(f.read())
+    if saved_objective > ckpt_objective:
+        tf.logging.info("Ckpt %s is worse than %s", ckpt_objective, saved_objective)
+        return False
+
+    filenames = tf.gfile.Glob(ckpt_path + ".*")
+    if filenames is None:
+        tf.logging.info("No files to copy for checkpoint %s", ckpt_path)
+        return False
+
+    # Clear the old folder.
+    dst_dir = os.path.join(ckpt_dir, "archive")
+    if tf.gfile.Exists(dst_dir):
+        tf.gfile.DeleteRecursively(dst_dir)
+    tf.gfile.MakeDirs(dst_dir)
+
+    # Write checkpoints.
+    for f in filenames:
+        dest = os.path.join(dst_dir, os.path.basename(f))
+        tf.gfile.Copy(f, dest, overwrite=True)
+    ckpt_state = tf.train.generate_checkpoint_state_proto(
+        dst_dir, model_checkpoint_path=ckpt_name, all_model_checkpoint_paths=[ckpt_name]
+    )
+    with tf.gfile.GFile(os.path.join(dst_dir, "checkpoint"), "w") as f:
+        f.write(str(ckpt_state))
+    with tf.gfile.GFile(os.path.join(dst_dir, "best_eval.txt"), "w") as f:
+        f.write("%s" % ckpt_eval)
+
+    # Update the best objective.
+    with tf.gfile.GFile(saved_objective_path, "w") as f:
+        f.write("%f" % ckpt_objective)
+
+    tf.logging.info("Copying checkpoint %s to %s", ckpt_path, dst_dir)
+    return True
 
 
 def get_ema_vars():
-  """Get all exponential moving average (ema) variables."""
-  ema_vars = tf.trainable_variables() + tf.get_collection('moving_vars')
-  for v in tf.global_variables():
-    # We maintain mva for batch norm moving mean and variance as well.
-    if 'moving_mean' in v.name or 'moving_variance' in v.name:
-      ema_vars.append(v)
-  return list(set(ema_vars))
+    """Get all exponential moving average (ema) variables."""
+    ema_vars = tf.trainable_variables() + tf.get_collection("moving_vars")
+    for v in tf.global_variables():
+        # We maintain mva for batch norm moving mean and variance as well.
+        if "moving_mean" in v.name or "moving_variance" in v.name:
+            ema_vars.append(v)
+    return list(set(ema_vars))
 
 
 class DepthwiseConv2D(tf.keras.layers.DepthwiseConv2D, tf.layers.Layer):
-  """Wrap keras DepthwiseConv2D to tf.layers."""
+    """Wrap keras DepthwiseConv2D to tf.layers."""
 
-  pass
+    pass
 
 
 class EvalCkptDriver(object):
-  """A driver for running eval inference.
+    """A driver for running eval inference.
   Attributes:
     model_name: str. Model name to eval.
     batch_size: int. Eval batch size.
@@ -231,106 +242,103 @@ class EvalCkptDriver(object):
     include_background_label: whether to include extra background label.
   """
 
-  def __init__(self,
-               model_name,
-               batch_size=1,
-               image_size=224,
-               num_classes=1000,
-               include_background_label=False):
-    """Initialize internal variables."""
-    self.model_name = model_name
-    self.batch_size = batch_size
-    self.num_classes = num_classes
-    self.include_background_label = include_background_label
-    self.image_size = image_size
-
-  def restore_model(self, sess, ckpt_dir, enable_ema=True, export_ckpt=None):
-    """Restore variables from checkpoint dir."""
-    sess.run(tf.global_variables_initializer())
-    checkpoint = tf.train.latest_checkpoint(ckpt_dir)
-    if enable_ema:
-      ema = tf.train.ExponentialMovingAverage(decay=0.0)
-      ema_vars = get_ema_vars()
-      var_dict = ema.variables_to_restore(ema_vars)
-      ema_assign_op = ema.apply(ema_vars)
-    else:
-      var_dict = get_ema_vars()
-      ema_assign_op = None
-
-    tf.train.get_or_create_global_step()
-    sess.run(tf.global_variables_initializer())
-    saver = tf.train.Saver(var_dict, max_to_keep=1)
-    saver.restore(sess, checkpoint)
-
-    if export_ckpt:
-      if ema_assign_op is not None:
-        sess.run(ema_assign_op)
-      saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True)
-      saver.save(sess, export_ckpt)
-
-  def build_model(self, features, is_training):
-    """Build model with input features."""
-    del features, is_training
-    raise ValueError('Must be implemented by subclasses.')
-
-  def get_preprocess_fn(self):
-    raise ValueError('Must be implemented by subclsses.')
-
-  def build_dataset(self, filenames, labels, is_training):
-    """Build input dataset."""
-    filenames = tf.constant(filenames)
-    labels = tf.constant(labels)
-    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
-
-    def _parse_function(filename, label):
-      image_string = tf.read_file(filename)
-      preprocess_fn = self.get_preprocess_fn()
-      image_decoded = preprocess_fn(
-          image_string, is_training, image_size=self.image_size)
-      image = tf.cast(image_decoded, tf.float32)
-      return image, label
-
-    dataset = dataset.map(_parse_function)
-    dataset = dataset.batch(self.batch_size)
-
-    iterator = dataset.make_one_shot_iterator()
-    images, labels = iterator.get_next()
-    return images, labels
-
-  def run_inference(self,
-                    ckpt_dir,
-                    image_files,
-                    labels,
-                    enable_ema=True,
-                    export_ckpt=None):
-    """Build and run inference on the target images and labels."""
-    label_offset = 1 if self.include_background_label else 0
-    with tf.Graph().as_default(), tf.Session() as sess:
-      images, labels = self.build_dataset(image_files, labels, False)
-      probs = self.build_model(images, is_training=False)
-      if isinstance(probs, tuple):
-        probs = probs[0]
-
-      self.restore_model(sess, ckpt_dir, enable_ema, export_ckpt)
-
-      prediction_idx = []
-      prediction_prob = []
-      for _ in range(len(image_files) // self.batch_size):
-        out_probs = sess.run(probs)
-        idx = np.argsort(out_probs)[::-1]
-        prediction_idx.append(idx[:5] - label_offset)
-        prediction_prob.append([out_probs[pid] for pid in idx[:5]])
-
-      # Return the top 5 predictions (idx and prob) for each image.
-      return prediction_idx, prediction_prob
-
-  def eval_example_images(self,
-                          ckpt_dir,
-                          image_files,
-                          labels_map_file,
-                          enable_ema=True,
-                          export_ckpt=None):
-    """Eval a list of example images.
+    def __init__(
+        self,
+        model_name,
+        batch_size=1,
+        image_size=224,
+        num_classes=1000,
+        include_background_label=False,
+    ):
+        """Initialize internal variables."""
+        self.model_name = model_name
+        self.batch_size = batch_size
+        self.num_classes = num_classes
+        self.include_background_label = include_background_label
+        self.image_size = image_size
+
+    def restore_model(self, sess, ckpt_dir, enable_ema=True, export_ckpt=None):
+        """Restore variables from checkpoint dir."""
+        sess.run(tf.global_variables_initializer())
+        checkpoint = tf.train.latest_checkpoint(ckpt_dir)
+        if enable_ema:
+            ema = tf.train.ExponentialMovingAverage(decay=0.0)
+            ema_vars = get_ema_vars()
+            var_dict = ema.variables_to_restore(ema_vars)
+            ema_assign_op = ema.apply(ema_vars)
+        else:
+            var_dict = get_ema_vars()
+            ema_assign_op = None
+
+        tf.train.get_or_create_global_step()
+        sess.run(tf.global_variables_initializer())
+        saver = tf.train.Saver(var_dict, max_to_keep=1)
+        saver.restore(sess, checkpoint)
+
+        if export_ckpt:
+            if ema_assign_op is not None:
+                sess.run(ema_assign_op)
+            saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True)
+            saver.save(sess, export_ckpt)
+
+    def build_model(self, features, is_training):
+        """Build model with input features."""
+        del features, is_training
+        raise ValueError("Must be implemented by subclasses.")
+
+    def get_preprocess_fn(self):
+        raise ValueError("Must be implemented by subclsses.")
+
+    def build_dataset(self, filenames, labels, is_training):
+        """Build input dataset."""
+        filenames = tf.constant(filenames)
+        labels = tf.constant(labels)
+        dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
+
+        def _parse_function(filename, label):
+            image_string = tf.read_file(filename)
+            preprocess_fn = self.get_preprocess_fn()
+            image_decoded = preprocess_fn(
+                image_string, is_training, image_size=self.image_size
+            )
+            image = tf.cast(image_decoded, tf.float32)
+            return image, label
+
+        dataset = dataset.map(_parse_function)
+        dataset = dataset.batch(self.batch_size)
+
+        iterator = dataset.make_one_shot_iterator()
+        images, labels = iterator.get_next()
+        return images, labels
+
+    def run_inference(
+        self, ckpt_dir, image_files, labels, enable_ema=True, export_ckpt=None
+    ):
+        """Build and run inference on the target images and labels."""
+        label_offset = 1 if self.include_background_label else 0
+        with tf.Graph().as_default(), tf.Session() as sess:
+            images, labels = self.build_dataset(image_files, labels, False)
+            probs = self.build_model(images, is_training=False)
+            if isinstance(probs, tuple):
+                probs = probs[0]
+
+            self.restore_model(sess, ckpt_dir, enable_ema, export_ckpt)
+
+            prediction_idx = []
+            prediction_prob = []
+            for _ in range(len(image_files) // self.batch_size):
+                out_probs = sess.run(probs)
+                idx = np.argsort(out_probs)[::-1]
+                prediction_idx.append(idx[:5] - label_offset)
+                prediction_prob.append([out_probs[pid] for pid in idx[:5]])
+
+            # Return the top 5 predictions (idx and prob) for each image.
+            return prediction_idx, prediction_prob
+
+    def eval_example_images(
+        self, ckpt_dir, image_files, labels_map_file, enable_ema=True, export_ckpt=None
+    ):
+        """Eval a list of example images.
     Args:
       ckpt_dir: str. Checkpoint directory path.
       image_files: List[str]. A list of image file paths.
@@ -341,19 +349,30 @@ def eval_example_images(self,
       A tuple (pred_idx, and pred_prob), where pred_idx is the top 5 prediction
       index and pred_prob is the top 5 prediction probability.
     """
-    classes = json.loads(tf.gfile.Open(labels_map_file).read())
-    pred_idx, pred_prob = self.run_inference(
-        ckpt_dir, image_files, [0] * len(image_files), enable_ema, export_ckpt)
-    for i in range(len(image_files)):
-      print('predicted class for image {}: '.format(image_files[i]))
-      for j, idx in enumerate(pred_idx[i]):
-        print('  -> top_{} ({:4.2f}%): {}  '.format(j, pred_prob[i][j] * 100,
-                                                    classes[str(idx)]))
-    return pred_idx, pred_prob
-
-  def eval_imagenet(self, ckpt_dir, imagenet_eval_glob,
-                    imagenet_eval_label, num_images, enable_ema, export_ckpt):
-    """Eval ImageNet images and report top1/top5 accuracy.
+        classes = json.loads(tf.gfile.Open(labels_map_file).read())
+        pred_idx, pred_prob = self.run_inference(
+            ckpt_dir, image_files, [0] * len(image_files), enable_ema, export_ckpt
+        )
+        for i in range(len(image_files)):
+            print("predicted class for image {}: ".format(image_files[i]))
+            for j, idx in enumerate(pred_idx[i]):
+                print(
+                    "  -> top_{} ({:4.2f}%): {}  ".format(
+                        j, pred_prob[i][j] * 100, classes[str(idx)]
+                    )
+                )
+        return pred_idx, pred_prob
+
+    def eval_imagenet(
+        self,
+        ckpt_dir,
+        imagenet_eval_glob,
+        imagenet_eval_label,
+        num_images,
+        enable_ema,
+        export_ckpt,
+    ):
+        """Eval ImageNet images and report top1/top5 accuracy.
     Args:
       ckpt_dir: str. Checkpoint directory path.
       imagenet_eval_glob: str. File path glob for all eval images.
@@ -365,23 +384,27 @@ def eval_imagenet(self, ckpt_dir, imagenet_eval_glob,
     Returns:
       A tuple (top1, top5) for top1 and top5 accuracy.
     """
-    imagenet_val_labels = [int(i) for i in tf.gfile.GFile(imagenet_eval_label)]
-    imagenet_filenames = sorted(tf.gfile.Glob(imagenet_eval_glob))
-    if num_images < 0:
-      num_images = len(imagenet_filenames)
-    image_files = imagenet_filenames[:num_images]
-    labels = imagenet_val_labels[:num_images]
-
-    pred_idx, _ = self.run_inference(
-        ckpt_dir, image_files, labels, enable_ema, export_ckpt)
-    top1_cnt, top5_cnt = 0.0, 0.0
-    for i, label in enumerate(labels):
-      top1_cnt += label in pred_idx[i][:1]
-      top5_cnt += label in pred_idx[i][:5]
-      if i % 100 == 0:
-        print('Step {}: top1_acc = {:4.2f}%  top5_acc = {:4.2f}%'.format(
-            i, 100 * top1_cnt / (i + 1), 100 * top5_cnt / (i + 1)))
-        sys.stdout.flush()
-    top1, top5 = 100 * top1_cnt / num_images, 100 * top5_cnt / num_images
-    print('Final: top1_acc = {:4.2f}%  top5_acc = {:4.2f}%'.format(top1, top5))
-    return top1, top5
+        imagenet_val_labels = [int(i) for i in tf.gfile.GFile(imagenet_eval_label)]
+        imagenet_filenames = sorted(tf.gfile.Glob(imagenet_eval_glob))
+        if num_images < 0:
+            num_images = len(imagenet_filenames)
+        image_files = imagenet_filenames[:num_images]
+        labels = imagenet_val_labels[:num_images]
+
+        pred_idx, _ = self.run_inference(
+            ckpt_dir, image_files, labels, enable_ema, export_ckpt
+        )
+        top1_cnt, top5_cnt = 0.0, 0.0
+        for i, label in enumerate(labels):
+            top1_cnt += label in pred_idx[i][:1]
+            top5_cnt += label in pred_idx[i][:5]
+            if i % 100 == 0:
+                print(
+                    "Step {}: top1_acc = {:4.2f}%  top5_acc = {:4.2f}%".format(
+                        i, 100 * top1_cnt / (i + 1), 100 * top5_cnt / (i + 1)
+                    )
+                )
+                sys.stdout.flush()
+        top1, top5 = 100 * top1_cnt / num_images, 100 * top5_cnt / num_images
+        print("Final: top1_acc = {:4.2f}%  top5_acc = {:4.2f}%".format(top1, top5))
+        return top1, top5
diff --git a/deeplabcut/pose_estimation_tensorflow/predict_multianimal.py b/deeplabcut/pose_estimation_tensorflow/predict_multianimal.py
index 4bf24629e..e7f710514 100644
--- a/deeplabcut/pose_estimation_tensorflow/predict_multianimal.py
+++ b/deeplabcut/pose_estimation_tensorflow/predict_multianimal.py
@@ -144,11 +144,11 @@ def GetPoseandCostsF(
 
     PredicteData = {}
     # initializing constants
-    dist_grid = predict.make_nms_grid(dlc_cfg['nmsradius'])
-    stride = dlc_cfg['stride']
+    dist_grid = predict.make_nms_grid(dlc_cfg["nmsradius"])
+    stride = dlc_cfg["stride"]
     halfstride = stride * 0.5
-    num_joints = dlc_cfg['num_joints']
-    det_min_score = dlc_cfg['minconfidence']
+    num_joints = dlc_cfg["num_joints"]
+    det_min_score = dlc_cfg["minconfidence"]
 
     num_idchannel = dlc_cfg.get("num_idchannel", 0)
     while cap.video.isOpened():
@@ -208,12 +208,12 @@ def GetPoseandCostsF(
     cap.close()
     pbar.close()
     PredicteData["metadata"] = {
-        "nms radius": dlc_cfg['nmsradius'],
-        "minimal confidence": dlc_cfg['minconfidence'],
-        "PAFgraph": dlc_cfg['partaffinityfield_graph'],
-        "all_joints": [[i] for i in range(len(dlc_cfg['all_joints']))],
+        "nms radius": dlc_cfg["nmsradius"],
+        "minimal confidence": dlc_cfg["minconfidence"],
+        "PAFgraph": dlc_cfg["partaffinityfield_graph"],
+        "all_joints": [[i] for i in range(len(dlc_cfg["all_joints"]))],
         "all_joints_names": [
-            dlc_cfg['all_joints_names'][i] for i in range(len(dlc_cfg['all_joints']))
+            dlc_cfg["all_joints_names"][i] for i in range(len(dlc_cfg["all_joints"]))
         ],
         "nframes": nframes,
         "c_engine": c_engine,
@@ -246,8 +246,8 @@ def GetPoseandCostsS(cfg, dlc_cfg, sess, inputs, outputs, cap, nframes, c_engine
                 inputs,
                 outputs,
                 outall=False,
-                nms_radius=dlc_cfg['nmsradius'],
-                det_min_score=dlc_cfg['minconfidence'],
+                nms_radius=dlc_cfg["nmsradius"],
+                det_min_score=dlc_cfg["minconfidence"],
                 c_engine=c_engine,
             )
         elif counter >= nframes:
@@ -256,12 +256,12 @@ def GetPoseandCostsS(cfg, dlc_cfg, sess, inputs, outputs, cap, nframes, c_engine
 
     pbar.close()
     PredicteData["metadata"] = {
-        "nms radius": dlc_cfg['nmsradius'],
-        "minimal confidence": dlc_cfg['minconfidence'],
-        "PAFgraph": dlc_cfg['partaffinityfield_graph'],
-        "all_joints": [[i] for i in range(len(dlc_cfg['all_joints']))],
+        "nms radius": dlc_cfg["nmsradius"],
+        "minimal confidence": dlc_cfg["minconfidence"],
+        "PAFgraph": dlc_cfg["partaffinityfield_graph"],
+        "all_joints": [[i] for i in range(len(dlc_cfg["all_joints"]))],
         "all_joints_names": [
-            dlc_cfg['all_joints_names'][i] for i in range(len(dlc_cfg['all_joints']))
+            dlc_cfg["all_joints_names"][i] for i in range(len(dlc_cfg["all_joints"]))
         ],
         "nframes": nframes,
     }
diff --git a/deeplabcut/pose_estimation_tensorflow/predict_videos.py b/deeplabcut/pose_estimation_tensorflow/predict_videos.py
index 53f2ae480..7f5a9b67b 100755
--- a/deeplabcut/pose_estimation_tensorflow/predict_videos.py
+++ b/deeplabcut/pose_estimation_tensorflow/predict_videos.py
@@ -335,7 +335,9 @@ def analyze_videos(
             print(
                 "If the tracking is not satisfactory for some videos, consider expanding the training set. You can use the function 'extract_outlier_frames' to extract a few representative outlier frames."
             )
-        return DLCscorer  # note: this is either DLCscorer or DLCscorerlegacy depending on what was used!
+        return (
+            DLCscorer
+        )  # note: this is either DLCscorer or DLCscorerlegacy depending on what was used!
     else:
         print("No video(s) were found. Please check your paths and/or 'video_type'.")
         return DLCscorer
@@ -1421,7 +1423,7 @@ def convert_detections2tracklets(
                     mot_tracker = trackingutils.SORTEllipse(
                         inferencecfg.get("max_age", 1),
                         inferencecfg.get("min_hits", 5),
-                        inferencecfg.get("iou_threshold", 0.6)
+                        inferencecfg.get("iou_threshold", 0.6),
                     )
                 tracklets = {}
                 if cfg[
@@ -1465,10 +1467,8 @@ def convert_detections2tracklets(
                             for a in animals
                         ):
                             single = np.full((numjoints, 3), np.nan)
-                            single_dets = (
-                                inferenceutils.convertdetectiondict2listoflist(
-                                    data[imname], inds_unique
-                                )
+                            single_dets = inferenceutils.convertdetectiondict2listoflist(
+                                data[imname], inds_unique
                             )
                             for ind, dets in zip(inds_unique, single_dets):
                                 if len(dets) == 1:
diff --git a/deeplabcut/pose_estimation_tensorflow/test.py b/deeplabcut/pose_estimation_tensorflow/test.py
index 75a4acb7e..918cee274 100644
--- a/deeplabcut/pose_estimation_tensorflow/test.py
+++ b/deeplabcut/pose_estimation_tensorflow/test.py
@@ -35,7 +35,7 @@ def test_net(visualise, cache_scoremaps):
     sess, inputs, outputs = setup_pose_prediction(cfg)
 
     if cache_scoremaps:
-        out_dir = cfg['scoremap_dir']
+        out_dir = cfg["scoremap_dir"]
         if not os.path.exists(out_dir):
             os.makedirs(out_dir)
 
@@ -51,10 +51,10 @@ def test_net(visualise, cache_scoremaps):
 
         scmap, locref = extract_cnn_output(outputs_np, cfg)
 
-        pose = argmax_pose_predict(scmap, locref, cfg['stride'])
+        pose = argmax_pose_predict(scmap, locref, cfg["stride"])
 
         pose_refscale = np.copy(pose)
-        pose_refscale[:, 0:2] /= cfg['global_scale']
+        pose_refscale[:, 0:2] /= cfg["global_scale"]
         predictions[k] = pose_refscale
 
         if visualise:
@@ -69,7 +69,7 @@ def test_net(visualise, cache_scoremaps):
             scipy.io.savemat(out_fn, mdict={"scoremaps": scmap.astype("float32")})
 
             out_fn = os.path.join(out_dir, raw_name + "_locreg" + ".mat")
-            if cfg['location_refinement']:
+            if cfg["location_refinement"]:
                 scipy.io.savemat(
                     out_fn, mdict={"locreg_pred": locref.astype("float32")}
                 )
diff --git a/deeplabcut/pose_estimation_tensorflow/train.py b/deeplabcut/pose_estimation_tensorflow/train.py
index 0d087d7e6..21e9f3435 100755
--- a/deeplabcut/pose_estimation_tensorflow/train.py
+++ b/deeplabcut/pose_estimation_tensorflow/train.py
@@ -37,7 +37,7 @@
 
 class LearningRate(object):
     def __init__(self, cfg):
-        self.steps = cfg['multi_step']
+        self.steps = cfg["multi_step"]
         self.current_step = 0
 
     def get_lr(self, iteration):
@@ -49,8 +49,8 @@ def get_lr(self, iteration):
 
 
 def get_batch_spec(cfg):
-    num_joints = cfg['num_joints']
-    batch_size = cfg['batch_size']
+    num_joints = cfg["num_joints"]
+    batch_size = cfg["batch_size"]
     return {
         Batch.inputs: [batch_size, None, None, 3],
         Batch.part_score_targets: [batch_size, None, None, num_joints],
@@ -103,25 +103,24 @@ def start_preloading(sess, enqueue_op, dataset, placeholders):
 
 
 def get_optimizer(loss_op, cfg):
-    tstep = tf.placeholder(tf.int32,shape=[],name='tstep')
-    if 'efficientnet' in cfg['net_type']:
+    tstep = tf.placeholder(tf.int32, shape=[], name="tstep")
+    if "efficientnet" in cfg["net_type"]:
         print("Switching to cosine decay schedule with adam!")
-        cfg['optimizer'] = "adam"
-        learning_rate = tf.train.cosine_decay(cfg['lr_init'],
-                                              tstep,
-                                              cfg['decay_steps'],
-                                              alpha=cfg['alpha_r'])
+        cfg["optimizer"] = "adam"
+        learning_rate = tf.train.cosine_decay(
+            cfg["lr_init"], tstep, cfg["decay_steps"], alpha=cfg["alpha_r"]
+        )
     else:
         learning_rate = tf.placeholder(tf.float32, shape=[])
 
-    if cfg['optimizer'] == "sgd":
+    if cfg["optimizer"] == "sgd":
         optimizer = TF.train.MomentumOptimizer(
             learning_rate=learning_rate, momentum=0.9
         )
-    elif cfg['optimizer'] == "adam":
+    elif cfg["optimizer"] == "adam":
         optimizer = TF.train.AdamOptimizer(learning_rate)
     else:
-        raise ValueError("unknown optimizer {}".format(cfg['optimizer']))
+        raise ValueError("unknown optimizer {}".format(cfg["optimizer"]))
     train_op = slim.learning.create_train_op(loss_op, optimizer)
 
     return learning_rate, train_op, tstep
@@ -130,14 +129,14 @@ def get_optimizer(loss_op, cfg):
 def get_optimizer_with_freeze(loss_op, cfg):
     learning_rate = TF.placeholder(tf.float32, shape=[])
 
-    if cfg['optimizer'] == "sgd":
+    if cfg["optimizer"] == "sgd":
         optimizer = TF.train.MomentumOptimizer(
             learning_rate=learning_rate, momentum=0.9
         )
-    elif cfg['optimizer'] == "adam":
+    elif cfg["optimizer"] == "adam":
         optimizer = TF.train.AdamOptimizer(learning_rate)
     else:
-        raise ValueError("unknown optimizer {}".format(cfg['optimizer']))
+        raise ValueError("unknown optimizer {}".format(cfg["optimizer"]))
 
     train_unfrozen_op = slim.learning.create_train_op(loss_op, optimizer)
     variables_unfrozen = TF.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "pose")
@@ -165,8 +164,8 @@ def train(
     setup_logging()
 
     cfg = load_config(config_yaml)
-    net_type = cfg['net_type']
-    if cfg['dataset_type'] in ("scalecrop", "tensorpack", "deterministic"):
+    net_type = cfg["net_type"]
+    if cfg["dataset_type"] in ("scalecrop", "tensorpack", "deterministic"):
         print(
             "Switching batchsize to 1, as tensorpack/scalecrop/deterministic loaders do not support batches >1. Use imgaug/default loader."
         )
@@ -183,7 +182,7 @@ def train(
         TF.summary.scalar(k, t)
     merged_summaries = TF.summary.merge_all()
 
-    if "snapshot" in Path(cfg['init_weights']).stem and keepdeconvweights:
+    if "snapshot" in Path(cfg["init_weights"]).stem and keepdeconvweights:
         print("Loading already trained DLC with backbone:", net_type)
         variables_to_restore = slim.get_variables_to_restore()
     else:
@@ -195,11 +194,15 @@ def train(
             variables_to_restore = slim.get_variables_to_restore(
                 include=["MobilenetV2"]
             )
-        elif 'efficientnet' in net_type:
-            variables_to_restore = slim.get_variables_to_restore(include=["efficientnet"])
+        elif "efficientnet" in net_type:
+            variables_to_restore = slim.get_variables_to_restore(
+                include=["efficientnet"]
+            )
             variables_to_restore = {
-                    var.op.name.replace("efficientnet/", "")
-                    + '/ExponentialMovingAverage':var for var in variables_to_restore}
+                var.op.name.replace("efficientnet/", "")
+                + "/ExponentialMovingAverage": var
+                for var in variables_to_restore
+            }
         else:
             print("Wait for DLC 2.3.")
 
@@ -216,10 +219,10 @@ def train(
         sess = TF.Session()
 
     coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders)
-    train_writer = TF.summary.FileWriter(cfg['log_dir'], sess.graph)
+    train_writer = TF.summary.FileWriter(cfg["log_dir"], sess.graph)
 
     if cfg.get("freezeencoder", False):
-        if 'efficientnet' in net_type:
+        if "efficientnet" in net_type:
             print("Freezing ONLY supported MobileNet/ResNet currently!!")
             learning_rate, train_op, tstep = get_optimizer(total_loss, cfg)
 
@@ -232,22 +235,22 @@ def train(
     sess.run(TF.local_variables_initializer())
 
     # Restore variables from disk.
-    restorer.restore(sess, cfg['init_weights'])
+    restorer.restore(sess, cfg["init_weights"])
     if maxiters == None:
-        max_iter = int(cfg['multi_step'][-1][1])
+        max_iter = int(cfg["multi_step"][-1][1])
     else:
-        max_iter = min(int(cfg['multi_step'][-1][1]), int(maxiters))
+        max_iter = min(int(cfg["multi_step"][-1][1]), int(maxiters))
         # display_iters = max(1,int(displayiters))
         print("Max_iters overwritten as", max_iter)
 
     if displayiters == None:
-        display_iters = max(1, int(cfg['display_iters']))
+        display_iters = max(1, int(cfg["display_iters"]))
     else:
         display_iters = max(1, int(displayiters))
         print("Display_iters overwritten as", display_iters)
 
     if saveiters == None:
-        save_iters = max(1, int(cfg['save_iters']))
+        save_iters = max(1, int(cfg["save_iters"]))
 
     else:
         save_iters = max(1, int(saveiters))
@@ -263,16 +266,15 @@ def train(
     print(cfg)
     print("Starting training....")
     for it in range(max_iter + 1):
-        if 'efficientnet' in net_type:
-            dict={tstep: it}
-            current_lr = sess.run(learning_rate,feed_dict=dict)
+        if "efficientnet" in net_type:
+            dict = {tstep: it}
+            current_lr = sess.run(learning_rate, feed_dict=dict)
         else:
             current_lr = lr_gen.get_lr(it)
-            dict={learning_rate: current_lr}
+            dict = {learning_rate: current_lr}
 
         [_, loss_val, summary] = sess.run(
-            [train_op, total_loss, merged_summaries],
-            feed_dict=dict,
+            [train_op, total_loss, merged_summaries], feed_dict=dict
         )
         cum_loss += loss_val
         train_writer.add_summary(summary, it)
@@ -290,7 +292,7 @@ def train(
 
         # Save snapshot
         if (it % save_iters == 0 and it != 0) or it == max_iter:
-            model_name = cfg['snapshot_prefix']
+            model_name = cfg["snapshot_prefix"]
             saver.save(sess, model_name, global_step=it)
 
     lrf.close()
diff --git a/deeplabcut/pose_estimation_tensorflow/train_multianimal.py b/deeplabcut/pose_estimation_tensorflow/train_multianimal.py
index 2cfc53365..7ddd65165 100755
--- a/deeplabcut/pose_estimation_tensorflow/train_multianimal.py
+++ b/deeplabcut/pose_estimation_tensorflow/train_multianimal.py
@@ -36,7 +36,7 @@
 
 class LearningRate(object):
     def __init__(self, cfg):
-        self.steps = cfg['multi_step']
+        self.steps = cfg["multi_step"]
         self.current_step = 0
 
     def get_lr(self, iteration):
@@ -89,25 +89,24 @@ def start_preloading(sess, enqueue_op, dataset, placeholders):
 
 
 def get_optimizer(loss_op, cfg):
-    tstep = tf.placeholder(tf.int32,shape=[],name='tstep')
-    if 'efficientnet' in cfg['net_type']:
+    tstep = tf.placeholder(tf.int32, shape=[], name="tstep")
+    if "efficientnet" in cfg["net_type"]:
         print("Switching to cosine decay schedule with adam!")
-        cfg['optimizer'] == "adam"
-        learning_rate = tf.train.cosine_decay(cfg['lr_init'],
-                                              tstep,
-                                              cfg['decay_steps'],
-                                              alpha=cfg['alpha_r'])
+        cfg["optimizer"] == "adam"
+        learning_rate = tf.train.cosine_decay(
+            cfg["lr_init"], tstep, cfg["decay_steps"], alpha=cfg["alpha_r"]
+        )
     else:
         learning_rate = tf.placeholder(tf.float32, shape=[])
 
-    if cfg['optimizer'] == "sgd":
+    if cfg["optimizer"] == "sgd":
         optimizer = TF.train.MomentumOptimizer(
             learning_rate=learning_rate, momentum=0.9
         )
-    elif cfg['optimizer'] == "adam":
+    elif cfg["optimizer"] == "adam":
         optimizer = TF.train.AdamOptimizer(learning_rate)
     else:
-        raise ValueError("unknown optimizer {}".format(cfg['optimizer']))
+        raise ValueError("unknown optimizer {}".format(cfg["optimizer"]))
     train_op = slim.learning.create_train_op(loss_op, optimizer)
 
     return learning_rate, train_op, tstep
@@ -153,9 +152,9 @@ def train(
     for k, t in losses.items():
         TF.summary.scalar(k, t)
     merged_summaries = TF.summary.merge_all()
-    net_type = cfg['net_type']
+    net_type = cfg["net_type"]
 
-    if "snapshot" in Path(cfg['init_weights']).stem and keepdeconvweights:
+    if "snapshot" in Path(cfg["init_weights"]).stem and keepdeconvweights:
         print("Loading already trained DLC with backbone:", net_type)
         variables_to_restore = slim.get_variables_to_restore()
     else:
@@ -167,11 +166,15 @@ def train(
             variables_to_restore = slim.get_variables_to_restore(
                 include=["MobilenetV2"]
             )
-        elif 'efficientnet' in net_type:
-            variables_to_restore = slim.get_variables_to_restore(include=["efficientnet"])
+        elif "efficientnet" in net_type:
+            variables_to_restore = slim.get_variables_to_restore(
+                include=["efficientnet"]
+            )
             variables_to_restore = {
-                    var.op.name.replace("efficientnet/", "")
-                    + '/ExponentialMovingAverage':var for var in variables_to_restore}
+                var.op.name.replace("efficientnet/", "")
+                + "/ExponentialMovingAverage": var
+                for var in variables_to_restore
+            }
         else:
             print("Wait for DLC 2.3.")
 
@@ -188,34 +191,34 @@ def train(
         sess = TF.Session()
 
     coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders)
-    train_writer = TF.summary.FileWriter(cfg['log_dir'], sess.graph)
+    train_writer = TF.summary.FileWriter(cfg["log_dir"], sess.graph)
     learning_rate, train_op, tstep = get_optimizer(total_loss, cfg)
 
     sess.run(TF.global_variables_initializer())
     sess.run(TF.local_variables_initializer())
 
     # Restore variables from disk.
-    if 'efficientnet' in net_type:
-        init_weights = os.path.join(cfg['init_weights'],"model.ckpt")
+    if "efficientnet" in net_type:
+        init_weights = os.path.join(cfg["init_weights"], "model.ckpt")
     else:
-        init_weights = cfg['init_weights']
+        init_weights = cfg["init_weights"]
 
     restorer.restore(sess, init_weights)
     if maxiters == None:
-        max_iter = int(cfg['multi_step'][-1][1])
+        max_iter = int(cfg["multi_step"][-1][1])
     else:
-        max_iter = min(int(cfg['multi_step'][-1][1]), int(maxiters))
+        max_iter = min(int(cfg["multi_step"][-1][1]), int(maxiters))
         # display_iters = max(1,int(displayiters))
         print("Max_iters overwritten as", max_iter)
 
     if displayiters == None:
-        display_iters = max(1, int(cfg['display_iters']))
+        display_iters = max(1, int(cfg["display_iters"]))
     else:
         display_iters = max(1, int(displayiters))
         print("Display_iters overwritten as", display_iters)
 
     if saveiters == None:
-        save_iters = max(1, int(cfg['save_iters']))
+        save_iters = max(1, int(cfg["save_iters"]))
 
     else:
         save_iters = max(1, int(saveiters))
@@ -230,23 +233,22 @@ def train(
     print(cfg)
     print("Starting multi-animal training....")
     for it in range(max_iter + 1):
-        if 'efficientnet' in net_type:
-            dict={tstep: it}
-            current_lr = sess.run(learning_rate,feed_dict=dict)
+        if "efficientnet" in net_type:
+            dict = {tstep: it}
+            current_lr = sess.run(learning_rate, feed_dict=dict)
         else:
             current_lr = lr_gen.get_lr(it)
-            dict={learning_rate: current_lr}
+            dict = {learning_rate: current_lr}
 
         # [_, loss_val, summary] = sess.run([train_op, total_loss, merged_summaries],feed_dict={learning_rate: current_lr})
         [_, alllosses, loss_val, summary] = sess.run(
-            [train_op, losses, total_loss, merged_summaries],
-            feed_dict=dict,
+            [train_op, losses, total_loss, merged_summaries], feed_dict=dict
         )
 
         partloss += alllosses["part_loss"]  # scoremap loss
-        if cfg['location_refinement']:
+        if cfg["location_refinement"]:
             locrefloss += alllosses["locref_loss"]
-        if cfg['pairwise_predict']:  # paf loss
+        if cfg["pairwise_predict"]:  # paf loss
             pwloss += alllosses["pairwise_loss"]
 
         cumloss += loss_val
@@ -280,7 +282,7 @@ def train(
 
         # Save snapshot
         if (it % save_iters == 0 and it != 0) or it == max_iter:
-            model_name = cfg['snapshot_prefix']
+            model_name = cfg["snapshot_prefix"]
             saver.save(sess, model_name, global_step=it)
 
     lrf.close()
diff --git a/deeplabcut/pose_estimation_tensorflow/util/visualize.py b/deeplabcut/pose_estimation_tensorflow/util/visualize.py
index 19a1ddfcd..e660ac2eb 100644
--- a/deeplabcut/pose_estimation_tensorflow/util/visualize.py
+++ b/deeplabcut/pose_estimation_tensorflow/util/visualize.py
@@ -69,8 +69,8 @@ def visualize_joints(image, pose):
 
 def show_heatmaps(cfg, img, scmap, pose, cmap="jet"):
     interp = "bilinear"
-    all_joints = cfg['all_joints']
-    all_joints_names = cfg['all_joints_names']
+    all_joints = cfg["all_joints"]
+    all_joints_names = cfg["all_joints_names"]
     subplot_width = 3
     subplot_height = math.ceil((len(all_joints) + 1) / subplot_width)
     f, axarr = plt.subplots(subplot_height, subplot_width)
diff --git a/deeplabcut/pose_estimation_tensorflow/vis_dataset.py b/deeplabcut/pose_estimation_tensorflow/vis_dataset.py
index 7b9421f6e..5655ecdcb 100644
--- a/deeplabcut/pose_estimation_tensorflow/vis_dataset.py
+++ b/deeplabcut/pose_estimation_tensorflow/vis_dataset.py
@@ -51,7 +51,7 @@ def display_dataset():
                 curr_plot = axarr[plot_j, plot_i]
                 curr_plot.axis("off")
 
-                if j >= cfg['num_joints']:
+                if j >= cfg["num_joints"]:
                     continue
 
                 scmap_part = scmap[:, :, j]
diff --git a/deeplabcut/pose_estimation_tensorflow/visualizemaps.py b/deeplabcut/pose_estimation_tensorflow/visualizemaps.py
index 8214470f8..9d9b2cc68 100644
--- a/deeplabcut/pose_estimation_tensorflow/visualizemaps.py
+++ b/deeplabcut/pose_estimation_tensorflow/visualizemaps.py
@@ -467,11 +467,11 @@ def extract_save_all_maps(
                         list_of_inds.append(
                             [(2 * n, 2 * n + 1), (bptnames[edge[0]], bptnames[edge[1]])]
                         )
-                if len(to_plot)>1:
+                if len(to_plot) > 1:
                     map_ = scmap[:, :, to_plot].sum(axis=2)
                     locref_x_ = locref_x[:, :, to_plot].sum(axis=2)
                     locref_y_ = locref_y[:, :, to_plot].sum(axis=2)
-                elif len(to_plot)==1 and len(bptnames)>1:
+                elif len(to_plot) == 1 and len(bptnames) > 1:
                     map_ = scmap[:, :, to_plot]
                     locref_x_ = locref_x[:, :, to_plot]
                     locref_y_ = locref_y[:, :, to_plot]
diff --git a/deeplabcut/refine_training_dataset/stitch.py b/deeplabcut/refine_training_dataset/stitch.py
index d3b580caa..e0a265bb0 100644
--- a/deeplabcut/refine_training_dataset/stitch.py
+++ b/deeplabcut/refine_training_dataset/stitch.py
@@ -28,16 +28,18 @@ def __init__(self, data, inds):
             Corresponding time frame indices.
         """
         if data.ndim != 3 or data.shape[-1] not in (3, 4):
-            raise ValueError('Data must of shape (nframes, nbodyparts, 3 or 4)')
+            raise ValueError("Data must of shape (nframes, nbodyparts, 3 or 4)")
 
         if data.shape[0] != len(inds):
-            raise ValueError('Data and corresponding indices must have the same length.')
+            raise ValueError(
+                "Data and corresponding indices must have the same length."
+            )
 
         self.data = data.astype(np.float64)
         self.inds = np.array(inds)
         monotonically_increasing = all(a < b for a, b in zip(inds, inds[1:]))
         if not monotonically_increasing:
-            idx = np.argsort(inds, kind='mergesort')  # For stable sort with duplicates
+            idx = np.argsort(inds, kind="mergesort")  # For stable sort with duplicates
             self.inds = self.inds[idx]
             self.data = self.data[idx]
         self._centroid = None
@@ -75,8 +77,10 @@ def __contains__(self, other_tracklet):
         return np.isin(self.inds, other_tracklet.inds, assume_unique=True).any()
 
     def __repr__(self):
-        return f'Tracklet of length {len(self)} from {self.start} to {self.end} ' \
-               f'with reliability {self.likelihood:.3f}'
+        return (
+            f"Tracklet of length {len(self)} from {self.start} to {self.end} "
+            f"with reliability {self.likelihood:.3f}"
+        )
 
     @property
     def xy(self):
@@ -108,7 +112,7 @@ def likelihood(self):
     def identity(self):
         """Return the average predicted identity of all Tracklet detections."""
         try:
-            return mode(self.data[..., 3], axis=None, nan_policy='omit')[0][0]
+            return mode(self.data[..., 3], axis=None, nan_policy="omit")[0][0]
         except IndexError:
             return -1
 
@@ -140,7 +144,7 @@ def del_data_at(self, ind):
 
     def interpolate(self, max_gap=1):
         if max_gap < 1:
-            raise ValueError('Gap should be a strictly positive integer.')
+            raise ValueError("Gap should be a strictly positive integer.")
 
         gaps = np.diff(self.inds) - 1
         valid_gaps = (0 < gaps) & (gaps <= max_gap)
@@ -168,21 +172,25 @@ def contains_duplicates(self, return_indices=False):
             return has_duplicates
         return has_duplicates, np.flatnonzero(np.diff(self.inds) == 0)
 
-    def calc_velocity(self, where='head', norm=True):
+    def calc_velocity(self, where="head", norm=True):
         """
         Calculate the linear velocity of either the `head`
         or `tail` of the Tracklet, computed over the last or first
         three frames, respectively. If `norm`, return the absolute
         speed rather than a 2D vector.
         """
-        if where == 'tail':
-            vel = (np.diff(self.centroid[:3], axis=0)
-                   / np.diff(self.inds[:3])[:, np.newaxis])
-        elif where == 'head':
-            vel = (np.diff(self.centroid[-3:], axis=0)
-                   / np.diff(self.inds[-3:])[:, np.newaxis])
+        if where == "tail":
+            vel = (
+                np.diff(self.centroid[:3], axis=0)
+                / np.diff(self.inds[:3])[:, np.newaxis]
+            )
+        elif where == "head":
+            vel = (
+                np.diff(self.centroid[-3:], axis=0)
+                / np.diff(self.inds[-3:])[:, np.newaxis]
+            )
         else:
-            raise ValueError(f'Unknown where={where}')
+            raise ValueError(f"Unknown where={where}")
         if norm:
             return np.sqrt(np.sum(vel ** 2, axis=1)).mean()
         return vel.mean(axis=0)
@@ -192,13 +200,13 @@ def maximal_velocity(self):
         vel = np.diff(self.centroid, axis=0) / np.diff(self.inds)[:, np.newaxis]
         return np.sqrt(np.max(np.sum(vel ** 2, axis=1)))
 
-    def calc_rate_of_turn(self, where='head'):
+    def calc_rate_of_turn(self, where="head"):
         """
         Calculate the rate of turn (or angular velocity) of
         either the `head` or `tail` of the Tracklet, computed over
         the last or first three frames, respectively.
         """
-        if where == 'tail':
+        if where == "tail":
             v = np.diff(self.centroid[:3], axis=0)
         else:
             v = np.diff(self.centroid[-3:], axis=0)
@@ -225,13 +233,19 @@ def distance_to(self, other_tracklet):
         of one to the tail/head of the other.
         """
         if self in other_tracklet:
-            dist = (self.centroid[np.isin(self.inds, other_tracklet.inds)]
-                    - other_tracklet.centroid[np.isin(other_tracklet.inds, self.inds)])
+            dist = (
+                self.centroid[np.isin(self.inds, other_tracklet.inds)]
+                - other_tracklet.centroid[np.isin(other_tracklet.inds, self.inds)]
+            )
             return np.sqrt(np.sum(dist ** 2, axis=1)).mean()
         elif self < other_tracklet:
-            return np.sqrt(np.sum((self.centroid[-1] - other_tracklet.centroid[0]) ** 2))
+            return np.sqrt(
+                np.sum((self.centroid[-1] - other_tracklet.centroid[0]) ** 2)
+            )
         else:
-            return np.sqrt(np.sum((self.centroid[0] - other_tracklet.centroid[-1]) ** 2))
+            return np.sqrt(
+                np.sum((self.centroid[0] - other_tracklet.centroid[-1]) ** 2)
+            )
 
     def motion_affinity_with(self, other_tracklet):
         """
@@ -244,12 +258,16 @@ def motion_affinity_with(self, other_tracklet):
         if time_gap > 0:
             if self < other_tracklet:
                 d1 = self.centroid[-1] + time_gap * self.calc_velocity(norm=False)
-                d2 = other_tracklet.centroid[0] - time_gap * other_tracklet.calc_velocity('tail', False)
+                d2 = other_tracklet.centroid[
+                    0
+                ] - time_gap * other_tracklet.calc_velocity("tail", False)
                 delta1 = other_tracklet.centroid[0] - d1
                 delta2 = self.centroid[-1] - d2
             else:
-                d1 = other_tracklet.centroid[-1] + time_gap * other_tracklet.calc_velocity(norm=False)
-                d2 = self.centroid[0] - time_gap * self.calc_velocity('tail', False)
+                d1 = other_tracklet.centroid[
+                    -1
+                ] + time_gap * other_tracklet.calc_velocity(norm=False)
+                d2 = self.centroid[0] - time_gap * self.calc_velocity("tail", False)
                 delta1 = self.centroid[0] - d1
                 delta2 = other_tracklet.centroid[-1] - d2
             return (np.sqrt(np.sum(delta1 ** 2)) + np.sqrt(np.sum(delta2 ** 2))) / 2
@@ -291,8 +309,7 @@ def box_overlap_with(self, other_tracklet):
 
     @staticmethod
     def undirected_hausdorff(u, v):
-        return max(directed_hausdorff(u, v)[0],
-                   directed_hausdorff(v, u)[0])
+        return max(directed_hausdorff(u, v)[0], directed_hausdorff(v, u)[0])
 
     @staticmethod
     def iou(bbox1, bbox2):
@@ -303,9 +320,11 @@ def iou(bbox1, bbox2):
         w = max(0, x2 - x1)
         h = max(0, y2 - y1)
         wh = w * h
-        return wh / ((bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
-                     + (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
-                     - wh)
+        return wh / (
+            (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
+            + (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
+            - wh
+        )
 
     def calc_bbox(self, ind):
         xy = self.xy[ind]
@@ -414,13 +433,13 @@ def __init__(
             raise IOError("Tracklets are empty.")
 
         if n_tracks < 2:
-            raise ValueError('There must at least be two tracks to reconstruct.')
+            raise ValueError("There must at least be two tracks to reconstruct.")
 
         if min_length < 3:
-            raise ValueError('A tracklet must have a minimal length of 3.')
+            raise ValueError("A tracklet must have a minimal length of 3.")
 
         self.min_length = min_length
-        self.filename = ''
+        self.filename = ""
         self.header = None
         self.single = None
         self.n_tracks = n_tracks
@@ -452,15 +471,18 @@ def __init__(
 
         # Note that if tracklets are very short, some may actually be part of the same track
         # and thus incorrectly reflect separate track endpoints...
-        self._first_tracklets = sorted(self, key=lambda t: t.start)[:self.n_tracks]
-        self._last_tracklets = sorted(self, key=lambda t: t.end)[-self.n_tracks:]
+        self._first_tracklets = sorted(self, key=lambda t: t.start)[: self.n_tracks]
+        self._last_tracklets = sorted(self, key=lambda t: t.end)[-self.n_tracks :]
 
         # Map each Tracklet to an entry and output nodes and vice versa,
         # which is convenient once the tracklets are stitched.
-        self._mapping = {tracklet: {'in': f'{i}in', 'out': f'{i}out'}
-                         for i, tracklet in enumerate(self)}
-        self._mapping_inv = {label: k for k, v in self._mapping.items()
-                             for label in v.values()}
+        self._mapping = {
+            tracklet: {"in": f"{i}in", "out": f"{i}out"}
+            for i, tracklet in enumerate(self)
+        }
+        self._mapping_inv = {
+            label: k for k, v in self._mapping.items() for label in v.values()
+        }
 
     def __getitem__(self, item):
         return self.tracklets[item]
@@ -477,7 +499,7 @@ def from_pickle(
         split_tracklets=True,
         prestitch_residuals=True,
     ):
-        with open(pickle_file, 'rb') as file:
+        with open(pickle_file, "rb") as file:
             tracklets = pickle.load(file)
         class_ = cls.from_dict_of_dict(
             tracklets, n_tracks, min_length, split_tracklets, prestitch_residuals
@@ -495,7 +517,7 @@ def from_dict_of_dict(
         prestitch_residuals=True,
     ):
         tracklets = []
-        header = dict_of_dict.pop('header', None)
+        header = dict_of_dict.pop("header", None)
         single = None
         for k, dict_ in dict_of_dict.items():
             inds, data = zip(*[(cls.get_frame_ind(k), v) for k, v in dict_.items()])
@@ -507,16 +529,12 @@ def from_dict_of_dict(
             except ValueError:
                 pass
             tracklet = Tracklet(data, inds)
-            if k == 'single':
+            if k == "single":
                 single = tracklet
             else:
                 tracklets.append(Tracklet(data, inds))
         class_ = cls(
-            tracklets,
-            n_tracks,
-            min_length,
-            split_tracklets,
-            prestitch_residuals,
+            tracklets, n_tracks, min_length, split_tracklets, prestitch_residuals
         )
         class_.header = header
         class_.single = single
@@ -565,14 +583,14 @@ def build_graph(self, max_gap=None, weight_func=None):
             max_gap = int(1.5 * self.compute_max_gap())
 
         self.G = nx.DiGraph()
-        self.G.add_node('source', demand=-self.n_tracks)
-        self.G.add_node('sink', demand=self.n_tracks)
+        self.G.add_node("source", demand=-self.n_tracks)
+        self.G.add_node("sink", demand=self.n_tracks)
         nodes_in, nodes_out = zip(*[v.values() for v in self._mapping.values()])
         self.G.add_nodes_from(nodes_in, demand=1)
         self.G.add_nodes_from(nodes_out, demand=-1)
         self.G.add_edges_from(zip(nodes_in, nodes_out), capacity=1)
-        self.G.add_edges_from(zip(['source'] * len(self), nodes_in), capacity=1)
-        self.G.add_edges_from(zip(nodes_out, ['sink'] * len(self)), capacity=1)
+        self.G.add_edges_from(zip(["source"] * len(self), nodes_in), capacity=1)
+        self.G.add_edges_from(zip(nodes_out, ["sink"] * len(self)), capacity=1)
         if weight_func is None:
             weight_func = self.calculate_edge_weight
         for i in trange(len(self)):
@@ -585,54 +603,69 @@ def build_graph(self, max_gap=None, weight_func=None):
                 elif gap > 0:
                     # The algorithm works better with integer weights
                     w = int(100 * weight_func(self[i], self[j]))
-                    self.G.add_edge(self._mapping[self[i]]['out'],
-                                    self._mapping[self[j]]['in'],
-                                    weight=w, capacity=1)
+                    self.G.add_edge(
+                        self._mapping[self[i]]["out"],
+                        self._mapping[self[j]]["in"],
+                        weight=w,
+                        capacity=1,
+                    )
 
     def _update_edge_weights(self, weight_func):
         if self.G is None:
-            raise ValueError('Inexistent graph. Call `build_graph` first')
+            raise ValueError("Inexistent graph. Call `build_graph` first")
 
-        for node1, node2, weight in self.G.edges.data('weight'):
+        for node1, node2, weight in self.G.edges.data("weight"):
             if weight is not None:
                 w = weight_func(self._mapping_inv[node1], self._mapping_inv[node2])
-                self.G.edges[(node1, node2)]['weight'] = w
+                self.G.edges[(node1, node2)]["weight"] = w
 
     def stitch(self, add_back_residuals=True):
         if self.G is None:
-            raise ValueError('Inexistent graph. Call `build_graph` first')
+            raise ValueError("Inexistent graph. Call `build_graph` first")
 
         try:
             _, self.flow = nx.capacity_scaling(self.G)
             self.paths = self.reconstruct_paths()
         except nx.exception.NetworkXUnfeasible:
-            print('No optimal solution found. Employing black magic...')
+            print("No optimal solution found. Employing black magic...")
             # Let us prune the graph by removing all source and sink edges
             # but those connecting the `n_tracks` first and last tracklets.
-            in_to_keep = [self._mapping[first_tracklet]['in']
-                          for first_tracklet in self._first_tracklets]
-            out_to_keep = [self._mapping[last_tracklet]['out']
-                           for last_tracklet in self._last_tracklets]
-            in_to_remove = (set(node for _, node in self.G.out_edges('source'))
-                            .difference(in_to_keep))
-            out_to_remove = (set(node for node, _ in self.G.in_edges('sink'))
-                             .difference(out_to_keep))
-            self.G.remove_edges_from(zip(['source'] * len(in_to_remove), in_to_remove))
-            self.G.remove_edges_from(zip(out_to_remove, ['sink'] * len(out_to_remove)))
+            in_to_keep = [
+                self._mapping[first_tracklet]["in"]
+                for first_tracklet in self._first_tracklets
+            ]
+            out_to_keep = [
+                self._mapping[last_tracklet]["out"]
+                for last_tracklet in self._last_tracklets
+            ]
+            in_to_remove = set(
+                node for _, node in self.G.out_edges("source")
+            ).difference(in_to_keep)
+            out_to_remove = set(node for node, _ in self.G.in_edges("sink")).difference(
+                out_to_keep
+            )
+            self.G.remove_edges_from(zip(["source"] * len(in_to_remove), in_to_remove))
+            self.G.remove_edges_from(zip(out_to_remove, ["sink"] * len(out_to_remove)))
             # Preflow push seems to work slightly better than shortest
             # augmentation path..., and is more computationally efficient.
             paths = []
-            for path in nx.node_disjoint_paths(self.G, 'source', 'sink',
-                                               preflow_push, self.n_tracks):
+            for path in nx.node_disjoint_paths(
+                self.G, "source", "sink", preflow_push, self.n_tracks
+            ):
                 temp = set()
                 for node in path[1:-1]:
                     self.G.remove_node(node)
                     temp.add(self._mapping_inv[node])
                 paths.append(list(temp))
             incomplete_tracks = self.n_tracks - len(paths)
-            if incomplete_tracks == 1:  # All remaining nodes ought to belong to the same track
-                nodes = set(self._mapping_inv[node] for node in self.G
-                            if node not in ('source', 'sink'))
+            if (
+                incomplete_tracks == 1
+            ):  # All remaining nodes ought to belong to the same track
+                nodes = set(
+                    self._mapping_inv[node]
+                    for node in self.G
+                    if node not in ("source", "sink")
+                )
                 # Verify whether there are overlapping tracklets
                 for t1, t2 in combinations(nodes, 2):
                     if t1 in t2:
@@ -672,7 +705,9 @@ def _finalize_tracks(self):
         n_max = len(residuals)
         while n_attemps < n_max:
             for res in tqdm(residuals[::-1]):
-                easy_fit = [i for i, track in enumerate(self.tracks) if res not in track]
+                easy_fit = [
+                    i for i, track in enumerate(self.tracks) if res not in track
+                ]
                 if not easy_fit:
                     residuals.remove(res)
                     continue
@@ -704,8 +739,9 @@ def _finalize_tracks(self):
                 elif right_gap <= 3:
                     dist = np.linalg.norm(track.centroid[e] - c1[1])
                 else:
-                    dist = (np.linalg.norm(track.centroid[s] - c1[0])
-                            + np.linalg.norm(track.centroid[e] - c1[1]))
+                    dist = np.linalg.norm(track.centroid[s] - c1[0]) + np.linalg.norm(
+                        track.centroid[e] - c1[1]
+                    )
                 dists.append((n, dist))
             if not dists:
                 continue
@@ -745,7 +781,7 @@ def _prestitch_residuals(self, max_gap=5):
 
     def concatenate_data(self):
         if self.tracks is None:
-            raise ValueError('No tracks were found. Call `stitch` first')
+            raise ValueError("No tracks were found. Call `stitch` first")
 
         # Refresh temporal bounds
         self._first_frame = min(self.tracks, key=lambda t: t.start).start
@@ -760,36 +796,38 @@ def concatenate_data(self):
 
     def format_df(self):
         data = self.concatenate_data()
-        individuals = [f'ind{i}' for i in range(1, self.n_tracks + 1)]
-        coords = ['x', 'y', 'likelihood']
+        individuals = [f"ind{i}" for i in range(1, self.n_tracks + 1)]
+        coords = ["x", "y", "likelihood"]
         if self.header is not None:
-            scorer = self.header.get_level_values('scorer').unique().to_list()
-            bpts = self.header.get_level_values('bodyparts').unique().to_list()
+            scorer = self.header.get_level_values("scorer").unique().to_list()
+            bpts = self.header.get_level_values("bodyparts").unique().to_list()
         else:
-            scorer = ['scorer']
+            scorer = ["scorer"]
             n_bpts = data.shape[1] // (len(individuals) * len(coords))
-            bpts = [f'bpt{i}' for i in range(1, n_bpts + 1)]
+            bpts = [f"bpt{i}" for i in range(1, n_bpts + 1)]
         columns = pd.MultiIndex.from_product(
             [scorer, individuals, bpts, coords],
-            names=['scorer', 'individuals', 'bodyparts', 'coords']
+            names=["scorer", "individuals", "bodyparts", "coords"],
         )
         inds = range(self._first_frame, self._last_frame + 1)
         df = pd.DataFrame(data, columns=columns, index=inds)
         if self.single is not None:
             n_dets = self.single.data.shape[1]
             columns = pd.MultiIndex.from_product(
-                [scorer, ['single'], [f'bpt{i}' for i in range(1, n_dets + 1)], coords],
-                names=['scorer', 'individuals', 'bodyparts', 'coords']
+                [scorer, ["single"], [f"bpt{i}" for i in range(1, n_dets + 1)], coords],
+                names=["scorer", "individuals", "bodyparts", "coords"],
+            )
+            df2 = pd.DataFrame(
+                self.single.flat_data, columns=columns, index=self.single.inds
             )
-            df2 = pd.DataFrame(self.single.flat_data, columns=columns, index=self.single.inds)
-            df = df.join(df2, how='outer')
+            df = df.join(df2, how="outer")
         return df
 
-    def write_tracks(self, output_name=''):
+    def write_tracks(self, output_name=""):
         df = self.format_df()
         if not output_name:
-            output_name = self.filename.replace('pickle', 'h5')
-        df.to_hdf(output_name, 'tracks', format='table', mode='w')
+            output_name = self.filename.replace("pickle", "h5")
+        df.to_hdf(output_name, "tracks", format="table", mode="w")
 
     @staticmethod
     def calculate_edge_weight(tracklet1, tracklet2):
@@ -799,27 +837,27 @@ def calculate_edge_weight(tracklet1, tracklet2):
     @property
     def weights(self):
         if self.G is None:
-            raise ValueError('Inexistent graph. Call `build_graph` first')
+            raise ValueError("Inexistent graph. Call `build_graph` first")
 
-        return nx.get_edge_attributes(self.G, 'weight')
+        return nx.get_edge_attributes(self.G, "weight")
 
     def draw_graph(self, with_weights=False):
         if self.G is None:
-            raise ValueError('Inexistent graph. Call `build_graph` first')
+            raise ValueError("Inexistent graph. Call `build_graph` first")
 
         pos = nx.spring_layout(self.G)
         nx.draw_networkx(self.G, pos)
         if with_weights:
             nx.draw_networkx_edge_labels(self.G, pos, edge_labels=self.weights)
 
-    def plot_paths(self, colormap='Set2'):
+    def plot_paths(self, colormap="Set2"):
         if self.paths is None:
-            raise ValueError('No paths were found. Call `stitch` first')
+            raise ValueError("No paths were found. Call `stitch` first")
 
         fig, ax = plt.subplots()
         ax.set_yticks([])
         for loc, spine in ax.spines.items():
-            if loc != 'bottom':
+            if loc != "bottom":
                 spine.set_visible(False)
         for path in self.paths:
             length = len(path)
@@ -827,26 +865,26 @@ def plot_paths(self, colormap='Set2'):
             for tracklet, color in zip(path, colors):
                 tracklet.plot(color=color, ax=ax)
 
-    def plot_tracks(self, colormap='viridis'):
+    def plot_tracks(self, colormap="viridis"):
         if self.tracks is None:
-            raise ValueError('No tracks were found. Call `stitch` first')
+            raise ValueError("No tracks were found. Call `stitch` first")
 
         fig, ax = plt.subplots()
         ax.set_yticks([])
         for loc, spine in ax.spines.items():
-            if loc != 'bottom':
+            if loc != "bottom":
                 spine.set_visible(False)
         colors = plt.get_cmap(colormap, self.n_tracks)(range(self.n_tracks))
         for track, color in zip(self.tracks, colors):
             track.plot(color=color, ax=ax)
 
-    def plot_tracklets(self, colormap='Paired'):
+    def plot_tracklets(self, colormap="Paired"):
         fig, axes = plt.subplots(ncols=2, figsize=(14, 4))
         axes[0].set_yticks([])
         for loc, spine in axes[0].spines.items():
-            if loc != 'bottom':
+            if loc != "bottom":
                 spine.set_visible(False)
-        axes[1].axis('off')
+        axes[1].axis("off")
 
         cmap = plt.get_cmap(colormap)
         colors = cycle(cmap.colors)
@@ -864,9 +902,9 @@ def plot_tracklets(self, colormap='Paired'):
 
     def reconstruct_paths(self):
         paths = []
-        for node, flow in self.flow['source'].items():
+        for node, flow in self.flow["source"].items():
             if flow == 1:
-                path = self.reconstruct_path(node.replace('in', 'out'))
+                path = self.reconstruct_path(node.replace("in", "out"))
                 paths.append([self._mapping_inv[tracklet] for tracklet in path])
         return paths
 
@@ -874,9 +912,9 @@ def reconstruct_path(self, source):
         path = [source]
         for node, flow in self.flow[source].items():
             if flow == 1:
-                if node != 'sink':
+                if node != "sink":
                     self.flow[source][node] -= 1
-                    path.extend(self.reconstruct_path(node.replace('in', 'out')))
+                    path.extend(self.reconstruct_path(node.replace("in", "out")))
                 return path
 
 
@@ -887,7 +925,7 @@ def stitch_tracklets(
     split_tracklets=True,
     prestitch_residuals=True,
     weight_func=None,
-    output_name='',
+    output_name="",
 ):
     """
     Stitch sparse tracklets into full tracks via a graph-based,
diff --git a/deeplabcut/utils/auxfun_models.py b/deeplabcut/utils/auxfun_models.py
index d495a6389..5f168317a 100755
--- a/deeplabcut/utils/auxfun_models.py
+++ b/deeplabcut/utils/auxfun_models.py
@@ -40,23 +40,26 @@ def Check4weights(modeltype, parent_path, num_shuffles):
                 + "_224.ckpt",
             )
         )
-    elif 'efficientnet' in modeltype:
+    elif "efficientnet" in modeltype:
         model_path = Path(
-                os.path.join(parent_path,
-                    'pose_estimation_tensorflow/models/pretrained/'
-                    + modeltype.replace('_','-')))
+            os.path.join(
+                parent_path,
+                "pose_estimation_tensorflow/models/pretrained/"
+                + modeltype.replace("_", "-"),
+            )
+        )
     else:
         print(
-                        "Currently ResNet (50, 101, 152), MobilenetV2 (1, 0.75, 0.5 and 0.35) and EfficientNet (b0-b6) are supported, please change 'resnet' entry in config.yaml!"
+            "Currently ResNet (50, 101, 152), MobilenetV2 (1, 0.75, 0.5 and 0.35) and EfficientNet (b0-b6) are supported, please change 'resnet' entry in config.yaml!"
         )
         num_shuffles = -1  # thus the loop below is empty...
         model_path = parent_path
 
     if num_shuffles > 0:
-        if 'efficientnet' in modeltype:
+        if "efficientnet" in modeltype:
             if not os.path.isdir(model_path):
-                Downloadweights(modeltype,model_path)
-            model_path = os.path.join(model_path, 'model.ckpt')
+                Downloadweights(modeltype, model_path)
+            model_path = os.path.join(model_path, "model.ckpt")
         else:
             if not model_path.is_file():
                 Downloadweights(modeltype, model_path)
@@ -77,9 +80,9 @@ def Downloadweights(modeltype, model_path):
         target_dir / "pretrained_model_urls.yaml"
     )
     try:
-        if 'efficientnet' in modeltype:
-            url = neturls['efficientnet']
-            url = url + modeltype.replace('_','-') + '.tar.gz'
+        if "efficientnet" in modeltype:
+            url = neturls["efficientnet"]
+            url = url + modeltype.replace("_", "-") + ".tar.gz"
         else:
             url = neturls[modeltype]
         print("Downloading a ImageNet-pretrained model from {}....".format(url))
diff --git a/deeplabcut/utils/auxfun_multianimal.py b/deeplabcut/utils/auxfun_multianimal.py
index c19e4ae8f..544db9938 100644
--- a/deeplabcut/utils/auxfun_multianimal.py
+++ b/deeplabcut/utils/auxfun_multianimal.py
@@ -68,7 +68,7 @@ def getpafgraph(cfg, printnames=True):
             print("Attention, parts do not exist!", link)
 
     unconnected = set(range(len(multianimalbodyparts))).difference(connected)
-    if unconnected and len(multianimalbodyparts)>1: #for single bpt not important!
+    if unconnected and len(multianimalbodyparts) > 1:  # for single bpt not important!
         raise ValueError(
             f'Unconnected {", ".join(multianimalbodyparts[i] for i  in unconnected)}. '
             f"For multi-animal projects, all multianimalbodyparts should be connected. "
diff --git a/deeplabcut/utils/auxfun_videos.py b/deeplabcut/utils/auxfun_videos.py
index 328d3a9c3..9cd91d2e0 100644
--- a/deeplabcut/utils/auxfun_videos.py
+++ b/deeplabcut/utils/auxfun_videos.py
@@ -294,22 +294,28 @@ def crop(self, suffix="crop", dest_folder=None):
         return output_path
 
     def rescale(
-        self, width, height=-1, rotateccw="No", angle=0.0, suffix="rescale", dest_folder=None
+        self,
+        width,
+        height=-1,
+        rotateccw="No",
+        angle=0.0,
+        suffix="rescale",
+        dest_folder=None,
     ):
         output_path = self.make_output_path(suffix, dest_folder)
         command = (
-            f'ffmpeg -n -i {self.video_path} -filter:v '
+            f"ffmpeg -n -i {self.video_path} -filter:v "
             f'"scale={width}:{height}{{}}" -c:a copy {output_path}'
         )
         # Rotate, see: https://stackoverflow.com/questions/3937387/rotating-videos-with-ffmpeg
         # interesting option to just update metadata.
         if rotateccw == "Arbitrary":
             angle = np.deg2rad(angle)
-            command = (command.format(f', rotate={angle}'))
+            command = command.format(f", rotate={angle}")
         elif rotateccw == "Yes":
-            command = (command.format(f', transpose=1'))
+            command = command.format(f", transpose=1")
         else:
-            command = (command.format(''))
+            command = command.format("")
         subprocess.call(command, shell=True)
         return output_path
 
@@ -451,7 +457,13 @@ def CropVideo(
 
 
 def DownSampleVideo(
-    vname, width=-1, height=200, outsuffix="downsampled", outpath=None, rotateccw="No", angle=0.0
+    vname,
+    width=-1,
+    height=200,
+    outsuffix="downsampled",
+    outpath=None,
+    rotateccw="No",
+    angle=0.0,
 ):
     """
     Auxiliary function to downsample a video and output it to the same folder with "outsuffix" appended in its name.
diff --git a/deeplabcut/utils/auxiliaryfunctions.py b/deeplabcut/utils/auxiliaryfunctions.py
index 67a1147a8..e5b4bb8af 100755
--- a/deeplabcut/utils/auxiliaryfunctions.py
+++ b/deeplabcut/utils/auxiliaryfunctions.py
@@ -470,10 +470,10 @@ def GetEvaluationFolder(trainFraction, shuffle, cfg, modelprefix=""):
     Task = cfg["Task"]
     date = cfg["date"]
     iterate = "iteration-" + str(cfg["iteration"])
-    if 'eval_prefix' in cfg:
-        eval_prefix = cfg['eval_prefix']+'/'
+    if "eval_prefix" in cfg:
+        eval_prefix = cfg["eval_prefix"] + "/"
     else:
-        eval_prefix = 'evaluation-results'+'/'
+        eval_prefix = "evaluation-results" + "/"
     return Path(
         modelprefix,
         eval_prefix
@@ -708,7 +708,7 @@ def find_analyzed_data(folder, videoname, scorer, filtered=False, track_method="
         tracker = "_sk"
     elif track_method == "box":
         tracker = "_bx"
-    elif track_method == 'ellipse':
+    elif track_method == "ellipse":
         tracker = "_el"
     else:
         tracker = ""
@@ -764,8 +764,8 @@ def load_detection_data(video, scorer, track_method):
         tracker = "sk"
     elif track_method == "box":
         tracker = "bx"
-    elif track_method == 'ellipse':
-        tracker = 'el'
+    elif track_method == "ellipse":
+        tracker = "el"
     else:
         raise ValueError(f"Unrecognized track_method={track_method}")
 
diff --git a/deeplabcut/utils/conversioncode.py b/deeplabcut/utils/conversioncode.py
index d0e0c00d4..4aadd18fa 100644
--- a/deeplabcut/utils/conversioncode.py
+++ b/deeplabcut/utils/conversioncode.py
@@ -170,8 +170,12 @@ def analyze_videos_converth5_to_csv(video_folder, videotype=".mp4"):
     deeplabcut.analyze_videos_converth5_to_csv('/media/alex/experimentaldata/cheetahvideos','.mp4')
 
     """
-    h5_files = list(auxiliaryfunctions.grab_files_in_folder(video_folder, "h5", relative=False))
-    videos = auxiliaryfunctions.grab_files_in_folder(video_folder, videotype, relative=False)
+    h5_files = list(
+        auxiliaryfunctions.grab_files_in_folder(video_folder, "h5", relative=False)
+    )
+    videos = auxiliaryfunctions.grab_files_in_folder(
+        video_folder, videotype, relative=False
+    )
     for video in videos:
         if "_labeled" in video:
             continue
diff --git a/deeplabcut/utils/skeleton.py b/deeplabcut/utils/skeleton.py
index add136355..4c4b48d66 100644
--- a/deeplabcut/utils/skeleton.py
+++ b/deeplabcut/utils/skeleton.py
@@ -62,7 +62,7 @@ def __init__(self, config_path):
                     found = True
                     break
         if self.df is None:
-            raise IOError('No labeled data were found.')
+            raise IOError("No labeled data were found.")
 
         self.bpts = self.df.columns.get_level_values("bodyparts").unique()
         if not found:
diff --git a/setup.py b/setup.py
index 8f04c5f43..b523ef88e 100644
--- a/setup.py
+++ b/setup.py
@@ -56,11 +56,9 @@
         "tensorpack==0.9.8",
         "tqdm",
         "moviepy<=1.0.1",
-        "bayesian-optimization"
+        "bayesian-optimization",
     ],
-    extras_require={
-        "gui": ["wxpython<4.1"]
-    },
+    extras_require={"gui": ["wxpython<4.1"]},
     scripts=["deeplabcut/pose_estimation_tensorflow/models/pretrained/download.sh"],
     packages=setuptools.find_packages(),
     data_files=[
diff --git a/tests/test_stitcher.py b/tests/test_stitcher.py
index b22d9a8b1..58fc46029 100644
--- a/tests/test_stitcher.py
+++ b/tests/test_stitcher.py
@@ -30,22 +30,17 @@ def fake_stitcher():
     track = Tracklet(data, inds)
     idx = np.linspace(0, inds.size, N_TRACKLETS + 1, dtype=int)
     tracklets = TrackletStitcher.split_tracklet(track, idx[1:-1])
-    return TrackletStitcher(
-        tracklets,
-        n_tracks=2,
-    )
+    return TrackletStitcher(tracklets, n_tracks=2)
 
 
 def test_tracklet_wrong_inputs(fake_tracklet):
     with pytest.raises(ValueError):
         _ = Tracklet(fake_tracklet.data[..., :2], fake_tracklet.inds)
-        _ = Tracklet(fake_tracklet.data[:TRACKLET_LEN - 2], fake_tracklet.inds)
+        _ = Tracklet(fake_tracklet.data[: TRACKLET_LEN - 2], fake_tracklet.inds)
 
 
 def test_tracklet_monotonic_indices(fake_tracklet):
-    tracklet_inv = Tracklet(
-        fake_tracklet.data[::-1], fake_tracklet.inds[::-1]
-    )
+    tracklet_inv = Tracklet(fake_tracklet.data[::-1], fake_tracklet.inds[::-1])
     np.testing.assert_equal(fake_tracklet.inds, tracklet_inv.inds)
     np.testing.assert_equal(fake_tracklet.xy, tracklet_inv.xy)
 
@@ -57,13 +52,9 @@ def test_tracklet(fake_tracklet):
     assert fake_tracklet.start == TRACKLET_START
     assert fake_tracklet.end == TRACKLET_START + TRACKLET_LEN - 1
     np.testing.assert_equal(
-        fake_tracklet.centroid,
-        np.full((TRACKLET_LEN, 2), np.arange(N_DETS).mean())
-    )
-    fake_tracklet2 = Tracklet(
-        fake_tracklet.data,
-        fake_tracklet.inds + TRACKLET_LEN
+        fake_tracklet.centroid, np.full((TRACKLET_LEN, 2), np.arange(N_DETS).mean())
     )
+    fake_tracklet2 = Tracklet(fake_tracklet.data, fake_tracklet.inds + TRACKLET_LEN)
     assert fake_tracklet not in fake_tracklet2
     tracklet = fake_tracklet + fake_tracklet2
     tracklet -= fake_tracklet
@@ -83,27 +74,19 @@ def test_tracklet_data_access(fake_tracklet):
         fake_tracklet.get_data_at(TRACKLET_START), fake_tracklet.data[0]
     )
     fake_tracklet.set_data_at(TRACKLET_START + 1, fake_tracklet.data[0] * 2)
-    np.testing.assert_equal(
-        fake_tracklet.data[1], fake_tracklet.data[0] * 2
-    )
+    np.testing.assert_equal(fake_tracklet.data[1], fake_tracklet.data[0] * 2)
     fake_tracklet.del_data_at(TRACKLET_START + 1)
     assert not fake_tracklet.is_continuous
     assert TRACKLET_START + 1 not in fake_tracklet.inds
 
 
-@pytest.mark.parametrize(
-    "where, norm",
-    [("head", False), ("tail", True)]
-)
+@pytest.mark.parametrize("where, norm", [("head", False), ("tail", True)])
 def test_tracklet_calc_velocity(fake_tracklet, where, norm):
     _ = fake_tracklet.calc_velocity(where, norm)
 
 
 def test_tracklet_affinities(fake_tracklet):
-    other_tracklet = Tracklet(
-        fake_tracklet.data,
-        fake_tracklet.inds + TRACKLET_LEN
-    )
+    other_tracklet = Tracklet(fake_tracklet.data, fake_tracklet.inds + TRACKLET_LEN)
     _ = fake_tracklet.dynamic_similarity_with(other_tracklet)
     _ = fake_tracklet.dynamic_dissimilarity_with(other_tracklet)
     _ = fake_tracklet.shape_dissimilarity_with(other_tracklet)
@@ -134,5 +117,5 @@ def test_stitcher(tmpdir_factory, fake_stitcher):
     assert fake_stitcher.compute_max_gap() == 1
     fake_stitcher.build_graph(max_gap=1)
     fake_stitcher.stitch(add_back_residuals=True)
-    output_name = tmpdir_factory.mktemp('data').join('fake.h5')
+    output_name = tmpdir_factory.mktemp("data").join("fake.h5")
     fake_stitcher.write_tracks(output_name)
diff --git a/tests/test_trackingutils.py b/tests/test_trackingutils.py
index 5a7dc6e35..66b18fa16 100644
--- a/tests/test_trackingutils.py
+++ b/tests/test_trackingutils.py
@@ -8,13 +8,7 @@
 
 @pytest.fixture()
 def ellipse():
-    params = {
-        'x': 0,
-        'y': 0,
-        'width': 2,
-        'height': 4,
-        'theta': np.pi / 2,
-    }
+    params = {"x": 0, "y": 0, "width": 2, "height": 4, "theta": np.pi / 2}
     return trackingutils.Ellipse(**params)
 
 
@@ -22,8 +16,7 @@ def test_ellipse(ellipse):
     assert ellipse.aspect_ratio == 2
     assert ellipse.geometry is not None
     np.testing.assert_equal(
-        ellipse.contains_points(np.asarray([[0, 0], [10, 10]])),
-        [True, False],
+        ellipse.contains_points(np.asarray([[0, 0], [10, 10]])), [True, False]
     )
 
 
@@ -61,22 +54,15 @@ def test_sort_ellipse():
     poses = np.random.rand(2, 10, 3)
     trackers = mot.track(poses[..., :2])
     assert trackers.shape == (2, 7)
-    trackingutils.fill_tracklets(
-        tracklets,
-        trackers,
-        poses,
-        imname=0
-    )
+    trackingutils.fill_tracklets(tracklets, trackers, poses, imname=0)
     assert all(id_ in tracklets for id_ in trackers[:, -2])
 
 
 def test_calc_bboxes_from_keypoints():
     xy = np.asarray([[[0, 0, 1]]])
     np.testing.assert_equal(
-        trackingutils.calc_bboxes_from_keypoints(xy, 10),
-        [[-10, -10, 10, 10, 1]],
+        trackingutils.calc_bboxes_from_keypoints(xy, 10), [[-10, -10, 10, 10, 1]]
     )
     np.testing.assert_equal(
-        trackingutils.calc_bboxes_from_keypoints(xy, 20, 10),
-        [[-10, -20, 30, 20, 1]],
+        trackingutils.calc_bboxes_from_keypoints(xy, 20, 10), [[-10, -20, 30, 20, 1]]
     )
diff --git a/testscript_cli.py b/testscript_cli.py
index 5a015f966..319344625 100644
--- a/testscript_cli.py
+++ b/testscript_cli.py
@@ -9,15 +9,17 @@
 It produces nothing of interest scientifically.
 """
 
-task='Testcore' # Enter the name of your experiment Task
-scorer='Mackenzie' # Enter the name of the experimenter/labeler
+task = "Testcore"  # Enter the name of your experiment Task
+scorer = "Mackenzie"  # Enter the name of the experimenter/labeler
 
 import os, subprocess, sys
 
+
 def install(package):
     subprocess.check_call([sys.executable, "-m", "pip", "install", package])
 
-install('tensorflow==1.13.1')
+
+install("tensorflow==1.13.1")
 
 import deeplabcut as dlc
 
@@ -32,86 +34,127 @@ def install(package):
 videoname = "reachingvideo1"
 video = [
     os.path.join(
-        basepath, "examples", "Reaching-Mackenzie-2018-08-30", "videos", videoname + ".avi"
+        basepath,
+        "examples",
+        "Reaching-Mackenzie-2018-08-30",
+        "videos",
+        videoname + ".avi",
     )
 ]
 # For testing a color video:
-#videoname='baby4hin2min'
-#video=[os.path.join('/home/alex/Desktop/Data',videoname+'.mp4')]
-#to test destination folder:
-#dfolder=basepath
+# videoname='baby4hin2min'
+# video=[os.path.join('/home/alex/Desktop/Data',videoname+'.mp4')]
+# to test destination folder:
+# dfolder=basepath
 print(video)
 
-dfolder=None
-net_type='resnet_50' #'mobilenet_v2_0.35' #'resnet_50'
-augmenter_type='default'
-augmenter_type2='imgaug'
+dfolder = None
+net_type = "resnet_50"  #'mobilenet_v2_0.35' #'resnet_50'
+augmenter_type = "default"
+augmenter_type2 = "imgaug"
 
-if platform.system() == 'Darwin' or platform.system()=='Windows':
+if platform.system() == "Darwin" or platform.system() == "Windows":
     print("On Windows/OSX tensorpack is not tested by default.")
-    augmenter_type3='imgaug'
+    augmenter_type3 = "imgaug"
 else:
-    augmenter_type3='tensorpack' #Does not work on WINDOWS
+    augmenter_type3 = "tensorpack"  # Does not work on WINDOWS
 
-numiter=3 
+numiter = 3
 
 print("CREATING PROJECT")
-path_config_file=dlc.create_new_project(task,scorer,video, copy_videos=True)
+path_config_file = dlc.create_new_project(task, scorer, video, copy_videos=True)
 
-cfg=dlc.auxiliaryfunctions.read_config(path_config_file)
-cfg['numframes2pick']=5
-cfg['pcutoff']=0.01
-cfg['TrainingFraction']=[.8]
-cfg['skeleton']=[['bodypart1','bodypart2'],['bodypart1','bodypart3']]
+cfg = dlc.auxiliaryfunctions.read_config(path_config_file)
+cfg["numframes2pick"] = 5
+cfg["pcutoff"] = 0.01
+cfg["TrainingFraction"] = [0.8]
+cfg["skeleton"] = [["bodypart1", "bodypart2"], ["bodypart1", "bodypart3"]]
 
-dlc.auxiliaryfunctions.write_config(path_config_file,cfg)
+dlc.auxiliaryfunctions.write_config(path_config_file, cfg)
 
 print("EXTRACTING FRAMES")
-dlc.extract_frames(path_config_file,mode='automatic',userfeedback=False)
+dlc.extract_frames(path_config_file, mode="automatic", userfeedback=False)
 
 print("CREATING SOME LABELS FOR THE FRAMES")
-frames=os.listdir(os.path.join(cfg['project_path'],'labeled-data',videoname))
-#As this next step is manual, we update the labels by putting them on the diagonal (fixed for all frames)
-for index,bodypart in enumerate(cfg['bodyparts']):
-        columnindex = pd.MultiIndex.from_product([[scorer], [bodypart], ['x', 'y']],names=['scorer', 'bodyparts', 'coords'])
-        frame = pd.DataFrame(100+np.ones((len(frames),2))*50*index, columns = columnindex, index = [os.path.join('labeled-data',videoname,fn) for fn in frames])
-        if index==0:
-            dataFrame=frame
-        else:
-            dataFrame = pd.concat([dataFrame, frame],axis=1)
-
-dataFrame.to_csv(os.path.join(cfg['project_path'],'labeled-data',videoname,"CollectedData_" + scorer + ".csv"))
-dataFrame.to_hdf(os.path.join(cfg['project_path'],'labeled-data',videoname,"CollectedData_" + scorer + '.h5'),'df_with_missing',format='table', mode='w')
+frames = os.listdir(os.path.join(cfg["project_path"], "labeled-data", videoname))
+# As this next step is manual, we update the labels by putting them on the diagonal (fixed for all frames)
+for index, bodypart in enumerate(cfg["bodyparts"]):
+    columnindex = pd.MultiIndex.from_product(
+        [[scorer], [bodypart], ["x", "y"]], names=["scorer", "bodyparts", "coords"]
+    )
+    frame = pd.DataFrame(
+        100 + np.ones((len(frames), 2)) * 50 * index,
+        columns=columnindex,
+        index=[os.path.join("labeled-data", videoname, fn) for fn in frames],
+    )
+    if index == 0:
+        dataFrame = frame
+    else:
+        dataFrame = pd.concat([dataFrame, frame], axis=1)
+
+dataFrame.to_csv(
+    os.path.join(
+        cfg["project_path"],
+        "labeled-data",
+        videoname,
+        "CollectedData_" + scorer + ".csv",
+    )
+)
+dataFrame.to_hdf(
+    os.path.join(
+        cfg["project_path"],
+        "labeled-data",
+        videoname,
+        "CollectedData_" + scorer + ".h5",
+    ),
+    "df_with_missing",
+    format="table",
+    mode="w",
+)
 
 print("Plot labels...")
 
 dlc.check_labels(path_config_file)
 
 print("CREATING TRAININGSET")
-dlc.create_training_dataset(path_config_file,net_type=net_type,augmenter_type=augmenter_type)
-
-posefile=os.path.join(cfg['project_path'],'dlc-models/iteration-'+str(cfg['iteration'])+'/'+ cfg['Task'] + cfg['date'] + '-trainset' + str(int(cfg['TrainingFraction'][0] * 100)) + 'shuffle' + str(1),'train/pose_cfg.yaml')
-
-DLC_config=dlc.auxiliaryfunctions.read_plainconfig(posefile)
-DLC_config['save_iters']=numiter
-DLC_config['display_iters']=2
-DLC_config['multi_step']=[[0.001,numiter]]
+dlc.create_training_dataset(
+    path_config_file, net_type=net_type, augmenter_type=augmenter_type
+)
+
+posefile = os.path.join(
+    cfg["project_path"],
+    "dlc-models/iteration-"
+    + str(cfg["iteration"])
+    + "/"
+    + cfg["Task"]
+    + cfg["date"]
+    + "-trainset"
+    + str(int(cfg["TrainingFraction"][0] * 100))
+    + "shuffle"
+    + str(1),
+    "train/pose_cfg.yaml",
+)
+
+DLC_config = dlc.auxiliaryfunctions.read_plainconfig(posefile)
+DLC_config["save_iters"] = numiter
+DLC_config["display_iters"] = 2
+DLC_config["multi_step"] = [[0.001, numiter]]
 
 print("CHANGING training parameters to end quickly!")
-dlc.auxiliaryfunctions.write_plainconfig(posefile,DLC_config)
+dlc.auxiliaryfunctions.write_plainconfig(posefile, DLC_config)
 
 print("TRAIN")
 dlc.train_network(path_config_file)
 
 print("EVALUATE")
-dlc.evaluate_network(path_config_file,plotting=True)
+dlc.evaluate_network(path_config_file, plotting=True)
 
-videotest = os.path.join(cfg['project_path'],'videos',videoname + ".avi")
+videotest = os.path.join(cfg["project_path"], "videos", videoname + ".avi")
 
 print(videotest)
 
 # quicker variant
-'''
+"""
 print("VIDEO ANALYSIS")
 dlc.analyze_videos(path_config_file, [videotest], save_as_csv=True)
 
@@ -143,9 +186,11 @@ def install(package):
 
 print("ANALYZING some individual frames")
 dlc.analyze_time_lapse_frames(path_config_file,os.path.join(cfg['project_path'],'labeled-data/reachingvideo1/'))
-'''
+"""
 
 print("Export model...")
-dlc.export_model(path_config_file,shuffle=1,make_tar=False)
+dlc.export_model(path_config_file, shuffle=1, make_tar=False)
 
-print("ALL DONE!!! - default/imgaug cases of DLCcore training and evaluation are functional (no extract outlier or refinement tested).")
+print(
+    "ALL DONE!!! - default/imgaug cases of DLCcore training and evaluation are functional (no extract outlier or refinement tested)."
+)