diff --git a/deeplabcut/__init__.py b/deeplabcut/__init__.py index 128239c19..09b650606 100644 --- a/deeplabcut/__init__.py +++ b/deeplabcut/__init__.py @@ -28,6 +28,7 @@ try: import wx + mpl.use("WxAgg") from deeplabcut import generate_training_dataset from deeplabcut import refine_training_dataset diff --git a/deeplabcut/__main__.py b/deeplabcut/__main__.py index 89609024f..e11ec3826 100644 --- a/deeplabcut/__main__.py +++ b/deeplabcut/__main__.py @@ -9,6 +9,7 @@ """ try: import wx + lite = False except ModuleNotFoundError: lite = True @@ -20,4 +21,6 @@ deeplabcut.launch_dlc() else: - print("You installed DLC lite, thus GUI's cannot be used. If you need GUI support please: pip install deeplabcut[gui]") + print( + "You installed DLC lite, thus GUI's cannot be used. If you need GUI support please: pip install deeplabcut[gui]" + ) diff --git a/deeplabcut/generate_training_dataset/trainingsetmanipulation.py b/deeplabcut/generate_training_dataset/trainingsetmanipulation.py index 1d71fde10..311d2e73d 100755 --- a/deeplabcut/generate_training_dataset/trainingsetmanipulation.py +++ b/deeplabcut/generate_training_dataset/trainingsetmanipulation.py @@ -397,7 +397,9 @@ def cropimagesandlabels( # moving old entry to _original, dropping it from video_set and update crop parameters video_orig = sep.join((vidpath, vidname + videotype)) if video_orig not in cfg["video_sets_original"]: - cfg["video_sets_original"][video_orig] = cfg["video_sets"][video_orig] + cfg["video_sets_original"][video_orig] = cfg["video_sets"][ + video_orig + ] cfg["video_sets"].pop(video_orig) cfg["video_sets"][sep.join((vidpath, new_vidname + videotype))] = { "crop": ", ".join(map(str, [0, temp_size[1], 0, temp_size[0]])) @@ -461,7 +463,7 @@ def check_labels( for folder in folders: try: DataCombined = pd.read_hdf( - os.path.join(str(folder), "CollectedData_" + cfg["scorer"] + ".h5"), + os.path.join(str(folder), "CollectedData_" + cfg["scorer"] + ".h5") ) if cfg.get("multianimalproject", False): color_by = "individual" if visualizeindividuals else "bodypart" @@ -887,7 +889,11 @@ def create_training_dataset( if net_type is None: # loading & linking pretrained models net_type = cfg.get("default_net_type", "resnet_50") else: - if "resnet" in net_type or "mobilenet" in net_type or "efficientnet" in net_type: + if ( + "resnet" in net_type + or "mobilenet" in net_type + or "efficientnet" in net_type + ): pass else: raise ValueError("Invalid network type:", net_type) diff --git a/deeplabcut/gui/auxfun_drag.py b/deeplabcut/gui/auxfun_drag.py index cefa6477b..f80827643 100644 --- a/deeplabcut/gui/auxfun_drag.py +++ b/deeplabcut/gui/auxfun_drag.py @@ -89,11 +89,7 @@ def on_press(self, event): message = f"Do you want to remove the label {self.bodyParts}?" if self.likelihood is not None: message += " You cannot undo this step!" - msg = wx.MessageBox( - message, - "Remove!", - wx.YES_NO | wx.ICON_WARNING, - ) + msg = wx.MessageBox(message, "Remove!", wx.YES_NO | wx.ICON_WARNING) if msg == 2: self.delete_data() diff --git a/deeplabcut/gui/create_training_dataset.py b/deeplabcut/gui/create_training_dataset.py index 153d64904..2285094d6 100644 --- a/deeplabcut/gui/create_training_dataset.py +++ b/deeplabcut/gui/create_training_dataset.py @@ -146,7 +146,9 @@ def __init__(self, parent, gui_size, cfg): ) self.cropandlabel.Bind(wx.EVT_RADIOBOX, self.input_crop_size) self.cropandlabel.SetSelection(0) - self.crop_text = wx.StaticBox(self, label="Crop settings (set to smaller than your input images)") + self.crop_text = wx.StaticBox( + self, label="Crop settings (set to smaller than your input images)" + ) self.crop_sizer = wx.StaticBoxSizer(self.crop_text, wx.VERTICAL) self.crop_widgets = [] for name, val in [ @@ -171,7 +173,9 @@ def __init__(self, parent, gui_size, cfg): self.hbox3.Add(self.userfeedback, 10, wx.EXPAND | wx.TOP | wx.BOTTOM, 5) if config_file.get("multianimalproject", False): - print("more networks are available soon for maDLC, but currenlty this uses DLC-ResNet50 only") + print( + "more networks are available soon for maDLC, but currenlty this uses DLC-ResNet50 only" + ) self.model_comparison_choice = "No" else: self.model_comparison_choice = wx.RadioBox( diff --git a/deeplabcut/gui/frame_extraction_toolbox.py b/deeplabcut/gui/frame_extraction_toolbox.py index ff8263132..9fc739d9d 100644 --- a/deeplabcut/gui/frame_extraction_toolbox.py +++ b/deeplabcut/gui/frame_extraction_toolbox.py @@ -45,7 +45,7 @@ def getColorIndices(self, img, bodyparts): class MainFrame(BaseFrame): def __init__(self, parent, config, slider_width=25): super(MainFrame, self).__init__( - "DeepLabCut2.0 - Manual Frame Extraction", parent, + "DeepLabCut2.0 - Manual Frame Extraction", parent ) ################################################################################################################################################### @@ -158,8 +158,9 @@ def __init__(self, parent, config, slider_width=25): self.date = self.cfg["date"] self.trainFraction = self.cfg["TrainingFraction"] self.trainFraction = self.trainFraction[0] - self.videos = list(self.cfg.get("video_sets_original") - or self.cfg["video_sets"]) + self.videos = list( + self.cfg.get("video_sets_original") or self.cfg["video_sets"] + ) self.bodyparts = self.cfg["bodyparts"] self.colormap = plt.get_cmap(self.cfg["colormap"]) self.colormap = self.colormap.reversed() diff --git a/deeplabcut/gui/label_frames.py b/deeplabcut/gui/label_frames.py index 1d26bd824..9399280d2 100644 --- a/deeplabcut/gui/label_frames.py +++ b/deeplabcut/gui/label_frames.py @@ -71,9 +71,7 @@ def label_frames( os.chdir(str(wd)) cfg = auxiliaryfunctions.read_config(config) if cfg.get("multianimalproject", False) or multiple_individualsGUI: - from deeplabcut.gui import ( - multiple_individuals_labeling_toolbox, - ) + from deeplabcut.gui import multiple_individuals_labeling_toolbox multiple_individuals_labeling_toolbox.show(config, config3d, sourceCam) else: diff --git a/deeplabcut/gui/labeling_toolbox.py b/deeplabcut/gui/labeling_toolbox.py index 3b81ec0a9..221a84dfb 100755 --- a/deeplabcut/gui/labeling_toolbox.py +++ b/deeplabcut/gui/labeling_toolbox.py @@ -58,7 +58,9 @@ def retrieveData_and_computeEpLines(self, img, imNum): if self.sourceCam is None: sourceCam = [ otherCam for otherCam in cams if cam not in otherCam - ][0] #WHY? + ][ + 0 + ] # WHY? else: sourceCam = self.sourceCam @@ -80,11 +82,13 @@ def retrieveData_and_computeEpLines(self, img, imNum): try: dataFrame = pd.read_hdf( - os.path.join(sourceCam_path, "CollectedData_" + scorer + ".h5"), + os.path.join(sourceCam_path, "CollectedData_" + scorer + ".h5") ) dataFrame.sort_index(inplace=True) except IOError: - print("source camera images have not yet been labeled, or you have opened this folder in the wrong mode!") + print( + "source camera images have not yet been labeled, or you have opened this folder in the wrong mode!" + ) return None, None, None # Find offset terms for drawing epipolar Lines @@ -241,7 +245,7 @@ def clearBoxer(self): class MainFrame(BaseFrame): def __init__(self, parent, config, imtypes, config3d, sourceCam): super(MainFrame, self).__init__( - "DeepLabCut2.0 - Labeling ToolBox", parent, imtypes, + "DeepLabCut2.0 - Labeling ToolBox", parent, imtypes ) self.statusbar.SetStatusText( @@ -370,9 +374,7 @@ def OnKeyPressed(self, event=None): inv = self.axes.transData.inverted() pos_rel = list(inv.transform(pos_abs)) y1, y2 = self.axes.get_ylim() - pos_rel[1] = ( - y1 - pos_rel[1] + y2 - ) # Recall y-axis is inverted + pos_rel[1] = y1 - pos_rel[1] + y2 # Recall y-axis is inverted i = np.nanargmin( [self.calc_distance(*dp.point.center, *pos_rel) for dp in self.drs] ) @@ -619,7 +621,7 @@ def browseDir(self, event): # Reading the existing dataset,if already present try: self.dataFrame = pd.read_hdf( - os.path.join(self.dir, "CollectedData_" + self.scorer + ".h5"), + os.path.join(self.dir, "CollectedData_" + self.scorer + ".h5") ) self.dataFrame.sort_index(inplace=True) self.prev.Enable(True) @@ -892,9 +894,7 @@ def plot(self, img): ) ] self.axes.add_patch(circle[0]) - self.dr = auxfun_drag.DraggablePoint( - circle[0], self.bodyparts[bpindex] - ) + self.dr = auxfun_drag.DraggablePoint(circle[0], self.bodyparts[bpindex]) self.dr.connect() self.dr.coords = MainFrame.getLabels(self, self.iter)[bpindex] self.drs.append(self.dr) @@ -952,7 +952,6 @@ def onChecked(self, event): self.slider.Enable(False) - def show(config, config3d, sourceCam, imtypes=["*.png"]): app = wx.App() frame = MainFrame(None, config, imtypes, config3d, sourceCam).Show() diff --git a/deeplabcut/gui/multiple_individuals_labeling_toolbox.py b/deeplabcut/gui/multiple_individuals_labeling_toolbox.py index 18e196ea9..3df4058ea 100755 --- a/deeplabcut/gui/multiple_individuals_labeling_toolbox.py +++ b/deeplabcut/gui/multiple_individuals_labeling_toolbox.py @@ -86,11 +86,13 @@ def retrieveData_and_computeEpLines(self, img, imNum): try: dataFrame = pd.read_hdf( - os.path.join(sourceCam_path, "CollectedData_" + scorer + ".h5"), + os.path.join(sourceCam_path, "CollectedData_" + scorer + ".h5") ) dataFrame.sort_index(inplace=True) except IOError: - print("source camera images have not yet been labeled, or you have opened this folder in the wrong mode!") + print( + "source camera images have not yet been labeled, or you have opened this folder in the wrong mode!" + ) return None, None, None # Find offset terms for drawing epipolar Lines @@ -283,7 +285,7 @@ def clearBoxer(self): class MainFrame(BaseFrame): def __init__(self, parent, config, config3d, sourceCam): super(MainFrame, self).__init__( - "DeepLabCut2.0 - Multiple Individuals Labeling ToolBox", parent, + "DeepLabCut2.0 - Multiple Individuals Labeling ToolBox", parent ) self.statusbar.SetStatusText( @@ -415,9 +417,7 @@ def OnKeyPressed(self, event=None): inv = self.axes.transData.inverted() pos_rel = list(inv.transform(pos_abs)) y1, y2 = self.axes.get_ylim() - pos_rel[1] = ( - y1 - pos_rel[1] + y2 - ) # Recall y-axis is inverted + pos_rel[1] = y1 - pos_rel[1] + y2 # Recall y-axis is inverted i = np.nanargmin( [self.calc_distance(*dp.point.center, *pos_rel) for dp in self.drs] ) @@ -789,7 +789,7 @@ def browseDir(self, event): # Reading the existing dataset,if already present try: self.dataFrame = pd.read_hdf( - os.path.join(self.dir, "CollectedData_" + self.scorer + ".h5"), + os.path.join(self.dir, "CollectedData_" + self.scorer + ".h5") ) # Handle data previously labeled on a different platform sep = "/" if "/" in self.dataFrame.index[0] else "\\" @@ -1205,9 +1205,7 @@ def plot(self, img): ) self.axes.add_patch(circle) self.dr = auxfun_drag.DraggablePoint( - circle, - self.uniquebodyparts[c], - individual_names=ind, + circle, self.uniquebodyparts[c], individual_names=ind ) self.dr.connect() self.dr.coords = image_points @@ -1243,9 +1241,7 @@ def plot(self, img): ) self.axes.add_patch(circle) self.dr = auxfun_drag.DraggablePoint( - circle, - self.multibodyparts[c], - individual_names=ind, + circle, self.multibodyparts[c], individual_names=ind ) self.dr.connect() self.dr.coords = image_points diff --git a/deeplabcut/gui/multiple_individuals_refinement_toolbox.py b/deeplabcut/gui/multiple_individuals_refinement_toolbox.py index 5c4ec2111..225385672 100644 --- a/deeplabcut/gui/multiple_individuals_refinement_toolbox.py +++ b/deeplabcut/gui/multiple_individuals_refinement_toolbox.py @@ -75,7 +75,7 @@ def drawplot( self.axes.set_xlim(xlim) self.axes.set_ylim(ylim) self.figure.canvas.draw() - if not hasattr(self, 'toolbar'): + if not hasattr(self, "toolbar"): self.toolbar = NavigationToolbar(self.canvas) return (self.figure, self.axes, self.canvas, self.toolbar, self.ax) @@ -137,9 +137,7 @@ def clearBoxer(self): class MainFrame(BaseFrame): def __init__(self, parent, config): - super(MainFrame, self).__init__( - "DeepLabCut - Refinement ToolBox", parent, - ) + super(MainFrame, self).__init__("DeepLabCut - Refinement ToolBox", parent) self.Bind(wx.EVT_CHAR_HOOK, self.OnKeyPressed) ################################################################################################################################################### @@ -281,9 +279,7 @@ def OnKeyPressed(self, event=None): inv = self.axes.transData.inverted() pos_rel = list(inv.transform(pos_abs)) y1, y2 = self.axes.get_ylim() - pos_rel[1] = ( - y1 - pos_rel[1] + y2 - ) # Recall y-axis is inverted + pos_rel[1] = y1 - pos_rel[1] + y2 # Recall y-axis is inverted i = np.nanargmin( [self.calc_distance(*dp.point.center, *pos_rel) for dp in self.drs] ) @@ -810,7 +806,7 @@ def saveDataSet(self, event): "A training dataset file is already found for this video. The refined machine labels are merged to this data!" ) DataU1 = pd.read_hdf( - os.path.join(self.dir, "CollectedData_" + self.humanscorer + ".h5"), + os.path.join(self.dir, "CollectedData_" + self.humanscorer + ".h5") ) # combine datasets Original Col. + corrected machinefiles: DataCombined = pd.concat([self.Dataframe, DataU1]) diff --git a/deeplabcut/gui/outlier_frame_extraction_toolbox.py b/deeplabcut/gui/outlier_frame_extraction_toolbox.py index 7c0d8e5de..61753b011 100644 --- a/deeplabcut/gui/outlier_frame_extraction_toolbox.py +++ b/deeplabcut/gui/outlier_frame_extraction_toolbox.py @@ -80,7 +80,7 @@ def __init__( self, parent, config, video, shuffle, Dataframe, savelabeled, multianimal ): super(MainFrame, self).__init__( - "DeepLabCut2.0 - Manual Outlier Frame Extraction", parent, + "DeepLabCut2.0 - Manual Outlier Frame Extraction", parent ) ################################################################################################################################################### diff --git a/deeplabcut/gui/refine_labels.py b/deeplabcut/gui/refine_labels.py index bd27bafe5..54a815997 100644 --- a/deeplabcut/gui/refine_labels.py +++ b/deeplabcut/gui/refine_labels.py @@ -54,9 +54,7 @@ def refine_labels(config, multianimal=False): refinement.show(config) else: # loading multianimal labeling GUI - from deeplabcut.gui import ( - multiple_individuals_refinement_toolbox, - ) + from deeplabcut.gui import multiple_individuals_refinement_toolbox multiple_individuals_refinement_toolbox.show(config) diff --git a/deeplabcut/gui/refine_tracklets.py b/deeplabcut/gui/refine_tracklets.py index ab2690a25..f9311ddce 100644 --- a/deeplabcut/gui/refine_tracklets.py +++ b/deeplabcut/gui/refine_tracklets.py @@ -185,7 +185,6 @@ def __init__(self, parent, gui_size, cfg): hbox2, pos=(7, 0), flag=wx.EXPAND | wx.TOP | wx.LEFT | wx.RIGHT, border=10 ) - self.inf_cfg_text = wx.Button(self, label="Edit inference_config.yaml") sizer.Add(self.inf_cfg_text, pos=(10, 1), flag=wx.BOTTOM | wx.RIGHT, border=10) self.inf_cfg_text.Bind(wx.EVT_BUTTON, self.edit_inf_config) @@ -202,7 +201,9 @@ def __init__(self, parent, gui_size, cfg): sizer.Add(self.reset, pos=(8, 1), flag=wx.BOTTOM | wx.RIGHT, border=10) self.reset.Bind(wx.EVT_BUTTON, self.reset_refine_tracklets) - self.filter = wx.Button(self, label=" Step2: Filter Tracks (then you also get a CSV file!)") + self.filter = wx.Button( + self, label=" Step2: Filter Tracks (then you also get a CSV file!)" + ) sizer.Add(self.filter, pos=(8, 3), flag=wx.BOTTOM | wx.RIGHT, border=10) self.filter.Bind(wx.EVT_BUTTON, self.filter_after_refinement) diff --git a/deeplabcut/gui/refinement.py b/deeplabcut/gui/refinement.py index b72a27d32..304c81bac 100644 --- a/deeplabcut/gui/refinement.py +++ b/deeplabcut/gui/refinement.py @@ -93,7 +93,7 @@ def drawplot( self.axes.set_xlim(xlim) self.axes.set_ylim(ylim) self.figure.canvas.draw() - if not hasattr(self, 'toolbar'): + if not hasattr(self, "toolbar"): self.toolbar = NavigationToolbar(self.canvas) return (self.figure, self.axes, self.canvas, self.toolbar) @@ -145,9 +145,7 @@ def clearBoxer(self): class MainFrame(BaseFrame): def __init__(self, parent, config): - super(MainFrame, self).__init__( - "DeepLabCut2.0 - Refinement ToolBox", parent, - ) + super(MainFrame, self).__init__("DeepLabCut2.0 - Refinement ToolBox", parent) self.Bind(wx.EVT_CHAR_HOOK, self.OnKeyPressed) ################################################################################################################################################### @@ -279,9 +277,7 @@ def OnKeyPressed(self, event=None): inv = self.axes.transData.inverted() pos_rel = list(inv.transform(pos_abs)) y1, y2 = self.axes.get_ylim() - pos_rel[1] = ( - y1 - pos_rel[1] + y2 - ) # Recall y-axis is inverted + pos_rel[1] = y1 - pos_rel[1] + y2 # Recall y-axis is inverted i = np.nanargmin( [self.calc_distance(*dp.point.center, *pos_rel) for dp in self.drs] ) @@ -739,7 +735,7 @@ def saveDataSet(self, event): "A training dataset file is already found for this video. The refined machine labels are merged to this data!" ) DataU1 = pd.read_hdf( - os.path.join(self.dir, "CollectedData_" + self.humanscorer + ".h5"), + os.path.join(self.dir, "CollectedData_" + self.humanscorer + ".h5") ) # combine datasets Original Col. + corrected machinefiles: DataCombined = pd.concat([self.Dataframe, DataU1]) @@ -886,9 +882,7 @@ def plot(self, im): self.axes.add_patch(circle[0]) self.dr = auxfun_drag.DraggablePoint( - circle[0], - bp, - likelihood=self.likelihood + circle[0], bp, likelihood=self.likelihood ) self.dr.connect() self.dr.coords = MainFrame.getLabels(self, self.iter)[bpindex] diff --git a/deeplabcut/gui/select_crop_parameters.py b/deeplabcut/gui/select_crop_parameters.py index 463379e2d..1c7d23b4c 100644 --- a/deeplabcut/gui/select_crop_parameters.py +++ b/deeplabcut/gui/select_crop_parameters.py @@ -19,7 +19,7 @@ class MainFrame(BaseFrame): def __init__(self, parent, config, image): super(MainFrame, self).__init__( - "DeepLabCut2.0 - Select Crop Parameters", parent, + "DeepLabCut2.0 - Select Crop Parameters", parent ) ################################################################################################################################################### diff --git a/deeplabcut/gui/tracklet_toolbox.py b/deeplabcut/gui/tracklet_toolbox.py index dc5e82260..8b67360cc 100644 --- a/deeplabcut/gui/tracklet_toolbox.py +++ b/deeplabcut/gui/tracklet_toolbox.py @@ -307,9 +307,7 @@ def toggle_draggable_points(self, *args): def add_point(self, center, animal, bodypart, **kwargs): circle = patches.Circle(center, **kwargs) self.ax1.add_patch(circle) - dp = auxfun_drag.DraggablePoint( - circle, bodypart, animal, - ) + dp = auxfun_drag.DraggablePoint(circle, bodypart, animal) dp.connect() self.dps.append(dp) diff --git a/deeplabcut/gui/video_editing.py b/deeplabcut/gui/video_editing.py index 614d629eb..22f7ba29b 100644 --- a/deeplabcut/gui/video_editing.py +++ b/deeplabcut/gui/video_editing.py @@ -103,7 +103,7 @@ def __init__(self, parent, gui_size, cfg): self, label="Downsample: rotate video?", choices=["Yes", "No", "Arbitrary"], - #majorDimension=0, + # majorDimension=0, style=wx.RA_SPECIFY_COLS, ) self.rotate.SetSelection(1) @@ -120,7 +120,9 @@ def __init__(self, parent, gui_size, cfg): ) angle = wx.StaticBox(self, label="Angle for arbitrary rotation (deg)") vangle_boxsizer = wx.StaticBoxSizer(angle, wx.VERTICAL) - self.vangle = FS.FloatSpin(self, value="0.0", min_val=-360.0, max_val=360.0, digits=2) + self.vangle = FS.FloatSpin( + self, value="0.0", min_val=-360.0, max_val=360.0, digits=2 + ) vangle_boxsizer.Add(self.vangle, 1, wx.EXPAND | wx.TOP | wx.BOTTOM, 10) video_start = wx.StaticBox(self, label="Shorten: start time (sec)") @@ -155,8 +157,7 @@ def __init__(self, parent, gui_size, cfg): self.ok.Bind(wx.EVT_BUTTON, self.crop_video) self.reset = wx.Button(self, label="Reset") - self.sizer.Add( - self.reset, pos=(6, 0), flag=wx.LEFT, border=10) + self.sizer.Add(self.reset, pos=(6, 0), flag=wx.LEFT, border=10) self.reset.Bind(wx.EVT_BUTTON, self.reset_edit_videos) self.sizer.AddGrowableCol(3) diff --git a/deeplabcut/gui/welcome.py b/deeplabcut/gui/welcome.py index 4ce62dfbc..30c8699eb 100644 --- a/deeplabcut/gui/welcome.py +++ b/deeplabcut/gui/welcome.py @@ -26,7 +26,9 @@ def __init__(self, parent, gui_size): ## design the panel sizer = wx.GridBagSizer(10, 7) # Add image of DLC - icon = wx.StaticBitmap(self, bitmap=wx.Bitmap(os.path.join(MEDIA_PATH, "dlc_1-01.png"))) + icon = wx.StaticBitmap( + self, bitmap=wx.Bitmap(os.path.join(MEDIA_PATH, "dlc_1-01.png")) + ) sizer.Add(icon, pos=(0, 0), span=(0, 8), flag=wx.EXPAND | wx.BOTTOM, border=10) line = wx.StaticLine(self) sizer.Add(line, pos=(1, 0), span=(1, 8), flag=wx.EXPAND | wx.BOTTOM, border=10) diff --git a/deeplabcut/pose_estimation_3d/triangulation.py b/deeplabcut/pose_estimation_3d/triangulation.py index b80bb68a7..7e18c6dec 100644 --- a/deeplabcut/pose_estimation_3d/triangulation.py +++ b/deeplabcut/pose_estimation_3d/triangulation.py @@ -203,7 +203,9 @@ def triangulate( ) stereo_file = auxiliaryfunctions.read_pickle(path_stereo_file) cam_pair = str(cam_names[0] + "-" + cam_names[1]) - if_video_analyzed = False # variable to keep track if the video was already analyzed + if_video_analyzed = ( + False + ) # variable to keep track if the video was already analyzed # Check for the camera matrix for k in metadata_["stereo_matrix"].keys(): if np.all( diff --git a/deeplabcut/pose_estimation_tensorflow/dataset/factory.py b/deeplabcut/pose_estimation_tensorflow/dataset/factory.py index f13d9b4d0..b9a61f478 100644 --- a/deeplabcut/pose_estimation_tensorflow/dataset/factory.py +++ b/deeplabcut/pose_estimation_tensorflow/dataset/factory.py @@ -15,7 +15,7 @@ def create(cfg): - dataset_type = cfg['dataset_type'] + dataset_type = cfg["dataset_type"] if dataset_type == "scalecrop": print("Starting with scalecrop pose-dataset loader.") from deeplabcut.pose_estimation_tensorflow.dataset.pose_dataset_scalecrop import ( diff --git a/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_deterministic.py b/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_deterministic.py index 3b3d60387..ee36213d3 100644 --- a/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_deterministic.py +++ b/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_deterministic.py @@ -38,14 +38,16 @@ def __init__(self, cfg): self.cfg = cfg self.data = self.load_dataset() self.num_images = len(self.data) - if self.cfg['mirror']: - self.symmetric_joints = mirror_joints_map(cfg['all_joints'], cfg['num_joints']) + if self.cfg["mirror"]: + self.symmetric_joints = mirror_joints_map( + cfg["all_joints"], cfg["num_joints"] + ) self.curr_img = 0 - self.set_shuffle(cfg['shuffle']) + self.set_shuffle(cfg["shuffle"]) def load_dataset(self): cfg = self.cfg - file_name = os.path.join(self.cfg['project_path'], cfg['dataset']) + file_name = os.path.join(self.cfg["project_path"], cfg["dataset"]) # Load Matlab file dataset annotation mlab = sio.loadmat(file_name) self.raw_data = mlab @@ -69,7 +71,7 @@ def load_dataset(self): joint_id = joints[:, 0] # make sure joint ids are 0-indexed if joint_id.size != 0: - assert (joint_id < cfg['num_joints']).any() + assert (joint_id < cfg["num_joints"]).any() joints[:, 0] = joint_id item.joints = [joints] else: @@ -88,7 +90,7 @@ def set_test_mode(self, test_mode): def set_shuffle(self, shuffle): self.shuffle = shuffle if not shuffle: - assert not self.cfg['mirror'] + assert not self.cfg["mirror"] self.image_indices = np.arange(self.num_images) def mirror_joint_coords(self, joints, image_width): @@ -106,10 +108,10 @@ def mirror_joints(self, joints, symmetric_joints, image_width): return res def shuffle_images(self): - if self.cfg['deterministic']: + if self.cfg["deterministic"]: np.random.seed(42) num_images = self.num_images - if self.cfg['mirror']: + if self.cfg["mirror"]: image_indices = np.random.permutation(num_images * 2) self.mirrored = image_indices >= num_images image_indices[self.mirrored] = image_indices[self.mirrored] - num_images @@ -119,7 +121,7 @@ def shuffle_images(self): def num_training_samples(self): num = self.num_images - if self.cfg['mirror']: + if self.cfg["mirror"]: num *= 2 return num @@ -131,7 +133,7 @@ def next_training_sample(self): self.curr_img = (self.curr_img + 1) % self.num_training_samples() imidx = self.image_indices[curr_img] - mirror = self.cfg['mirror'] and self.mirrored[curr_img] + mirror = self.cfg["mirror"] and self.mirrored[curr_img] return imidx, mirror @@ -140,11 +142,11 @@ def get_training_sample(self, imidx): def get_scale(self): cfg = self.cfg - if cfg['deterministic']: + if cfg["deterministic"]: rand.seed(42) - scale = cfg['global_scale'] + scale = cfg["global_scale"] if hasattr(cfg, "scale_jitter_lo") and hasattr(cfg, "scale_jitter_up"): - scale_jitter = rand.uniform(cfg['scale_jitter_lo'], cfg['scale_jitter_up']) + scale_jitter = rand.uniform(cfg["scale_jitter_lo"], cfg["scale_jitter_up"]) scale *= scale_jitter return scale @@ -164,11 +166,11 @@ def is_valid_size(self, image_size, scale): input_width = image_size[2] * scale input_height = image_size[1] * scale if ( - input_height < self.cfg['min_input_size'] - or input_width < self.cfg['min_input_size'] + input_height < self.cfg["min_input_size"] + or input_width < self.cfg["min_input_size"] ): return False - if input_height * input_width > self.cfg['max_input_size'] ** 2: + if input_height * input_width > self.cfg["max_input_size"] ** 2: return False return True @@ -181,13 +183,13 @@ def make_batch(self, data_item, scale, mirror): # print(im_file, os.getcwd()) # print(self.cfg.project_path) - image = imread(os.path.join(self.cfg['project_path'], im_file), mode="RGB") + image = imread(os.path.join(self.cfg["project_path"], im_file), mode="RGB") if self.has_gt: joints = np.copy(data_item.joints) - if self.cfg['crop']: # adapted cropping for DLC - if np.random.rand() < self.cfg['cropratio']: + if self.cfg["crop"]: # adapted cropping for DLC + if np.random.rand() < self.cfg["cropratio"]: # 1. get center of joints j = np.random.randint(np.shape(joints)[1]) # pick a random joint # draw random crop dimensions & subtract joint points @@ -219,7 +221,7 @@ def make_batch(self, data_item, scale, mirror): batch = {Batch.inputs: img} if self.has_gt: - stride = self.cfg['stride'] + stride = self.cfg["stride"] if mirror: joints = [ @@ -259,16 +261,16 @@ def make_batch(self, data_item, scale, mirror): return batch def compute_target_part_scoremap(self, joint_id, coords, data_item, size, scale): - stride = self.cfg['stride'] - dist_thresh = self.cfg['pos_dist_thresh'] * scale - num_joints = self.cfg['num_joints'] + stride = self.cfg["stride"] + dist_thresh = self.cfg["pos_dist_thresh"] * scale + num_joints = self.cfg["num_joints"] half_stride = stride / 2 scmap = np.zeros(cat([size, arr([num_joints])])) locref_size = cat([size, arr([num_joints * 2])]) locref_mask = np.zeros(locref_size) locref_map = np.zeros(locref_size) - locref_scale = 1.0 / self.cfg['locref_stdev'] + locref_scale = 1.0 / self.cfg["locref_stdev"] dist_thresh_sq = dist_thresh ** 2 width = size[1] @@ -312,7 +314,7 @@ def compute_target_part_scoremap(self, joint_id, coords, data_item, size, scale) def compute_scmap_weights(self, scmap_shape, joint_id, data_item): cfg = self.cfg - if cfg['weigh_only_present_joints']: + if cfg["weigh_only_present_joints"]: weights = np.zeros(scmap_shape) for person_joint_id in joint_id: for j_id in person_joint_id: diff --git a/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_scalecrop.py b/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_scalecrop.py index c189127ec..78c9e1808 100755 --- a/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_scalecrop.py +++ b/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_scalecrop.py @@ -36,31 +36,33 @@ def __init__(self, cfg): self.num_images = len(self.data) self.max_input_sizesquare = cfg.get("max_input_size", 1500) ** 2 self.min_input_sizesquare = cfg.get("min_input_size", 64) ** 2 - self.locref_scale = 1.0 / cfg['locref_stdev'] - self.stride = cfg['stride'] + self.locref_scale = 1.0 / cfg["locref_stdev"] + self.stride = cfg["stride"] self.half_stride = self.stride / 2 - self.scale = cfg['global_scale'] + self.scale = cfg["global_scale"] self.scale_jitter_lo = cfg.get("scale_jitter_lo", 0.75) self.scale_jitter_up = cfg.get("scale_jitter_up", 1.25) - self.cfg['crop'] = cfg.get("crop", True) - self.cfg['cropratio'] = cfg.get("cropratio", 0.4) + self.cfg["crop"] = cfg.get("crop", True) + self.cfg["cropratio"] = cfg.get("cropratio", 0.4) # what is the minimal frames size for cropping plus/minus ie.. [-100,100]^2 for an arb. joint - self.cfg['minsize'] = cfg.get("minsize", 100) - self.cfg['leftwidth'] = cfg.get("leftwidth", 400) - self.cfg['rightwidth'] = cfg.get("rightwidth", 400) - self.cfg['topheight'] = cfg.get("topheight", 400) - self.cfg['bottomheight'] = cfg.get("bottomheight", 400) - - if self.cfg['mirror']: - self.symmetric_joints = mirror_joints_map(cfg['all_joints'], cfg['num_joints']) + self.cfg["minsize"] = cfg.get("minsize", 100) + self.cfg["leftwidth"] = cfg.get("leftwidth", 400) + self.cfg["rightwidth"] = cfg.get("rightwidth", 400) + self.cfg["topheight"] = cfg.get("topheight", 400) + self.cfg["bottomheight"] = cfg.get("bottomheight", 400) + + if self.cfg["mirror"]: + self.symmetric_joints = mirror_joints_map( + cfg["all_joints"], cfg["num_joints"] + ) self.curr_img = 0 - self.set_shuffle(cfg['shuffle']) + self.set_shuffle(cfg["shuffle"]) def load_dataset(self): cfg = self.cfg - file_name = os.path.join(self.cfg['project_path'], cfg['dataset']) + file_name = os.path.join(self.cfg["project_path"], cfg["dataset"]) # Load Matlab file dataset annotation mlab = sio.loadmat(file_name) self.raw_data = mlab @@ -84,7 +86,7 @@ def load_dataset(self): joint_id = joints[:, 0] # make sure joint ids are 0-indexed if joint_id.size != 0: - assert (joint_id < cfg['num_joints']).any() + assert (joint_id < cfg["num_joints"]).any() joints[:, 0] = joint_id item.joints = [joints] else: @@ -103,7 +105,7 @@ def set_test_mode(self, test_mode): def set_shuffle(self, shuffle): self.shuffle = shuffle if not shuffle: - assert not self.cfg['mirror'] + assert not self.cfg["mirror"] self.image_indices = np.arange(self.num_images) def mirror_joint_coords(self, joints, image_width): @@ -122,7 +124,7 @@ def mirror_joints(self, joints, symmetric_joints, image_width): def shuffle_images(self): num_images = self.num_images - if self.cfg['mirror']: + if self.cfg["mirror"]: image_indices = np.random.permutation(num_images * 2) self.mirrored = image_indices >= num_images image_indices[self.mirrored] = image_indices[self.mirrored] - num_images @@ -132,7 +134,7 @@ def shuffle_images(self): def num_training_samples(self): num = self.num_images - if self.cfg['mirror']: + if self.cfg["mirror"]: num *= 2 return num @@ -144,7 +146,7 @@ def next_training_sample(self): self.curr_img = (self.curr_img + 1) % self.num_training_samples() imidx = self.image_indices[curr_img] - mirror = self.cfg['mirror'] and self.mirrored[curr_img] + mirror = self.cfg["mirror"] and self.mirrored[curr_img] return imidx, mirror @@ -181,13 +183,13 @@ def make_batch(self, data_item, scale, mirror): im_file = data_item.im_path logging.debug("image %s", im_file) logging.debug("mirror %r", mirror) - image = imread(os.path.join(self.cfg['project_path'], im_file), mode="RGB") + image = imread(os.path.join(self.cfg["project_path"], im_file), mode="RGB") if self.has_gt: joints = np.copy(data_item.joints) - if self.cfg['crop']: # adapted cropping for DLC - if np.random.rand() < self.cfg['cropratio']: + if self.cfg["crop"]: # adapted cropping for DLC + if np.random.rand() < self.cfg["cropratio"]: j = np.random.randint(np.shape(joints)[1]) # pick a random joint joints, image = CropImage( joints, image, joints[0, j, 1], joints[0, j, 2], self.cfg @@ -211,7 +213,7 @@ def make_batch(self, data_item, scale, mirror): batch = {Batch.inputs: img} if self.has_gt: - stride = self.cfg['stride'] + stride = self.cfg["stride"] if mirror: joints = [ @@ -251,9 +253,9 @@ def make_batch(self, data_item, scale, mirror): return batch def compute_target_part_scoremap(self, joint_id, coords, data_item, size, scale): - dist_thresh = self.cfg['pos_dist_thresh'] * scale + dist_thresh = self.cfg["pos_dist_thresh"] * scale dist_thresh_sq = dist_thresh ** 2 - num_joints = self.cfg['num_joints'] + num_joints = self.cfg["num_joints"] scmap = np.zeros(cat([size, arr([num_joints])])) locref_size = cat([size, arr([num_joints * 2])]) @@ -299,7 +301,7 @@ def compute_target_part_scoremap(self, joint_id, coords, data_item, size, scale) return scmap, weights, locref_map, locref_mask def compute_scmap_weights(self, scmap_shape, joint_id, data_item): - if self.cfg['weigh_only_present_joints']: + if self.cfg["weigh_only_present_joints"]: weights = np.zeros(scmap_shape) for person_joint_id in joint_id: for j_id in person_joint_id: diff --git a/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_tensorpack.py b/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_tensorpack.py index 4100d6be5..0b6d3566e 100644 --- a/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_tensorpack.py +++ b/deeplabcut/pose_estimation_tensorflow/dataset/pose_dataset_tensorpack.py @@ -99,7 +99,7 @@ def __init__(self, cfg, shuffle=True, dir=None): def load_dataset(self): cfg = self.cfg - file_name = os.path.join(self.cfg['project_path'], cfg['dataset']) + file_name = os.path.join(self.cfg["project_path"], cfg["dataset"]) # Load Matlab file dataset annotation mlab = sio.loadmat(file_name) self.raw_data = mlab @@ -125,7 +125,7 @@ def load_dataset(self): joint_id = joints[:, 0] # make sure joint ids are 0-indexed if joint_id.size != 0: - assert (joint_id < cfg['num_joints']).any() + assert (joint_id < cfg["num_joints"]).any() joints[:, 0] = joint_id coords = [joint[1:] for joint in joints] coords = arr(coords) @@ -276,7 +276,7 @@ def __init__(self, cfg): ] self.has_gt = True - self.set_shuffle(cfg['shuffle']) + self.set_shuffle(cfg["shuffle"]) p = Pose(cfg=self.cfg, shuffle=self.shuffle) self.data = p.load_dataset() self.num_images = len(self.data) @@ -335,9 +335,9 @@ def compute_target_part_scoremap(self, components): img_size = components[4] scale = components[5] - stride = self.cfg['stride'] - dist_thresh = self.cfg['pos_dist_thresh'] * scale - num_joints = self.cfg['num_joints'] + stride = self.cfg["stride"] + dist_thresh = self.cfg["pos_dist_thresh"] * scale + num_joints = self.cfg["num_joints"] half_stride = stride / 2 size = np.ceil(arr(img_size) / (stride * 2)).astype(int) * 2 scmap = np.zeros(np.append(size, num_joints)) @@ -345,7 +345,7 @@ def compute_target_part_scoremap(self, components): locref_mask = np.zeros(locref_size) locref_map = np.zeros(locref_size) - locref_scale = 1.0 / self.cfg['locref_stdev'] + locref_scale = 1.0 / self.cfg["locref_stdev"] dist_thresh_sq = dist_thresh ** 2 width = size[1] @@ -393,12 +393,12 @@ def set_test_mode(self, test_mode): def set_shuffle(self, shuffle): self.shuffle = shuffle if not shuffle: - assert not self.cfg['mirror'] + assert not self.cfg["mirror"] self.image_indices = np.arange(self.num_images) def shuffle_images(self): num_images = self.num_images - if self.cfg['mirror']: + if self.cfg["mirror"]: image_indices = np.random.permutation(num_images * 2) self.mirrored = image_indices >= num_images image_indices[self.mirrored] = image_indices[self.mirrored] - num_images @@ -408,9 +408,9 @@ def shuffle_images(self): def get_scale(self): cfg = self.cfg - scale = cfg['global_scale'] + scale = cfg["global_scale"] if hasattr(cfg, "scale_jitter_lo") and hasattr(cfg, "scale_jitter_up"): - scale_jitter = rand.uniform(cfg['scale_jitter_lo'], cfg['scale_jitter_up']) + scale_jitter = rand.uniform(cfg["scale_jitter_lo"], cfg["scale_jitter_up"]) scale *= scale_jitter return scale @@ -423,11 +423,11 @@ def is_valid_size(self, image_size, scale): input_width = image_size[2] * scale input_height = image_size[1] * scale if ( - input_height < self.cfg['min_input_size'] - or input_width < self.cfg['min_input_size'] + input_height < self.cfg["min_input_size"] + or input_width < self.cfg["min_input_size"] ): return False - if input_height * input_width > self.cfg['max_input_size'] ** 2: + if input_height * input_width > self.cfg["max_input_size"] ** 2: return False return True @@ -467,7 +467,7 @@ def make_batch(self, components): def compute_scmap_weights(self, scmap_shape, joint_id): cfg = self.cfg - if cfg['weigh_only_present_joints']: + if cfg["weigh_only_present_joints"]: weights = np.zeros(scmap_shape) for person_joint_id in joint_id: for j_id in person_joint_id: diff --git a/deeplabcut/pose_estimation_tensorflow/dataset/pose_multianimal_imgaug.py b/deeplabcut/pose_estimation_tensorflow/dataset/pose_multianimal_imgaug.py index 6cb6b3ca8..6cd28591d 100644 --- a/deeplabcut/pose_estimation_tensorflow/dataset/pose_multianimal_imgaug.py +++ b/deeplabcut/pose_estimation_tensorflow/dataset/pose_multianimal_imgaug.py @@ -33,13 +33,13 @@ def __init__(self, cfg): self.cfg = cfg self.data = self.load_dataset() self.num_images = len(self.data) - self.batch_size = cfg['batch_size'] + self.batch_size = cfg["batch_size"] print("Batch Size is %d" % self.batch_size) def load_dataset(self): cfg = self.cfg - file_name = os.path.join(self.cfg['project_path'], cfg['dataset']) - with open(os.path.join(self.cfg['project_path'], file_name), "rb") as f: + file_name = os.path.join(self.cfg["project_path"], cfg["dataset"]) + with open(os.path.join(self.cfg["project_path"], file_name), "rb") as f: # Pickle the 'data' dictionary using the highest protocol available. pickledata = pickle.load(f) @@ -127,7 +127,8 @@ def build_augmentation_pipeline(self, height=None, width=None, apply_prob=0.5): if height is not None and width is not None: pipeline.add( iaa.Sometimes( - cfg['cropratio'], iaa.CropAndPad(percent=(-0.3, 0.1), keep_size=False) + cfg["cropratio"], + iaa.CropAndPad(percent=(-0.3, 0.1), keep_size=False), ) ) pipeline.add(iaa.Resize({"height": height, "width": width})) @@ -150,7 +151,7 @@ def get_batch(self): if self.is_valid_size(target_size): break - stride = self.cfg['stride'] + stride = self.cfg["stride"] for i in range(self.batch_size): data_item = self.data[img_idx[i]] @@ -158,7 +159,7 @@ def get_batch(self): im_file = data_item.im_path logging.debug("image %s", im_file) - image = imread(os.path.join(self.cfg['project_path'], im_file), mode="RGB") + image = imread(os.path.join(self.cfg["project_path"], im_file), mode="RGB") if self.has_gt: Joints = data_item.joints joint_id = [ @@ -262,7 +263,9 @@ def next_batch(self, plotting=False): ) im = kps.draw_on_image(batch_images[i]) # imageio.imwrite(data_items[i].im_path.split('/')[-1],im) - imageio.imwrite(os.path.join(self.cfg['project_path'], str(i) + ".png"), im) + imageio.imwrite( + os.path.join(self.cfg["project_path"], str(i) + ".png"), im + ) image_shape = arr(batch_images).shape[1:3] batch = {Batch.inputs: arr(batch_images).astype(np.float64)} @@ -284,15 +287,15 @@ def set_test_mode(self, test_mode): def num_training_samples(self): num = self.num_images - if self.cfg['mirror']: + if self.cfg["mirror"]: num *= 2 return num def get_scale(self): cfg = self.cfg - scale = cfg['global_scale'] + scale = cfg["global_scale"] if hasattr(cfg, "scale_jitter_lo") and hasattr(cfg, "scale_jitter_up"): - scale_jitter = rand.uniform(cfg['scale_jitter_lo'], cfg['scale_jitter_up']) + scale_jitter = rand.uniform(cfg["scale_jitter_lo"], cfg["scale_jitter_up"]) scale *= scale_jitter return scale @@ -303,14 +306,14 @@ def is_valid_size(self, target_size): if im_height < min_input_size or im_width < min_input_size: return False if hasattr(self.cfg, "max_input_size"): - max_input_size = self.cfg['max_input_size'] + max_input_size = self.cfg["max_input_size"] if im_width * im_height > max_input_size * max_input_size: return False return True def compute_scmap_weights(self, scmap_shape, joint_id, data_item): cfg = self.cfg - if cfg['weigh_only_present_joints']: + if cfg["weigh_only_present_joints"]: weights = np.zeros(scmap_shape) for k, j_id in enumerate( np.concatenate(joint_id) @@ -323,23 +326,23 @@ def compute_scmap_weights(self, scmap_shape, joint_id, data_item): def compute_target_part_scoremap_numpy( self, joint_id, coords, data_item, size, scale ): - stride = self.cfg['stride'] - dist_thresh = float(self.cfg['pos_dist_thresh'] * scale) + stride = self.cfg["stride"] + dist_thresh = float(self.cfg["pos_dist_thresh"] * scale) num_idchannel = self.cfg.get("num_idchannel", 0) - num_joints = self.cfg['num_joints'] + num_joints = self.cfg["num_joints"] half_stride = stride / 2 scmap = np.zeros(cat([size, arr([num_joints + num_idchannel])])) locref_size = cat([size, arr([num_joints * 2])]) locref_map = np.zeros(locref_size) - locref_scale = 1.0 / self.cfg['locref_stdev'] + locref_scale = 1.0 / self.cfg["locref_stdev"] dist_thresh_sq = dist_thresh ** 2 - partaffinityfield_shape = cat([size, arr([self.cfg['num_limbs'] * 2])]) + partaffinityfield_shape = cat([size, arr([self.cfg["num_limbs"] * 2])]) partaffinityfield_map = np.zeros(partaffinityfield_shape) - if self.cfg['weigh_only_present_joints']: + if self.cfg["weigh_only_present_joints"]: partaffinityfield_mask = np.zeros(partaffinityfield_shape) locref_mask = np.zeros(locref_size) else: @@ -373,7 +376,7 @@ def compute_target_part_scoremap_numpy( mask3 = (y >= min_y) & (y <= max_y) mask = mask1 & mask2 & mask3 scmap[mask, j_id] = 1 - if self.cfg['weigh_only_present_joints']: + if self.cfg["weigh_only_present_joints"]: locref_mask[mask, j_id * 2 + 0] = 1.0 locref_mask[mask, j_id * 2 + 1] = 1.0 locref_map[mask, j_id * 2 + 0] = (dx * locref_scale)[mask] @@ -420,8 +423,8 @@ def compute_target_part_scoremap_numpy( joint_ids = joint_id[person_id].copy() if len(joint_ids) > 1: # otherwise there cannot be a joint! # CONSIDER SMARTER SEARCHES here... (i.e. calculate the bpts beforehand?) - for l in range(self.cfg['num_limbs']): - bp1, bp2 = self.cfg['partaffinityfield_graph'][l] + for l in range(self.cfg["num_limbs"]): + bp1, bp2 = self.cfg["partaffinityfield_graph"][l] I1 = np.where(np.array(joint_ids) == bp1)[0] I2 = np.where(np.array(joint_ids) == bp2)[0] if (len(I1) > 0) * (len(I2) > 0): @@ -460,7 +463,7 @@ def compute_target_part_scoremap_numpy( - d2mid ) * 1.0 - / self.cfg['pafwidth'] + / self.cfg["pafwidth"] * scale ) @@ -482,7 +485,7 @@ def compute_target_part_scoremap_numpy( # mask3 = ((x >= 0) & (x <= width-1)) # mask4 = ((y >= 0) & (y <= height-1)) mask = mask1 & mask2 # &mask3 &mask4 - if self.cfg['weigh_only_present_joints']: + if self.cfg["weigh_only_present_joints"]: partaffinityfield_mask[mask, l * 2 + 0] = 1.0 partaffinityfield_mask[mask, l * 2 + 1] = 1.0 @@ -553,23 +556,23 @@ def compute_target_part_scoremap_numpy( def gaussian_scmap(self, joint_id, coords, data_item, size, scale): # WIP! - stride = self.cfg['stride'] - dist_thresh = float(self.cfg['pos_dist_thresh'] * scale) + stride = self.cfg["stride"] + dist_thresh = float(self.cfg["pos_dist_thresh"] * scale) num_idchannel = self.cfg.get("num_idchannel", 0) - num_joints = self.cfg['num_joints'] + num_joints = self.cfg["num_joints"] half_stride = stride / 2 scmap = np.zeros(cat([size, arr([num_joints])])) locref_size = cat([size, arr([num_joints * 2])]) locref_mask = np.zeros(locref_size) locref_map = np.zeros(locref_size) - locref_scale = 1.0 / self.cfg['locref_stdev'] + locref_scale = 1.0 / self.cfg["locref_stdev"] dist_thresh_sq = dist_thresh ** 2 - partaffinityfield_shape = cat([size, arr([self.cfg['num_limbs'] * 2])]) + partaffinityfield_shape = cat([size, arr([self.cfg["num_limbs"] * 2])]) partaffinityfield_map = np.zeros(partaffinityfield_shape) - if self.cfg['weigh_only_present_joints']: + if self.cfg["weigh_only_present_joints"]: partaffinityfield_mask = np.zeros(partaffinityfield_shape) locref_mask = np.zeros(locref_size) else: @@ -614,8 +617,8 @@ def gaussian_scmap(self, joint_id, coords, data_item, size, scale): joint_ids = joint_id[person_id].copy() if len(joint_ids) > 1: # otherwise there cannot be a joint! # CONSIDER SMARTER SEARCHES here... (i.e. calculate the bpts beforehand?) - for l in range(self.cfg['num_limbs']): - bp1, bp2 = self.cfg['partaffinityfield_graph'][l] + for l in range(self.cfg["num_limbs"]): + bp1, bp2 = self.cfg["partaffinityfield_graph"][l] I1 = np.where(np.array(joint_ids) == bp1)[0] I2 = np.where(np.array(joint_ids) == bp2)[0] if (len(I1) > 0) * (len(I2) > 0): @@ -654,7 +657,7 @@ def gaussian_scmap(self, joint_id, coords, data_item, size, scale): - d2mid ) * 1.0 - / self.cfg['pafwidth'] + / self.cfg["pafwidth"] * scale ) mask1 = (distance_along >= d1lowerboundary) & ( @@ -664,7 +667,7 @@ def gaussian_scmap(self, joint_id, coords, data_item, size, scale): # mask3 = ((x >= 0) & (x <= width-1)) # mask4 = ((y >= 0) & (y <= height-1)) mask = mask1 & mask2 # &mask3 &mask4 - if self.cfg['weigh_only_present_joints']: + if self.cfg["weigh_only_present_joints"]: partaffinityfield_mask[mask, l * 2 + 0] = 1.0 partaffinityfield_mask[mask, l * 2 + 1] = 1.0 diff --git a/deeplabcut/pose_estimation_tensorflow/default_config.py b/deeplabcut/pose_estimation_tensorflow/default_config.py index b8e973d54..129e0b2c5 100644 --- a/deeplabcut/pose_estimation_tensorflow/default_config.py +++ b/deeplabcut/pose_estimation_tensorflow/default_config.py @@ -13,39 +13,39 @@ cfg = dict() -cfg['stride'] = 8.0 -cfg['weigh_part_predictions'] = False -cfg['weigh_negatives'] = False -cfg['fg_fraction'] = 0.25 +cfg["stride"] = 8.0 +cfg["weigh_part_predictions"] = False +cfg["weigh_negatives"] = False +cfg["fg_fraction"] = 0.25 # imagenet mean for resnet pretraining: -cfg['mean_pixel'] = [123.68, 116.779, 103.939] -cfg['shuffle'] = True -cfg['snapshot_prefix'] = "./snapshot" -cfg['log_dir'] = "log" -cfg['global_scale'] = 1.0 -cfg['location_refinement'] = False -cfg['locref_stdev'] = 7.2801 -cfg['locref_loss_weight'] = 1.0 -cfg['locref_huber_loss'] = True -cfg['optimizer'] = "sgd" -cfg['intermediate_supervision'] = False -cfg['intermediate_supervision_layer'] = 12 -cfg['regularize'] = False -cfg['weight_decay'] = 0.0001 -cfg['crop_pad'] = 0 -cfg['scoremap_dir'] = "test" - -cfg['batch_size'] = 1 +cfg["mean_pixel"] = [123.68, 116.779, 103.939] +cfg["shuffle"] = True +cfg["snapshot_prefix"] = "./snapshot" +cfg["log_dir"] = "log" +cfg["global_scale"] = 1.0 +cfg["location_refinement"] = False +cfg["locref_stdev"] = 7.2801 +cfg["locref_loss_weight"] = 1.0 +cfg["locref_huber_loss"] = True +cfg["optimizer"] = "sgd" +cfg["intermediate_supervision"] = False +cfg["intermediate_supervision_layer"] = 12 +cfg["regularize"] = False +cfg["weight_decay"] = 0.0001 +cfg["crop_pad"] = 0 +cfg["scoremap_dir"] = "test" + +cfg["batch_size"] = 1 # types of datasets, see factory: deeplabcut/pose_estimation_tensorflow/dataset/factory.py -cfg['dataset_type'] = "imgaug" # >> imagaug default as of 2.2 +cfg["dataset_type"] = "imgaug" # >> imagaug default as of 2.2 # you can also set this to deterministic, see https://github.com/AlexEMG/DeepLabCut/pull/324 -cfg['deterministic'] = False -cfg['mirror'] = False +cfg["deterministic"] = False +cfg["mirror"] = False # for DLC 2.2. (here all set False to not use PAFs/pairwise fields) -cfg['pairwise_huber_loss'] = True -cfg['weigh_only_present_joints'] = False -cfg['partaffinityfield_predict'] = False -cfg['pairwise_predict'] = False +cfg["pairwise_huber_loss"] = True +cfg["weigh_only_present_joints"] = False +cfg["partaffinityfield_predict"] = False +cfg["pairwise_predict"] = False diff --git a/deeplabcut/pose_estimation_tensorflow/evaluate.py b/deeplabcut/pose_estimation_tensorflow/evaluate.py index d3b8fbb87..8560a1b9f 100644 --- a/deeplabcut/pose_estimation_tensorflow/evaluate.py +++ b/deeplabcut/pose_estimation_tensorflow/evaluate.py @@ -114,9 +114,9 @@ def calculatepafdistancebounds( dlc_cfg = load_config(str(path_test_config)) # get the graph! - partaffinityfield_graph = dlc_cfg['partaffinityfield_graph'] + partaffinityfield_graph = dlc_cfg["partaffinityfield_graph"] jointnames = [ - dlc_cfg['all_joints_names'][i] for i in range(len(dlc_cfg['all_joints'])) + dlc_cfg["all_joints_names"][i] for i in range(len(dlc_cfg["all_joints"])) ] path_inferencebounds_config = ( Path(modelfolder) / "test" / "inferencebounds.yaml" @@ -134,10 +134,13 @@ def calculatepafdistancebounds( j2, "y", ) in Data.keys(): - distances = np.sqrt( - (Data[ind, j1, "x"] - Data[ind2, j2, "x"]) ** 2 - + (Data[ind, j1, "y"] - Data[ind2, j2, "y"]) ** 2 - ) / dlc_cfg["stride"] + distances = ( + np.sqrt( + (Data[ind, j1, "x"] - Data[ind2, j2, "x"]) ** 2 + + (Data[ind, j1, "y"] - Data[ind2, j2, "y"]) ** 2 + ) + / dlc_cfg["stride"] + ) else: distances = None @@ -312,7 +315,7 @@ def return_evaluate_network_data( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", - ), + ) ) * scale ) @@ -323,7 +326,7 @@ def return_evaluate_network_data( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", - ), + ) ) evaluationfolder = os.path.join( @@ -614,7 +617,7 @@ def evaluate_network( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", - ), + ) ) # Get list of body parts to evaluate network for @@ -719,7 +722,7 @@ def evaluate_network( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", - ), + ) ) * scale ) @@ -789,7 +792,7 @@ def evaluate_network( # Extract maximum scoring location from the heatmap, assume 1 person pose = predict.argmax_pose_predict( - scmap, locref, dlc_cfg['stride'] + scmap, locref, dlc_cfg["stride"] ) PredicteData[ imageindex, : @@ -972,15 +975,18 @@ def make_results_file(final_result, evaluationfolder, DLCscorer): df.to_csv(output_path) ## Also storing one "large" table with results: - #note: evaluationfolder.parents[0] to get common folder above all shuffle evaluations. + # note: evaluationfolder.parents[0] to get common folder above all shuffle evaluations. df = pd.DataFrame(final_result, columns=col_names) - output_path = os.path.join(str(Path(evaluationfolder).parents[0]), "CombinedEvaluation-results.csv") + output_path = os.path.join( + str(Path(evaluationfolder).parents[0]), "CombinedEvaluation-results.csv" + ) if os.path.exists(output_path): temp = pd.read_csv(output_path, index_col=0) df = pd.concat((df, temp)).reset_index(drop=True) df.to_csv(output_path) + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("config") diff --git a/deeplabcut/pose_estimation_tensorflow/evaluate_multianimal.py b/deeplabcut/pose_estimation_tensorflow/evaluate_multianimal.py index 71ce49ce4..78e5fd136 100755 --- a/deeplabcut/pose_estimation_tensorflow/evaluate_multianimal.py +++ b/deeplabcut/pose_estimation_tensorflow/evaluate_multianimal.py @@ -68,12 +68,12 @@ def _find_closest_neighbors(xy_true, xy_pred, k=5): def _calc_prediction_error(data): - _ = data.pop('metadata', None) + _ = data.pop("metadata", None) dists = [] for n, dict_ in enumerate(tqdm(data.values())): - gt = np.concatenate(dict_['groundtruth'][1]) - xy = np.concatenate(dict_['prediction']['coordinates'][0]) - p = np.concatenate(dict_['prediction']['confidence']) + gt = np.concatenate(dict_["groundtruth"][1]) + xy = np.concatenate(dict_["prediction"]["coordinates"][0]) + p = np.concatenate(dict_["prediction"]["confidence"]) neighbors = _find_closest_neighbors(gt, xy) found = neighbors != -1 gt2 = gt[found] @@ -83,7 +83,7 @@ def _calc_prediction_error(data): def _calc_train_test_error(data, metadata, pcutoff=0.3): - train_inds = set(metadata['data']['trainIndices']) + train_inds = set(metadata["data"]["trainIndices"]) dists = _calc_prediction_error(data) dists_train, dists_test = [], [] for n, dist in enumerate(dists): @@ -145,7 +145,7 @@ def evaluate_multianimal_full( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", - ), + ) ) # Handle data previously annotated on a different platform sep = "/" if "/" in Data.index[0] else "\\" @@ -315,14 +315,14 @@ def evaluate_multianimal_full( frame = img_as_ubyte(image) GT = Data.iloc[imageindex] - df = GT.unstack("coords").reindex(joints, level='bodyparts') + df = GT.unstack("coords").reindex(joints, level="bodyparts") # Evaluate PAF edge lengths to calibrate `distnorm` temp_xy = GT.unstack("bodyparts")[joints] - xy = temp_xy.values.reshape((-1, 2, temp_xy.shape[1])).swapaxes( - 1, 2 - ) - if dlc_cfg['partaffinityfield_predict']: + xy = temp_xy.values.reshape( + (-1, 2, temp_xy.shape[1]) + ).swapaxes(1, 2) + if dlc_cfg["partaffinityfield_predict"]: edges = xy[:, dlc_cfg["partaffinityfield_graph"]] lengths = np.sum( (edges[:, :, 0] - edges[:, :, 1]) ** 2, axis=2 @@ -354,8 +354,8 @@ def evaluate_multianimal_full( inputs, outputs, outall=False, - nms_radius=dlc_cfg['nmsradius'], - det_min_score=dlc_cfg['minconfidence'], + nms_radius=dlc_cfg["nmsradius"], + det_min_score=dlc_cfg["minconfidence"], c_engine=c_engine, ) PredicteData[imagename]["prediction"] = pred @@ -385,9 +385,9 @@ def evaluate_multianimal_full( conf[sl] = probs_pred[n_joint][cols].squeeze() if plotting: - gt = (temp_xy.values - .reshape((-1, 2, temp_xy.shape[1])) - .T.swapaxes(1, 2)) + gt = temp_xy.values.reshape( + (-1, 2, temp_xy.shape[1]) + ).T.swapaxes(1, 2) fig = visualization.make_multianimal_labeled_image( frame, gt, @@ -415,7 +415,7 @@ def evaluate_multianimal_full( [df_dist, df_conf], keys=["rmse", "conf"], names=["metrics"], - axis=1 + axis=1, ) df_joint = df_joint.reorder_levels( list(np.roll(df_joint.columns.names, -1)), axis=1 @@ -424,14 +424,19 @@ def evaluate_multianimal_full( axis=1, level=["individuals", "bodyparts"], ascending=[True, True], - inplace=True + inplace=True, + ) + write_path = os.path.join( + evaluationfolder, f"dist_{trainingsiterations}.csv" ) - write_path = os.path.join(evaluationfolder, f"dist_{trainingsiterations}.csv") df_joint.to_csv(write_path) # Calculate overall prediction error error = df_joint.xs("rmse", level="metrics", axis=1) - mask = df_joint.xs("conf", level="metrics", axis=1) >= cfg["pcutoff"] + mask = ( + df_joint.xs("conf", level="metrics", axis=1) + >= cfg["pcutoff"] + ) error_masked = error[mask] error_train = np.nanmean(error.iloc[trainIndices]) error_train_cut = np.nanmean(error_masked.iloc[trainIndices]) @@ -455,26 +460,44 @@ def evaluate_multianimal_full( sd.to_csv(write_path.replace("dist.csv", "sd.csv")) if show_errors: - string = "Results for {} training iterations: {}, shuffle {}:\n" \ - "Train error: {} pixels. Test error: {} pixels.\n" \ - "With pcutoff of {}:\n" \ - "Train error: {} pixels. Test error: {} pixels." + string = ( + "Results for {} training iterations: {}, shuffle {}:\n" + "Train error: {} pixels. Test error: {} pixels.\n" + "With pcutoff of {}:\n" + "Train error: {} pixels. Test error: {} pixels." + ) print(string.format(*results)) print("##########################################") - print("Average Euclidean distance to GT per individual (in pixels)") - print(error_masked.groupby('individuals', axis=1).mean().mean().to_string()) - print("Average Euclidean distance to GT per bodypart (in pixels)") - print(error_masked.groupby('bodyparts', axis=1).mean().mean().to_string()) + print( + "Average Euclidean distance to GT per individual (in pixels)" + ) + print( + error_masked.groupby("individuals", axis=1) + .mean() + .mean() + .to_string() + ) + print( + "Average Euclidean distance to GT per bodypart (in pixels)" + ) + print( + error_masked.groupby("bodyparts", axis=1) + .mean() + .mean() + .to_string() + ) PredicteData["metadata"] = { - "nms radius": dlc_cfg['nmsradius'], - "minimal confidence": dlc_cfg['minconfidence'], - "PAFgraph": dlc_cfg['partaffinityfield_graph'], - "all_joints": [[i] for i in range(len(dlc_cfg['all_joints']))], + "nms radius": dlc_cfg["nmsradius"], + "minimal confidence": dlc_cfg["minconfidence"], + "PAFgraph": dlc_cfg["partaffinityfield_graph"], + "all_joints": [ + [i] for i in range(len(dlc_cfg["all_joints"])) + ], "all_joints_names": [ - dlc_cfg['all_joints_names'][i] - for i in range(len(dlc_cfg['all_joints'])) + dlc_cfg["all_joints_names"][i] + for i in range(len(dlc_cfg["all_joints"])) ], "stride": dlc_cfg.get("stride", 8), } @@ -604,7 +627,7 @@ def evaluate_multianimal_crossvalidate( cfg["project_path"], str(trainingsetfolder), "CollectedData_" + cfg["scorer"] + ".h5", - ), + ) ) comparisonbodyparts = auxiliaryfunctions.IntersectionofBodyPartsandOnesGivenbyUser( cfg, "all" @@ -698,7 +721,7 @@ def evaluate_multianimal_crossvalidate( stats_file = os.path.join(evaluationfolder, "sd.csv") if os.path.isfile(stats_file): stats = pd.read_csv(stats_file, header=None, index_col=0) - inferencecfg['distnormalization'] = np.round( + inferencecfg["distnormalization"] = np.round( stats.loc["distnorm", 1], 2 ).item() stats = stats.drop("distnorm") @@ -707,7 +730,7 @@ def evaluate_multianimal_crossvalidate( ) # Taken as 2*SD error between predictions and ground truth else: dcorr = 10 - inferencecfg['topktoretain'] = np.inf + inferencecfg["topktoretain"] = np.inf inferencecfg, opt = crossvalutils.bayesian_search( config, inferencecfg, @@ -728,7 +751,7 @@ def evaluate_multianimal_crossvalidate( ) # update number of individuals to retain. - inferencecfg['topktoretain'] = len(cfg["individuals"]) + 1 * ( + inferencecfg["topktoretain"] = len(cfg["individuals"]) + 1 * ( len(cfg["uniquebodyparts"]) > 0 ) diff --git a/deeplabcut/pose_estimation_tensorflow/lib/inferenceutils.py b/deeplabcut/pose_estimation_tensorflow/lib/inferenceutils.py index 6183dd08d..ffc1d01aa 100644 --- a/deeplabcut/pose_estimation_tensorflow/lib/inferenceutils.py +++ b/deeplabcut/pose_estimation_tensorflow/lib/inferenceutils.py @@ -28,9 +28,11 @@ def individual2boundingbox(cfg, animals, X1=0): for id, individual in enumerate(animals): boundingboxes[id, 0:4:2] = minmax( - individual[::3] + X1, slack=cfg['boundingboxslack'] + individual[::3] + X1, slack=cfg["boundingboxslack"] + ) + boundingboxes[id, 1:4:2] = minmax( + individual[1::3], slack=cfg["boundingboxslack"] ) - boundingboxes[id, 1:4:2] = minmax(individual[1::3], slack=cfg['boundingboxslack']) boundingboxes[id, 4] = np.nanmean( individual[2::3] ) # average likelihood of all bpts @@ -139,7 +141,7 @@ def extractstrongconnections( for j in range(n_b): if evaluation: score_with_dist_prior = abs( - dataimage["prediction"]["costs"][PAF[edge]][cfg['method']][ + dataimage["prediction"]["costs"][PAF[edge]][cfg["method"]][ i, j ] ) @@ -148,7 +150,7 @@ def extractstrongconnections( ] else: score_with_dist_prior = abs( - dataimage["costs"][PAF[edge]][cfg['method']][i, j] + dataimage["costs"][PAF[edge]][cfg["method"]][i, j] ) d = dataimage["costs"][PAF[edge]]["distance"][i, j] @@ -157,9 +159,11 @@ def extractstrongconnections( # filtering with global distance bounds if lowerbound is None and upperbound is None: if ( - score_with_dist_prior > cfg['pafthreshold'] - and cfg['distnormalizationLOWER'] <= d < cfg['distnormalization'] - and si * sj > cfg['detectionthresholdsquare'] + score_with_dist_prior > cfg["pafthreshold"] + and cfg["distnormalizationLOWER"] + <= d + < cfg["distnormalization"] + and si * sj > cfg["detectionthresholdsquare"] ): connection_candidate.append( @@ -168,15 +172,15 @@ def extractstrongconnections( j, score_with_dist_prior, score_with_dist_prior - + np.sqrt(si * sj) * cfg['addlikelihoods'], + + np.sqrt(si * sj) * cfg["addlikelihoods"], ] ) else: # filtering with edgewise distance bounds if ( - score_with_dist_prior > cfg['pafthreshold'] + score_with_dist_prior > cfg["pafthreshold"] and lowerbound[edge] <= d < upperbound[edge] - and si * sj > cfg['detectionthresholdsquare'] + and si * sj > cfg["detectionthresholdsquare"] ): connection_candidate.append( [ @@ -184,7 +188,7 @@ def extractstrongconnections( j, score_with_dist_prior, score_with_dist_prior - + np.sqrt(si * sj) * cfg['addlikelihoods'], + + np.sqrt(si * sj) * cfg["addlikelihoods"], ] ) @@ -302,8 +306,8 @@ def linkjoints2individuals( len(subset) ): # delete animal proposals with too few edges or too low average score if ( - subset[i][-1] < cfg['minimalnumberofconnections'] - or subset[i][-2] / subset[i][-1] < cfg['averagescore'] + subset[i][-1] < cfg["minimalnumberofconnections"] + or subset[i][-2] / subset[i][-1] < cfg["averagescore"] ): deleteIdx.append(i) @@ -328,7 +332,7 @@ def assemble_individuals( # filter detections according to inferencecfg parameters all_detections = convertdetectiondict2listoflist( - data, BPTS, withid=inference_cfg['withid'], evaluation=evaluation + data, BPTS, withid=inference_cfg["withid"], evaluation=evaluation ) # filter connections according to inferencecfg parameters @@ -359,8 +363,8 @@ def assemble_individuals( print(subset) sortedindividuals = np.argsort(-subset[:, -2]) # sort by top score! - if len(sortedindividuals) > inference_cfg['topktoretain']: - sortedindividuals = sortedindividuals[: inference_cfg['topktoretain']] + if len(sortedindividuals) > inference_cfg["topktoretain"]: + sortedindividuals = sortedindividuals[: inference_cfg["topktoretain"]] animals = [] for n in sortedindividuals: # number of individuals diff --git a/deeplabcut/pose_estimation_tensorflow/lib/trackingutils.py b/deeplabcut/pose_estimation_tensorflow/lib/trackingutils.py index ce37b632c..cc3d6a280 100644 --- a/deeplabcut/pose_estimation_tensorflow/lib/trackingutils.py +++ b/deeplabcut/pose_estimation_tensorflow/lib/trackingutils.py @@ -42,7 +42,7 @@ class SkeletonTracker developed for DLC 2.2. from shapely.geometry import Polygon -warnings.simplefilter('ignore', category=NumbaPerformanceWarning) +warnings.simplefilter("ignore", category=NumbaPerformanceWarning) @jit @@ -225,7 +225,9 @@ def calc_similarity_with(self, other_ellipse): max_dist = max( self.height, self.width, other_ellipse.height, other_ellipse.width ) - dist = math.sqrt((self.x - other_ellipse.x) ** 2 + (self.y - other_ellipse.y) ** 2) + dist = math.sqrt( + (self.x - other_ellipse.x) ** 2 + (self.y - other_ellipse.y) ** 2 + ) cost1 = 1 - min(dist / max_dist, 1) cost2 = abs(math.cos(self.theta - other_ellipse.theta)) return 0.8 * cost1 + 0.2 * cost2 * cost1 @@ -235,8 +237,10 @@ def contains_points(self, xy, tol=0.1): sa = math.sin(self.theta) x_demean = xy[:, 0] - self.x y_demean = xy[:, 1] - self.y - return (((ca * x_demean + sa * y_demean) ** 2 / (0.5 * self.width) ** 2) - + ((sa * x_demean - ca * y_demean) ** 2 / (0.5 * self.height) ** 2)) <= 1 + tol + return ( + ((ca * x_demean + sa * y_demean) ** 2 / (0.5 * self.width) ** 2) + + ((sa * x_demean - ca * y_demean) ** 2 / (0.5 * self.height) ** 2) + ) <= 1 + tol def draw(self, show_axes=True, ax=None, **kwargs): import matplotlib.pyplot as plt @@ -244,15 +248,21 @@ def draw(self, show_axes=True, ax=None, **kwargs): from matplotlib.transforms import Affine2D if ax is None: - ax = plt.subplot(111, aspect='equal') - el = patches.Ellipse(xy=(self.x, self.y), width=self.width, height=self.height, - angle=np.rad2deg(self.theta), **kwargs) + ax = plt.subplot(111, aspect="equal") + el = patches.Ellipse( + xy=(self.x, self.y), + width=self.width, + height=self.height, + angle=np.rad2deg(self.theta), + **kwargs, + ) ax.add_patch(el) if show_axes: major = Line2D([-self.width / 2, self.width / 2], [0, 0], lw=3, zorder=3) minor = Line2D([0, 0], [-self.height / 2, self.height / 2], lw=3, zorder=3) - trans = (Affine2D().rotate(self.theta).translate(self.x, self.y) - + ax.transData) + trans = ( + Affine2D().rotate(self.theta).translate(self.x, self.y) + ax.transData + ) major.set_transform(trans) minor.set_transform(trans) ax.add_artist(major) @@ -363,13 +373,13 @@ def calc_parameters(coeffs): f *= 0.5 # Ellipse center coordinates - x0 = (c*d - b*f) / (b*b - a*c) - y0 = (a*f - b*d) / (b*b - a*c) + x0 = (c * d - b * f) / (b * b - a * c) + y0 = (a * f - b * d) / (b * b - a * c) # Semi-axes lengths - num = 2 * (a*f*f + c*d*d + g*b*b - 2*b*d*f - a*c*g) - den1 = (b*b - a*c) * (np.sqrt((a - c)**2 + 4*b*b) - (a + c)) - den2 = (b*b - a*c) * (-np.sqrt((a - c)**2 + 4*b*b) - (a + c)) + num = 2 * (a * f * f + c * d * d + g * b * b - 2 * b * d * f - a * c * g) + den1 = (b * b - a * c) * (np.sqrt((a - c) ** 2 + 4 * b * b) - (a + c)) + den2 = (b * b - a * c) * (-np.sqrt((a - c) ** 2 + 4 * b * b) - (a + c)) major = np.sqrt(num / den1) minor = np.sqrt(num / den2) @@ -378,12 +388,12 @@ def calc_parameters(coeffs): if a < c: phi = 0 else: - phi = np.pi/2 + phi = np.pi / 2 else: if a < c: - phi = np.arctan(2*b / (a-c)) / 2 + phi = np.arctan(2 * b / (a - c)) / 2 else: - phi = np.pi/2 + np.arctan(2*b / (a-c)) / 2 + phi = np.pi / 2 + np.arctan(2 * b / (a - c)) / 2 return [x0, y0, 2 * major, 2 * minor, phi] @@ -394,7 +404,9 @@ class EllipseTracker: def __init__(self, params): self.kf = kinematic_kf(5, order=1, dim_z=5, order_by_dim=False) self.kf.R[2:, 2:] *= 10.0 - self.kf.P[5:, 5:] *= 1000.0 # High uncertainty to the unobservable initial velocities + self.kf.P[ + 5:, 5: + ] *= 1000.0 # High uncertainty to the unobservable initial velocities self.kf.P *= 10.0 self.kf.Q[5:, 5:] *= 0.01 self.state = params @@ -473,8 +485,12 @@ def track(self, poses, identities=None): cost *= match cost_matrix[i, j] = cost row_indices, col_indices = linear_sum_assignment(cost_matrix, maximize=True) - unmatched_detections = [i for i, _ in enumerate(ellipses) if i not in row_indices] - unmatched_trackers = [j for j, _ in enumerate(trackers) if j not in col_indices] + unmatched_detections = [ + i for i, _ in enumerate(ellipses) if i not in row_indices + ] + unmatched_trackers = [ + j for j, _ in enumerate(trackers) if j not in col_indices + ] matches = [] for row, col in zip(row_indices, col_indices): val = cost_matrix[row, col] @@ -518,7 +534,7 @@ def track(self, poses, identities=None): for trk in reversed(self.trackers): d = trk.state if (trk.time_since_update < 1) and ( - trk.hit_streak >= self.min_hits or self.n_frames <= self.min_hits + trk.hit_streak >= self.min_hits or self.n_frames <= self.min_hits ): ret.append( np.concatenate((d, [trk.id, int(animalindex[i - 1])])).reshape( @@ -880,37 +896,24 @@ def reconstruct_all_ellipses(data, sd): def _track_individuals( - individuals, - min_hits=1, - max_age=5, - similarity_threshold=0.6, - track_method='ellipse', + individuals, min_hits=1, max_age=5, similarity_threshold=0.6, track_method="ellipse" ): - if track_method not in ('box', 'skeleton', 'ellipse'): - raise ValueError(f'Unknown {track_method} tracker.') - - if track_method == 'ellipse': - tracker = SORTEllipse( - max_age, - min_hits, - similarity_threshold - ) - elif track_method == 'box': + if track_method not in ("box", "skeleton", "ellipse"): + raise ValueError(f"Unknown {track_method} tracker.") + + if track_method == "ellipse": + tracker = SORTEllipse(max_age, min_hits, similarity_threshold) + elif track_method == "box": tracker = Sort( { - 'max_age': max_age, - 'min_hits': min_hits, - 'iou_threshold': similarity_threshold + "max_age": max_age, + "min_hits": min_hits, + "iou_threshold": similarity_threshold, } ) else: n_bodyparts = individuals[0][0].shape[0] - tracker = SORT( - n_bodyparts, - max_age, - min_hits, - similarity_threshold, - ) + tracker = SORT(n_bodyparts, max_age, min_hits, similarity_threshold) tracklets = defaultdict(dict) all_hyps = dict() diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/efficientnet_builder.py b/deeplabcut/pose_estimation_tensorflow/nnet/efficientnet_builder.py index 3180f46fe..01ebb19f2 100644 --- a/deeplabcut/pose_estimation_tensorflow/nnet/efficientnet_builder.py +++ b/deeplabcut/pose_estimation_tensorflow/nnet/efficientnet_builder.py @@ -31,94 +31,95 @@ def efficientnet_params(model_name): - """Get efficientnet params based on model name.""" - params_dict = { - # (width_coefficient, depth_coefficient, resolution, dropout_rate) - 'efficientnet-b0': (1.0, 1.0, 224, 0.2), - 'efficientnet-b1': (1.0, 1.1, 240, 0.2), - 'efficientnet-b2': (1.1, 1.2, 260, 0.3), - 'efficientnet-b3': (1.2, 1.4, 300, 0.3), - 'efficientnet-b4': (1.4, 1.8, 380, 0.4), - 'efficientnet-b5': (1.6, 2.2, 456, 0.4), - 'efficientnet-b6': (1.8, 2.6, 528, 0.5), - 'efficientnet-b7': (2.0, 3.1, 600, 0.5), - } - return params_dict[model_name] + """Get efficientnet params based on model name.""" + params_dict = { + # (width_coefficient, depth_coefficient, resolution, dropout_rate) + "efficientnet-b0": (1.0, 1.0, 224, 0.2), + "efficientnet-b1": (1.0, 1.1, 240, 0.2), + "efficientnet-b2": (1.1, 1.2, 260, 0.3), + "efficientnet-b3": (1.2, 1.4, 300, 0.3), + "efficientnet-b4": (1.4, 1.8, 380, 0.4), + "efficientnet-b5": (1.6, 2.2, 456, 0.4), + "efficientnet-b6": (1.8, 2.6, 528, 0.5), + "efficientnet-b7": (2.0, 3.1, 600, 0.5), + } + return params_dict[model_name] class BlockDecoder(object): - """Block Decoder for readability.""" + """Block Decoder for readability.""" - def _decode_block_string(self, block_string): - """Gets a block through a string notation of arguments.""" - assert isinstance(block_string, str) - ops = block_string.split('_') - options = {} - for op in ops: - splits = re.split(r'(\d.*)', op) - if len(splits) >= 2: - key, value = splits[:2] - options[key] = value + def _decode_block_string(self, block_string): + """Gets a block through a string notation of arguments.""" + assert isinstance(block_string, str) + ops = block_string.split("_") + options = {} + for op in ops: + splits = re.split(r"(\d.*)", op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value - if 's' not in options or len(options['s']) != 2: - raise ValueError('Strides options should be a pair of integers.') + if "s" not in options or len(options["s"]) != 2: + raise ValueError("Strides options should be a pair of integers.") - return efficientnet_model.BlockArgs( - kernel_size=int(options['k']), - num_repeat=int(options['r']), - input_filters=int(options['i']), - output_filters=int(options['o']), - expand_ratio=int(options['e']), - id_skip=('noskip' not in block_string), - se_ratio=float(options['se']) if 'se' in options else None, - strides=[int(options['s'][0]), int(options['s'][1])], - conv_type=int(options['c']) if 'c' in options else 0) + return efficientnet_model.BlockArgs( + kernel_size=int(options["k"]), + num_repeat=int(options["r"]), + input_filters=int(options["i"]), + output_filters=int(options["o"]), + expand_ratio=int(options["e"]), + id_skip=("noskip" not in block_string), + se_ratio=float(options["se"]) if "se" in options else None, + strides=[int(options["s"][0]), int(options["s"][1])], + conv_type=int(options["c"]) if "c" in options else 0, + ) - def _encode_block_string(self, block): - """Encodes a block to a string.""" - args = [ - 'r%d' % block.num_repeat, - 'k%d' % block.kernel_size, - 's%d%d' % (block.strides[0], block.strides[1]), - 'e%s' % block.expand_ratio, - 'i%d' % block.input_filters, - 'o%d' % block.output_filters, - 'c%d' % block.conv_type, - ] - if block.se_ratio > 0 and block.se_ratio <= 1: - args.append('se%s' % block.se_ratio) - if block.id_skip is False: - args.append('noskip') - return '_'.join(args) + def _encode_block_string(self, block): + """Encodes a block to a string.""" + args = [ + "r%d" % block.num_repeat, + "k%d" % block.kernel_size, + "s%d%d" % (block.strides[0], block.strides[1]), + "e%s" % block.expand_ratio, + "i%d" % block.input_filters, + "o%d" % block.output_filters, + "c%d" % block.conv_type, + ] + if block.se_ratio > 0 and block.se_ratio <= 1: + args.append("se%s" % block.se_ratio) + if block.id_skip is False: + args.append("noskip") + return "_".join(args) - def decode(self, string_list): - """Decodes a list of string notations to specify blocks inside the network. + def decode(self, string_list): + """Decodes a list of string notations to specify blocks inside the network. Args: string_list: a list of strings, each string is a notation of block. Returns: A list of namedtuples to represent blocks arguments. """ - assert isinstance(string_list, list) - blocks_args = [] - for block_string in string_list: - blocks_args.append(self._decode_block_string(block_string)) - return blocks_args + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(self._decode_block_string(block_string)) + return blocks_args - def encode(self, blocks_args): - """Encodes a list of Blocks to a list of strings. + def encode(self, blocks_args): + """Encodes a list of Blocks to a list of strings. Args: blocks_args: A list of namedtuples to represent blocks arguments. Returns: a list of strings, each string is a notation of block. """ - block_strings = [] - for block in blocks_args: - block_strings.append(self._encode_block_string(block)) - return block_strings + block_strings = [] + for block in blocks_args: + block_strings.append(self._encode_block_string(block)) + return block_strings def swish(features, use_native=True): - """Computes the Swish activation function. + """Computes the Swish activation function. The tf.nn.swish operation uses a custom gradient to reduce memory usage. Since saving custom gradients in SavedModel is currently not supported, and one would not be able to use an exported TF-Hub module for fine-tuning, we @@ -133,79 +134,89 @@ def swish(features, use_native=True): Returns: The activation value. """ - if use_native: - return tf.nn.swish(features) - else: - features = tf.convert_to_tensor(features, name='features') - return features * tf.nn.sigmoid(features) + if use_native: + return tf.nn.swish(features) + else: + features = tf.convert_to_tensor(features, name="features") + return features * tf.nn.sigmoid(features) -def efficientnet(width_coefficient=None, - depth_coefficient=None, - dropout_rate=0.2, - drop_connect_rate=0.2): - """Creates a efficientnet model.""" - blocks_args = [ - 'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25', - 'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25', - 'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s11_e6_i112_o192_se0.25', - 'r1_k3_s11_e6_i192_o320_se0.25', - ] - # blocks_args = [ - # 'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25', - # 'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25', - # 'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25', - # 'r1_k3_s11_e6_i192_o320_se0.25', - # ] - global_params = efficientnet_model.GlobalParams( - batch_norm_momentum=0.99, - batch_norm_epsilon=1e-3, - dropout_rate=dropout_rate, - drop_connect_rate=drop_connect_rate, - data_format='channels_last', - num_classes=1000, - width_coefficient=width_coefficient, - depth_coefficient=depth_coefficient, - depth_divisor=8, - min_depth=None, - relu_fn=tf.nn.swish, - # The default is TPU-specific batch norm. - # The alternative is tf.layers.BatchNormalization. - # batch_norm=utils.TpuBatchNormalization, # TPU-specific requirement. - batch_norm=utils.BatchNormalization, - use_se=True) - decoder = BlockDecoder() - return decoder.decode(blocks_args), global_params +def efficientnet( + width_coefficient=None, + depth_coefficient=None, + dropout_rate=0.2, + drop_connect_rate=0.2, +): + """Creates a efficientnet model.""" + blocks_args = [ + "r1_k3_s11_e1_i32_o16_se0.25", + "r2_k3_s22_e6_i16_o24_se0.25", + "r2_k5_s22_e6_i24_o40_se0.25", + "r3_k3_s22_e6_i40_o80_se0.25", + "r3_k5_s11_e6_i80_o112_se0.25", + "r4_k5_s11_e6_i112_o192_se0.25", + "r1_k3_s11_e6_i192_o320_se0.25", + ] + # blocks_args = [ + # 'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25', + # 'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25', + # 'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25', + # 'r1_k3_s11_e6_i192_o320_se0.25', + # ] + global_params = efficientnet_model.GlobalParams( + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + dropout_rate=dropout_rate, + drop_connect_rate=drop_connect_rate, + data_format="channels_last", + num_classes=1000, + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + depth_divisor=8, + min_depth=None, + relu_fn=tf.nn.swish, + # The default is TPU-specific batch norm. + # The alternative is tf.layers.BatchNormalization. + # batch_norm=utils.TpuBatchNormalization, # TPU-specific requirement. + batch_norm=utils.BatchNormalization, + use_se=True, + ) + decoder = BlockDecoder() + return decoder.decode(blocks_args), global_params def get_model_params(model_name, override_params): - """Get the block args and global params for a given model.""" - if model_name.startswith('efficientnet'): - width_coefficient, depth_coefficient, _, dropout_rate = ( - efficientnet_params(model_name)) - blocks_args, global_params = efficientnet( - width_coefficient, depth_coefficient, dropout_rate) - else: - raise NotImplementedError('model name is not pre-defined: %s' % model_name) + """Get the block args and global params for a given model.""" + if model_name.startswith("efficientnet"): + width_coefficient, depth_coefficient, _, dropout_rate = efficientnet_params( + model_name + ) + blocks_args, global_params = efficientnet( + width_coefficient, depth_coefficient, dropout_rate + ) + else: + raise NotImplementedError("model name is not pre-defined: %s" % model_name) - if override_params: - # ValueError will be raised here if override_params has fields not included - # in global_params. - global_params = global_params._replace(**override_params) + if override_params: + # ValueError will be raised here if override_params has fields not included + # in global_params. + global_params = global_params._replace(**override_params) - tf.logging.info('global_params= %s', global_params) - tf.logging.info('blocks_args= %s', blocks_args) - return blocks_args, global_params + tf.logging.info("global_params= %s", global_params) + tf.logging.info("blocks_args= %s", blocks_args) + return blocks_args, global_params -def build_model(images, - model_name, - training, - override_params=None, - model_dir=None, - fine_tuning=False, - features_only=False): - """A helper functiion to creates a model and returns predicted logits. +def build_model( + images, + model_name, + training, + override_params=None, + model_dir=None, + fine_tuning=False, + features_only=False, +): + """A helper functiion to creates a model and returns predicted logits. Args: images: input images tensor. model_name: string, the predefined model name. @@ -222,34 +233,36 @@ def build_model(images, When model_name specified an undefined model, raises NotImplementedError. When override_params has invalid fields, raises ValueError. """ - assert isinstance(images, tf.Tensor) - if not training or fine_tuning: - if not override_params: - override_params = {} - override_params['batch_norm'] = utils.BatchNormalization - override_params['relu_fn'] = functools.partial(swish, use_native=False) - blocks_args, global_params = get_model_params(model_name, override_params) + assert isinstance(images, tf.Tensor) + if not training or fine_tuning: + if not override_params: + override_params = {} + override_params["batch_norm"] = utils.BatchNormalization + override_params["relu_fn"] = functools.partial(swish, use_native=False) + blocks_args, global_params = get_model_params(model_name, override_params) - if model_dir: - param_file = os.path.join(model_dir, 'model_params.txt') - if not tf.gfile.Exists(param_file): - if not tf.gfile.Exists(model_dir): - tf.gfile.MakeDirs(model_dir) - with tf.gfile.GFile(param_file, 'w') as f: - tf.logging.info('writing to %s' % param_file) - f.write('model_name= %s\n\n' % model_name) - f.write('global_params= %s\n\n' % str(global_params)) - f.write('blocks_args= %s\n\n' % str(blocks_args)) + if model_dir: + param_file = os.path.join(model_dir, "model_params.txt") + if not tf.gfile.Exists(param_file): + if not tf.gfile.Exists(model_dir): + tf.gfile.MakeDirs(model_dir) + with tf.gfile.GFile(param_file, "w") as f: + tf.logging.info("writing to %s" % param_file) + f.write("model_name= %s\n\n" % model_name) + f.write("global_params= %s\n\n" % str(global_params)) + f.write("blocks_args= %s\n\n" % str(blocks_args)) - with tf.variable_scope(model_name): - model = efficientnet_model.Model(blocks_args, global_params) - outputs = model(images, training=training, features_only=features_only) - outputs = tf.identity(outputs, 'features' if features_only else 'logits') - return outputs, model.endpoints + with tf.variable_scope(model_name): + model = efficientnet_model.Model(blocks_args, global_params) + outputs = model(images, training=training, features_only=features_only) + outputs = tf.identity(outputs, "features" if features_only else "logits") + return outputs, model.endpoints -def build_model_base(images, model_name, use_batch_norm=False, drop_out=False, override_params=None): - """A helper functiion to create a base model and return global_pool. +def build_model_base( + images, model_name, use_batch_norm=False, drop_out=False, override_params=None +): + """A helper functiion to create a base model and return global_pool. Args: images: input images tensor. model_name: string, the predefined model name. @@ -263,12 +276,14 @@ def build_model_base(images, model_name, use_batch_norm=False, drop_out=False, o When model_name specified an undefined model, raises NotImplementedError. When override_params has invalid fields, raises ValueError. """ - assert isinstance(images, tf.Tensor) - blocks_args, global_params = get_model_params(model_name, override_params) + assert isinstance(images, tf.Tensor) + blocks_args, global_params = get_model_params(model_name, override_params) - with tf.variable_scope(model_name): - model = efficientnet_model.Model(blocks_args, global_params) - features = model(images, use_batch_norm=use_batch_norm, drop_out=drop_out, features_only=True) + with tf.variable_scope(model_name): + model = efficientnet_model.Model(blocks_args, global_params) + features = model( + images, use_batch_norm=use_batch_norm, drop_out=drop_out, features_only=True + ) - features = tf.identity(features, 'features') - return features, model.endpoints + features = tf.identity(features, "features") + return features, model.endpoints diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/efficientnet_model.py b/deeplabcut/pose_estimation_tensorflow/nnet/efficientnet_model.py index c4a854272..677ddf25b 100644 --- a/deeplabcut/pose_estimation_tensorflow/nnet/efficientnet_model.py +++ b/deeplabcut/pose_estimation_tensorflow/nnet/efficientnet_model.py @@ -31,25 +31,47 @@ import deeplabcut.pose_estimation_tensorflow.nnet.utils as utils -GlobalParams = collections.namedtuple('GlobalParams', [ - 'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate', 'data_format', - 'num_classes', 'width_coefficient', 'depth_coefficient', - 'depth_divisor', 'min_depth', 'drop_connect_rate', 'relu_fn', - 'batch_norm', 'use_se', -]) +GlobalParams = collections.namedtuple( + "GlobalParams", + [ + "batch_norm_momentum", + "batch_norm_epsilon", + "dropout_rate", + "data_format", + "num_classes", + "width_coefficient", + "depth_coefficient", + "depth_divisor", + "min_depth", + "drop_connect_rate", + "relu_fn", + "batch_norm", + "use_se", + ], +) GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields) -BlockArgs = collections.namedtuple('BlockArgs', [ - 'kernel_size', 'num_repeat', 'input_filters', 'output_filters', - 'expand_ratio', 'id_skip', 'strides', 'se_ratio', 'conv_type', -]) +BlockArgs = collections.namedtuple( + "BlockArgs", + [ + "kernel_size", + "num_repeat", + "input_filters", + "output_filters", + "expand_ratio", + "id_skip", + "strides", + "se_ratio", + "conv_type", + ], +) # defaults will be a public argument for namedtuple in Python 3.7 # https://docs.python.org/3/library/collections.html#collections.namedtuple BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields) def conv_kernel_initializer(shape, dtype=None, partition_info=None): - """Initialization for convolutional kernels. + """Initialization for convolutional kernels. The main difference with tf.variance_scaling_initializer is that tf.variance_scaling_initializer uses a truncated normal with an uncorrected standard deviation, whereas here we use a normal distribution. Similarly, @@ -62,15 +84,14 @@ def conv_kernel_initializer(shape, dtype=None, partition_info=None): Returns: an initialization for the variable """ - del partition_info - kernel_height, kernel_width, _, out_filters = shape - fan_out = int(kernel_height * kernel_width * out_filters) - return tf.random_normal( - shape, mean=0.0, stddev=np.sqrt(2.0 / fan_out), dtype=dtype) + del partition_info + kernel_height, kernel_width, _, out_filters = shape + fan_out = int(kernel_height * kernel_width * out_filters) + return tf.random_normal(shape, mean=0.0, stddev=np.sqrt(2.0 / fan_out), dtype=dtype) def dense_kernel_initializer(shape, dtype=None, partition_info=None): - """Initialization for dense kernels. + """Initialization for dense kernels. This initialization is equal to tf.variance_scaling_initializer(scale=1.0/3.0, mode='fan_out', distribution='uniform'). @@ -82,159 +103,173 @@ def dense_kernel_initializer(shape, dtype=None, partition_info=None): Returns: an initialization for the variable """ - del partition_info - init_range = 1.0 / np.sqrt(shape[1]) - return tf.random_uniform(shape, -init_range, init_range, dtype=dtype) + del partition_info + init_range = 1.0 / np.sqrt(shape[1]) + return tf.random_uniform(shape, -init_range, init_range, dtype=dtype) def round_filters(filters, global_params): - """Round number of filters based on depth multiplier.""" - orig_f = filters - multiplier = global_params.width_coefficient - divisor = global_params.depth_divisor - min_depth = global_params.min_depth - if not multiplier: - return filters - - filters *= multiplier - min_depth = min_depth or divisor - new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_filters < 0.9 * filters: - new_filters += divisor - tf.logging.info('round_filter input={} output={}'.format(orig_f, new_filters)) - return int(new_filters) + """Round number of filters based on depth multiplier.""" + orig_f = filters + multiplier = global_params.width_coefficient + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + if not multiplier: + return filters + + filters *= multiplier + min_depth = min_depth or divisor + new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) + # Make sure that round down does not go down by more than 10%. + if new_filters < 0.9 * filters: + new_filters += divisor + tf.logging.info("round_filter input={} output={}".format(orig_f, new_filters)) + return int(new_filters) def round_repeats(repeats, global_params): - """Round number of filters based on depth multiplier.""" - multiplier = global_params.depth_coefficient - if not multiplier: - return repeats - return int(math.ceil(multiplier * repeats)) + """Round number of filters based on depth multiplier.""" + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) class MBConvBlock(tf.keras.layers.Layer): - """A class of MBConv: Mobile Inverted Residual Bottleneck. + """A class of MBConv: Mobile Inverted Residual Bottleneck. Attributes: endpoints: dict. A list of internal tensors. """ - def __init__(self, block_args, global_params): - """Initializes a MBConv block. + def __init__(self, block_args, global_params): + """Initializes a MBConv block. Args: block_args: BlockArgs, arguments to create a Block. global_params: GlobalParams, a set of global parameters. """ - super(MBConvBlock, self).__init__() - self._block_args = block_args - self._batch_norm_momentum = global_params.batch_norm_momentum - self._batch_norm_epsilon = global_params.batch_norm_epsilon - self._batch_norm = global_params.batch_norm - self._data_format = global_params.data_format - if self._data_format == 'channels_first': - self._channel_axis = 1 - self._spatial_dims = [2, 3] - else: - self._channel_axis = -1 - self._spatial_dims = [1, 2] - - self._relu_fn = global_params.relu_fn or tf.nn.swish - self._has_se = ( - global_params.use_se and self._block_args.se_ratio is not None and - 0 < self._block_args.se_ratio <= 1) - - self.endpoints = None - - # Builds the block accordings to arguments. - self._build() - - def block_args(self): - return self._block_args - - def _build(self): - """Builds block according to the arguments.""" - filters = self._block_args.input_filters * self._block_args.expand_ratio - if self._block_args.expand_ratio != 1: - # Expansion phase: - self._expand_conv = tf.layers.Conv2D( - filters, - kernel_size=[1, 1], - strides=[1, 1], - kernel_initializer=conv_kernel_initializer, - padding='same', - data_format=self._data_format, - use_bias=False) - self._bn0 = self._batch_norm( - axis=self._channel_axis, - momentum=self._batch_norm_momentum, - epsilon=self._batch_norm_epsilon) - - kernel_size = self._block_args.kernel_size - # Depth-wise convolution phase: - self._depthwise_conv = utils.DepthwiseConv2D( - [kernel_size, kernel_size], - strides=self._block_args.strides, - depthwise_initializer=conv_kernel_initializer, - padding='same', - data_format=self._data_format, - use_bias=False) - self._bn1 = self._batch_norm( - axis=self._channel_axis, - momentum=self._batch_norm_momentum, - epsilon=self._batch_norm_epsilon) - - if self._has_se: - num_reduced_filters = max( - 1, int(self._block_args.input_filters * self._block_args.se_ratio)) - # Squeeze and Excitation layer. - self._se_reduce = tf.layers.Conv2D( - num_reduced_filters, - kernel_size=[1, 1], - strides=[1, 1], - kernel_initializer=conv_kernel_initializer, - padding='same', - data_format=self._data_format, - use_bias=True) - self._se_expand = tf.layers.Conv2D( - filters, - kernel_size=[1, 1], - strides=[1, 1], - kernel_initializer=conv_kernel_initializer, - padding='same', - data_format=self._data_format, - use_bias=True) - - # Output phase: - filters = self._block_args.output_filters - self._project_conv = tf.layers.Conv2D( - filters, - kernel_size=[1, 1], - strides=[1, 1], - kernel_initializer=conv_kernel_initializer, - padding='same', - data_format=self._data_format, - use_bias=False) - self._bn2 = self._batch_norm( - axis=self._channel_axis, - momentum=self._batch_norm_momentum, - epsilon=self._batch_norm_epsilon) - - def _call_se(self, input_tensor): - """Call Squeeze and Excitation layer. + super(MBConvBlock, self).__init__() + self._block_args = block_args + self._batch_norm_momentum = global_params.batch_norm_momentum + self._batch_norm_epsilon = global_params.batch_norm_epsilon + self._batch_norm = global_params.batch_norm + self._data_format = global_params.data_format + if self._data_format == "channels_first": + self._channel_axis = 1 + self._spatial_dims = [2, 3] + else: + self._channel_axis = -1 + self._spatial_dims = [1, 2] + + self._relu_fn = global_params.relu_fn or tf.nn.swish + self._has_se = ( + global_params.use_se + and self._block_args.se_ratio is not None + and 0 < self._block_args.se_ratio <= 1 + ) + + self.endpoints = None + + # Builds the block accordings to arguments. + self._build() + + def block_args(self): + return self._block_args + + def _build(self): + """Builds block according to the arguments.""" + filters = self._block_args.input_filters * self._block_args.expand_ratio + if self._block_args.expand_ratio != 1: + # Expansion phase: + self._expand_conv = tf.layers.Conv2D( + filters, + kernel_size=[1, 1], + strides=[1, 1], + kernel_initializer=conv_kernel_initializer, + padding="same", + data_format=self._data_format, + use_bias=False, + ) + self._bn0 = self._batch_norm( + axis=self._channel_axis, + momentum=self._batch_norm_momentum, + epsilon=self._batch_norm_epsilon, + ) + + kernel_size = self._block_args.kernel_size + # Depth-wise convolution phase: + self._depthwise_conv = utils.DepthwiseConv2D( + [kernel_size, kernel_size], + strides=self._block_args.strides, + depthwise_initializer=conv_kernel_initializer, + padding="same", + data_format=self._data_format, + use_bias=False, + ) + self._bn1 = self._batch_norm( + axis=self._channel_axis, + momentum=self._batch_norm_momentum, + epsilon=self._batch_norm_epsilon, + ) + + if self._has_se: + num_reduced_filters = max( + 1, int(self._block_args.input_filters * self._block_args.se_ratio) + ) + # Squeeze and Excitation layer. + self._se_reduce = tf.layers.Conv2D( + num_reduced_filters, + kernel_size=[1, 1], + strides=[1, 1], + kernel_initializer=conv_kernel_initializer, + padding="same", + data_format=self._data_format, + use_bias=True, + ) + self._se_expand = tf.layers.Conv2D( + filters, + kernel_size=[1, 1], + strides=[1, 1], + kernel_initializer=conv_kernel_initializer, + padding="same", + data_format=self._data_format, + use_bias=True, + ) + + # Output phase: + filters = self._block_args.output_filters + self._project_conv = tf.layers.Conv2D( + filters, + kernel_size=[1, 1], + strides=[1, 1], + kernel_initializer=conv_kernel_initializer, + padding="same", + data_format=self._data_format, + use_bias=False, + ) + self._bn2 = self._batch_norm( + axis=self._channel_axis, + momentum=self._batch_norm_momentum, + epsilon=self._batch_norm_epsilon, + ) + + def _call_se(self, input_tensor): + """Call Squeeze and Excitation layer. Args: input_tensor: Tensor, a single input tensor for Squeeze/Excitation layer. Returns: A output tensor, which should have the same shape as input. """ - se_tensor = tf.reduce_mean(input_tensor, self._spatial_dims, keepdims=True) - se_tensor = self._se_expand(self._relu_fn(self._se_reduce(se_tensor))) - tf.logging.info('Built Squeeze and Excitation with tensor shape: %s' % - (se_tensor.shape)) - return tf.sigmoid(se_tensor) * input_tensor - - def call(self, inputs, use_batch_norm=False, drop_out=False, drop_connect_rate=None): - """Implementation of call(). + se_tensor = tf.reduce_mean(input_tensor, self._spatial_dims, keepdims=True) + se_tensor = self._se_expand(self._relu_fn(self._se_reduce(se_tensor))) + tf.logging.info( + "Built Squeeze and Excitation with tensor shape: %s" % (se_tensor.shape) + ) + return tf.sigmoid(se_tensor) * input_tensor + + def call( + self, inputs, use_batch_norm=False, drop_out=False, drop_connect_rate=None + ): + """Implementation of call(). Args: inputs: the inputs tensor. training: boolean, whether the model is constructed for training. @@ -242,71 +277,80 @@ def call(self, inputs, use_batch_norm=False, drop_out=False, drop_connect_rate=N Returns: A output tensor. """ - tf.logging.info('Block input: %s shape: %s' % (inputs.name, inputs.shape)) - if self._block_args.expand_ratio != 1: - x = self._relu_fn(self._bn0(self._expand_conv(inputs), training=use_batch_norm)) - else: - x = inputs - tf.logging.info('Expand: %s shape: %s' % (x.name, x.shape)) - - x = self._relu_fn(self._bn1(self._depthwise_conv(x), training=use_batch_norm)) - tf.logging.info('DWConv: %s shape: %s' % (x.name, x.shape)) - - if self._has_se: - with tf.variable_scope('se'): - x = self._call_se(x) - - self.endpoints = {'expansion_output': x} - - x = self._bn2(self._project_conv(x), training=use_batch_norm) - if self._block_args.id_skip: - if all( - s == 1 for s in self._block_args.strides - ) and self._block_args.input_filters == self._block_args.output_filters: - # only apply drop_connect if skip presents. - if drop_connect_rate: - x = utils.drop_connect(x, drop_out, drop_connect_rate) - x = tf.add(x, inputs) - tf.logging.info('Project: %s shape: %s' % (x.name, x.shape)) - return x + tf.logging.info("Block input: %s shape: %s" % (inputs.name, inputs.shape)) + if self._block_args.expand_ratio != 1: + x = self._relu_fn( + self._bn0(self._expand_conv(inputs), training=use_batch_norm) + ) + else: + x = inputs + tf.logging.info("Expand: %s shape: %s" % (x.name, x.shape)) + + x = self._relu_fn(self._bn1(self._depthwise_conv(x), training=use_batch_norm)) + tf.logging.info("DWConv: %s shape: %s" % (x.name, x.shape)) + + if self._has_se: + with tf.variable_scope("se"): + x = self._call_se(x) + + self.endpoints = {"expansion_output": x} + + x = self._bn2(self._project_conv(x), training=use_batch_norm) + if self._block_args.id_skip: + if ( + all(s == 1 for s in self._block_args.strides) + and self._block_args.input_filters == self._block_args.output_filters + ): + # only apply drop_connect if skip presents. + if drop_connect_rate: + x = utils.drop_connect(x, drop_out, drop_connect_rate) + x = tf.add(x, inputs) + tf.logging.info("Project: %s shape: %s" % (x.name, x.shape)) + return x class MBConvBlockWithoutDepthwise(MBConvBlock): - """MBConv-like block without depthwise convolution and squeeze-and-excite.""" - - def _build(self): - """Builds block according to the arguments.""" - filters = self._block_args.input_filters * self._block_args.expand_ratio - if self._block_args.expand_ratio != 1: - # Expansion phase: - self._expand_conv = tf.layers.Conv2D( - filters, - kernel_size=[3, 3], - strides=[1, 1], - kernel_initializer=conv_kernel_initializer, - padding='same', - use_bias=False) - self._bn0 = self._batch_norm( - axis=self._channel_axis, - momentum=self._batch_norm_momentum, - epsilon=self._batch_norm_epsilon) - - # Output phase: - filters = self._block_args.output_filters - self._project_conv = tf.layers.Conv2D( - filters, - kernel_size=[1, 1], - strides=self._block_args.strides, - kernel_initializer=conv_kernel_initializer, - padding='same', - use_bias=False) - self._bn1 = self._batch_norm( - axis=self._channel_axis, - momentum=self._batch_norm_momentum, - epsilon=self._batch_norm_epsilon) - - def call(self, inputs, use_batch_norm=False, drop_out=False, drop_connect_rate=None): - """Implementation of call(). + """MBConv-like block without depthwise convolution and squeeze-and-excite.""" + + def _build(self): + """Builds block according to the arguments.""" + filters = self._block_args.input_filters * self._block_args.expand_ratio + if self._block_args.expand_ratio != 1: + # Expansion phase: + self._expand_conv = tf.layers.Conv2D( + filters, + kernel_size=[3, 3], + strides=[1, 1], + kernel_initializer=conv_kernel_initializer, + padding="same", + use_bias=False, + ) + self._bn0 = self._batch_norm( + axis=self._channel_axis, + momentum=self._batch_norm_momentum, + epsilon=self._batch_norm_epsilon, + ) + + # Output phase: + filters = self._block_args.output_filters + self._project_conv = tf.layers.Conv2D( + filters, + kernel_size=[1, 1], + strides=self._block_args.strides, + kernel_initializer=conv_kernel_initializer, + padding="same", + use_bias=False, + ) + self._bn1 = self._batch_norm( + axis=self._channel_axis, + momentum=self._batch_norm_momentum, + epsilon=self._batch_norm_epsilon, + ) + + def call( + self, inputs, use_batch_norm=False, drop_out=False, drop_connect_rate=None + ): + """Implementation of call(). Args: inputs: the inputs tensor. training: boolean, whether the model is constructed for training. @@ -314,132 +358,141 @@ def call(self, inputs, use_batch_norm=False, drop_out=False, drop_connect_rate=N Returns: A output tensor. """ - tf.logging.info('Block input: %s shape: %s' % (inputs.name, inputs.shape)) - if self._block_args.expand_ratio != 1: - x = self._relu_fn(self._bn0(self._expand_conv(inputs), training=use_batch_norm)) - else: - x = inputs - tf.logging.info('Expand: %s shape: %s' % (x.name, x.shape)) - - self.endpoints = {'expansion_output': x} - - x = self._bn1(self._project_conv(x), training=use_batch_norm) - if self._block_args.id_skip: - if all( - s == 1 for s in self._block_args.strides - ) and self._block_args.input_filters == self._block_args.output_filters: - # only apply drop_connect if skip presents. - if drop_connect_rate: - x = utils.drop_connect(x, drop_out, drop_connect_rate) - x = tf.add(x, inputs) - tf.logging.info('Project: %s shape: %s' % (x.name, x.shape)) - return x + tf.logging.info("Block input: %s shape: %s" % (inputs.name, inputs.shape)) + if self._block_args.expand_ratio != 1: + x = self._relu_fn( + self._bn0(self._expand_conv(inputs), training=use_batch_norm) + ) + else: + x = inputs + tf.logging.info("Expand: %s shape: %s" % (x.name, x.shape)) + + self.endpoints = {"expansion_output": x} + + x = self._bn1(self._project_conv(x), training=use_batch_norm) + if self._block_args.id_skip: + if ( + all(s == 1 for s in self._block_args.strides) + and self._block_args.input_filters == self._block_args.output_filters + ): + # only apply drop_connect if skip presents. + if drop_connect_rate: + x = utils.drop_connect(x, drop_out, drop_connect_rate) + x = tf.add(x, inputs) + tf.logging.info("Project: %s shape: %s" % (x.name, x.shape)) + return x class Model(tf.keras.Model): - """A class implements tf.keras.Model for MNAS-like model. + """A class implements tf.keras.Model for MNAS-like model. Reference: https://arxiv.org/abs/1807.11626 """ - def __init__(self, blocks_args=None, global_params=None): - """Initializes an `Model` instance. + def __init__(self, blocks_args=None, global_params=None): + """Initializes an `Model` instance. Args: blocks_args: A list of BlockArgs to construct block modules. global_params: GlobalParams, a set of global parameters. Raises: ValueError: when blocks_args is not specified as a list. """ - super(Model, self).__init__() - if not isinstance(blocks_args, list): - raise ValueError('blocks_args should be a list.') - self._global_params = global_params - self._blocks_args = blocks_args - self._relu_fn = global_params.relu_fn or tf.nn.swish - self._batch_norm = global_params.batch_norm - - self.endpoints = None - - self._build() - - def _get_conv_block(self, conv_type): - conv_block_map = {0: MBConvBlock, 1: MBConvBlockWithoutDepthwise} - return conv_block_map[conv_type] - - def _build(self): - """Builds a model.""" - self._blocks = [] - # Builds blocks. - for block_args in self._blocks_args: - assert block_args.num_repeat > 0 - # Update block input and output filters based on depth multiplier. - block_args = block_args._replace( - input_filters=round_filters(block_args.input_filters, - self._global_params), - output_filters=round_filters(block_args.output_filters, - self._global_params), - num_repeat=round_repeats(block_args.num_repeat, self._global_params)) - - # The first block needs to take care of stride and filter size increase. - conv_block = self._get_conv_block(block_args.conv_type) - self._blocks.append(conv_block(block_args, self._global_params)) - if block_args.num_repeat > 1: - # pylint: disable=protected-access - block_args = block_args._replace( - input_filters=block_args.output_filters, strides=[1, 1]) - # pylint: enable=protected-access - for _ in xrange(block_args.num_repeat - 1): - self._blocks.append(conv_block(block_args, self._global_params)) - - batch_norm_momentum = self._global_params.batch_norm_momentum - batch_norm_epsilon = self._global_params.batch_norm_epsilon - if self._global_params.data_format == 'channels_first': - channel_axis = 1 - else: - channel_axis = -1 - - # Stem part. - self._conv_stem = tf.layers.Conv2D( - filters=round_filters(32, self._global_params), - kernel_size=[3, 3], - strides=[2, 2], - kernel_initializer=conv_kernel_initializer, - padding='same', - data_format=self._global_params.data_format, - use_bias=False) - self._bn0 = self._batch_norm( - axis=channel_axis, - momentum=batch_norm_momentum, - epsilon=batch_norm_epsilon) - - # Head part. - self._conv_head = tf.layers.Conv2D( - filters=round_filters(1280, self._global_params), - kernel_size=[1, 1], - strides=[1, 1], - kernel_initializer=conv_kernel_initializer, - padding='same', - use_bias=False) - self._bn1 = self._batch_norm( - axis=channel_axis, - momentum=batch_norm_momentum, - epsilon=batch_norm_epsilon) - - self._avg_pooling = tf.keras.layers.GlobalAveragePooling2D( - data_format=self._global_params.data_format) - if self._global_params.num_classes: - self._fc = tf.layers.Dense( - self._global_params.num_classes, - kernel_initializer=dense_kernel_initializer) - else: - self._fc = None - - if self._global_params.dropout_rate > 0: - self._dropout = tf.keras.layers.Dropout(self._global_params.dropout_rate) - else: - self._dropout = None - - def call(self, inputs, use_batch_norm=False, drop_out=False, features_only=None): - """Implementation of call(). + super(Model, self).__init__() + if not isinstance(blocks_args, list): + raise ValueError("blocks_args should be a list.") + self._global_params = global_params + self._blocks_args = blocks_args + self._relu_fn = global_params.relu_fn or tf.nn.swish + self._batch_norm = global_params.batch_norm + + self.endpoints = None + + self._build() + + def _get_conv_block(self, conv_type): + conv_block_map = {0: MBConvBlock, 1: MBConvBlockWithoutDepthwise} + return conv_block_map[conv_type] + + def _build(self): + """Builds a model.""" + self._blocks = [] + # Builds blocks. + for block_args in self._blocks_args: + assert block_args.num_repeat > 0 + # Update block input and output filters based on depth multiplier. + block_args = block_args._replace( + input_filters=round_filters( + block_args.input_filters, self._global_params + ), + output_filters=round_filters( + block_args.output_filters, self._global_params + ), + num_repeat=round_repeats(block_args.num_repeat, self._global_params), + ) + + # The first block needs to take care of stride and filter size increase. + conv_block = self._get_conv_block(block_args.conv_type) + self._blocks.append(conv_block(block_args, self._global_params)) + if block_args.num_repeat > 1: + # pylint: disable=protected-access + block_args = block_args._replace( + input_filters=block_args.output_filters, strides=[1, 1] + ) + # pylint: enable=protected-access + for _ in xrange(block_args.num_repeat - 1): + self._blocks.append(conv_block(block_args, self._global_params)) + + batch_norm_momentum = self._global_params.batch_norm_momentum + batch_norm_epsilon = self._global_params.batch_norm_epsilon + if self._global_params.data_format == "channels_first": + channel_axis = 1 + else: + channel_axis = -1 + + # Stem part. + self._conv_stem = tf.layers.Conv2D( + filters=round_filters(32, self._global_params), + kernel_size=[3, 3], + strides=[2, 2], + kernel_initializer=conv_kernel_initializer, + padding="same", + data_format=self._global_params.data_format, + use_bias=False, + ) + self._bn0 = self._batch_norm( + axis=channel_axis, momentum=batch_norm_momentum, epsilon=batch_norm_epsilon + ) + + # Head part. + self._conv_head = tf.layers.Conv2D( + filters=round_filters(1280, self._global_params), + kernel_size=[1, 1], + strides=[1, 1], + kernel_initializer=conv_kernel_initializer, + padding="same", + use_bias=False, + ) + self._bn1 = self._batch_norm( + axis=channel_axis, momentum=batch_norm_momentum, epsilon=batch_norm_epsilon + ) + + self._avg_pooling = tf.keras.layers.GlobalAveragePooling2D( + data_format=self._global_params.data_format + ) + if self._global_params.num_classes: + self._fc = tf.layers.Dense( + self._global_params.num_classes, + kernel_initializer=dense_kernel_initializer, + ) + else: + self._fc = None + + if self._global_params.dropout_rate > 0: + self._dropout = tf.keras.layers.Dropout(self._global_params.dropout_rate) + else: + self._dropout = None + + def call(self, inputs, use_batch_norm=False, drop_out=False, features_only=None): + """Implementation of call(). Args: inputs: input tensors. training: boolean, whether the model is constructed for training. @@ -447,51 +500,58 @@ def call(self, inputs, use_batch_norm=False, drop_out=False, features_only=None) Returns: output tensors. """ - outputs = None - self.endpoints = {} - # Calls Stem layers - with tf.variable_scope('stem'): - outputs = self._relu_fn( - self._bn0(self._conv_stem(inputs), training=use_batch_norm)) - tf.logging.info('Built stem layers with output shape: %s' % outputs.shape) - self.endpoints['stem'] = outputs - - # Calls blocks. - reduction_idx = 0 - for idx, block in enumerate(self._blocks): - is_reduction = False - if ((idx == len(self._blocks) - 1) or - self._blocks[idx + 1].block_args().strides[0] > 1): - is_reduction = True - reduction_idx += 1 - - with tf.variable_scope('blocks_%s' % idx): - drop_rate = self._global_params.drop_connect_rate - if drop_rate: - drop_rate *= float(idx) / len(self._blocks) - tf.logging.info('block_%s drop_connect_rate: %s' % (idx, drop_rate)) - outputs = block.call( - outputs, use_batch_norm=use_batch_norm, drop_out=drop_out, drop_connect_rate=drop_rate) - self.endpoints['block_%s' % idx] = outputs - if is_reduction: - self.endpoints['reduction_%s' % reduction_idx] = outputs - if block.endpoints: - for k, v in six.iteritems(block.endpoints): - self.endpoints['block_%s/%s' % (idx, k)] = v - if is_reduction: - self.endpoints['reduction_%s/%s' % (reduction_idx, k)] = v - self.endpoints['features'] = outputs - - if not features_only: - # Calls final layers and returns logits. - with tf.variable_scope('head'): - outputs = self._relu_fn( - self._bn1(self._conv_head(outputs), training=use_batch_norm)) - outputs = self._avg_pooling(outputs) - if self._dropout: - outputs = self._dropout(outputs, training=drop_out) - self.endpoints['global_pool'] = outputs - if self._fc: - outputs = self._fc(outputs) - self.endpoints['head'] = outputs - return outputs + outputs = None + self.endpoints = {} + # Calls Stem layers + with tf.variable_scope("stem"): + outputs = self._relu_fn( + self._bn0(self._conv_stem(inputs), training=use_batch_norm) + ) + tf.logging.info("Built stem layers with output shape: %s" % outputs.shape) + self.endpoints["stem"] = outputs + + # Calls blocks. + reduction_idx = 0 + for idx, block in enumerate(self._blocks): + is_reduction = False + if (idx == len(self._blocks) - 1) or self._blocks[ + idx + 1 + ].block_args().strides[0] > 1: + is_reduction = True + reduction_idx += 1 + + with tf.variable_scope("blocks_%s" % idx): + drop_rate = self._global_params.drop_connect_rate + if drop_rate: + drop_rate *= float(idx) / len(self._blocks) + tf.logging.info("block_%s drop_connect_rate: %s" % (idx, drop_rate)) + outputs = block.call( + outputs, + use_batch_norm=use_batch_norm, + drop_out=drop_out, + drop_connect_rate=drop_rate, + ) + self.endpoints["block_%s" % idx] = outputs + if is_reduction: + self.endpoints["reduction_%s" % reduction_idx] = outputs + if block.endpoints: + for k, v in six.iteritems(block.endpoints): + self.endpoints["block_%s/%s" % (idx, k)] = v + if is_reduction: + self.endpoints["reduction_%s/%s" % (reduction_idx, k)] = v + self.endpoints["features"] = outputs + + if not features_only: + # Calls final layers and returns logits. + with tf.variable_scope("head"): + outputs = self._relu_fn( + self._bn1(self._conv_head(outputs), training=use_batch_norm) + ) + outputs = self._avg_pooling(outputs) + if self._dropout: + outputs = self._dropout(outputs, training=drop_out) + self.endpoints["global_pool"] = outputs + if self._fc: + outputs = self._fc(outputs) + self.endpoints["head"] = outputs + return outputs diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/net_factory.py b/deeplabcut/pose_estimation_tensorflow/nnet/net_factory.py index bb7248dfe..1b50711e9 100755 --- a/deeplabcut/pose_estimation_tensorflow/nnet/net_factory.py +++ b/deeplabcut/pose_estimation_tensorflow/nnet/net_factory.py @@ -10,7 +10,7 @@ def pose_net(cfg): - net_type = cfg['net_type'] + net_type = cfg["net_type"] if "mobilenet" in net_type: # multi currently not supported if ( cfg.get("stride", 8) < 8 @@ -23,6 +23,7 @@ def pose_net(cfg): from deeplabcut.pose_estimation_tensorflow.nnet.pose_net_mobilenet import ( PoseNet, ) + cls = PoseNet elif "resnet" in net_type: @@ -40,7 +41,7 @@ def pose_net(cfg): from deeplabcut.pose_estimation_tensorflow.nnet.pose_net import PoseNet cls = PoseNet - elif 'efficientnet' in net_type: + elif "efficientnet" in net_type: if ( cfg.get("stride", 8) < 8 ): # this supports multianimal (with PAFs) or pairwise prediction @@ -49,7 +50,10 @@ def pose_net(cfg): cls = PoseNet else: print("Initializing Efficientnet") - from deeplabcut.pose_estimation_tensorflow.nnet.pose_net_efficientnet import PoseNet + from deeplabcut.pose_estimation_tensorflow.nnet.pose_net_efficientnet import ( + PoseNet, + ) + cls = PoseNet else: raise Exception('Unsupported class of network: "{}"'.format(net_type)) diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/pose_net.py b/deeplabcut/pose_estimation_tensorflow/nnet/pose_net.py index 596822242..839f40482 100644 --- a/deeplabcut/pose_estimation_tensorflow/nnet/pose_net.py +++ b/deeplabcut/pose_estimation_tensorflow/nnet/pose_net.py @@ -39,7 +39,7 @@ def prediction_layer(cfg, input, name, num_outputs): padding="SAME", activation_fn=None, normalizer_fn=None, - weights_regularizer=slim.l2_regularizer(cfg['weight_decay']), + weights_regularizer=slim.l2_regularizer(cfg["weight_decay"]), ): with tf.variable_scope(name): pred = slim.conv2d_transpose( @@ -49,9 +49,9 @@ def prediction_layer(cfg, input, name, num_outputs): def get_batch_spec(cfg): - num_joints = cfg['num_joints'] - num_limbs = cfg['num_limbs'] - batch_size = cfg['batch_size'] + num_joints = cfg["num_joints"] + num_limbs = cfg["num_limbs"] + batch_size = cfg["batch_size"] batch_spec = { Batch.inputs: [batch_size, None, None, 3], Batch.part_score_targets: [ @@ -67,13 +67,13 @@ def get_batch_spec(cfg): num_joints + cfg.get("num_idchannel", 0), ], } - if cfg['location_refinement']: + if cfg["location_refinement"]: batch_spec[Batch.locref_targets] = [batch_size, None, None, num_joints * 2] batch_spec[Batch.locref_mask] = [batch_size, None, None, num_joints * 2] - if cfg['pairwise_predict']: - print("Getting specs", cfg['dataset_type'], num_limbs, num_joints) + if cfg["pairwise_predict"]: + print("Getting specs", cfg["dataset_type"], num_limbs, num_joints) if ( - "multi-animal" not in cfg['dataset_type'] + "multi-animal" not in cfg["dataset_type"] ): # this can be used for pairwise conditional batch_spec[Batch.pairwise_targets] = [ batch_size, @@ -88,18 +88,8 @@ def get_batch_spec(cfg): num_joints * (num_joints - 1) * 2, ] else: # train partaffinity fields - batch_spec[Batch.pairwise_targets] = [ - batch_size, - None, - None, - num_limbs * 2, - ] - batch_spec[Batch.pairwise_mask] = [ - batch_size, - None, - None, - num_limbs * 2, - ] + batch_spec[Batch.pairwise_targets] = [batch_size, None, None, num_limbs * 2] + batch_spec[Batch.pairwise_mask] = [batch_size, None, None, num_limbs * 2] return batch_spec @@ -108,10 +98,13 @@ def __init__(self, cfg): self.cfg = cfg def extract_features(self, inputs): - net_fun = net_funcs[self.cfg['net_type']] + net_fun = net_funcs[self.cfg["net_type"]] mean = tf.constant( - self.cfg['mean_pixel'], dtype=tf.float32, shape=[1, 1, 1, 3], name="img_mean" + self.cfg["mean_pixel"], + dtype=tf.float32, + shape=[1, 1, 1, 3], + name="img_mean", ) im_centered = inputs - mean @@ -137,8 +130,8 @@ def prediction_layers( self, features, end_points, reuse=None, no_interm=False, scope="pose" ): cfg = self.cfg - n_joints = cfg['num_joints'] - num_layers = re.findall("resnet_([0-9]*)", cfg['net_type'])[0] + n_joints = cfg["num_joints"] + num_layers = re.findall("resnet_([0-9]*)", cfg["net_type"])[0] layer_name = ( "resnet_v1_{}".format(num_layers) + "/block{}/unit_{}/bottleneck_v1" ) @@ -148,23 +141,25 @@ def prediction_layers( out["part_pred"] = prediction_layer( cfg, features, "part_pred", n_joints + cfg.get("num_idchannel", 0) ) - if cfg['location_refinement']: + if cfg["location_refinement"]: out["locref"] = prediction_layer( cfg, features, "locref_pred", n_joints * 2 ) - if cfg['pairwise_predict'] and "multi-animal" not in cfg['dataset_type']: + if cfg["pairwise_predict"] and "multi-animal" not in cfg["dataset_type"]: out["pairwise_pred"] = prediction_layer( - cfg, - features, - "pairwise_pred", - n_joints * (n_joints - 1) * 2, + cfg, features, "pairwise_pred", n_joints * (n_joints - 1) * 2 ) - if cfg['partaffinityfield_predict'] and "multi-animal" in cfg['dataset_type']: + if ( + cfg["partaffinityfield_predict"] + and "multi-animal" in cfg["dataset_type"] + ): out["pairwise_pred"] = prediction_layer( - cfg, features, "pairwise_pred", cfg['num_limbs'] * 2 + cfg, features, "pairwise_pred", cfg["num_limbs"] * 2 + ) + if cfg["intermediate_supervision"] and not no_interm: + interm_name = layer_name.format( + 3, cfg["intermediate_supervision_layer"] ) - if cfg['intermediate_supervision'] and not no_interm: - interm_name = layer_name.format(3, cfg['intermediate_supervision_layer']) block_interm_out = end_points[interm_name] out["part_pred_interm"] = prediction_layer( cfg, @@ -191,7 +186,7 @@ def inference(self, inputs): locref = heads["locref"] probs = tf.sigmoid(heads["part_pred"]) - if self.cfg['batch_size'] == 1: + if self.cfg["batch_size"] == 1: # assuming batchsize 1 here! probs = tf.squeeze(probs, axis=0) locref = tf.squeeze(locref, axis=0) @@ -216,9 +211,9 @@ def inference(self, inputs): likelihood = tf.reshape(tf.gather_nd(probs, indices), (-1, 1)) pose = ( - self.cfg['stride'] * tf.cast(tf.transpose(loc), dtype=tf.float32) - + self.cfg['stride'] * 0.5 - + offset * self.cfg['locref_stdev'] + self.cfg["stride"] * tf.cast(tf.transpose(loc), dtype=tf.float32) + + self.cfg["stride"] * 0.5 + + offset * self.cfg["locref_stdev"] ) pose = tf.concat([pose, likelihood], axis=1) @@ -257,9 +252,9 @@ def inference(self, inputs): likelihood = tf.reshape(tf.gather_nd(probs, indices), (-1, 1)) pose = ( - self.cfg['stride'] * tf.cast(tf.transpose(loc), dtype=tf.float32) - + self.cfg['stride'] * 0.5 - + offset * self.cfg['locref_stdev'] + self.cfg["stride"] * tf.cast(tf.transpose(loc), dtype=tf.float32) + + self.cfg["stride"] * 0.5 + + offset * self.cfg["locref_stdev"] ) pose = tf.concat([pose, likelihood], axis=1) return {"pose": pose} @@ -268,9 +263,9 @@ def add_inference_layers(self, heads): """ initialized during inference """ prob = tf.sigmoid(heads["part_pred"]) outputs = {"part_prob": prob} - if self.cfg['location_refinement']: + if self.cfg["location_refinement"]: outputs["locref"] = heads["locref"] - if self.cfg['pairwise_predict'] or self.cfg['partaffinityfield_predict']: + if self.cfg["pairwise_predict"] or self.cfg["partaffinityfield_predict"]: outputs["pairwise_pred"] = heads["pairwise_pred"] return outputs @@ -278,7 +273,7 @@ def train(self, batch): cfg = self.cfg heads = self.get_net(batch[Batch.inputs]) - weigh_part_predictions = cfg['weigh_part_predictions'] + weigh_part_predictions = cfg["weigh_part_predictions"] part_score_weights = ( batch[Batch.part_score_weights] if weigh_part_predictions else 1.0 ) @@ -291,25 +286,25 @@ def add_part_loss(pred_layer): loss = {} loss["part_loss"] = add_part_loss("part_pred") total_loss = loss["part_loss"] - if cfg['intermediate_supervision']: + if cfg["intermediate_supervision"]: loss["part_loss_interm"] = add_part_loss("part_pred_interm") total_loss = total_loss + loss["part_loss_interm"] - if cfg['location_refinement']: + if cfg["location_refinement"]: locref_pred = heads["locref"] locref_targets = batch[Batch.locref_targets] locref_weights = batch[Batch.locref_mask] loss_func = ( losses.huber_loss - if cfg['locref_huber_loss'] + if cfg["locref_huber_loss"] else tf.losses.mean_squared_error ) - loss["locref_loss"] = cfg['locref_loss_weight'] * loss_func( + loss["locref_loss"] = cfg["locref_loss_weight"] * loss_func( locref_targets, locref_pred, locref_weights ) total_loss = total_loss + loss["locref_loss"] - if cfg['pairwise_predict'] or cfg['partaffinityfield_predict']: + if cfg["pairwise_predict"] or cfg["partaffinityfield_predict"]: # setting pairwise bodypart loss pairwise_pred = heads["pairwise_pred"] pairwise_targets = batch[Batch.pairwise_targets] @@ -317,10 +312,10 @@ def add_part_loss(pred_layer): loss_func = ( losses.huber_loss - if cfg['pairwise_huber_loss'] + if cfg["pairwise_huber_loss"] else tf.losses.mean_squared_error ) - loss["pairwise_loss"] = cfg['pairwise_loss_weight'] * loss_func( + loss["pairwise_loss"] = cfg["pairwise_loss_weight"] * loss_func( pairwise_targets, pairwise_pred, pairwise_weights ) total_loss = total_loss + loss["pairwise_loss"] diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/pose_net_efficientnet.py b/deeplabcut/pose_estimation_tensorflow/nnet/pose_net_efficientnet.py index eba5b5121..e065c81b8 100644 --- a/deeplabcut/pose_estimation_tensorflow/nnet/pose_net_efficientnet.py +++ b/deeplabcut/pose_estimation_tensorflow/nnet/pose_net_efficientnet.py @@ -1,4 +1,4 @@ -''' +""" DeepLabCut 2.1.9 Toolbox (deeplabcut.org) © A. & M. Mathis Labs https://github.com/AlexEMG/DeepLabCut @@ -14,7 +14,7 @@ Efficient Nets added by T. Biasi & AM See https://openaccess.thecvf.com/content/WACV2021/html/Mathis_Pretraining_Boosts_Out-of-Domain_Robustness_for_Pose_Estimation_WACV_2021_paper.html -''' +""" import re import tensorflow as tf @@ -25,19 +25,23 @@ def prediction_layer(cfg, input, name, num_outputs): - with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], padding='SAME', - activation_fn=None, normalizer_fn=None, - weights_regularizer=slim.l2_regularizer(cfg['weight_decay'])): + with slim.arg_scope( + [slim.conv2d, slim.conv2d_transpose], + padding="SAME", + activation_fn=None, + normalizer_fn=None, + weights_regularizer=slim.l2_regularizer(cfg["weight_decay"]), + ): with tf.variable_scope(name): - pred = slim.conv2d_transpose(input, num_outputs, - kernel_size=[3, 3], stride=2, - scope='block4') + pred = slim.conv2d_transpose( + input, num_outputs, kernel_size=[3, 3], stride=2, scope="block4" + ) return pred def get_batch_spec(cfg): - num_joints = cfg['num_joints'] - batch_size = cfg['batch_size'] + num_joints = cfg["num_joints"] + batch_size = cfg["batch_size"] batch_spec = { Batch.inputs: [batch_size, None, None, 3], Batch.part_score_targets: [ @@ -53,13 +57,13 @@ def get_batch_spec(cfg): num_joints + cfg.get("num_idchannel", 0), ], } - if cfg['location_refinement']: + if cfg["location_refinement"]: batch_spec[Batch.locref_targets] = [batch_size, None, None, num_joints * 2] batch_spec[Batch.locref_mask] = [batch_size, None, None, num_joints * 2] - if cfg['pairwise_predict']: - print("Getting specs", cfg['dataset_type'], cfg['num_limbs'], cfg['num_joints']) + if cfg["pairwise_predict"]: + print("Getting specs", cfg["dataset_type"], cfg["num_limbs"], cfg["num_joints"]) if ( - "multi-animal" not in cfg['dataset_type'] + "multi-animal" not in cfg["dataset_type"] ): # this can be used for pairwise conditional batch_spec[Batch.pairwise_targets] = [ batch_size, @@ -78,13 +82,13 @@ def get_batch_spec(cfg): batch_size, None, None, - cfg['num_limbs'] * 2, + cfg["num_limbs"] * 2, ] batch_spec[Batch.pairwise_mask] = [ batch_size, None, None, - cfg['num_limbs'] * 2, + cfg["num_limbs"] * 2, ] return batch_spec @@ -92,45 +96,63 @@ def get_batch_spec(cfg): class PoseNet: def __init__(self, cfg): self.cfg = cfg - if 'use_batch_norm' not in self.cfg.keys(): - self.cfg['use_batch_norm'] = False - if 'use_drop_out' not in self.cfg.keys(): - self.cfg['use_drop_out'] = False + if "use_batch_norm" not in self.cfg.keys(): + self.cfg["use_batch_norm"] = False + if "use_drop_out" not in self.cfg.keys(): + self.cfg["use_drop_out"] = False def extract_features(self, inputs, use_batch_norm=False, use_drop_out=False): - mean = tf.constant(self.cfg['mean_pixel'], - dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') + mean = tf.constant( + self.cfg["mean_pixel"], + dtype=tf.float32, + shape=[1, 1, 1, 3], + name="img_mean", + ) im_centered = inputs - mean - im_centered /= tf.constant( - eff.STDDEV_RGB, shape=[1, 1, 3]) + im_centered /= tf.constant(eff.STDDEV_RGB, shape=[1, 1, 3]) with tf.variable_scope("efficientnet"): - eff_net_type = self.cfg['net_type'].replace('_','-') - net, end_points = eff.build_model_base(im_centered, eff_net_type, use_batch_norm=use_batch_norm, drop_out=use_drop_out) + eff_net_type = self.cfg["net_type"].replace("_", "-") + net, end_points = eff.build_model_base( + im_centered, + eff_net_type, + use_batch_norm=use_batch_norm, + drop_out=use_drop_out, + ) return net, end_points def prediction_layers(self, features, end_points, reuse=None): cfg = self.cfg out = {} - with tf.variable_scope('pose', reuse=reuse): - out['part_pred'] = prediction_layer(cfg, features, 'part_pred', - cfg['num_joints'] + cfg.get("num_idchannel", 0)) - if cfg['location_refinement']: - out['locref'] = prediction_layer(cfg, features, 'locref_pred', - cfg['num_joints'] * 2) - if cfg['pairwise_predict'] and "multi-animal" not in cfg['dataset_type']: + with tf.variable_scope("pose", reuse=reuse): + out["part_pred"] = prediction_layer( + cfg, + features, + "part_pred", + cfg["num_joints"] + cfg.get("num_idchannel", 0), + ) + if cfg["location_refinement"]: + out["locref"] = prediction_layer( + cfg, features, "locref_pred", cfg["num_joints"] * 2 + ) + if cfg["pairwise_predict"] and "multi-animal" not in cfg["dataset_type"]: out["pairwise_pred"] = prediction_layer( cfg, features, "pairwise_pred", - cfg['num_joints'] * (cfg['num_joints'] - 1) * 2, + cfg["num_joints"] * (cfg["num_joints"] - 1) * 2, ) - if cfg['partaffinityfield_predict'] and "multi-animal" in cfg['dataset_type']: + if ( + cfg["partaffinityfield_predict"] + and "multi-animal" in cfg["dataset_type"] + ): out["pairwise_pred"] = prediction_layer( - cfg, features, "pairwise_pred", cfg['num_limbs'] * 2 + cfg, features, "pairwise_pred", cfg["num_limbs"] * 2 + ) + if cfg["intermediate_supervision"]: + raise NotImplementedError( + "Intermediate supervision is currently disabled." ) - if cfg['intermediate_supervision']: - raise NotImplementedError("Intermediate supervision is currently disabled.") return out @@ -139,7 +161,9 @@ def get_net(self, inputs, use_batch_norm, use_drop_out): return self.prediction_layers(net, end_points) def test(self, inputs): - heads = self.get_net(inputs, self.cfg['use_batch_norm'], self.cfg['use_drop_out']) + heads = self.get_net( + inputs, self.cfg["use_batch_norm"], self.cfg["use_drop_out"] + ) return self.add_inference_layers(heads) def inference(self, inputs): @@ -150,7 +174,7 @@ def inference(self, inputs): locref = heads["locref"] probs = tf.sigmoid(heads["part_pred"]) - if self.cfg['batch_size'] == 1: + if self.cfg["batch_size"] == 1: # assuming batchsize 1 here! probs = tf.squeeze(probs, axis=0) locref = tf.squeeze(locref, axis=0) @@ -175,9 +199,9 @@ def inference(self, inputs): likelihood = tf.reshape(tf.gather_nd(probs, indices), (-1, 1)) pose = ( - self.cfg['stride'] * tf.cast(tf.transpose(loc), dtype=tf.float32) - + self.cfg['stride'] * 0.5 - + offset * self.cfg['locref_stdev'] + self.cfg["stride"] * tf.cast(tf.transpose(loc), dtype=tf.float32) + + self.cfg["stride"] * 0.5 + + offset * self.cfg["locref_stdev"] ) pose = tf.concat([pose, likelihood], axis=1) @@ -216,9 +240,9 @@ def inference(self, inputs): likelihood = tf.reshape(tf.gather_nd(probs, indices), (-1, 1)) pose = ( - self.cfg['stride'] * tf.cast(tf.transpose(loc), dtype=tf.float32) - + self.cfg['stride'] * 0.5 - + offset * self.cfg['locref_stdev'] + self.cfg["stride"] * tf.cast(tf.transpose(loc), dtype=tf.float32) + + self.cfg["stride"] * 0.5 + + offset * self.cfg["locref_stdev"] ) pose = tf.concat([pose, likelihood], axis=1) return {"pose": pose} @@ -227,41 +251,51 @@ def add_inference_layers(self, heads): """ initialized during inference """ prob = tf.sigmoid(heads["part_pred"]) outputs = {"part_prob": prob} - if self.cfg['location_refinement']: + if self.cfg["location_refinement"]: outputs["locref"] = heads["locref"] - if self.cfg['pairwise_predict'] or self.cfg['partaffinityfield_predict']: + if self.cfg["pairwise_predict"] or self.cfg["partaffinityfield_predict"]: outputs["pairwise_pred"] = heads["pairwise_pred"] return outputs def train(self, batch): cfg = self.cfg - heads = self.get_net(batch[Batch.inputs], self.cfg['use_batch_norm'], self.cfg['use_drop_out']) + heads = self.get_net( + batch[Batch.inputs], self.cfg["use_batch_norm"], self.cfg["use_drop_out"] + ) - weigh_part_predictions = cfg['weigh_part_predictions'] - part_score_weights = batch[Batch.part_score_weights] if weigh_part_predictions else 1.0 + weigh_part_predictions = cfg["weigh_part_predictions"] + part_score_weights = ( + batch[Batch.part_score_weights] if weigh_part_predictions else 1.0 + ) def add_part_loss(pred_layer): - return tf.losses.sigmoid_cross_entropy(batch[Batch.part_score_targets], - heads[pred_layer], - part_score_weights) + return tf.losses.sigmoid_cross_entropy( + batch[Batch.part_score_targets], heads[pred_layer], part_score_weights + ) loss = {} - loss['part_loss'] = add_part_loss('part_pred') - total_loss = loss['part_loss'] - if cfg['intermediate_supervision']: + loss["part_loss"] = add_part_loss("part_pred") + total_loss = loss["part_loss"] + if cfg["intermediate_supervision"]: raise NotImplementedError("Intermediate supervision is currently disabled.") - if cfg['location_refinement']: - locref_pred = heads['locref'] + if cfg["location_refinement"]: + locref_pred = heads["locref"] locref_targets = batch[Batch.locref_targets] locref_weights = batch[Batch.locref_mask] - loss_func = losses.huber_loss if cfg['locref_huber_loss'] else tf.losses.mean_squared_error - loss['locref_loss'] = cfg['locref_loss_weight'] * loss_func(locref_targets, locref_pred, locref_weights) - total_loss = total_loss + loss['locref_loss'] + loss_func = ( + losses.huber_loss + if cfg["locref_huber_loss"] + else tf.losses.mean_squared_error + ) + loss["locref_loss"] = cfg["locref_loss_weight"] * loss_func( + locref_targets, locref_pred, locref_weights + ) + total_loss = total_loss + loss["locref_loss"] - if cfg['pairwise_predict'] or cfg['partaffinityfield_predict']: + if cfg["pairwise_predict"] or cfg["partaffinityfield_predict"]: # setting pairwise bodypart loss pairwise_pred = heads["pairwise_pred"] pairwise_targets = batch[Batch.pairwise_targets] @@ -269,14 +303,14 @@ def add_part_loss(pred_layer): loss_func = ( losses.huber_loss - if cfg['pairwise_huber_loss'] + if cfg["pairwise_huber_loss"] else tf.losses.mean_squared_error ) - loss["pairwise_loss"] = cfg['pairwise_loss_weight'] * loss_func( + loss["pairwise_loss"] = cfg["pairwise_loss_weight"] * loss_func( pairwise_targets, pairwise_pred, pairwise_weights ) total_loss = total_loss + loss["pairwise_loss"] # loss['total_loss'] = slim.losses.get_total_loss(add_regularization_losses=params.regularize) - loss['total_loss'] = total_loss + loss["total_loss"] = total_loss return loss diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/pose_net_mobilenet.py b/deeplabcut/pose_estimation_tensorflow/nnet/pose_net_mobilenet.py index 951b7a50b..0ee47fbb3 100755 --- a/deeplabcut/pose_estimation_tensorflow/nnet/pose_net_mobilenet.py +++ b/deeplabcut/pose_estimation_tensorflow/nnet/pose_net_mobilenet.py @@ -67,7 +67,7 @@ def prediction_layer(cfg, input, name, num_outputs): padding="SAME", activation_fn=None, normalizer_fn=None, - weights_regularizer=slim.l2_regularizer(cfg['weight_decay']), + weights_regularizer=slim.l2_regularizer(cfg["weight_decay"]), ): with tf.variable_scope(name): pred = slim.conv2d_transpose( @@ -75,10 +75,11 @@ def prediction_layer(cfg, input, name, num_outputs): ) return pred + def get_batch_spec(cfg): - num_joints = cfg['num_joints'] - num_limbs = cfg['num_limbs'] - batch_size = cfg['batch_size'] + num_joints = cfg["num_joints"] + num_limbs = cfg["num_limbs"] + batch_size = cfg["batch_size"] batch_spec = { Batch.inputs: [batch_size, None, None, 3], Batch.part_score_targets: [ @@ -94,13 +95,13 @@ def get_batch_spec(cfg): num_joints + cfg.get("num_idchannel", 0), ], } - if cfg['location_refinement']: + if cfg["location_refinement"]: batch_spec[Batch.locref_targets] = [batch_size, None, None, num_joints * 2] batch_spec[Batch.locref_mask] = [batch_size, None, None, num_joints * 2] - if cfg['pairwise_predict']: - print("Getting specs", cfg['dataset_type'], num_limbs, num_joints) + if cfg["pairwise_predict"]: + print("Getting specs", cfg["dataset_type"], num_limbs, num_joints) if ( - "multi-animal" not in cfg['dataset_type'] + "multi-animal" not in cfg["dataset_type"] ): # this can be used for pairwise conditional batch_spec[Batch.pairwise_targets] = [ batch_size, @@ -115,28 +116,22 @@ def get_batch_spec(cfg): num_joints * (num_joints - 1) * 2, ] else: # train partaffinity fields - batch_spec[Batch.pairwise_targets] = [ - batch_size, - None, - None, - num_limbs * 2, - ] - batch_spec[Batch.pairwise_mask] = [ - batch_size, - None, - None, - num_limbs * 2, - ] + batch_spec[Batch.pairwise_targets] = [batch_size, None, None, num_limbs * 2] + batch_spec[Batch.pairwise_mask] = [batch_size, None, None, num_limbs * 2] return batch_spec + class PoseNet: def __init__(self, cfg): self.cfg = cfg def extract_features(self, inputs): - net_fun, net_arg_scope = networks[self.cfg['net_type']] + net_fun, net_arg_scope = networks[self.cfg["net_type"]] mean = tf.constant( - self.cfg['mean_pixel'], dtype=tf.float32, shape=[1, 1, 1, 3], name="img_mean" + self.cfg["mean_pixel"], + dtype=tf.float32, + shape=[1, 1, 1, 3], + name="img_mean", ) im_centered = inputs - mean with slim.arg_scope(net_arg_scope()): @@ -146,29 +141,27 @@ def extract_features(self, inputs): def prediction_layers(self, features, end_points, reuse=None): cfg = self.cfg - num_joints = cfg['num_joints'] + num_joints = cfg["num_joints"] out = {} with tf.variable_scope("pose", reuse=reuse): - out["part_pred"] = prediction_layer( - cfg, features, "part_pred", num_joints - ) - if cfg['location_refinement']: + out["part_pred"] = prediction_layer(cfg, features, "part_pred", num_joints) + if cfg["location_refinement"]: out["locref"] = prediction_layer( cfg, features, "locref_pred", num_joints * 2 ) - if cfg['pairwise_predict'] and "multi-animal" not in cfg['dataset_type']: + if cfg["pairwise_predict"] and "multi-animal" not in cfg["dataset_type"]: out["pairwise_pred"] = prediction_layer( - cfg, - features, - "pairwise_pred", - num_joints * (num_joints - 1) * 2, + cfg, features, "pairwise_pred", num_joints * (num_joints - 1) * 2 ) - if cfg['partaffinityfield_predict'] and "multi-animal" in cfg['dataset_type']: + if ( + cfg["partaffinityfield_predict"] + and "multi-animal" in cfg["dataset_type"] + ): out["pairwise_pred"] = prediction_layer( - cfg, features, "pairwise_pred", cfg['num_limbs'] * 2 + cfg, features, "pairwise_pred", cfg["num_limbs"] * 2 ) - if cfg['intermediate_supervision']: + if cfg["intermediate_supervision"]: # print(end_points.keys()) >> to see what else is available. out["part_pred_interm"] = prediction_layer( cfg, @@ -194,7 +187,7 @@ def inference(self, inputs): locref = heads["locref"] probs = tf.sigmoid(heads["part_pred"]) - if cfg['batch_size'] == 1: + if cfg["batch_size"] == 1: probs = tf.squeeze(probs, axis=0) locref = tf.squeeze(locref, axis=0) l_shape = tf.shape(probs) @@ -218,9 +211,9 @@ def inference(self, inputs): likelihood = tf.reshape(tf.gather_nd(probs, indices), (-1, 1)) pose = ( - self.cfg['stride'] * tf.cast(tf.transpose(loc), dtype=tf.float32) - + self.cfg['stride'] * 0.5 - + offset * cfg['locref_stdev'] + self.cfg["stride"] * tf.cast(tf.transpose(loc), dtype=tf.float32) + + self.cfg["stride"] * 0.5 + + offset * cfg["locref_stdev"] ) pose = tf.concat([pose, likelihood], axis=1) @@ -259,9 +252,9 @@ def inference(self, inputs): likelihood = tf.reshape(tf.gather_nd(probs, indices), (-1, 1)) pose = ( - self.cfg['stride'] * tf.cast(tf.transpose(loc), dtype=tf.float32) - + self.cfg['stride'] * 0.5 - + offset * cfg['locref_stdev'] + self.cfg["stride"] * tf.cast(tf.transpose(loc), dtype=tf.float32) + + self.cfg["stride"] * 0.5 + + offset * cfg["locref_stdev"] ) pose = tf.concat([pose, likelihood], axis=1) return {"pose": pose} @@ -270,9 +263,9 @@ def add_inference_layers(self, heads): """ initialized during inference """ prob = tf.sigmoid(heads["part_pred"]) outputs = {"part_prob": prob} - if self.cfg['location_refinement']: + if self.cfg["location_refinement"]: outputs["locref"] = heads["locref"] - if self.cfg['pairwise_predict'] or self.cfg['partaffinityfield_predict']: + if self.cfg["pairwise_predict"] or self.cfg["partaffinityfield_predict"]: outputs["pairwise_pred"] = heads["pairwise_pred"] return outputs @@ -281,7 +274,7 @@ def train(self, batch): heads = self.get_net(batch[Batch.inputs]) - weigh_part_predictions = cfg['weigh_part_predictions'] + weigh_part_predictions = cfg["weigh_part_predictions"] part_score_weights = ( batch[Batch.part_score_weights] if weigh_part_predictions else 1.0 ) @@ -294,26 +287,26 @@ def add_part_loss(pred_layer): loss = {} loss["part_loss"] = add_part_loss("part_pred") total_loss = loss["part_loss"] - if cfg['intermediate_supervision']: + if cfg["intermediate_supervision"]: loss["part_loss_interm"] = add_part_loss("part_pred_interm") total_loss = total_loss + loss["part_loss_interm"] - if cfg['location_refinement']: + if cfg["location_refinement"]: locref_pred = heads["locref"] locref_targets = batch[Batch.locref_targets] locref_weights = batch[Batch.locref_mask] loss_func = ( losses.huber_loss - if cfg['locref_huber_loss'] + if cfg["locref_huber_loss"] else tf.losses.mean_squared_error ) - loss["locref_loss"] = cfg['locref_loss_weight'] * loss_func( + loss["locref_loss"] = cfg["locref_loss_weight"] * loss_func( locref_targets, locref_pred, locref_weights ) total_loss = total_loss + loss["locref_loss"] - if cfg['pairwise_predict'] or cfg['partaffinityfield_predict']: + if cfg["pairwise_predict"] or cfg["partaffinityfield_predict"]: # setting pairwise bodypart loss pairwise_pred = heads["pairwise_pred"] pairwise_targets = batch[Batch.pairwise_targets] @@ -321,10 +314,10 @@ def add_part_loss(pred_layer): loss_func = ( losses.huber_loss - if cfg['pairwise_huber_loss'] + if cfg["pairwise_huber_loss"] else tf.losses.mean_squared_error ) - loss["pairwise_loss"] = cfg['pairwise_loss_weight'] * loss_func( + loss["pairwise_loss"] = cfg["pairwise_loss_weight"] * loss_func( pairwise_targets, pairwise_pred, pairwise_weights ) total_loss = total_loss + loss["pairwise_loss"] diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/pose_netmulti.py b/deeplabcut/pose_estimation_tensorflow/nnet/pose_netmulti.py index fd50bf059..8b52e1e19 100755 --- a/deeplabcut/pose_estimation_tensorflow/nnet/pose_netmulti.py +++ b/deeplabcut/pose_estimation_tensorflow/nnet/pose_netmulti.py @@ -20,7 +20,11 @@ from deeplabcut.pose_estimation_tensorflow.dataset.pose_dataset import Batch import deeplabcut.pose_estimation_tensorflow.nnet.efficientnet_builder as eff -from deeplabcut.pose_estimation_tensorflow.nnet import mobilenet_v2, mobilenet, conv_blocks +from deeplabcut.pose_estimation_tensorflow.nnet import ( + mobilenet_v2, + mobilenet, + conv_blocks, +) from deeplabcut.pose_estimation_tensorflow.nnet import losses vers = (tf.__version__).split(".") @@ -30,44 +34,61 @@ TF = tf # Change the stride from 2 to 1 to get 16x downscaling instead of 32x. -mobilenet_v2.V2_DEF["spec"][14] = mobilenet.op(conv_blocks.expanded_conv, stride=1, num_outputs=160) +mobilenet_v2.V2_DEF["spec"][14] = mobilenet.op( + conv_blocks.expanded_conv, stride=1, num_outputs=160 +) + + def wrapped_partial(func, *args, **kwargs): partial_func = functools.partial(func, *args, **kwargs) functools.update_wrapper(partial_func, func) return partial_func + net_funcs = { "resnet_50": resnet_v1.resnet_v1_50, "resnet_101": resnet_v1.resnet_v1_101, "resnet_152": resnet_v1.resnet_v1_152, - 'mobilenet_v2_1.0': mobilenet_v2.mobilenet_base, - 'mobilenet_v2_0.75': wrapped_partial(mobilenet_v2.mobilenet_base, - depth_multiplier=0.75, - final_endpoint="layer_19", - finegrain_classification_mode=True), - 'mobilenet_v2_0.5': wrapped_partial(mobilenet_v2.mobilenet_base, - depth_multiplier=0.5, - final_endpoint="layer_19", - finegrain_classification_mode=True), - 'mobilenet_v2_0.35': wrapped_partial(mobilenet_v2.mobilenet_base, - depth_multiplier=0.35, - final_endpoint="layer_19", - finegrain_classification_mode=True), - 'mobilenet_v2_0.1': wrapped_partial(mobilenet_v2.mobilenet_base, - depth_multiplier=0.1, - final_endpoint="layer_19", - finegrain_classification_mode=True), - 'mobilenet_v2_0.35_10': wrapped_partial(mobilenet_v2.mobilenet_base, - depth_multiplier=0.35, - final_endpoint="layer_10", - finegrain_classification_mode=True), - 'mobilenet_v2_0.1_10': wrapped_partial(mobilenet_v2.mobilenet_base, - depth_multiplier=0.1, - final_endpoint="layer_10", - finegrain_classification_mode=True) + "mobilenet_v2_1.0": mobilenet_v2.mobilenet_base, + "mobilenet_v2_0.75": wrapped_partial( + mobilenet_v2.mobilenet_base, + depth_multiplier=0.75, + final_endpoint="layer_19", + finegrain_classification_mode=True, + ), + "mobilenet_v2_0.5": wrapped_partial( + mobilenet_v2.mobilenet_base, + depth_multiplier=0.5, + final_endpoint="layer_19", + finegrain_classification_mode=True, + ), + "mobilenet_v2_0.35": wrapped_partial( + mobilenet_v2.mobilenet_base, + depth_multiplier=0.35, + final_endpoint="layer_19", + finegrain_classification_mode=True, + ), + "mobilenet_v2_0.1": wrapped_partial( + mobilenet_v2.mobilenet_base, + depth_multiplier=0.1, + final_endpoint="layer_19", + finegrain_classification_mode=True, + ), + "mobilenet_v2_0.35_10": wrapped_partial( + mobilenet_v2.mobilenet_base, + depth_multiplier=0.35, + final_endpoint="layer_10", + finegrain_classification_mode=True, + ), + "mobilenet_v2_0.1_10": wrapped_partial( + mobilenet_v2.mobilenet_base, + depth_multiplier=0.1, + final_endpoint="layer_10", + finegrain_classification_mode=True, + ), } -#https://towardsdatascience.com/complete-architectural-details-of-all-efficientnet-models-5fd5b736142 +# https://towardsdatascience.com/complete-architectural-details-of-all-efficientnet-models-5fd5b736142 parallel_layers = { "b0": "4", "b1": "7", @@ -76,8 +97,8 @@ def wrapped_partial(func, *args, **kwargs): "b4": "9", "b5": "12", "b6": "14", - "b7": "17" - } + "b7": "17", +} def prediction_layer(cfg, input, name, num_outputs): @@ -86,7 +107,7 @@ def prediction_layer(cfg, input, name, num_outputs): padding="SAME", activation_fn=None, normalizer_fn=None, - weights_regularizer=slim.l2_regularizer(cfg['weight_decay']), + weights_regularizer=slim.l2_regularizer(cfg["weight_decay"]), ): with tf.variable_scope(name): pred = slim.conv2d_transpose( @@ -96,8 +117,8 @@ def prediction_layer(cfg, input, name, num_outputs): def get_batch_spec(cfg): - num_joints = cfg['num_joints'] - batch_size = cfg['batch_size'] + num_joints = cfg["num_joints"] + batch_size = cfg["batch_size"] batch_spec = { Batch.inputs: [batch_size, None, None, 3], Batch.part_score_targets: [ @@ -113,13 +134,13 @@ def get_batch_spec(cfg): num_joints + cfg.get("num_idchannel", 0), ], } - if cfg['location_refinement']: + if cfg["location_refinement"]: batch_spec[Batch.locref_targets] = [batch_size, None, None, num_joints * 2] batch_spec[Batch.locref_mask] = [batch_size, None, None, num_joints * 2] - if cfg['pairwise_predict']: - print("Getting specs", cfg['dataset_type'], cfg['num_limbs'], cfg['num_joints']) + if cfg["pairwise_predict"]: + print("Getting specs", cfg["dataset_type"], cfg["num_limbs"], cfg["num_joints"]) if ( - "multi-animal" not in cfg['dataset_type'] + "multi-animal" not in cfg["dataset_type"] ): # this can be used for pairwise conditional batch_spec[Batch.pairwise_targets] = [ batch_size, @@ -138,13 +159,13 @@ def get_batch_spec(cfg): batch_size, None, None, - cfg['num_limbs'] * 2, + cfg["num_limbs"] * 2, ] batch_spec[Batch.pairwise_mask] = [ batch_size, None, None, - cfg['num_limbs'] * 2, + cfg["num_limbs"] * 2, ] return batch_spec @@ -152,26 +173,31 @@ def get_batch_spec(cfg): class PoseNet: def __init__(self, cfg): self.cfg = cfg - if 'use_batch_norm' not in self.cfg.keys(): - self.cfg['use_batch_norm'] = False - if 'use_drop_out' not in self.cfg.keys(): - self.cfg['use_drop_out'] = False + if "use_batch_norm" not in self.cfg.keys(): + self.cfg["use_batch_norm"] = False + if "use_drop_out" not in self.cfg.keys(): + self.cfg["use_drop_out"] = False def extract_features(self, inputs): mean = tf.constant( - self.cfg['mean_pixel'], dtype=tf.float32, shape=[1, 1, 1, 3], name="img_mean" + self.cfg["mean_pixel"], + dtype=tf.float32, + shape=[1, 1, 1, 3], + name="img_mean", ) im_centered = inputs - mean - if 'resnet' in self.cfg['net_type']: + if "resnet" in self.cfg["net_type"]: # The next part of the code depends upon which tensorflow version you have. vers = tf.__version__ vers = vers.split( "." ) # Updated based on https://github.com/AlexEMG/DeepLabCut/issues/44 - net_fun = net_funcs[self.cfg['net_type']] - if int(vers[0]) == 1 and int(vers[1]) < 4: # check if lower than version 1.4. + net_fun = net_funcs[self.cfg["net_type"]] + if ( + int(vers[0]) == 1 and int(vers[1]) < 4 + ): # check if lower than version 1.4. with slim.arg_scope(resnet_v1.resnet_arg_scope(False)): net, end_points = net_fun( im_centered, global_pool=False, output_stride=16 @@ -179,18 +205,23 @@ def extract_features(self, inputs): else: with slim.arg_scope(resnet_v1.resnet_arg_scope()): net, end_points = net_fun( - im_centered, global_pool=False, output_stride=16, is_training=False + im_centered, + global_pool=False, + output_stride=16, + is_training=False, ) - elif 'mobilenet' in self.cfg['net_type']: - net_fun = net_funcs[self.cfg['net_type']] + elif "mobilenet" in self.cfg["net_type"]: + net_fun = net_funcs[self.cfg["net_type"]] with slim.arg_scope(mobilenet_v2.training_scope()): net, end_points = net_fun(im_centered) - elif 'efficientnet' in self.cfg['net_type']: + elif "efficientnet" in self.cfg["net_type"]: im_centered /= tf.constant(eff.STDDEV_RGB, shape=[1, 1, 3]) - net, end_points = eff.build_model_base(im_centered, - self.cfg['net_type'], - use_batch_norm=self.cfg['use_batch_norm'], - drop_out=self.cfg['use_drop_out']) + net, end_points = eff.build_model_base( + im_centered, + self.cfg["net_type"], + use_batch_norm=self.cfg["use_batch_norm"], + drop_out=self.cfg["use_drop_out"], + ) return net, end_points def prediction_layers( @@ -203,20 +234,18 @@ def prediction_layers( scope="pose", ): cfg = self.cfg - if "resnet" in cfg['net_type']: - num_layers = re.findall("resnet_([0-9]*)", cfg['net_type'])[0] + if "resnet" in cfg["net_type"]: + num_layers = re.findall("resnet_([0-9]*)", cfg["net_type"])[0] layer_name = ( "resnet_v1_{}".format(num_layers) + "/block{}/unit_{}/bottleneck_v1" ) - mid_pt = layer_name.format(2,3) - elif "mobilenet" in cfg['net_type']: + mid_pt = layer_name.format(2, 3) + elif "mobilenet" in cfg["net_type"]: mid_pt = "layer_7" - elif "efficientnet" in cfg['net_type']: - mid_pt = "block_"+parallel_layers[cfg['net_type'].split('-')[1]] + elif "efficientnet" in cfg["net_type"]: + mid_pt = "block_" + parallel_layers[cfg["net_type"].split("-")[1]] - final_dims = tf.ceil( - tf.divide(input_shape[1:3], tf.convert_to_tensor(16)) - ) + final_dims = tf.ceil(tf.divide(input_shape[1:3], tf.convert_to_tensor(16))) interim_dims = tf.scalar_mul(2, final_dims) interim_dims = tf.cast(interim_dims, tf.int32) bank_3 = end_points[mid_pt] @@ -226,57 +255,72 @@ def prediction_layers( [slim.conv2d], padding="SAME", normalizer_fn=None, - weights_regularizer=slim.l2_regularizer(cfg['weight_decay']), + weights_regularizer=slim.l2_regularizer(cfg["weight_decay"]), ): with tf.variable_scope("decoder_filters"): - bank_3 = slim.conv2d(bank_3, cfg['bank3'], 1, scope="decoder_parallel_1") + bank_3 = slim.conv2d( + bank_3, cfg["bank3"], 1, scope="decoder_parallel_1" + ) with slim.arg_scope( [slim.conv2d_transpose], padding="SAME", normalizer_fn=None, - weights_regularizer=slim.l2_regularizer(cfg['weight_decay']), + weights_regularizer=slim.l2_regularizer(cfg["weight_decay"]), ): with tf.variable_scope("upsampled_features"): upsampled_features = slim.conv2d_transpose( - features, cfg['bank5'], kernel_size=[3, 3], stride=2, scope="block4" + features, cfg["bank5"], kernel_size=[3, 3], stride=2, scope="block4" ) net = tf.concat([bank_3, upsampled_features], 3) out = {} with tf.variable_scope(scope, reuse=reuse): out["part_pred"] = prediction_layer( - cfg, net, "part_pred", cfg['num_joints'] + cfg.get("num_idchannel", 0) + cfg, net, "part_pred", cfg["num_joints"] + cfg.get("num_idchannel", 0) ) - if cfg['location_refinement']: + if cfg["location_refinement"]: out["locref"] = prediction_layer( - cfg, net, "locref_pred", cfg['num_joints'] * 2 + cfg, net, "locref_pred", cfg["num_joints"] * 2 ) - if cfg['pairwise_predict'] and "multi-animal" not in cfg['dataset_type']: + if cfg["pairwise_predict"] and "multi-animal" not in cfg["dataset_type"]: out["pairwise_pred"] = prediction_layer( - cfg, net, "pairwise_pred", cfg['num_joints'] * (cfg['num_joints'] - 1) * 2 + cfg, + net, + "pairwise_pred", + cfg["num_joints"] * (cfg["num_joints"] - 1) * 2, ) - if cfg['partaffinityfield_predict'] and "multi-animal" in cfg['dataset_type']: + if ( + cfg["partaffinityfield_predict"] + and "multi-animal" in cfg["dataset_type"] + ): out["pairwise_pred"] = prediction_layer( - cfg, net, "pairwise_pred", cfg['num_limbs'] * 2 + cfg, net, "pairwise_pred", cfg["num_limbs"] * 2 ) - if cfg['intermediate_supervision'] and "efficientnet" not in cfg['net_type']: - if "mobilenet" in cfg['net_type']: + if ( + cfg["intermediate_supervision"] + and "efficientnet" not in cfg["net_type"] + ): + if "mobilenet" in cfg["net_type"]: out["part_pred_interm"] = prediction_layer( cfg, - end_points["layer_" + str(cfg["intermediate_supervision_layer"])], + end_points[ + "layer_" + str(cfg["intermediate_supervision_layer"]) + ], "intermediate_supervision", - cfg['num_joints'], + cfg["num_joints"], + ) + elif "resnet" in cfg["net_type"]: + interm_name = layer_name.format( + 3, cfg["intermediate_supervision_layer"] ) - elif "resnet" in cfg['net_type']: - interm_name = layer_name.format(3, cfg['intermediate_supervision_layer']) block_interm_out = end_points[interm_name] out["part_pred_interm"] = prediction_layer( cfg, block_interm_out, "intermediate_supervision", - cfg['num_joints'] + cfg.get("num_idchannel", 0), + cfg["num_joints"] + cfg.get("num_idchannel", 0), ) return out @@ -294,9 +338,9 @@ def add_inference_layers(self, heads): """ initialized during inference """ prob = tf.sigmoid(heads["part_pred"]) outputs = {"part_prob": prob} - if self.cfg['location_refinement']: + if self.cfg["location_refinement"]: outputs["locref"] = heads["locref"] - if self.cfg['pairwise_predict'] or self.cfg['partaffinityfield_predict']: + if self.cfg["pairwise_predict"] or self.cfg["partaffinityfield_predict"]: outputs["pairwise_pred"] = heads["pairwise_pred"] return outputs @@ -304,7 +348,7 @@ def train(self, batch): cfg = self.cfg heads = self.get_net(batch[Batch.inputs]) - weigh_part_predictions = cfg['weigh_part_predictions'] + weigh_part_predictions = cfg["weigh_part_predictions"] part_score_weights = ( batch[Batch.part_score_weights] if weigh_part_predictions else 1.0 ) @@ -317,25 +361,25 @@ def add_part_loss(pred_layer): loss = {} loss["part_loss"] = add_part_loss("part_pred") total_loss = loss["part_loss"] - if cfg['intermediate_supervision'] and "efficientnet" not in cfg['net_type']: + if cfg["intermediate_supervision"] and "efficientnet" not in cfg["net_type"]: loss["part_loss_interm"] = add_part_loss("part_pred_interm") total_loss = total_loss + loss["part_loss_interm"] - if cfg['location_refinement']: + if cfg["location_refinement"]: locref_pred = heads["locref"] locref_targets = batch[Batch.locref_targets] locref_weights = batch[Batch.locref_mask] loss_func = ( losses.huber_loss - if cfg['locref_huber_loss'] + if cfg["locref_huber_loss"] else tf.losses.mean_squared_error ) - loss["locref_loss"] = cfg['locref_loss_weight'] * loss_func( + loss["locref_loss"] = cfg["locref_loss_weight"] * loss_func( locref_targets, locref_pred, locref_weights ) total_loss = total_loss + loss["locref_loss"] - if cfg['pairwise_predict'] or cfg['partaffinityfield_predict']: + if cfg["pairwise_predict"] or cfg["partaffinityfield_predict"]: "setting pw bodypart loss..." pairwise_pred = heads["pairwise_pred"] pairwise_targets = batch[Batch.pairwise_targets] @@ -343,10 +387,10 @@ def add_part_loss(pred_layer): loss_func = ( losses.huber_loss - if cfg['pairwise_huber_loss'] + if cfg["pairwise_huber_loss"] else tf.losses.mean_squared_error ) - loss["pairwise_loss"] = cfg['pairwise_loss_weight'] * loss_func( + loss["pairwise_loss"] = cfg["pairwise_loss_weight"] * loss_func( pairwise_targets, pairwise_pred, pairwise_weights ) total_loss = total_loss + loss["pairwise_loss"] diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/predict.py b/deeplabcut/pose_estimation_tensorflow/nnet/predict.py index 75ea8789e..b4884f582 100644 --- a/deeplabcut/pose_estimation_tensorflow/nnet/predict.py +++ b/deeplabcut/pose_estimation_tensorflow/nnet/predict.py @@ -35,13 +35,13 @@ def setup_pose_prediction(cfg): TF.reset_default_graph() - inputs = TF.placeholder(tf.float32, shape=[cfg['batch_size'], None, None, 3]) + inputs = TF.placeholder(tf.float32, shape=[cfg["batch_size"], None, None, 3]) net_heads = pose_net(cfg).test(inputs) outputs = [net_heads["part_prob"]] - if cfg['location_refinement']: + if cfg["location_refinement"]: outputs.append(net_heads["locref"]) - if ("multi-animal" in cfg['dataset_type']) and cfg['partaffinityfield_predict']: + if ("multi-animal" in cfg["dataset_type"]) and cfg["partaffinityfield_predict"]: print("Activating extracting of PAFs") outputs.append(net_heads["pairwise_pred"]) @@ -51,7 +51,7 @@ def setup_pose_prediction(cfg): sess.run(TF.local_variables_initializer()) # Restore variables from disk. - restorer.restore(sess, cfg['init_weights']) + restorer.restore(sess, cfg["init_weights"]) return sess, inputs, outputs @@ -61,11 +61,11 @@ def extract_cnn_output(outputs_np, cfg): scmap = outputs_np[0] scmap = np.squeeze(scmap) locref = None - if cfg['location_refinement']: + if cfg["location_refinement"]: locref = np.squeeze(outputs_np[1]) shape = locref.shape locref = np.reshape(locref, (shape[0], shape[1], -1, 2)) - locref *= cfg['locref_stdev'] + locref *= cfg["locref_stdev"] if len(scmap.shape) == 2: # for single body part! scmap = np.expand_dims(scmap, axis=2) return scmap, locref @@ -116,9 +116,9 @@ def getpose(image, cfg, sess, inputs, outputs, outall=False): scmap, locref = extract_cnn_output(outputs_np, cfg) num_outputs = cfg.get("num_outputs", 1) if num_outputs > 1: - pose = multi_pose_predict(scmap, locref, cfg['stride'], num_outputs) + pose = multi_pose_predict(scmap, locref, cfg["stride"], num_outputs) else: - pose = argmax_pose_predict(scmap, locref, cfg['stride']) + pose = argmax_pose_predict(scmap, locref, cfg["stride"]) if outall: return scmap, locref, pose else: @@ -131,11 +131,11 @@ def extract_cnn_outputmulti(outputs_np, cfg): Dimensions: image batch x imagedim1 x imagedim2 x bodypart""" scmap = outputs_np[0] locref = None - if cfg['location_refinement']: + if cfg["location_refinement"]: locref = outputs_np[1] shape = locref.shape locref = np.reshape(locref, (shape[0], shape[1], shape[2], -1, 2)) - locref *= cfg['locref_stdev'] + locref *= cfg["locref_stdev"] if len(scmap.shape) == 2: # for single body part! scmap = np.expand_dims(scmap, axis=2) return scmap, locref @@ -180,8 +180,8 @@ def getposeNP(image, cfg, sess, inputs, outputs, outall=False): DZ[m, l, k, :2] = locref[l, y, x, k, :] DZ[m, l, k, 2] = scmap[l, y, x, k] - X = X.astype("float32") * cfg['stride'] + 0.5 * cfg['stride'] + DZ[:, :, :, 0] - Y = Y.astype("float32") * cfg['stride'] + 0.5 * cfg['stride'] + DZ[:, :, :, 1] + X = X.astype("float32") * cfg["stride"] + 0.5 * cfg["stride"] + DZ[:, :, :, 0] + Y = Y.astype("float32") * cfg["stride"] + 0.5 * cfg["stride"] + DZ[:, :, :, 1] P = DZ[:, :, :, 2] Xs = X.swapaxes(0, 2).swapaxes(0, 1) @@ -204,7 +204,7 @@ def getposeNP(image, cfg, sess, inputs, outputs, outall=False): ### Code for TF inference on GPU def setup_GPUpose_prediction(cfg): tf.reset_default_graph() - inputs = tf.placeholder(tf.float32, shape=[cfg['batch_size'], None, None, 3]) + inputs = tf.placeholder(tf.float32, shape=[cfg["batch_size"], None, None, 3]) net_heads = pose_net(cfg).inference(inputs) outputs = [net_heads["pose"]] @@ -215,7 +215,7 @@ def setup_GPUpose_prediction(cfg): sess.run(tf.local_variables_initializer()) # Restore variables from disk. - restorer.restore(sess, cfg['init_weights']) + restorer.restore(sess, cfg["init_weights"]) return sess, inputs, outputs diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/predict_multianimal.py b/deeplabcut/pose_estimation_tensorflow/nnet/predict_multianimal.py index 3307ad7ff..74b7206d3 100755 --- a/deeplabcut/pose_estimation_tensorflow/nnet/predict_multianimal.py +++ b/deeplabcut/pose_estimation_tensorflow/nnet/predict_multianimal.py @@ -28,14 +28,14 @@ def extract_cnn_output(outputs_np, cfg): """ extract locref, scmap and partaffinityfield from network """ scmap = outputs_np[0] scmap = np.squeeze(scmap) - if cfg['location_refinement']: + if cfg["location_refinement"]: locref = np.squeeze(outputs_np[1]) shape = locref.shape locref = np.reshape(locref, (shape[0], shape[1], -1, 2)) - locref *= cfg['locref_stdev'] + locref *= cfg["locref_stdev"] else: locref = None - if cfg['partaffinityfield_predict'] and ("multi-animal" in cfg['dataset_type']): + if cfg["partaffinityfield_predict"] and ("multi-animal" in cfg["dataset_type"]): paf = np.squeeze(outputs_np[2]) else: paf = None @@ -51,8 +51,8 @@ def AssociationCosts( """ Association costs for detections based on PAFs """ Distances = {} ny, nx, nlimbs = np.shape(partaffinitymaps) - for l in range(cfg['num_limbs']): - bp1, bp2 = cfg['partaffinityfield_graph'][l] # [(0,1),(1,2) + for l in range(cfg["num_limbs"]): + bp1, bp2 = cfg["partaffinityfield_graph"][l] # [(0,1),(1,2) # get coordinates for bp1 and bp2 C1 = coordinates[bp1] C2 = coordinates[bp2] @@ -135,9 +135,9 @@ def extract_detections(cfg, scmap, locref, pafs, nms_radius, det_min_score): from nms_grid import nms_grid # this needs to be installed (C-code) Detections = {} - stride = cfg['stride'] + stride = cfg["stride"] halfstride = stride * 0.5 - num_joints = cfg['num_joints'] + num_joints = cfg["num_joints"] dist_grid = make_nms_grid(nms_radius) unProb = [None] * num_joints unPos = [None] * num_joints @@ -174,10 +174,7 @@ def extract_detections(cfg, scmap, locref, pafs, nms_radius, det_min_score): def find_local_maxima(scmap, radius, threshold): peak_idx = peak_local_max( - scmap, - min_distance=radius, - threshold_abs=threshold, - exclude_border=False, + scmap, min_distance=radius, threshold_abs=threshold, exclude_border=False ) grid = np.zeros_like(scmap, dtype=bool) grid[tuple(peak_idx.T)] = True @@ -188,9 +185,9 @@ def find_local_maxima(scmap, radius, threshold): def extract_detections_python(cfg, scmap, locref, pafs, radius, threshold): Detections = {} - stride = cfg['stride'] + stride = cfg["stride"] halfstride = stride * 0.5 - num_joints = cfg['num_joints'] + num_joints = cfg["num_joints"] unProb = [None] * num_joints unPos = [None] * num_joints @@ -253,9 +250,9 @@ def extract_detection_withgroundtruth( Detections = {} num_idchannel = cfg.get("num_idchannel", 0) - stride = cfg['stride'] + stride = cfg["stride"] halfstride = stride * 0.5 - num_joints = cfg['num_joints'] + num_joints = cfg["num_joints"] # get dist_grid dist_grid = make_nms_grid(nms_radius) unProb = [None] * num_joints @@ -308,9 +305,9 @@ def extract_detection_withgroundtruth_python( cfg, groundtruthcoordinates, scmap, locref, pafs, radius, threshold ): Detections = {} - stride = cfg['stride'] + stride = cfg["stride"] halfstride = stride * 0.5 - num_joints = cfg['num_joints'] + num_joints = cfg["num_joints"] num_idchannel = cfg.get("num_idchannel", 0) unProb = [None] * num_joints unPos = [None] * num_joints @@ -391,14 +388,14 @@ def extract_cnn_outputmulti(outputs_np, cfg): """ extract locref + scmap from network Dimensions: image batch x imagedim1 x imagedim2 x bodypart""" scmap = outputs_np[0] - if cfg['location_refinement']: + if cfg["location_refinement"]: locref = outputs_np[1] shape = locref.shape locref = np.reshape(locref, (shape[0], shape[1], shape[2], -1, 2)) - locref *= cfg['locref_stdev'] + locref *= cfg["locref_stdev"] else: locref = None - if cfg['partaffinityfield_predict'] and ("multi-animal" in cfg['dataset_type']): + if cfg["partaffinityfield_predict"] and ("multi-animal" in cfg["dataset_type"]): paf = outputs_np[2] else: paf = None @@ -467,9 +464,9 @@ def extract_batchdetections( def extract_batchdetections_python(cfg, scmap, locref, pafs, radius, threshold): Detections = {} - stride = cfg['stride'] + stride = cfg["stride"] halfstride = stride * 0.5 - num_joints = cfg['num_joints'] + num_joints = cfg["num_joints"] num_idchannel = cfg.get("num_idchannel", 0) unProb = [None] * num_joints unPos = [None] * num_joints diff --git a/deeplabcut/pose_estimation_tensorflow/nnet/utils.py b/deeplabcut/pose_estimation_tensorflow/nnet/utils.py index 85d7bbeb9..a8d89b50f 100644 --- a/deeplabcut/pose_estimation_tensorflow/nnet/utils.py +++ b/deeplabcut/pose_estimation_tensorflow/nnet/utils.py @@ -28,201 +28,212 @@ from tensorflow.contrib.tpu.python.tpu import tpu_function -def build_learning_rate(initial_lr, - global_step, - steps_per_epoch=None, - lr_decay_type='exponential', - decay_factor=0.97, - decay_epochs=2.4, - total_steps=None, - warmup_epochs=5): - """Build learning rate.""" - if lr_decay_type == 'exponential': - assert steps_per_epoch is not None - decay_steps = steps_per_epoch * decay_epochs - lr = tf.train.exponential_decay( - initial_lr, global_step, decay_steps, decay_factor, staircase=True) - elif lr_decay_type == 'cosine': - assert total_steps is not None - lr = 0.5 * initial_lr * ( - 1 + tf.cos(np.pi * tf.cast(global_step, tf.float32) / total_steps)) - elif lr_decay_type == 'constant': - lr = initial_lr - else: - assert False, 'Unknown lr_decay_type : %s' % lr_decay_type - - if warmup_epochs: - tf.logging.info('Learning rate warmup_epochs: %d' % warmup_epochs) - warmup_steps = int(warmup_epochs * steps_per_epoch) - warmup_lr = ( - initial_lr * tf.cast(global_step, tf.float32) / tf.cast( - warmup_steps, tf.float32)) - lr = tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr) - - return lr - - -def build_optimizer(learning_rate, - optimizer_name='rmsprop', - decay=0.9, - epsilon=0.001, - momentum=0.9): - """Build optimizer.""" - if optimizer_name == 'sgd': - tf.logging.info('Using SGD optimizer') - optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) - elif optimizer_name == 'momentum': - tf.logging.info('Using Momentum optimizer') - optimizer = tf.train.MomentumOptimizer( - learning_rate=learning_rate, momentum=momentum) - elif optimizer_name == 'rmsprop': - tf.logging.info('Using RMSProp optimizer') - optimizer = tf.train.RMSPropOptimizer(learning_rate, decay, momentum, - epsilon) - else: - tf.logging.fatal('Unknown optimizer:', optimizer_name) - - return optimizer +def build_learning_rate( + initial_lr, + global_step, + steps_per_epoch=None, + lr_decay_type="exponential", + decay_factor=0.97, + decay_epochs=2.4, + total_steps=None, + warmup_epochs=5, +): + """Build learning rate.""" + if lr_decay_type == "exponential": + assert steps_per_epoch is not None + decay_steps = steps_per_epoch * decay_epochs + lr = tf.train.exponential_decay( + initial_lr, global_step, decay_steps, decay_factor, staircase=True + ) + elif lr_decay_type == "cosine": + assert total_steps is not None + lr = ( + 0.5 + * initial_lr + * (1 + tf.cos(np.pi * tf.cast(global_step, tf.float32) / total_steps)) + ) + elif lr_decay_type == "constant": + lr = initial_lr + else: + assert False, "Unknown lr_decay_type : %s" % lr_decay_type + + if warmup_epochs: + tf.logging.info("Learning rate warmup_epochs: %d" % warmup_epochs) + warmup_steps = int(warmup_epochs * steps_per_epoch) + warmup_lr = ( + initial_lr + * tf.cast(global_step, tf.float32) + / tf.cast(warmup_steps, tf.float32) + ) + lr = tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr) + + return lr + + +def build_optimizer( + learning_rate, optimizer_name="rmsprop", decay=0.9, epsilon=0.001, momentum=0.9 +): + """Build optimizer.""" + if optimizer_name == "sgd": + tf.logging.info("Using SGD optimizer") + optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) + elif optimizer_name == "momentum": + tf.logging.info("Using Momentum optimizer") + optimizer = tf.train.MomentumOptimizer( + learning_rate=learning_rate, momentum=momentum + ) + elif optimizer_name == "rmsprop": + tf.logging.info("Using RMSProp optimizer") + optimizer = tf.train.RMSPropOptimizer(learning_rate, decay, momentum, epsilon) + else: + tf.logging.fatal("Unknown optimizer:", optimizer_name) + + return optimizer class TpuBatchNormalization(tf.layers.BatchNormalization): - # class TpuBatchNormalization(tf.layers.BatchNormalization): - """Cross replica batch normalization.""" - - def __init__(self, fused=False, **kwargs): - if fused in (True, None): - raise ValueError('TpuBatchNormalization does not support fused=True.') - super(TpuBatchNormalization, self).__init__(fused=fused, **kwargs) - - def _cross_replica_average(self, t, num_shards_per_group): - """Calculates the average value of input tensor across TPU replicas.""" - num_shards = tpu_function.get_tpu_context().number_of_shards - group_assignment = None - if num_shards_per_group > 1: - if num_shards % num_shards_per_group != 0: - raise ValueError('num_shards: %d mod shards_per_group: %d, should be 0' - % (num_shards, num_shards_per_group)) - num_groups = num_shards // num_shards_per_group - group_assignment = [[ - x for x in range(num_shards) if x // num_shards_per_group == y - ] for y in range(num_groups)] - return tpu_ops.cross_replica_sum(t, group_assignment) / tf.cast( - num_shards_per_group, t.dtype) - - def _moments(self, inputs, reduction_axes, keep_dims): - """Compute the mean and variance: it overrides the original _moments.""" - shard_mean, shard_variance = super(TpuBatchNormalization, self)._moments( - inputs, reduction_axes, keep_dims=keep_dims) - - num_shards = tpu_function.get_tpu_context().number_of_shards or 1 - if num_shards <= 8: # Skip cross_replica for 2x2 or smaller slices. - num_shards_per_group = 1 - else: - num_shards_per_group = max(8, num_shards // 8) - tf.logging.info('TpuBatchNormalization with num_shards_per_group %s', - num_shards_per_group) - if num_shards_per_group > 1: - # Compute variance using: Var[X]= E[X^2] - E[X]^2. - shard_square_of_mean = tf.math.square(shard_mean) - shard_mean_of_square = shard_variance + shard_square_of_mean - group_mean = self._cross_replica_average( - shard_mean, num_shards_per_group) - group_mean_of_square = self._cross_replica_average( - shard_mean_of_square, num_shards_per_group) - group_variance = group_mean_of_square - tf.math.square(group_mean) - return (group_mean, group_variance) - else: - return (shard_mean, shard_variance) + # class TpuBatchNormalization(tf.layers.BatchNormalization): + """Cross replica batch normalization.""" + + def __init__(self, fused=False, **kwargs): + if fused in (True, None): + raise ValueError("TpuBatchNormalization does not support fused=True.") + super(TpuBatchNormalization, self).__init__(fused=fused, **kwargs) + + def _cross_replica_average(self, t, num_shards_per_group): + """Calculates the average value of input tensor across TPU replicas.""" + num_shards = tpu_function.get_tpu_context().number_of_shards + group_assignment = None + if num_shards_per_group > 1: + if num_shards % num_shards_per_group != 0: + raise ValueError( + "num_shards: %d mod shards_per_group: %d, should be 0" + % (num_shards, num_shards_per_group) + ) + num_groups = num_shards // num_shards_per_group + group_assignment = [ + [x for x in range(num_shards) if x // num_shards_per_group == y] + for y in range(num_groups) + ] + return tpu_ops.cross_replica_sum(t, group_assignment) / tf.cast( + num_shards_per_group, t.dtype + ) + + def _moments(self, inputs, reduction_axes, keep_dims): + """Compute the mean and variance: it overrides the original _moments.""" + shard_mean, shard_variance = super(TpuBatchNormalization, self)._moments( + inputs, reduction_axes, keep_dims=keep_dims + ) + + num_shards = tpu_function.get_tpu_context().number_of_shards or 1 + if num_shards <= 8: # Skip cross_replica for 2x2 or smaller slices. + num_shards_per_group = 1 + else: + num_shards_per_group = max(8, num_shards // 8) + tf.logging.info( + "TpuBatchNormalization with num_shards_per_group %s", num_shards_per_group + ) + if num_shards_per_group > 1: + # Compute variance using: Var[X]= E[X^2] - E[X]^2. + shard_square_of_mean = tf.math.square(shard_mean) + shard_mean_of_square = shard_variance + shard_square_of_mean + group_mean = self._cross_replica_average(shard_mean, num_shards_per_group) + group_mean_of_square = self._cross_replica_average( + shard_mean_of_square, num_shards_per_group + ) + group_variance = group_mean_of_square - tf.math.square(group_mean) + return (group_mean, group_variance) + else: + return (shard_mean, shard_variance) class BatchNormalization(tf.layers.BatchNormalization): - """Fixed default name of BatchNormalization to match TpuBatchNormalization.""" + """Fixed default name of BatchNormalization to match TpuBatchNormalization.""" - def __init__(self, name='tpu_batch_normalization', **kwargs): - super(BatchNormalization, self).__init__(name=name, **kwargs) + def __init__(self, name="tpu_batch_normalization", **kwargs): + super(BatchNormalization, self).__init__(name=name, **kwargs) def drop_connect(inputs, is_training, drop_connect_rate): - """Apply drop connect.""" - if not is_training: - return inputs + """Apply drop connect.""" + if not is_training: + return inputs - # Compute keep_prob - # TODO(tanmingxing): add support for training progress. - keep_prob = 1.0 - drop_connect_rate + # Compute keep_prob + # TODO(tanmingxing): add support for training progress. + keep_prob = 1.0 - drop_connect_rate - # Compute drop_connect tensor - batch_size = tf.shape(inputs)[0] - random_tensor = keep_prob - random_tensor += tf.random_uniform([batch_size, 1, 1, 1], dtype=inputs.dtype) - binary_tensor = tf.floor(random_tensor) - output = tf.div(inputs, keep_prob) * binary_tensor - return output + # Compute drop_connect tensor + batch_size = tf.shape(inputs)[0] + random_tensor = keep_prob + random_tensor += tf.random_uniform([batch_size, 1, 1, 1], dtype=inputs.dtype) + binary_tensor = tf.floor(random_tensor) + output = tf.div(inputs, keep_prob) * binary_tensor + return output def archive_ckpt(ckpt_eval, ckpt_objective, ckpt_path): - """Archive a checkpoint if the metric is better.""" - ckpt_dir, ckpt_name = os.path.split(ckpt_path) - - saved_objective_path = os.path.join(ckpt_dir, 'best_objective.txt') - saved_objective = float('-inf') - if tf.gfile.Exists(saved_objective_path): - with tf.gfile.GFile(saved_objective_path, 'r') as f: - saved_objective = float(f.read()) - if saved_objective > ckpt_objective: - tf.logging.info('Ckpt %s is worse than %s', ckpt_objective, saved_objective) - return False - - filenames = tf.gfile.Glob(ckpt_path + '.*') - if filenames is None: - tf.logging.info('No files to copy for checkpoint %s', ckpt_path) - return False - - # Clear the old folder. - dst_dir = os.path.join(ckpt_dir, 'archive') - if tf.gfile.Exists(dst_dir): - tf.gfile.DeleteRecursively(dst_dir) - tf.gfile.MakeDirs(dst_dir) - - # Write checkpoints. - for f in filenames: - dest = os.path.join(dst_dir, os.path.basename(f)) - tf.gfile.Copy(f, dest, overwrite=True) - ckpt_state = tf.train.generate_checkpoint_state_proto( - dst_dir, - model_checkpoint_path=ckpt_name, - all_model_checkpoint_paths=[ckpt_name]) - with tf.gfile.GFile(os.path.join(dst_dir, 'checkpoint'), 'w') as f: - f.write(str(ckpt_state)) - with tf.gfile.GFile(os.path.join(dst_dir, 'best_eval.txt'), 'w') as f: - f.write('%s' % ckpt_eval) - - # Update the best objective. - with tf.gfile.GFile(saved_objective_path, 'w') as f: - f.write('%f' % ckpt_objective) - - tf.logging.info('Copying checkpoint %s to %s', ckpt_path, dst_dir) - return True + """Archive a checkpoint if the metric is better.""" + ckpt_dir, ckpt_name = os.path.split(ckpt_path) + + saved_objective_path = os.path.join(ckpt_dir, "best_objective.txt") + saved_objective = float("-inf") + if tf.gfile.Exists(saved_objective_path): + with tf.gfile.GFile(saved_objective_path, "r") as f: + saved_objective = float(f.read()) + if saved_objective > ckpt_objective: + tf.logging.info("Ckpt %s is worse than %s", ckpt_objective, saved_objective) + return False + + filenames = tf.gfile.Glob(ckpt_path + ".*") + if filenames is None: + tf.logging.info("No files to copy for checkpoint %s", ckpt_path) + return False + + # Clear the old folder. + dst_dir = os.path.join(ckpt_dir, "archive") + if tf.gfile.Exists(dst_dir): + tf.gfile.DeleteRecursively(dst_dir) + tf.gfile.MakeDirs(dst_dir) + + # Write checkpoints. + for f in filenames: + dest = os.path.join(dst_dir, os.path.basename(f)) + tf.gfile.Copy(f, dest, overwrite=True) + ckpt_state = tf.train.generate_checkpoint_state_proto( + dst_dir, model_checkpoint_path=ckpt_name, all_model_checkpoint_paths=[ckpt_name] + ) + with tf.gfile.GFile(os.path.join(dst_dir, "checkpoint"), "w") as f: + f.write(str(ckpt_state)) + with tf.gfile.GFile(os.path.join(dst_dir, "best_eval.txt"), "w") as f: + f.write("%s" % ckpt_eval) + + # Update the best objective. + with tf.gfile.GFile(saved_objective_path, "w") as f: + f.write("%f" % ckpt_objective) + + tf.logging.info("Copying checkpoint %s to %s", ckpt_path, dst_dir) + return True def get_ema_vars(): - """Get all exponential moving average (ema) variables.""" - ema_vars = tf.trainable_variables() + tf.get_collection('moving_vars') - for v in tf.global_variables(): - # We maintain mva for batch norm moving mean and variance as well. - if 'moving_mean' in v.name or 'moving_variance' in v.name: - ema_vars.append(v) - return list(set(ema_vars)) + """Get all exponential moving average (ema) variables.""" + ema_vars = tf.trainable_variables() + tf.get_collection("moving_vars") + for v in tf.global_variables(): + # We maintain mva for batch norm moving mean and variance as well. + if "moving_mean" in v.name or "moving_variance" in v.name: + ema_vars.append(v) + return list(set(ema_vars)) class DepthwiseConv2D(tf.keras.layers.DepthwiseConv2D, tf.layers.Layer): - """Wrap keras DepthwiseConv2D to tf.layers.""" + """Wrap keras DepthwiseConv2D to tf.layers.""" - pass + pass class EvalCkptDriver(object): - """A driver for running eval inference. + """A driver for running eval inference. Attributes: model_name: str. Model name to eval. batch_size: int. Eval batch size. @@ -231,106 +242,103 @@ class EvalCkptDriver(object): include_background_label: whether to include extra background label. """ - def __init__(self, - model_name, - batch_size=1, - image_size=224, - num_classes=1000, - include_background_label=False): - """Initialize internal variables.""" - self.model_name = model_name - self.batch_size = batch_size - self.num_classes = num_classes - self.include_background_label = include_background_label - self.image_size = image_size - - def restore_model(self, sess, ckpt_dir, enable_ema=True, export_ckpt=None): - """Restore variables from checkpoint dir.""" - sess.run(tf.global_variables_initializer()) - checkpoint = tf.train.latest_checkpoint(ckpt_dir) - if enable_ema: - ema = tf.train.ExponentialMovingAverage(decay=0.0) - ema_vars = get_ema_vars() - var_dict = ema.variables_to_restore(ema_vars) - ema_assign_op = ema.apply(ema_vars) - else: - var_dict = get_ema_vars() - ema_assign_op = None - - tf.train.get_or_create_global_step() - sess.run(tf.global_variables_initializer()) - saver = tf.train.Saver(var_dict, max_to_keep=1) - saver.restore(sess, checkpoint) - - if export_ckpt: - if ema_assign_op is not None: - sess.run(ema_assign_op) - saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True) - saver.save(sess, export_ckpt) - - def build_model(self, features, is_training): - """Build model with input features.""" - del features, is_training - raise ValueError('Must be implemented by subclasses.') - - def get_preprocess_fn(self): - raise ValueError('Must be implemented by subclsses.') - - def build_dataset(self, filenames, labels, is_training): - """Build input dataset.""" - filenames = tf.constant(filenames) - labels = tf.constant(labels) - dataset = tf.data.Dataset.from_tensor_slices((filenames, labels)) - - def _parse_function(filename, label): - image_string = tf.read_file(filename) - preprocess_fn = self.get_preprocess_fn() - image_decoded = preprocess_fn( - image_string, is_training, image_size=self.image_size) - image = tf.cast(image_decoded, tf.float32) - return image, label - - dataset = dataset.map(_parse_function) - dataset = dataset.batch(self.batch_size) - - iterator = dataset.make_one_shot_iterator() - images, labels = iterator.get_next() - return images, labels - - def run_inference(self, - ckpt_dir, - image_files, - labels, - enable_ema=True, - export_ckpt=None): - """Build and run inference on the target images and labels.""" - label_offset = 1 if self.include_background_label else 0 - with tf.Graph().as_default(), tf.Session() as sess: - images, labels = self.build_dataset(image_files, labels, False) - probs = self.build_model(images, is_training=False) - if isinstance(probs, tuple): - probs = probs[0] - - self.restore_model(sess, ckpt_dir, enable_ema, export_ckpt) - - prediction_idx = [] - prediction_prob = [] - for _ in range(len(image_files) // self.batch_size): - out_probs = sess.run(probs) - idx = np.argsort(out_probs)[::-1] - prediction_idx.append(idx[:5] - label_offset) - prediction_prob.append([out_probs[pid] for pid in idx[:5]]) - - # Return the top 5 predictions (idx and prob) for each image. - return prediction_idx, prediction_prob - - def eval_example_images(self, - ckpt_dir, - image_files, - labels_map_file, - enable_ema=True, - export_ckpt=None): - """Eval a list of example images. + def __init__( + self, + model_name, + batch_size=1, + image_size=224, + num_classes=1000, + include_background_label=False, + ): + """Initialize internal variables.""" + self.model_name = model_name + self.batch_size = batch_size + self.num_classes = num_classes + self.include_background_label = include_background_label + self.image_size = image_size + + def restore_model(self, sess, ckpt_dir, enable_ema=True, export_ckpt=None): + """Restore variables from checkpoint dir.""" + sess.run(tf.global_variables_initializer()) + checkpoint = tf.train.latest_checkpoint(ckpt_dir) + if enable_ema: + ema = tf.train.ExponentialMovingAverage(decay=0.0) + ema_vars = get_ema_vars() + var_dict = ema.variables_to_restore(ema_vars) + ema_assign_op = ema.apply(ema_vars) + else: + var_dict = get_ema_vars() + ema_assign_op = None + + tf.train.get_or_create_global_step() + sess.run(tf.global_variables_initializer()) + saver = tf.train.Saver(var_dict, max_to_keep=1) + saver.restore(sess, checkpoint) + + if export_ckpt: + if ema_assign_op is not None: + sess.run(ema_assign_op) + saver = tf.train.Saver(max_to_keep=1, save_relative_paths=True) + saver.save(sess, export_ckpt) + + def build_model(self, features, is_training): + """Build model with input features.""" + del features, is_training + raise ValueError("Must be implemented by subclasses.") + + def get_preprocess_fn(self): + raise ValueError("Must be implemented by subclsses.") + + def build_dataset(self, filenames, labels, is_training): + """Build input dataset.""" + filenames = tf.constant(filenames) + labels = tf.constant(labels) + dataset = tf.data.Dataset.from_tensor_slices((filenames, labels)) + + def _parse_function(filename, label): + image_string = tf.read_file(filename) + preprocess_fn = self.get_preprocess_fn() + image_decoded = preprocess_fn( + image_string, is_training, image_size=self.image_size + ) + image = tf.cast(image_decoded, tf.float32) + return image, label + + dataset = dataset.map(_parse_function) + dataset = dataset.batch(self.batch_size) + + iterator = dataset.make_one_shot_iterator() + images, labels = iterator.get_next() + return images, labels + + def run_inference( + self, ckpt_dir, image_files, labels, enable_ema=True, export_ckpt=None + ): + """Build and run inference on the target images and labels.""" + label_offset = 1 if self.include_background_label else 0 + with tf.Graph().as_default(), tf.Session() as sess: + images, labels = self.build_dataset(image_files, labels, False) + probs = self.build_model(images, is_training=False) + if isinstance(probs, tuple): + probs = probs[0] + + self.restore_model(sess, ckpt_dir, enable_ema, export_ckpt) + + prediction_idx = [] + prediction_prob = [] + for _ in range(len(image_files) // self.batch_size): + out_probs = sess.run(probs) + idx = np.argsort(out_probs)[::-1] + prediction_idx.append(idx[:5] - label_offset) + prediction_prob.append([out_probs[pid] for pid in idx[:5]]) + + # Return the top 5 predictions (idx and prob) for each image. + return prediction_idx, prediction_prob + + def eval_example_images( + self, ckpt_dir, image_files, labels_map_file, enable_ema=True, export_ckpt=None + ): + """Eval a list of example images. Args: ckpt_dir: str. Checkpoint directory path. image_files: List[str]. A list of image file paths. @@ -341,19 +349,30 @@ def eval_example_images(self, A tuple (pred_idx, and pred_prob), where pred_idx is the top 5 prediction index and pred_prob is the top 5 prediction probability. """ - classes = json.loads(tf.gfile.Open(labels_map_file).read()) - pred_idx, pred_prob = self.run_inference( - ckpt_dir, image_files, [0] * len(image_files), enable_ema, export_ckpt) - for i in range(len(image_files)): - print('predicted class for image {}: '.format(image_files[i])) - for j, idx in enumerate(pred_idx[i]): - print(' -> top_{} ({:4.2f}%): {} '.format(j, pred_prob[i][j] * 100, - classes[str(idx)])) - return pred_idx, pred_prob - - def eval_imagenet(self, ckpt_dir, imagenet_eval_glob, - imagenet_eval_label, num_images, enable_ema, export_ckpt): - """Eval ImageNet images and report top1/top5 accuracy. + classes = json.loads(tf.gfile.Open(labels_map_file).read()) + pred_idx, pred_prob = self.run_inference( + ckpt_dir, image_files, [0] * len(image_files), enable_ema, export_ckpt + ) + for i in range(len(image_files)): + print("predicted class for image {}: ".format(image_files[i])) + for j, idx in enumerate(pred_idx[i]): + print( + " -> top_{} ({:4.2f}%): {} ".format( + j, pred_prob[i][j] * 100, classes[str(idx)] + ) + ) + return pred_idx, pred_prob + + def eval_imagenet( + self, + ckpt_dir, + imagenet_eval_glob, + imagenet_eval_label, + num_images, + enable_ema, + export_ckpt, + ): + """Eval ImageNet images and report top1/top5 accuracy. Args: ckpt_dir: str. Checkpoint directory path. imagenet_eval_glob: str. File path glob for all eval images. @@ -365,23 +384,27 @@ def eval_imagenet(self, ckpt_dir, imagenet_eval_glob, Returns: A tuple (top1, top5) for top1 and top5 accuracy. """ - imagenet_val_labels = [int(i) for i in tf.gfile.GFile(imagenet_eval_label)] - imagenet_filenames = sorted(tf.gfile.Glob(imagenet_eval_glob)) - if num_images < 0: - num_images = len(imagenet_filenames) - image_files = imagenet_filenames[:num_images] - labels = imagenet_val_labels[:num_images] - - pred_idx, _ = self.run_inference( - ckpt_dir, image_files, labels, enable_ema, export_ckpt) - top1_cnt, top5_cnt = 0.0, 0.0 - for i, label in enumerate(labels): - top1_cnt += label in pred_idx[i][:1] - top5_cnt += label in pred_idx[i][:5] - if i % 100 == 0: - print('Step {}: top1_acc = {:4.2f}% top5_acc = {:4.2f}%'.format( - i, 100 * top1_cnt / (i + 1), 100 * top5_cnt / (i + 1))) - sys.stdout.flush() - top1, top5 = 100 * top1_cnt / num_images, 100 * top5_cnt / num_images - print('Final: top1_acc = {:4.2f}% top5_acc = {:4.2f}%'.format(top1, top5)) - return top1, top5 + imagenet_val_labels = [int(i) for i in tf.gfile.GFile(imagenet_eval_label)] + imagenet_filenames = sorted(tf.gfile.Glob(imagenet_eval_glob)) + if num_images < 0: + num_images = len(imagenet_filenames) + image_files = imagenet_filenames[:num_images] + labels = imagenet_val_labels[:num_images] + + pred_idx, _ = self.run_inference( + ckpt_dir, image_files, labels, enable_ema, export_ckpt + ) + top1_cnt, top5_cnt = 0.0, 0.0 + for i, label in enumerate(labels): + top1_cnt += label in pred_idx[i][:1] + top5_cnt += label in pred_idx[i][:5] + if i % 100 == 0: + print( + "Step {}: top1_acc = {:4.2f}% top5_acc = {:4.2f}%".format( + i, 100 * top1_cnt / (i + 1), 100 * top5_cnt / (i + 1) + ) + ) + sys.stdout.flush() + top1, top5 = 100 * top1_cnt / num_images, 100 * top5_cnt / num_images + print("Final: top1_acc = {:4.2f}% top5_acc = {:4.2f}%".format(top1, top5)) + return top1, top5 diff --git a/deeplabcut/pose_estimation_tensorflow/predict_multianimal.py b/deeplabcut/pose_estimation_tensorflow/predict_multianimal.py index 4bf24629e..e7f710514 100644 --- a/deeplabcut/pose_estimation_tensorflow/predict_multianimal.py +++ b/deeplabcut/pose_estimation_tensorflow/predict_multianimal.py @@ -144,11 +144,11 @@ def GetPoseandCostsF( PredicteData = {} # initializing constants - dist_grid = predict.make_nms_grid(dlc_cfg['nmsradius']) - stride = dlc_cfg['stride'] + dist_grid = predict.make_nms_grid(dlc_cfg["nmsradius"]) + stride = dlc_cfg["stride"] halfstride = stride * 0.5 - num_joints = dlc_cfg['num_joints'] - det_min_score = dlc_cfg['minconfidence'] + num_joints = dlc_cfg["num_joints"] + det_min_score = dlc_cfg["minconfidence"] num_idchannel = dlc_cfg.get("num_idchannel", 0) while cap.video.isOpened(): @@ -208,12 +208,12 @@ def GetPoseandCostsF( cap.close() pbar.close() PredicteData["metadata"] = { - "nms radius": dlc_cfg['nmsradius'], - "minimal confidence": dlc_cfg['minconfidence'], - "PAFgraph": dlc_cfg['partaffinityfield_graph'], - "all_joints": [[i] for i in range(len(dlc_cfg['all_joints']))], + "nms radius": dlc_cfg["nmsradius"], + "minimal confidence": dlc_cfg["minconfidence"], + "PAFgraph": dlc_cfg["partaffinityfield_graph"], + "all_joints": [[i] for i in range(len(dlc_cfg["all_joints"]))], "all_joints_names": [ - dlc_cfg['all_joints_names'][i] for i in range(len(dlc_cfg['all_joints'])) + dlc_cfg["all_joints_names"][i] for i in range(len(dlc_cfg["all_joints"])) ], "nframes": nframes, "c_engine": c_engine, @@ -246,8 +246,8 @@ def GetPoseandCostsS(cfg, dlc_cfg, sess, inputs, outputs, cap, nframes, c_engine inputs, outputs, outall=False, - nms_radius=dlc_cfg['nmsradius'], - det_min_score=dlc_cfg['minconfidence'], + nms_radius=dlc_cfg["nmsradius"], + det_min_score=dlc_cfg["minconfidence"], c_engine=c_engine, ) elif counter >= nframes: @@ -256,12 +256,12 @@ def GetPoseandCostsS(cfg, dlc_cfg, sess, inputs, outputs, cap, nframes, c_engine pbar.close() PredicteData["metadata"] = { - "nms radius": dlc_cfg['nmsradius'], - "minimal confidence": dlc_cfg['minconfidence'], - "PAFgraph": dlc_cfg['partaffinityfield_graph'], - "all_joints": [[i] for i in range(len(dlc_cfg['all_joints']))], + "nms radius": dlc_cfg["nmsradius"], + "minimal confidence": dlc_cfg["minconfidence"], + "PAFgraph": dlc_cfg["partaffinityfield_graph"], + "all_joints": [[i] for i in range(len(dlc_cfg["all_joints"]))], "all_joints_names": [ - dlc_cfg['all_joints_names'][i] for i in range(len(dlc_cfg['all_joints'])) + dlc_cfg["all_joints_names"][i] for i in range(len(dlc_cfg["all_joints"])) ], "nframes": nframes, } diff --git a/deeplabcut/pose_estimation_tensorflow/predict_videos.py b/deeplabcut/pose_estimation_tensorflow/predict_videos.py index 53f2ae480..7f5a9b67b 100755 --- a/deeplabcut/pose_estimation_tensorflow/predict_videos.py +++ b/deeplabcut/pose_estimation_tensorflow/predict_videos.py @@ -335,7 +335,9 @@ def analyze_videos( print( "If the tracking is not satisfactory for some videos, consider expanding the training set. You can use the function 'extract_outlier_frames' to extract a few representative outlier frames." ) - return DLCscorer # note: this is either DLCscorer or DLCscorerlegacy depending on what was used! + return ( + DLCscorer + ) # note: this is either DLCscorer or DLCscorerlegacy depending on what was used! else: print("No video(s) were found. Please check your paths and/or 'video_type'.") return DLCscorer @@ -1421,7 +1423,7 @@ def convert_detections2tracklets( mot_tracker = trackingutils.SORTEllipse( inferencecfg.get("max_age", 1), inferencecfg.get("min_hits", 5), - inferencecfg.get("iou_threshold", 0.6) + inferencecfg.get("iou_threshold", 0.6), ) tracklets = {} if cfg[ @@ -1465,10 +1467,8 @@ def convert_detections2tracklets( for a in animals ): single = np.full((numjoints, 3), np.nan) - single_dets = ( - inferenceutils.convertdetectiondict2listoflist( - data[imname], inds_unique - ) + single_dets = inferenceutils.convertdetectiondict2listoflist( + data[imname], inds_unique ) for ind, dets in zip(inds_unique, single_dets): if len(dets) == 1: diff --git a/deeplabcut/pose_estimation_tensorflow/test.py b/deeplabcut/pose_estimation_tensorflow/test.py index 75a4acb7e..918cee274 100644 --- a/deeplabcut/pose_estimation_tensorflow/test.py +++ b/deeplabcut/pose_estimation_tensorflow/test.py @@ -35,7 +35,7 @@ def test_net(visualise, cache_scoremaps): sess, inputs, outputs = setup_pose_prediction(cfg) if cache_scoremaps: - out_dir = cfg['scoremap_dir'] + out_dir = cfg["scoremap_dir"] if not os.path.exists(out_dir): os.makedirs(out_dir) @@ -51,10 +51,10 @@ def test_net(visualise, cache_scoremaps): scmap, locref = extract_cnn_output(outputs_np, cfg) - pose = argmax_pose_predict(scmap, locref, cfg['stride']) + pose = argmax_pose_predict(scmap, locref, cfg["stride"]) pose_refscale = np.copy(pose) - pose_refscale[:, 0:2] /= cfg['global_scale'] + pose_refscale[:, 0:2] /= cfg["global_scale"] predictions[k] = pose_refscale if visualise: @@ -69,7 +69,7 @@ def test_net(visualise, cache_scoremaps): scipy.io.savemat(out_fn, mdict={"scoremaps": scmap.astype("float32")}) out_fn = os.path.join(out_dir, raw_name + "_locreg" + ".mat") - if cfg['location_refinement']: + if cfg["location_refinement"]: scipy.io.savemat( out_fn, mdict={"locreg_pred": locref.astype("float32")} ) diff --git a/deeplabcut/pose_estimation_tensorflow/train.py b/deeplabcut/pose_estimation_tensorflow/train.py index 0d087d7e6..21e9f3435 100755 --- a/deeplabcut/pose_estimation_tensorflow/train.py +++ b/deeplabcut/pose_estimation_tensorflow/train.py @@ -37,7 +37,7 @@ class LearningRate(object): def __init__(self, cfg): - self.steps = cfg['multi_step'] + self.steps = cfg["multi_step"] self.current_step = 0 def get_lr(self, iteration): @@ -49,8 +49,8 @@ def get_lr(self, iteration): def get_batch_spec(cfg): - num_joints = cfg['num_joints'] - batch_size = cfg['batch_size'] + num_joints = cfg["num_joints"] + batch_size = cfg["batch_size"] return { Batch.inputs: [batch_size, None, None, 3], Batch.part_score_targets: [batch_size, None, None, num_joints], @@ -103,25 +103,24 @@ def start_preloading(sess, enqueue_op, dataset, placeholders): def get_optimizer(loss_op, cfg): - tstep = tf.placeholder(tf.int32,shape=[],name='tstep') - if 'efficientnet' in cfg['net_type']: + tstep = tf.placeholder(tf.int32, shape=[], name="tstep") + if "efficientnet" in cfg["net_type"]: print("Switching to cosine decay schedule with adam!") - cfg['optimizer'] = "adam" - learning_rate = tf.train.cosine_decay(cfg['lr_init'], - tstep, - cfg['decay_steps'], - alpha=cfg['alpha_r']) + cfg["optimizer"] = "adam" + learning_rate = tf.train.cosine_decay( + cfg["lr_init"], tstep, cfg["decay_steps"], alpha=cfg["alpha_r"] + ) else: learning_rate = tf.placeholder(tf.float32, shape=[]) - if cfg['optimizer'] == "sgd": + if cfg["optimizer"] == "sgd": optimizer = TF.train.MomentumOptimizer( learning_rate=learning_rate, momentum=0.9 ) - elif cfg['optimizer'] == "adam": + elif cfg["optimizer"] == "adam": optimizer = TF.train.AdamOptimizer(learning_rate) else: - raise ValueError("unknown optimizer {}".format(cfg['optimizer'])) + raise ValueError("unknown optimizer {}".format(cfg["optimizer"])) train_op = slim.learning.create_train_op(loss_op, optimizer) return learning_rate, train_op, tstep @@ -130,14 +129,14 @@ def get_optimizer(loss_op, cfg): def get_optimizer_with_freeze(loss_op, cfg): learning_rate = TF.placeholder(tf.float32, shape=[]) - if cfg['optimizer'] == "sgd": + if cfg["optimizer"] == "sgd": optimizer = TF.train.MomentumOptimizer( learning_rate=learning_rate, momentum=0.9 ) - elif cfg['optimizer'] == "adam": + elif cfg["optimizer"] == "adam": optimizer = TF.train.AdamOptimizer(learning_rate) else: - raise ValueError("unknown optimizer {}".format(cfg['optimizer'])) + raise ValueError("unknown optimizer {}".format(cfg["optimizer"])) train_unfrozen_op = slim.learning.create_train_op(loss_op, optimizer) variables_unfrozen = TF.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "pose") @@ -165,8 +164,8 @@ def train( setup_logging() cfg = load_config(config_yaml) - net_type = cfg['net_type'] - if cfg['dataset_type'] in ("scalecrop", "tensorpack", "deterministic"): + net_type = cfg["net_type"] + if cfg["dataset_type"] in ("scalecrop", "tensorpack", "deterministic"): print( "Switching batchsize to 1, as tensorpack/scalecrop/deterministic loaders do not support batches >1. Use imgaug/default loader." ) @@ -183,7 +182,7 @@ def train( TF.summary.scalar(k, t) merged_summaries = TF.summary.merge_all() - if "snapshot" in Path(cfg['init_weights']).stem and keepdeconvweights: + if "snapshot" in Path(cfg["init_weights"]).stem and keepdeconvweights: print("Loading already trained DLC with backbone:", net_type) variables_to_restore = slim.get_variables_to_restore() else: @@ -195,11 +194,15 @@ def train( variables_to_restore = slim.get_variables_to_restore( include=["MobilenetV2"] ) - elif 'efficientnet' in net_type: - variables_to_restore = slim.get_variables_to_restore(include=["efficientnet"]) + elif "efficientnet" in net_type: + variables_to_restore = slim.get_variables_to_restore( + include=["efficientnet"] + ) variables_to_restore = { - var.op.name.replace("efficientnet/", "") - + '/ExponentialMovingAverage':var for var in variables_to_restore} + var.op.name.replace("efficientnet/", "") + + "/ExponentialMovingAverage": var + for var in variables_to_restore + } else: print("Wait for DLC 2.3.") @@ -216,10 +219,10 @@ def train( sess = TF.Session() coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders) - train_writer = TF.summary.FileWriter(cfg['log_dir'], sess.graph) + train_writer = TF.summary.FileWriter(cfg["log_dir"], sess.graph) if cfg.get("freezeencoder", False): - if 'efficientnet' in net_type: + if "efficientnet" in net_type: print("Freezing ONLY supported MobileNet/ResNet currently!!") learning_rate, train_op, tstep = get_optimizer(total_loss, cfg) @@ -232,22 +235,22 @@ def train( sess.run(TF.local_variables_initializer()) # Restore variables from disk. - restorer.restore(sess, cfg['init_weights']) + restorer.restore(sess, cfg["init_weights"]) if maxiters == None: - max_iter = int(cfg['multi_step'][-1][1]) + max_iter = int(cfg["multi_step"][-1][1]) else: - max_iter = min(int(cfg['multi_step'][-1][1]), int(maxiters)) + max_iter = min(int(cfg["multi_step"][-1][1]), int(maxiters)) # display_iters = max(1,int(displayiters)) print("Max_iters overwritten as", max_iter) if displayiters == None: - display_iters = max(1, int(cfg['display_iters'])) + display_iters = max(1, int(cfg["display_iters"])) else: display_iters = max(1, int(displayiters)) print("Display_iters overwritten as", display_iters) if saveiters == None: - save_iters = max(1, int(cfg['save_iters'])) + save_iters = max(1, int(cfg["save_iters"])) else: save_iters = max(1, int(saveiters)) @@ -263,16 +266,15 @@ def train( print(cfg) print("Starting training....") for it in range(max_iter + 1): - if 'efficientnet' in net_type: - dict={tstep: it} - current_lr = sess.run(learning_rate,feed_dict=dict) + if "efficientnet" in net_type: + dict = {tstep: it} + current_lr = sess.run(learning_rate, feed_dict=dict) else: current_lr = lr_gen.get_lr(it) - dict={learning_rate: current_lr} + dict = {learning_rate: current_lr} [_, loss_val, summary] = sess.run( - [train_op, total_loss, merged_summaries], - feed_dict=dict, + [train_op, total_loss, merged_summaries], feed_dict=dict ) cum_loss += loss_val train_writer.add_summary(summary, it) @@ -290,7 +292,7 @@ def train( # Save snapshot if (it % save_iters == 0 and it != 0) or it == max_iter: - model_name = cfg['snapshot_prefix'] + model_name = cfg["snapshot_prefix"] saver.save(sess, model_name, global_step=it) lrf.close() diff --git a/deeplabcut/pose_estimation_tensorflow/train_multianimal.py b/deeplabcut/pose_estimation_tensorflow/train_multianimal.py index 2cfc53365..7ddd65165 100755 --- a/deeplabcut/pose_estimation_tensorflow/train_multianimal.py +++ b/deeplabcut/pose_estimation_tensorflow/train_multianimal.py @@ -36,7 +36,7 @@ class LearningRate(object): def __init__(self, cfg): - self.steps = cfg['multi_step'] + self.steps = cfg["multi_step"] self.current_step = 0 def get_lr(self, iteration): @@ -89,25 +89,24 @@ def start_preloading(sess, enqueue_op, dataset, placeholders): def get_optimizer(loss_op, cfg): - tstep = tf.placeholder(tf.int32,shape=[],name='tstep') - if 'efficientnet' in cfg['net_type']: + tstep = tf.placeholder(tf.int32, shape=[], name="tstep") + if "efficientnet" in cfg["net_type"]: print("Switching to cosine decay schedule with adam!") - cfg['optimizer'] == "adam" - learning_rate = tf.train.cosine_decay(cfg['lr_init'], - tstep, - cfg['decay_steps'], - alpha=cfg['alpha_r']) + cfg["optimizer"] == "adam" + learning_rate = tf.train.cosine_decay( + cfg["lr_init"], tstep, cfg["decay_steps"], alpha=cfg["alpha_r"] + ) else: learning_rate = tf.placeholder(tf.float32, shape=[]) - if cfg['optimizer'] == "sgd": + if cfg["optimizer"] == "sgd": optimizer = TF.train.MomentumOptimizer( learning_rate=learning_rate, momentum=0.9 ) - elif cfg['optimizer'] == "adam": + elif cfg["optimizer"] == "adam": optimizer = TF.train.AdamOptimizer(learning_rate) else: - raise ValueError("unknown optimizer {}".format(cfg['optimizer'])) + raise ValueError("unknown optimizer {}".format(cfg["optimizer"])) train_op = slim.learning.create_train_op(loss_op, optimizer) return learning_rate, train_op, tstep @@ -153,9 +152,9 @@ def train( for k, t in losses.items(): TF.summary.scalar(k, t) merged_summaries = TF.summary.merge_all() - net_type = cfg['net_type'] + net_type = cfg["net_type"] - if "snapshot" in Path(cfg['init_weights']).stem and keepdeconvweights: + if "snapshot" in Path(cfg["init_weights"]).stem and keepdeconvweights: print("Loading already trained DLC with backbone:", net_type) variables_to_restore = slim.get_variables_to_restore() else: @@ -167,11 +166,15 @@ def train( variables_to_restore = slim.get_variables_to_restore( include=["MobilenetV2"] ) - elif 'efficientnet' in net_type: - variables_to_restore = slim.get_variables_to_restore(include=["efficientnet"]) + elif "efficientnet" in net_type: + variables_to_restore = slim.get_variables_to_restore( + include=["efficientnet"] + ) variables_to_restore = { - var.op.name.replace("efficientnet/", "") - + '/ExponentialMovingAverage':var for var in variables_to_restore} + var.op.name.replace("efficientnet/", "") + + "/ExponentialMovingAverage": var + for var in variables_to_restore + } else: print("Wait for DLC 2.3.") @@ -188,34 +191,34 @@ def train( sess = TF.Session() coord, thread = start_preloading(sess, enqueue_op, dataset, placeholders) - train_writer = TF.summary.FileWriter(cfg['log_dir'], sess.graph) + train_writer = TF.summary.FileWriter(cfg["log_dir"], sess.graph) learning_rate, train_op, tstep = get_optimizer(total_loss, cfg) sess.run(TF.global_variables_initializer()) sess.run(TF.local_variables_initializer()) # Restore variables from disk. - if 'efficientnet' in net_type: - init_weights = os.path.join(cfg['init_weights'],"model.ckpt") + if "efficientnet" in net_type: + init_weights = os.path.join(cfg["init_weights"], "model.ckpt") else: - init_weights = cfg['init_weights'] + init_weights = cfg["init_weights"] restorer.restore(sess, init_weights) if maxiters == None: - max_iter = int(cfg['multi_step'][-1][1]) + max_iter = int(cfg["multi_step"][-1][1]) else: - max_iter = min(int(cfg['multi_step'][-1][1]), int(maxiters)) + max_iter = min(int(cfg["multi_step"][-1][1]), int(maxiters)) # display_iters = max(1,int(displayiters)) print("Max_iters overwritten as", max_iter) if displayiters == None: - display_iters = max(1, int(cfg['display_iters'])) + display_iters = max(1, int(cfg["display_iters"])) else: display_iters = max(1, int(displayiters)) print("Display_iters overwritten as", display_iters) if saveiters == None: - save_iters = max(1, int(cfg['save_iters'])) + save_iters = max(1, int(cfg["save_iters"])) else: save_iters = max(1, int(saveiters)) @@ -230,23 +233,22 @@ def train( print(cfg) print("Starting multi-animal training....") for it in range(max_iter + 1): - if 'efficientnet' in net_type: - dict={tstep: it} - current_lr = sess.run(learning_rate,feed_dict=dict) + if "efficientnet" in net_type: + dict = {tstep: it} + current_lr = sess.run(learning_rate, feed_dict=dict) else: current_lr = lr_gen.get_lr(it) - dict={learning_rate: current_lr} + dict = {learning_rate: current_lr} # [_, loss_val, summary] = sess.run([train_op, total_loss, merged_summaries],feed_dict={learning_rate: current_lr}) [_, alllosses, loss_val, summary] = sess.run( - [train_op, losses, total_loss, merged_summaries], - feed_dict=dict, + [train_op, losses, total_loss, merged_summaries], feed_dict=dict ) partloss += alllosses["part_loss"] # scoremap loss - if cfg['location_refinement']: + if cfg["location_refinement"]: locrefloss += alllosses["locref_loss"] - if cfg['pairwise_predict']: # paf loss + if cfg["pairwise_predict"]: # paf loss pwloss += alllosses["pairwise_loss"] cumloss += loss_val @@ -280,7 +282,7 @@ def train( # Save snapshot if (it % save_iters == 0 and it != 0) or it == max_iter: - model_name = cfg['snapshot_prefix'] + model_name = cfg["snapshot_prefix"] saver.save(sess, model_name, global_step=it) lrf.close() diff --git a/deeplabcut/pose_estimation_tensorflow/util/visualize.py b/deeplabcut/pose_estimation_tensorflow/util/visualize.py index 19a1ddfcd..e660ac2eb 100644 --- a/deeplabcut/pose_estimation_tensorflow/util/visualize.py +++ b/deeplabcut/pose_estimation_tensorflow/util/visualize.py @@ -69,8 +69,8 @@ def visualize_joints(image, pose): def show_heatmaps(cfg, img, scmap, pose, cmap="jet"): interp = "bilinear" - all_joints = cfg['all_joints'] - all_joints_names = cfg['all_joints_names'] + all_joints = cfg["all_joints"] + all_joints_names = cfg["all_joints_names"] subplot_width = 3 subplot_height = math.ceil((len(all_joints) + 1) / subplot_width) f, axarr = plt.subplots(subplot_height, subplot_width) diff --git a/deeplabcut/pose_estimation_tensorflow/vis_dataset.py b/deeplabcut/pose_estimation_tensorflow/vis_dataset.py index 7b9421f6e..5655ecdcb 100644 --- a/deeplabcut/pose_estimation_tensorflow/vis_dataset.py +++ b/deeplabcut/pose_estimation_tensorflow/vis_dataset.py @@ -51,7 +51,7 @@ def display_dataset(): curr_plot = axarr[plot_j, plot_i] curr_plot.axis("off") - if j >= cfg['num_joints']: + if j >= cfg["num_joints"]: continue scmap_part = scmap[:, :, j] diff --git a/deeplabcut/pose_estimation_tensorflow/visualizemaps.py b/deeplabcut/pose_estimation_tensorflow/visualizemaps.py index 8214470f8..9d9b2cc68 100644 --- a/deeplabcut/pose_estimation_tensorflow/visualizemaps.py +++ b/deeplabcut/pose_estimation_tensorflow/visualizemaps.py @@ -467,11 +467,11 @@ def extract_save_all_maps( list_of_inds.append( [(2 * n, 2 * n + 1), (bptnames[edge[0]], bptnames[edge[1]])] ) - if len(to_plot)>1: + if len(to_plot) > 1: map_ = scmap[:, :, to_plot].sum(axis=2) locref_x_ = locref_x[:, :, to_plot].sum(axis=2) locref_y_ = locref_y[:, :, to_plot].sum(axis=2) - elif len(to_plot)==1 and len(bptnames)>1: + elif len(to_plot) == 1 and len(bptnames) > 1: map_ = scmap[:, :, to_plot] locref_x_ = locref_x[:, :, to_plot] locref_y_ = locref_y[:, :, to_plot] diff --git a/deeplabcut/refine_training_dataset/stitch.py b/deeplabcut/refine_training_dataset/stitch.py index d3b580caa..e0a265bb0 100644 --- a/deeplabcut/refine_training_dataset/stitch.py +++ b/deeplabcut/refine_training_dataset/stitch.py @@ -28,16 +28,18 @@ def __init__(self, data, inds): Corresponding time frame indices. """ if data.ndim != 3 or data.shape[-1] not in (3, 4): - raise ValueError('Data must of shape (nframes, nbodyparts, 3 or 4)') + raise ValueError("Data must of shape (nframes, nbodyparts, 3 or 4)") if data.shape[0] != len(inds): - raise ValueError('Data and corresponding indices must have the same length.') + raise ValueError( + "Data and corresponding indices must have the same length." + ) self.data = data.astype(np.float64) self.inds = np.array(inds) monotonically_increasing = all(a < b for a, b in zip(inds, inds[1:])) if not monotonically_increasing: - idx = np.argsort(inds, kind='mergesort') # For stable sort with duplicates + idx = np.argsort(inds, kind="mergesort") # For stable sort with duplicates self.inds = self.inds[idx] self.data = self.data[idx] self._centroid = None @@ -75,8 +77,10 @@ def __contains__(self, other_tracklet): return np.isin(self.inds, other_tracklet.inds, assume_unique=True).any() def __repr__(self): - return f'Tracklet of length {len(self)} from {self.start} to {self.end} ' \ - f'with reliability {self.likelihood:.3f}' + return ( + f"Tracklet of length {len(self)} from {self.start} to {self.end} " + f"with reliability {self.likelihood:.3f}" + ) @property def xy(self): @@ -108,7 +112,7 @@ def likelihood(self): def identity(self): """Return the average predicted identity of all Tracklet detections.""" try: - return mode(self.data[..., 3], axis=None, nan_policy='omit')[0][0] + return mode(self.data[..., 3], axis=None, nan_policy="omit")[0][0] except IndexError: return -1 @@ -140,7 +144,7 @@ def del_data_at(self, ind): def interpolate(self, max_gap=1): if max_gap < 1: - raise ValueError('Gap should be a strictly positive integer.') + raise ValueError("Gap should be a strictly positive integer.") gaps = np.diff(self.inds) - 1 valid_gaps = (0 < gaps) & (gaps <= max_gap) @@ -168,21 +172,25 @@ def contains_duplicates(self, return_indices=False): return has_duplicates return has_duplicates, np.flatnonzero(np.diff(self.inds) == 0) - def calc_velocity(self, where='head', norm=True): + def calc_velocity(self, where="head", norm=True): """ Calculate the linear velocity of either the `head` or `tail` of the Tracklet, computed over the last or first three frames, respectively. If `norm`, return the absolute speed rather than a 2D vector. """ - if where == 'tail': - vel = (np.diff(self.centroid[:3], axis=0) - / np.diff(self.inds[:3])[:, np.newaxis]) - elif where == 'head': - vel = (np.diff(self.centroid[-3:], axis=0) - / np.diff(self.inds[-3:])[:, np.newaxis]) + if where == "tail": + vel = ( + np.diff(self.centroid[:3], axis=0) + / np.diff(self.inds[:3])[:, np.newaxis] + ) + elif where == "head": + vel = ( + np.diff(self.centroid[-3:], axis=0) + / np.diff(self.inds[-3:])[:, np.newaxis] + ) else: - raise ValueError(f'Unknown where={where}') + raise ValueError(f"Unknown where={where}") if norm: return np.sqrt(np.sum(vel ** 2, axis=1)).mean() return vel.mean(axis=0) @@ -192,13 +200,13 @@ def maximal_velocity(self): vel = np.diff(self.centroid, axis=0) / np.diff(self.inds)[:, np.newaxis] return np.sqrt(np.max(np.sum(vel ** 2, axis=1))) - def calc_rate_of_turn(self, where='head'): + def calc_rate_of_turn(self, where="head"): """ Calculate the rate of turn (or angular velocity) of either the `head` or `tail` of the Tracklet, computed over the last or first three frames, respectively. """ - if where == 'tail': + if where == "tail": v = np.diff(self.centroid[:3], axis=0) else: v = np.diff(self.centroid[-3:], axis=0) @@ -225,13 +233,19 @@ def distance_to(self, other_tracklet): of one to the tail/head of the other. """ if self in other_tracklet: - dist = (self.centroid[np.isin(self.inds, other_tracklet.inds)] - - other_tracklet.centroid[np.isin(other_tracklet.inds, self.inds)]) + dist = ( + self.centroid[np.isin(self.inds, other_tracklet.inds)] + - other_tracklet.centroid[np.isin(other_tracklet.inds, self.inds)] + ) return np.sqrt(np.sum(dist ** 2, axis=1)).mean() elif self < other_tracklet: - return np.sqrt(np.sum((self.centroid[-1] - other_tracklet.centroid[0]) ** 2)) + return np.sqrt( + np.sum((self.centroid[-1] - other_tracklet.centroid[0]) ** 2) + ) else: - return np.sqrt(np.sum((self.centroid[0] - other_tracklet.centroid[-1]) ** 2)) + return np.sqrt( + np.sum((self.centroid[0] - other_tracklet.centroid[-1]) ** 2) + ) def motion_affinity_with(self, other_tracklet): """ @@ -244,12 +258,16 @@ def motion_affinity_with(self, other_tracklet): if time_gap > 0: if self < other_tracklet: d1 = self.centroid[-1] + time_gap * self.calc_velocity(norm=False) - d2 = other_tracklet.centroid[0] - time_gap * other_tracklet.calc_velocity('tail', False) + d2 = other_tracklet.centroid[ + 0 + ] - time_gap * other_tracklet.calc_velocity("tail", False) delta1 = other_tracklet.centroid[0] - d1 delta2 = self.centroid[-1] - d2 else: - d1 = other_tracklet.centroid[-1] + time_gap * other_tracklet.calc_velocity(norm=False) - d2 = self.centroid[0] - time_gap * self.calc_velocity('tail', False) + d1 = other_tracklet.centroid[ + -1 + ] + time_gap * other_tracklet.calc_velocity(norm=False) + d2 = self.centroid[0] - time_gap * self.calc_velocity("tail", False) delta1 = self.centroid[0] - d1 delta2 = other_tracklet.centroid[-1] - d2 return (np.sqrt(np.sum(delta1 ** 2)) + np.sqrt(np.sum(delta2 ** 2))) / 2 @@ -291,8 +309,7 @@ def box_overlap_with(self, other_tracklet): @staticmethod def undirected_hausdorff(u, v): - return max(directed_hausdorff(u, v)[0], - directed_hausdorff(v, u)[0]) + return max(directed_hausdorff(u, v)[0], directed_hausdorff(v, u)[0]) @staticmethod def iou(bbox1, bbox2): @@ -303,9 +320,11 @@ def iou(bbox1, bbox2): w = max(0, x2 - x1) h = max(0, y2 - y1) wh = w * h - return wh / ((bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1]) - + (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1]) - - wh) + return wh / ( + (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1]) + + (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1]) + - wh + ) def calc_bbox(self, ind): xy = self.xy[ind] @@ -414,13 +433,13 @@ def __init__( raise IOError("Tracklets are empty.") if n_tracks < 2: - raise ValueError('There must at least be two tracks to reconstruct.') + raise ValueError("There must at least be two tracks to reconstruct.") if min_length < 3: - raise ValueError('A tracklet must have a minimal length of 3.') + raise ValueError("A tracklet must have a minimal length of 3.") self.min_length = min_length - self.filename = '' + self.filename = "" self.header = None self.single = None self.n_tracks = n_tracks @@ -452,15 +471,18 @@ def __init__( # Note that if tracklets are very short, some may actually be part of the same track # and thus incorrectly reflect separate track endpoints... - self._first_tracklets = sorted(self, key=lambda t: t.start)[:self.n_tracks] - self._last_tracklets = sorted(self, key=lambda t: t.end)[-self.n_tracks:] + self._first_tracklets = sorted(self, key=lambda t: t.start)[: self.n_tracks] + self._last_tracklets = sorted(self, key=lambda t: t.end)[-self.n_tracks :] # Map each Tracklet to an entry and output nodes and vice versa, # which is convenient once the tracklets are stitched. - self._mapping = {tracklet: {'in': f'{i}in', 'out': f'{i}out'} - for i, tracklet in enumerate(self)} - self._mapping_inv = {label: k for k, v in self._mapping.items() - for label in v.values()} + self._mapping = { + tracklet: {"in": f"{i}in", "out": f"{i}out"} + for i, tracklet in enumerate(self) + } + self._mapping_inv = { + label: k for k, v in self._mapping.items() for label in v.values() + } def __getitem__(self, item): return self.tracklets[item] @@ -477,7 +499,7 @@ def from_pickle( split_tracklets=True, prestitch_residuals=True, ): - with open(pickle_file, 'rb') as file: + with open(pickle_file, "rb") as file: tracklets = pickle.load(file) class_ = cls.from_dict_of_dict( tracklets, n_tracks, min_length, split_tracklets, prestitch_residuals @@ -495,7 +517,7 @@ def from_dict_of_dict( prestitch_residuals=True, ): tracklets = [] - header = dict_of_dict.pop('header', None) + header = dict_of_dict.pop("header", None) single = None for k, dict_ in dict_of_dict.items(): inds, data = zip(*[(cls.get_frame_ind(k), v) for k, v in dict_.items()]) @@ -507,16 +529,12 @@ def from_dict_of_dict( except ValueError: pass tracklet = Tracklet(data, inds) - if k == 'single': + if k == "single": single = tracklet else: tracklets.append(Tracklet(data, inds)) class_ = cls( - tracklets, - n_tracks, - min_length, - split_tracklets, - prestitch_residuals, + tracklets, n_tracks, min_length, split_tracklets, prestitch_residuals ) class_.header = header class_.single = single @@ -565,14 +583,14 @@ def build_graph(self, max_gap=None, weight_func=None): max_gap = int(1.5 * self.compute_max_gap()) self.G = nx.DiGraph() - self.G.add_node('source', demand=-self.n_tracks) - self.G.add_node('sink', demand=self.n_tracks) + self.G.add_node("source", demand=-self.n_tracks) + self.G.add_node("sink", demand=self.n_tracks) nodes_in, nodes_out = zip(*[v.values() for v in self._mapping.values()]) self.G.add_nodes_from(nodes_in, demand=1) self.G.add_nodes_from(nodes_out, demand=-1) self.G.add_edges_from(zip(nodes_in, nodes_out), capacity=1) - self.G.add_edges_from(zip(['source'] * len(self), nodes_in), capacity=1) - self.G.add_edges_from(zip(nodes_out, ['sink'] * len(self)), capacity=1) + self.G.add_edges_from(zip(["source"] * len(self), nodes_in), capacity=1) + self.G.add_edges_from(zip(nodes_out, ["sink"] * len(self)), capacity=1) if weight_func is None: weight_func = self.calculate_edge_weight for i in trange(len(self)): @@ -585,54 +603,69 @@ def build_graph(self, max_gap=None, weight_func=None): elif gap > 0: # The algorithm works better with integer weights w = int(100 * weight_func(self[i], self[j])) - self.G.add_edge(self._mapping[self[i]]['out'], - self._mapping[self[j]]['in'], - weight=w, capacity=1) + self.G.add_edge( + self._mapping[self[i]]["out"], + self._mapping[self[j]]["in"], + weight=w, + capacity=1, + ) def _update_edge_weights(self, weight_func): if self.G is None: - raise ValueError('Inexistent graph. Call `build_graph` first') + raise ValueError("Inexistent graph. Call `build_graph` first") - for node1, node2, weight in self.G.edges.data('weight'): + for node1, node2, weight in self.G.edges.data("weight"): if weight is not None: w = weight_func(self._mapping_inv[node1], self._mapping_inv[node2]) - self.G.edges[(node1, node2)]['weight'] = w + self.G.edges[(node1, node2)]["weight"] = w def stitch(self, add_back_residuals=True): if self.G is None: - raise ValueError('Inexistent graph. Call `build_graph` first') + raise ValueError("Inexistent graph. Call `build_graph` first") try: _, self.flow = nx.capacity_scaling(self.G) self.paths = self.reconstruct_paths() except nx.exception.NetworkXUnfeasible: - print('No optimal solution found. Employing black magic...') + print("No optimal solution found. Employing black magic...") # Let us prune the graph by removing all source and sink edges # but those connecting the `n_tracks` first and last tracklets. - in_to_keep = [self._mapping[first_tracklet]['in'] - for first_tracklet in self._first_tracklets] - out_to_keep = [self._mapping[last_tracklet]['out'] - for last_tracklet in self._last_tracklets] - in_to_remove = (set(node for _, node in self.G.out_edges('source')) - .difference(in_to_keep)) - out_to_remove = (set(node for node, _ in self.G.in_edges('sink')) - .difference(out_to_keep)) - self.G.remove_edges_from(zip(['source'] * len(in_to_remove), in_to_remove)) - self.G.remove_edges_from(zip(out_to_remove, ['sink'] * len(out_to_remove))) + in_to_keep = [ + self._mapping[first_tracklet]["in"] + for first_tracklet in self._first_tracklets + ] + out_to_keep = [ + self._mapping[last_tracklet]["out"] + for last_tracklet in self._last_tracklets + ] + in_to_remove = set( + node for _, node in self.G.out_edges("source") + ).difference(in_to_keep) + out_to_remove = set(node for node, _ in self.G.in_edges("sink")).difference( + out_to_keep + ) + self.G.remove_edges_from(zip(["source"] * len(in_to_remove), in_to_remove)) + self.G.remove_edges_from(zip(out_to_remove, ["sink"] * len(out_to_remove))) # Preflow push seems to work slightly better than shortest # augmentation path..., and is more computationally efficient. paths = [] - for path in nx.node_disjoint_paths(self.G, 'source', 'sink', - preflow_push, self.n_tracks): + for path in nx.node_disjoint_paths( + self.G, "source", "sink", preflow_push, self.n_tracks + ): temp = set() for node in path[1:-1]: self.G.remove_node(node) temp.add(self._mapping_inv[node]) paths.append(list(temp)) incomplete_tracks = self.n_tracks - len(paths) - if incomplete_tracks == 1: # All remaining nodes ought to belong to the same track - nodes = set(self._mapping_inv[node] for node in self.G - if node not in ('source', 'sink')) + if ( + incomplete_tracks == 1 + ): # All remaining nodes ought to belong to the same track + nodes = set( + self._mapping_inv[node] + for node in self.G + if node not in ("source", "sink") + ) # Verify whether there are overlapping tracklets for t1, t2 in combinations(nodes, 2): if t1 in t2: @@ -672,7 +705,9 @@ def _finalize_tracks(self): n_max = len(residuals) while n_attemps < n_max: for res in tqdm(residuals[::-1]): - easy_fit = [i for i, track in enumerate(self.tracks) if res not in track] + easy_fit = [ + i for i, track in enumerate(self.tracks) if res not in track + ] if not easy_fit: residuals.remove(res) continue @@ -704,8 +739,9 @@ def _finalize_tracks(self): elif right_gap <= 3: dist = np.linalg.norm(track.centroid[e] - c1[1]) else: - dist = (np.linalg.norm(track.centroid[s] - c1[0]) - + np.linalg.norm(track.centroid[e] - c1[1])) + dist = np.linalg.norm(track.centroid[s] - c1[0]) + np.linalg.norm( + track.centroid[e] - c1[1] + ) dists.append((n, dist)) if not dists: continue @@ -745,7 +781,7 @@ def _prestitch_residuals(self, max_gap=5): def concatenate_data(self): if self.tracks is None: - raise ValueError('No tracks were found. Call `stitch` first') + raise ValueError("No tracks were found. Call `stitch` first") # Refresh temporal bounds self._first_frame = min(self.tracks, key=lambda t: t.start).start @@ -760,36 +796,38 @@ def concatenate_data(self): def format_df(self): data = self.concatenate_data() - individuals = [f'ind{i}' for i in range(1, self.n_tracks + 1)] - coords = ['x', 'y', 'likelihood'] + individuals = [f"ind{i}" for i in range(1, self.n_tracks + 1)] + coords = ["x", "y", "likelihood"] if self.header is not None: - scorer = self.header.get_level_values('scorer').unique().to_list() - bpts = self.header.get_level_values('bodyparts').unique().to_list() + scorer = self.header.get_level_values("scorer").unique().to_list() + bpts = self.header.get_level_values("bodyparts").unique().to_list() else: - scorer = ['scorer'] + scorer = ["scorer"] n_bpts = data.shape[1] // (len(individuals) * len(coords)) - bpts = [f'bpt{i}' for i in range(1, n_bpts + 1)] + bpts = [f"bpt{i}" for i in range(1, n_bpts + 1)] columns = pd.MultiIndex.from_product( [scorer, individuals, bpts, coords], - names=['scorer', 'individuals', 'bodyparts', 'coords'] + names=["scorer", "individuals", "bodyparts", "coords"], ) inds = range(self._first_frame, self._last_frame + 1) df = pd.DataFrame(data, columns=columns, index=inds) if self.single is not None: n_dets = self.single.data.shape[1] columns = pd.MultiIndex.from_product( - [scorer, ['single'], [f'bpt{i}' for i in range(1, n_dets + 1)], coords], - names=['scorer', 'individuals', 'bodyparts', 'coords'] + [scorer, ["single"], [f"bpt{i}" for i in range(1, n_dets + 1)], coords], + names=["scorer", "individuals", "bodyparts", "coords"], + ) + df2 = pd.DataFrame( + self.single.flat_data, columns=columns, index=self.single.inds ) - df2 = pd.DataFrame(self.single.flat_data, columns=columns, index=self.single.inds) - df = df.join(df2, how='outer') + df = df.join(df2, how="outer") return df - def write_tracks(self, output_name=''): + def write_tracks(self, output_name=""): df = self.format_df() if not output_name: - output_name = self.filename.replace('pickle', 'h5') - df.to_hdf(output_name, 'tracks', format='table', mode='w') + output_name = self.filename.replace("pickle", "h5") + df.to_hdf(output_name, "tracks", format="table", mode="w") @staticmethod def calculate_edge_weight(tracklet1, tracklet2): @@ -799,27 +837,27 @@ def calculate_edge_weight(tracklet1, tracklet2): @property def weights(self): if self.G is None: - raise ValueError('Inexistent graph. Call `build_graph` first') + raise ValueError("Inexistent graph. Call `build_graph` first") - return nx.get_edge_attributes(self.G, 'weight') + return nx.get_edge_attributes(self.G, "weight") def draw_graph(self, with_weights=False): if self.G is None: - raise ValueError('Inexistent graph. Call `build_graph` first') + raise ValueError("Inexistent graph. Call `build_graph` first") pos = nx.spring_layout(self.G) nx.draw_networkx(self.G, pos) if with_weights: nx.draw_networkx_edge_labels(self.G, pos, edge_labels=self.weights) - def plot_paths(self, colormap='Set2'): + def plot_paths(self, colormap="Set2"): if self.paths is None: - raise ValueError('No paths were found. Call `stitch` first') + raise ValueError("No paths were found. Call `stitch` first") fig, ax = plt.subplots() ax.set_yticks([]) for loc, spine in ax.spines.items(): - if loc != 'bottom': + if loc != "bottom": spine.set_visible(False) for path in self.paths: length = len(path) @@ -827,26 +865,26 @@ def plot_paths(self, colormap='Set2'): for tracklet, color in zip(path, colors): tracklet.plot(color=color, ax=ax) - def plot_tracks(self, colormap='viridis'): + def plot_tracks(self, colormap="viridis"): if self.tracks is None: - raise ValueError('No tracks were found. Call `stitch` first') + raise ValueError("No tracks were found. Call `stitch` first") fig, ax = plt.subplots() ax.set_yticks([]) for loc, spine in ax.spines.items(): - if loc != 'bottom': + if loc != "bottom": spine.set_visible(False) colors = plt.get_cmap(colormap, self.n_tracks)(range(self.n_tracks)) for track, color in zip(self.tracks, colors): track.plot(color=color, ax=ax) - def plot_tracklets(self, colormap='Paired'): + def plot_tracklets(self, colormap="Paired"): fig, axes = plt.subplots(ncols=2, figsize=(14, 4)) axes[0].set_yticks([]) for loc, spine in axes[0].spines.items(): - if loc != 'bottom': + if loc != "bottom": spine.set_visible(False) - axes[1].axis('off') + axes[1].axis("off") cmap = plt.get_cmap(colormap) colors = cycle(cmap.colors) @@ -864,9 +902,9 @@ def plot_tracklets(self, colormap='Paired'): def reconstruct_paths(self): paths = [] - for node, flow in self.flow['source'].items(): + for node, flow in self.flow["source"].items(): if flow == 1: - path = self.reconstruct_path(node.replace('in', 'out')) + path = self.reconstruct_path(node.replace("in", "out")) paths.append([self._mapping_inv[tracklet] for tracklet in path]) return paths @@ -874,9 +912,9 @@ def reconstruct_path(self, source): path = [source] for node, flow in self.flow[source].items(): if flow == 1: - if node != 'sink': + if node != "sink": self.flow[source][node] -= 1 - path.extend(self.reconstruct_path(node.replace('in', 'out'))) + path.extend(self.reconstruct_path(node.replace("in", "out"))) return path @@ -887,7 +925,7 @@ def stitch_tracklets( split_tracklets=True, prestitch_residuals=True, weight_func=None, - output_name='', + output_name="", ): """ Stitch sparse tracklets into full tracks via a graph-based, diff --git a/deeplabcut/utils/auxfun_models.py b/deeplabcut/utils/auxfun_models.py index d495a6389..5f168317a 100755 --- a/deeplabcut/utils/auxfun_models.py +++ b/deeplabcut/utils/auxfun_models.py @@ -40,23 +40,26 @@ def Check4weights(modeltype, parent_path, num_shuffles): + "_224.ckpt", ) ) - elif 'efficientnet' in modeltype: + elif "efficientnet" in modeltype: model_path = Path( - os.path.join(parent_path, - 'pose_estimation_tensorflow/models/pretrained/' - + modeltype.replace('_','-'))) + os.path.join( + parent_path, + "pose_estimation_tensorflow/models/pretrained/" + + modeltype.replace("_", "-"), + ) + ) else: print( - "Currently ResNet (50, 101, 152), MobilenetV2 (1, 0.75, 0.5 and 0.35) and EfficientNet (b0-b6) are supported, please change 'resnet' entry in config.yaml!" + "Currently ResNet (50, 101, 152), MobilenetV2 (1, 0.75, 0.5 and 0.35) and EfficientNet (b0-b6) are supported, please change 'resnet' entry in config.yaml!" ) num_shuffles = -1 # thus the loop below is empty... model_path = parent_path if num_shuffles > 0: - if 'efficientnet' in modeltype: + if "efficientnet" in modeltype: if not os.path.isdir(model_path): - Downloadweights(modeltype,model_path) - model_path = os.path.join(model_path, 'model.ckpt') + Downloadweights(modeltype, model_path) + model_path = os.path.join(model_path, "model.ckpt") else: if not model_path.is_file(): Downloadweights(modeltype, model_path) @@ -77,9 +80,9 @@ def Downloadweights(modeltype, model_path): target_dir / "pretrained_model_urls.yaml" ) try: - if 'efficientnet' in modeltype: - url = neturls['efficientnet'] - url = url + modeltype.replace('_','-') + '.tar.gz' + if "efficientnet" in modeltype: + url = neturls["efficientnet"] + url = url + modeltype.replace("_", "-") + ".tar.gz" else: url = neturls[modeltype] print("Downloading a ImageNet-pretrained model from {}....".format(url)) diff --git a/deeplabcut/utils/auxfun_multianimal.py b/deeplabcut/utils/auxfun_multianimal.py index c19e4ae8f..544db9938 100644 --- a/deeplabcut/utils/auxfun_multianimal.py +++ b/deeplabcut/utils/auxfun_multianimal.py @@ -68,7 +68,7 @@ def getpafgraph(cfg, printnames=True): print("Attention, parts do not exist!", link) unconnected = set(range(len(multianimalbodyparts))).difference(connected) - if unconnected and len(multianimalbodyparts)>1: #for single bpt not important! + if unconnected and len(multianimalbodyparts) > 1: # for single bpt not important! raise ValueError( f'Unconnected {", ".join(multianimalbodyparts[i] for i in unconnected)}. ' f"For multi-animal projects, all multianimalbodyparts should be connected. " diff --git a/deeplabcut/utils/auxfun_videos.py b/deeplabcut/utils/auxfun_videos.py index 328d3a9c3..9cd91d2e0 100644 --- a/deeplabcut/utils/auxfun_videos.py +++ b/deeplabcut/utils/auxfun_videos.py @@ -294,22 +294,28 @@ def crop(self, suffix="crop", dest_folder=None): return output_path def rescale( - self, width, height=-1, rotateccw="No", angle=0.0, suffix="rescale", dest_folder=None + self, + width, + height=-1, + rotateccw="No", + angle=0.0, + suffix="rescale", + dest_folder=None, ): output_path = self.make_output_path(suffix, dest_folder) command = ( - f'ffmpeg -n -i {self.video_path} -filter:v ' + f"ffmpeg -n -i {self.video_path} -filter:v " f'"scale={width}:{height}{{}}" -c:a copy {output_path}' ) # Rotate, see: https://stackoverflow.com/questions/3937387/rotating-videos-with-ffmpeg # interesting option to just update metadata. if rotateccw == "Arbitrary": angle = np.deg2rad(angle) - command = (command.format(f', rotate={angle}')) + command = command.format(f", rotate={angle}") elif rotateccw == "Yes": - command = (command.format(f', transpose=1')) + command = command.format(f", transpose=1") else: - command = (command.format('')) + command = command.format("") subprocess.call(command, shell=True) return output_path @@ -451,7 +457,13 @@ def CropVideo( def DownSampleVideo( - vname, width=-1, height=200, outsuffix="downsampled", outpath=None, rotateccw="No", angle=0.0 + vname, + width=-1, + height=200, + outsuffix="downsampled", + outpath=None, + rotateccw="No", + angle=0.0, ): """ Auxiliary function to downsample a video and output it to the same folder with "outsuffix" appended in its name. diff --git a/deeplabcut/utils/auxiliaryfunctions.py b/deeplabcut/utils/auxiliaryfunctions.py index 67a1147a8..e5b4bb8af 100755 --- a/deeplabcut/utils/auxiliaryfunctions.py +++ b/deeplabcut/utils/auxiliaryfunctions.py @@ -470,10 +470,10 @@ def GetEvaluationFolder(trainFraction, shuffle, cfg, modelprefix=""): Task = cfg["Task"] date = cfg["date"] iterate = "iteration-" + str(cfg["iteration"]) - if 'eval_prefix' in cfg: - eval_prefix = cfg['eval_prefix']+'/' + if "eval_prefix" in cfg: + eval_prefix = cfg["eval_prefix"] + "/" else: - eval_prefix = 'evaluation-results'+'/' + eval_prefix = "evaluation-results" + "/" return Path( modelprefix, eval_prefix @@ -708,7 +708,7 @@ def find_analyzed_data(folder, videoname, scorer, filtered=False, track_method=" tracker = "_sk" elif track_method == "box": tracker = "_bx" - elif track_method == 'ellipse': + elif track_method == "ellipse": tracker = "_el" else: tracker = "" @@ -764,8 +764,8 @@ def load_detection_data(video, scorer, track_method): tracker = "sk" elif track_method == "box": tracker = "bx" - elif track_method == 'ellipse': - tracker = 'el' + elif track_method == "ellipse": + tracker = "el" else: raise ValueError(f"Unrecognized track_method={track_method}") diff --git a/deeplabcut/utils/conversioncode.py b/deeplabcut/utils/conversioncode.py index d0e0c00d4..4aadd18fa 100644 --- a/deeplabcut/utils/conversioncode.py +++ b/deeplabcut/utils/conversioncode.py @@ -170,8 +170,12 @@ def analyze_videos_converth5_to_csv(video_folder, videotype=".mp4"): deeplabcut.analyze_videos_converth5_to_csv('/media/alex/experimentaldata/cheetahvideos','.mp4') """ - h5_files = list(auxiliaryfunctions.grab_files_in_folder(video_folder, "h5", relative=False)) - videos = auxiliaryfunctions.grab_files_in_folder(video_folder, videotype, relative=False) + h5_files = list( + auxiliaryfunctions.grab_files_in_folder(video_folder, "h5", relative=False) + ) + videos = auxiliaryfunctions.grab_files_in_folder( + video_folder, videotype, relative=False + ) for video in videos: if "_labeled" in video: continue diff --git a/deeplabcut/utils/skeleton.py b/deeplabcut/utils/skeleton.py index add136355..4c4b48d66 100644 --- a/deeplabcut/utils/skeleton.py +++ b/deeplabcut/utils/skeleton.py @@ -62,7 +62,7 @@ def __init__(self, config_path): found = True break if self.df is None: - raise IOError('No labeled data were found.') + raise IOError("No labeled data were found.") self.bpts = self.df.columns.get_level_values("bodyparts").unique() if not found: diff --git a/setup.py b/setup.py index 8f04c5f43..b523ef88e 100644 --- a/setup.py +++ b/setup.py @@ -56,11 +56,9 @@ "tensorpack==0.9.8", "tqdm", "moviepy<=1.0.1", - "bayesian-optimization" + "bayesian-optimization", ], - extras_require={ - "gui": ["wxpython<4.1"] - }, + extras_require={"gui": ["wxpython<4.1"]}, scripts=["deeplabcut/pose_estimation_tensorflow/models/pretrained/download.sh"], packages=setuptools.find_packages(), data_files=[ diff --git a/tests/test_stitcher.py b/tests/test_stitcher.py index b22d9a8b1..58fc46029 100644 --- a/tests/test_stitcher.py +++ b/tests/test_stitcher.py @@ -30,22 +30,17 @@ def fake_stitcher(): track = Tracklet(data, inds) idx = np.linspace(0, inds.size, N_TRACKLETS + 1, dtype=int) tracklets = TrackletStitcher.split_tracklet(track, idx[1:-1]) - return TrackletStitcher( - tracklets, - n_tracks=2, - ) + return TrackletStitcher(tracklets, n_tracks=2) def test_tracklet_wrong_inputs(fake_tracklet): with pytest.raises(ValueError): _ = Tracklet(fake_tracklet.data[..., :2], fake_tracklet.inds) - _ = Tracklet(fake_tracklet.data[:TRACKLET_LEN - 2], fake_tracklet.inds) + _ = Tracklet(fake_tracklet.data[: TRACKLET_LEN - 2], fake_tracklet.inds) def test_tracklet_monotonic_indices(fake_tracklet): - tracklet_inv = Tracklet( - fake_tracklet.data[::-1], fake_tracklet.inds[::-1] - ) + tracklet_inv = Tracklet(fake_tracklet.data[::-1], fake_tracklet.inds[::-1]) np.testing.assert_equal(fake_tracklet.inds, tracklet_inv.inds) np.testing.assert_equal(fake_tracklet.xy, tracklet_inv.xy) @@ -57,13 +52,9 @@ def test_tracklet(fake_tracklet): assert fake_tracklet.start == TRACKLET_START assert fake_tracklet.end == TRACKLET_START + TRACKLET_LEN - 1 np.testing.assert_equal( - fake_tracklet.centroid, - np.full((TRACKLET_LEN, 2), np.arange(N_DETS).mean()) - ) - fake_tracklet2 = Tracklet( - fake_tracklet.data, - fake_tracklet.inds + TRACKLET_LEN + fake_tracklet.centroid, np.full((TRACKLET_LEN, 2), np.arange(N_DETS).mean()) ) + fake_tracklet2 = Tracklet(fake_tracklet.data, fake_tracklet.inds + TRACKLET_LEN) assert fake_tracklet not in fake_tracklet2 tracklet = fake_tracklet + fake_tracklet2 tracklet -= fake_tracklet @@ -83,27 +74,19 @@ def test_tracklet_data_access(fake_tracklet): fake_tracklet.get_data_at(TRACKLET_START), fake_tracklet.data[0] ) fake_tracklet.set_data_at(TRACKLET_START + 1, fake_tracklet.data[0] * 2) - np.testing.assert_equal( - fake_tracklet.data[1], fake_tracklet.data[0] * 2 - ) + np.testing.assert_equal(fake_tracklet.data[1], fake_tracklet.data[0] * 2) fake_tracklet.del_data_at(TRACKLET_START + 1) assert not fake_tracklet.is_continuous assert TRACKLET_START + 1 not in fake_tracklet.inds -@pytest.mark.parametrize( - "where, norm", - [("head", False), ("tail", True)] -) +@pytest.mark.parametrize("where, norm", [("head", False), ("tail", True)]) def test_tracklet_calc_velocity(fake_tracklet, where, norm): _ = fake_tracklet.calc_velocity(where, norm) def test_tracklet_affinities(fake_tracklet): - other_tracklet = Tracklet( - fake_tracklet.data, - fake_tracklet.inds + TRACKLET_LEN - ) + other_tracklet = Tracklet(fake_tracklet.data, fake_tracklet.inds + TRACKLET_LEN) _ = fake_tracklet.dynamic_similarity_with(other_tracklet) _ = fake_tracklet.dynamic_dissimilarity_with(other_tracklet) _ = fake_tracklet.shape_dissimilarity_with(other_tracklet) @@ -134,5 +117,5 @@ def test_stitcher(tmpdir_factory, fake_stitcher): assert fake_stitcher.compute_max_gap() == 1 fake_stitcher.build_graph(max_gap=1) fake_stitcher.stitch(add_back_residuals=True) - output_name = tmpdir_factory.mktemp('data').join('fake.h5') + output_name = tmpdir_factory.mktemp("data").join("fake.h5") fake_stitcher.write_tracks(output_name) diff --git a/tests/test_trackingutils.py b/tests/test_trackingutils.py index 5a7dc6e35..66b18fa16 100644 --- a/tests/test_trackingutils.py +++ b/tests/test_trackingutils.py @@ -8,13 +8,7 @@ @pytest.fixture() def ellipse(): - params = { - 'x': 0, - 'y': 0, - 'width': 2, - 'height': 4, - 'theta': np.pi / 2, - } + params = {"x": 0, "y": 0, "width": 2, "height": 4, "theta": np.pi / 2} return trackingutils.Ellipse(**params) @@ -22,8 +16,7 @@ def test_ellipse(ellipse): assert ellipse.aspect_ratio == 2 assert ellipse.geometry is not None np.testing.assert_equal( - ellipse.contains_points(np.asarray([[0, 0], [10, 10]])), - [True, False], + ellipse.contains_points(np.asarray([[0, 0], [10, 10]])), [True, False] ) @@ -61,22 +54,15 @@ def test_sort_ellipse(): poses = np.random.rand(2, 10, 3) trackers = mot.track(poses[..., :2]) assert trackers.shape == (2, 7) - trackingutils.fill_tracklets( - tracklets, - trackers, - poses, - imname=0 - ) + trackingutils.fill_tracklets(tracklets, trackers, poses, imname=0) assert all(id_ in tracklets for id_ in trackers[:, -2]) def test_calc_bboxes_from_keypoints(): xy = np.asarray([[[0, 0, 1]]]) np.testing.assert_equal( - trackingutils.calc_bboxes_from_keypoints(xy, 10), - [[-10, -10, 10, 10, 1]], + trackingutils.calc_bboxes_from_keypoints(xy, 10), [[-10, -10, 10, 10, 1]] ) np.testing.assert_equal( - trackingutils.calc_bboxes_from_keypoints(xy, 20, 10), - [[-10, -20, 30, 20, 1]], + trackingutils.calc_bboxes_from_keypoints(xy, 20, 10), [[-10, -20, 30, 20, 1]] ) diff --git a/testscript_cli.py b/testscript_cli.py index 5a015f966..319344625 100644 --- a/testscript_cli.py +++ b/testscript_cli.py @@ -9,15 +9,17 @@ It produces nothing of interest scientifically. """ -task='Testcore' # Enter the name of your experiment Task -scorer='Mackenzie' # Enter the name of the experimenter/labeler +task = "Testcore" # Enter the name of your experiment Task +scorer = "Mackenzie" # Enter the name of the experimenter/labeler import os, subprocess, sys + def install(package): subprocess.check_call([sys.executable, "-m", "pip", "install", package]) -install('tensorflow==1.13.1') + +install("tensorflow==1.13.1") import deeplabcut as dlc @@ -32,86 +34,127 @@ def install(package): videoname = "reachingvideo1" video = [ os.path.join( - basepath, "examples", "Reaching-Mackenzie-2018-08-30", "videos", videoname + ".avi" + basepath, + "examples", + "Reaching-Mackenzie-2018-08-30", + "videos", + videoname + ".avi", ) ] # For testing a color video: -#videoname='baby4hin2min' -#video=[os.path.join('/home/alex/Desktop/Data',videoname+'.mp4')] -#to test destination folder: -#dfolder=basepath +# videoname='baby4hin2min' +# video=[os.path.join('/home/alex/Desktop/Data',videoname+'.mp4')] +# to test destination folder: +# dfolder=basepath print(video) -dfolder=None -net_type='resnet_50' #'mobilenet_v2_0.35' #'resnet_50' -augmenter_type='default' -augmenter_type2='imgaug' +dfolder = None +net_type = "resnet_50" #'mobilenet_v2_0.35' #'resnet_50' +augmenter_type = "default" +augmenter_type2 = "imgaug" -if platform.system() == 'Darwin' or platform.system()=='Windows': +if platform.system() == "Darwin" or platform.system() == "Windows": print("On Windows/OSX tensorpack is not tested by default.") - augmenter_type3='imgaug' + augmenter_type3 = "imgaug" else: - augmenter_type3='tensorpack' #Does not work on WINDOWS + augmenter_type3 = "tensorpack" # Does not work on WINDOWS -numiter=3 +numiter = 3 print("CREATING PROJECT") -path_config_file=dlc.create_new_project(task,scorer,video, copy_videos=True) +path_config_file = dlc.create_new_project(task, scorer, video, copy_videos=True) -cfg=dlc.auxiliaryfunctions.read_config(path_config_file) -cfg['numframes2pick']=5 -cfg['pcutoff']=0.01 -cfg['TrainingFraction']=[.8] -cfg['skeleton']=[['bodypart1','bodypart2'],['bodypart1','bodypart3']] +cfg = dlc.auxiliaryfunctions.read_config(path_config_file) +cfg["numframes2pick"] = 5 +cfg["pcutoff"] = 0.01 +cfg["TrainingFraction"] = [0.8] +cfg["skeleton"] = [["bodypart1", "bodypart2"], ["bodypart1", "bodypart3"]] -dlc.auxiliaryfunctions.write_config(path_config_file,cfg) +dlc.auxiliaryfunctions.write_config(path_config_file, cfg) print("EXTRACTING FRAMES") -dlc.extract_frames(path_config_file,mode='automatic',userfeedback=False) +dlc.extract_frames(path_config_file, mode="automatic", userfeedback=False) print("CREATING SOME LABELS FOR THE FRAMES") -frames=os.listdir(os.path.join(cfg['project_path'],'labeled-data',videoname)) -#As this next step is manual, we update the labels by putting them on the diagonal (fixed for all frames) -for index,bodypart in enumerate(cfg['bodyparts']): - columnindex = pd.MultiIndex.from_product([[scorer], [bodypart], ['x', 'y']],names=['scorer', 'bodyparts', 'coords']) - frame = pd.DataFrame(100+np.ones((len(frames),2))*50*index, columns = columnindex, index = [os.path.join('labeled-data',videoname,fn) for fn in frames]) - if index==0: - dataFrame=frame - else: - dataFrame = pd.concat([dataFrame, frame],axis=1) - -dataFrame.to_csv(os.path.join(cfg['project_path'],'labeled-data',videoname,"CollectedData_" + scorer + ".csv")) -dataFrame.to_hdf(os.path.join(cfg['project_path'],'labeled-data',videoname,"CollectedData_" + scorer + '.h5'),'df_with_missing',format='table', mode='w') +frames = os.listdir(os.path.join(cfg["project_path"], "labeled-data", videoname)) +# As this next step is manual, we update the labels by putting them on the diagonal (fixed for all frames) +for index, bodypart in enumerate(cfg["bodyparts"]): + columnindex = pd.MultiIndex.from_product( + [[scorer], [bodypart], ["x", "y"]], names=["scorer", "bodyparts", "coords"] + ) + frame = pd.DataFrame( + 100 + np.ones((len(frames), 2)) * 50 * index, + columns=columnindex, + index=[os.path.join("labeled-data", videoname, fn) for fn in frames], + ) + if index == 0: + dataFrame = frame + else: + dataFrame = pd.concat([dataFrame, frame], axis=1) + +dataFrame.to_csv( + os.path.join( + cfg["project_path"], + "labeled-data", + videoname, + "CollectedData_" + scorer + ".csv", + ) +) +dataFrame.to_hdf( + os.path.join( + cfg["project_path"], + "labeled-data", + videoname, + "CollectedData_" + scorer + ".h5", + ), + "df_with_missing", + format="table", + mode="w", +) print("Plot labels...") dlc.check_labels(path_config_file) print("CREATING TRAININGSET") -dlc.create_training_dataset(path_config_file,net_type=net_type,augmenter_type=augmenter_type) - -posefile=os.path.join(cfg['project_path'],'dlc-models/iteration-'+str(cfg['iteration'])+'/'+ cfg['Task'] + cfg['date'] + '-trainset' + str(int(cfg['TrainingFraction'][0] * 100)) + 'shuffle' + str(1),'train/pose_cfg.yaml') - -DLC_config=dlc.auxiliaryfunctions.read_plainconfig(posefile) -DLC_config['save_iters']=numiter -DLC_config['display_iters']=2 -DLC_config['multi_step']=[[0.001,numiter]] +dlc.create_training_dataset( + path_config_file, net_type=net_type, augmenter_type=augmenter_type +) + +posefile = os.path.join( + cfg["project_path"], + "dlc-models/iteration-" + + str(cfg["iteration"]) + + "/" + + cfg["Task"] + + cfg["date"] + + "-trainset" + + str(int(cfg["TrainingFraction"][0] * 100)) + + "shuffle" + + str(1), + "train/pose_cfg.yaml", +) + +DLC_config = dlc.auxiliaryfunctions.read_plainconfig(posefile) +DLC_config["save_iters"] = numiter +DLC_config["display_iters"] = 2 +DLC_config["multi_step"] = [[0.001, numiter]] print("CHANGING training parameters to end quickly!") -dlc.auxiliaryfunctions.write_plainconfig(posefile,DLC_config) +dlc.auxiliaryfunctions.write_plainconfig(posefile, DLC_config) print("TRAIN") dlc.train_network(path_config_file) print("EVALUATE") -dlc.evaluate_network(path_config_file,plotting=True) +dlc.evaluate_network(path_config_file, plotting=True) -videotest = os.path.join(cfg['project_path'],'videos',videoname + ".avi") +videotest = os.path.join(cfg["project_path"], "videos", videoname + ".avi") print(videotest) # quicker variant -''' +""" print("VIDEO ANALYSIS") dlc.analyze_videos(path_config_file, [videotest], save_as_csv=True) @@ -143,9 +186,11 @@ def install(package): print("ANALYZING some individual frames") dlc.analyze_time_lapse_frames(path_config_file,os.path.join(cfg['project_path'],'labeled-data/reachingvideo1/')) -''' +""" print("Export model...") -dlc.export_model(path_config_file,shuffle=1,make_tar=False) +dlc.export_model(path_config_file, shuffle=1, make_tar=False) -print("ALL DONE!!! - default/imgaug cases of DLCcore training and evaluation are functional (no extract outlier or refinement tested).") +print( + "ALL DONE!!! - default/imgaug cases of DLCcore training and evaluation are functional (no extract outlier or refinement tested)." +)