From 7391b48166f22292f078f1cc0cece090813a8d2e Mon Sep 17 00:00:00 2001
From: Xiaoyu Liu <xiaoyuliu1993@gmail.com>
Date: Fri, 18 May 2018 10:55:53 -0700
Subject: [PATCH] Simplify the way to get file names and add top_k_by_category
 (#38)

* simply the way to get video names

* add top_k_by_category

* change to meaningful variable name

* display recall and precision by category

* move implementation to tools.py

* change input variables

* remove duplicate precision/recall output
---
 main.py                          | 40 ++++++++++++++++++++++++------
 st_gcn/feeder/feeder.py          |  6 +++++
 st_gcn/feeder/feeder_kinetics.py |  8 ++++++
 st_gcn/feeder/tools.py           | 42 +++++++++++++++++++++++++++++++-
 tools/convert_poses.py           | 32 ++++++++++++------------
 5 files changed, 105 insertions(+), 23 deletions(-)

diff --git a/main.py b/main.py
index dd5b07a52..24b6cc0a6 100644
--- a/main.py
+++ b/main.py
@@ -15,7 +15,6 @@
 
 
 def get_parser():
-
     # parameter priority: command line > config > default
     parser = argparse.ArgumentParser(
         description='Spatial Temporal Graph Convolution Network')
@@ -139,6 +138,16 @@ def get_parser():
         type=float,
         default=0.0005,
         help='weight decay for optimizer')
+    parser.add_argument(
+        '--display_by_category',
+        type=str2bool,
+        default=True,
+        help='if ture, the top k accuracy by category  will be displayed')
+    parser.add_argument(
+        '--display_recall_precision',
+        type=str2bool,
+        default=True,
+        help='if ture, recall and precision by category  will be displayed')
 
     return parser
 
@@ -242,7 +251,7 @@ def save_arg(self):
     def adjust_learning_rate(self, epoch):
         if self.arg.optimizer == 'SGD' or self.arg.optimizer == 'Adam':
             lr = self.arg.base_lr * (
-                0.1**np.sum(epoch >= np.array(self.arg.step)))
+                    0.1 ** np.sum(epoch >= np.array(self.arg.step)))
             for param_group in self.optimizer.param_groups:
                 param_group['lr'] = lr
             return lr
@@ -281,7 +290,7 @@ def train(self, epoch, save_model=False):
         self.record_time()
         timer = dict(dataloader=0.001, model=0.001, statistics=0.001)
         for batch_idx, (data, label) in enumerate(loader):
-
+            print('{}/{}'.format(batch_idx, len(loader)))
             # get data
             data = Variable(
                 data.float().cuda(self.output_device), requires_grad=False)
@@ -333,6 +342,7 @@ def eval(self, epoch, save_score=False, loader_name=['test']):
             loss_value = []
             score_frag = []
             for batch_idx, (data, label) in enumerate(self.data_loader[ln]):
+                print('{}/{}'.format(batch_idx, len(self.data_loader[ln])))
                 data = Variable(
                     data.float().cuda(self.output_device),
                     requires_grad=False,
@@ -350,9 +360,25 @@ def eval(self, epoch, save_score=False, loader_name=['test']):
                 zip(self.data_loader[ln].dataset.sample_name, score))
             self.print_log('\tMean {} loss of {} batches: {}.'.format(
                 ln, len(self.data_loader[ln]), np.mean(loss_value)))
+
+            if arg.display_recall_precision:
+                precision, recall = self.data_loader[ln].dataset.calculate_recall_precision(score)
+                for i in range(len(precision)):
+                    self.print_log('\tClass{} Precision: {:.2f}%, Recall: {:.2f}%'.format(
+                        i + 1, 100 * precision[i], 100 * recall[i]
+                    ))
+
             for k in self.arg.show_topk:
-                self.print_log('\tTop{}: {:.2f}%'.format(
-                    k, 100 * self.data_loader[ln].dataset.top_k(score, k)))
+                if arg.display_by_category:
+                    accuracy = self.data_loader[ln].dataset.top_k_by_category(score, k)
+                    for i in range(score.shape[1]):
+                        self.print_log('\tClass{} Top{}: {:.2f}%'.format(
+                            i + 1, k, 100 * accuracy[i]))
+                    self.print_log('\tTop{}: {:.2f}%'.format(k, 100 * sum(accuracy) / len(accuracy)))
+                else:
+                    self.print_log('\tTop{}: {:.2f}%'.format(
+                        k, 100 * self.data_loader[ln].dataset.top_k(score, k)))
+
 
             if save_score:
                 with open('{}/epoch{}_{}_score.pkl'.format(
@@ -364,9 +390,9 @@ def start(self):
             self.print_log('Parameters:\n{}\n'.format(str(vars(self.arg))))
             for epoch in range(self.arg.start_epoch, self.arg.num_epoch):
                 save_model = ((epoch + 1) % self.arg.save_interval == 0) or (
-                    epoch + 1 == self.arg.num_epoch)
+                        epoch + 1 == self.arg.num_epoch)
                 eval_model = ((epoch + 1) % self.arg.eval_interval == 0) or (
-                    epoch + 1 == self.arg.num_epoch)
+                        epoch + 1 == self.arg.num_epoch)
 
                 self.train(epoch, save_model=save_model)
 
diff --git a/st_gcn/feeder/feeder.py b/st_gcn/feeder/feeder.py
index f833c95a5..43dc02301 100644
--- a/st_gcn/feeder/feeder.py
+++ b/st_gcn/feeder/feeder.py
@@ -125,6 +125,12 @@ def top_k(self, score, top_k):
         hit_top_k = [l in rank[i, -top_k:] for i, l in enumerate(self.label)]
         return sum(hit_top_k) * 1.0 / len(hit_top_k)
 
+    def top_k_by_category(self, score, top_k):
+        return tools.top_k_by_category(self.label, score, top_k)
+
+    def calculate_recall_precision(self, score):
+        return tools.calculate_recall_precision(self.label, score)
+
 
 def test(data_path, label_path, vid=None):
     import matplotlib.pyplot as plt
diff --git a/st_gcn/feeder/feeder_kinetics.py b/st_gcn/feeder/feeder_kinetics.py
index 2bd465f12..1014a95bc 100644
--- a/st_gcn/feeder/feeder_kinetics.py
+++ b/st_gcn/feeder/feeder_kinetics.py
@@ -160,6 +160,14 @@ def top_k(self, score, top_k):
         hit_top_k = [l in rank[i, -top_k:] for i, l in enumerate(self.label)]
         return sum(hit_top_k) * 1.0 / len(hit_top_k)
 
+    def top_k_by_category(self, score, top_k):
+        assert (all(self.label >= 0))
+        return tools.top_k_by_category(self.label, score, top_k)
+
+    def calculate_recall_precision(self, score):
+        assert (all(self.label >= 0))
+        return tools.calculate_recall_precision(self.label, score)
+
 
 def test(data_path, label_path, vid=None, graph=None):
     import matplotlib.pyplot as plt
diff --git a/st_gcn/feeder/tools.py b/st_gcn/feeder/tools.py
index b1c5dc1c5..6de3136f5 100644
--- a/st_gcn/feeder/tools.py
+++ b/st_gcn/feeder/tools.py
@@ -217,4 +217,44 @@ def openpose_match(data_numpy):
     #         for t, m in path:
     #             new_data_numpy[:, t, :, i] = data_numpy[:, t, :, m]
 
-    #     data_numpy = new_data_numpy
\ No newline at end of file
+    #     data_numpy = new_data_numpy
+
+
+def top_k_by_category(label, score, top_k):
+    instance_num, class_num = score.shape
+    rank = score.argsort()
+    hit_top_k = [[] for i in range(class_num)]
+    for i in range(instance_num):
+        l = label[i]
+        hit_top_k[l].append(l in rank[i, -top_k:])
+
+    accuracy_list = []
+    for hit_per_category in hit_top_k:
+        if hit_per_category:
+            accuracy_list.append(sum(hit_per_category) * 1.0 / len(hit_per_category))
+        else:
+            accuracy_list.append(0.0)
+    return accuracy_list
+
+
+def calculate_recall_precision(label, score):
+    instance_num, class_num = score.shape
+    rank = score.argsort()
+    confusion_matrix = np.zeros([class_num, class_num])
+
+    for i in range(instance_num):
+        true_l = label[i]
+        pred_l = rank[i, -1]
+        confusion_matrix[true_l][pred_l] += 1
+
+    precision = []
+    recall = []
+
+    for i in range(class_num):
+        true_p = confusion_matrix[i][i]
+        false_n = sum(confusion_matrix[i, :]) - true_p
+        false_p = sum(confusion_matrix[:, i]) - true_p
+        precision.append(true_p * 1.0 / (true_p + false_p))
+        recall.append(true_p * 1.0 / (true_p + false_n))
+
+    return precision, recall
\ No newline at end of file
diff --git a/tools/convert_poses.py b/tools/convert_poses.py
index 0f11e3509..553aceb9c 100644
--- a/tools/convert_poses.py
+++ b/tools/convert_poses.py
@@ -7,14 +7,13 @@
 
 def list_video_names(videos_path, video_list_file):
     """
-    Creates a file containing the list of all the video names for the videos in 'video_path'
+    Creates a file containing the list of all the video names (no suffix) for the videos in 'video_path'
     """
     with open(video_list_file, 'w') as f:
         p = Path(videos_path)
         for path in p.glob('*.avi'):
-            video_path = str(path)
-            video_name = (video_path.split('/')[-1]).split('.')[0]
-            f.write(video_name + '\n')
+            f.write(path.stem + '\n')
+
 
 if __name__ == "__main__":
 
@@ -35,26 +34,29 @@ def list_video_names(videos_path, video_list_file):
     list_video_names(arg.videos_path, arg.video_list_file)
 
     p = Path(arg.openpose_json_path)
-    
+
+    if not os.path.exists(arg.stgcn_json_path):
+        os.mkdir(arg.stgcn_json_path)
+
     labels = {}
     with open(arg.video_list_file, 'r') as f:
         for line in f:
-            video_name  = line.strip('\n')
+            video_name = line.strip('\n')
             arg.stgcn_data_array = []
             stgcn_data = {}
-            dest_path = arg.stgcn_json_path + video_name + '.json'
-            for path in p.glob(video_name + '*.json'): # each json file for this video
+            dest_path = os.path.join(arg.stgcn_json_path, video_name + '.json')
+            for path in p.glob(video_name + '*.json'):  # each json file for this video
                 json_path = str(path)
-                frame_id = int(((json_path.split('/')[-1]).split('.')[0]).split('_')[1])
+                frame_id = int(path.stem.split('_')[1])
                 frame_data = {'frame_index': frame_id}
                 data = json.load(open(json_path))
-                skeletons = []        
+                skeletons = []
                 for person in data['people']:
                     score, coordinates = [], []
                     skeleton = {}
                     keypoints = person['pose_keypoints_2d']
                     for i in range(0, len(keypoints), 3):
-                        coordinates +=  [keypoints[i], keypoints[i + 1]]
+                        coordinates += [keypoints[i], keypoints[i + 1]]
                         score += [keypoints[i + 2]]
                     skeleton['pose'] = coordinates
                     skeleton['score'] = score
@@ -62,9 +64,9 @@ def list_video_names(videos_path, video_list_file):
                 frame_data['skeleton'] = skeletons
                 arg.stgcn_data_array += [frame_data]
 
-            labels[video_name] = {"has_skeleton": True, 
-                "label": "fake_label", 
-                "label_index": 0}
+            labels[video_name] = {"has_skeleton": True,
+                                  "label": "fake_label",
+                                  "label_index": 0}
             stgcn_data['data'] = arg.stgcn_data_array
             stgcn_data['label'] = 'fake_label'
             stgcn_data['label_index'] = 0
@@ -72,4 +74,4 @@ def list_video_names(videos_path, video_list_file):
                 json.dump(stgcn_data, outfile)
 
     with open(arg.labels_file, 'w') as label_file:
-        json.dump(labels, label_file)
\ No newline at end of file
+        json.dump(labels, label_file)