diff --git a/README.md b/README.md index 42eb413f0..e26329e47 100644 --- a/README.md +++ b/README.md @@ -65,8 +65,10 @@ where the `````` points to the 3D skeletons modality of NTU RGB ### Kinetics-skeleton [Kinetics](https://deepmind.com/research/open-source/open-source-datasets/kinetics/) is a video-based dataset for action recognition which only provide raw video clips without skeleton data. To obatin the joint locations, we first resized all videos to the resolution of 340x256 and converted the frame rate to 30 fps. Then, we extracted skeletons from each frame in Kinetics by [Openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose). The extracted skeleton data we called **Kinetics-skeleton**(7.5GB) can be directly downloaded from [GoogleDrive](https://drive.google.com/open?id=1SPQ6FmFsjGg3f59uCWfdUWI-5HJM_YhZ) or [BaiduYun](https://pan.baidu.com/s/1dwKG2TLvG-R1qeIiE4MjeA#list/path=%2FShare%2FAAAI18%2Fkinetics-skeleton&parentPath=%2FShare). -It is highly recommended storing data in the **SSD** rather than HDD for efficiency. - +After uncompressing, rebuild the database by this command: +``` +python tools/kinetics_gendata.py --data_path +``` ## Testing Pretrained Models ### Get trained models diff --git a/config/baseline/kinetics-skeleton-from-rawdata/test.yaml b/config/baseline/kinetics-skeleton-from-rawdata/test.yaml new file mode 100644 index 000000000..16e57bcbb --- /dev/null +++ b/config/baseline/kinetics-skeleton-from-rawdata/test.yaml @@ -0,0 +1,25 @@ +# feeder +# read data from original kenetics-skeleton dataset +feeder: st_gcn.feeder.Feeder_kinetics +test_feeder_args: + data_path: ./data/Kinetics/kinetics-skeleton/kinetics_val + label_path : ./data/Kinetics/kinetics-skeleton/kinetics_val_label.json + + +# model +model: st_gcn.net.TCN +model_args: + num_class: 400 + channel: 108 + window_size: 300 + use_data_bn: True + + +# test +phase: test +device: 0 +test_batch_size: 64 +weights: ./model/kinetics-tcn.pt + + + diff --git a/config/baseline/kinetics-skeleton-from-rawdata/train.yaml b/config/baseline/kinetics-skeleton-from-rawdata/train.yaml new file mode 100644 index 000000000..c56e0012a --- /dev/null +++ b/config/baseline/kinetics-skeleton-from-rawdata/train.yaml @@ -0,0 +1,35 @@ +work_dir: ./work_dir/Kinetics/TCN + +# feeder +# read data from original kenetics-skeleton dataset +feeder: st_gcn.feeder.Feeder_kinetics +train_feeder_args: + random_move: True + data_path: ./data/Kinetics/kinetics-skeleton/kinetics_train + label_path : ./data/Kinetics/kinetics-skeleton/kinetics_train_label.json +test_feeder_args: + data_path: ./data/Kinetics/kinetics-skeleton/kinetics_val + label_path : ./data/Kinetics/kinetics-skeleton/kinetics_val_label.json + + +# model +model: st_gcn.net.TCN +model_args: + num_class: 400 + channel: 108 + window_size: 300 + use_data_bn: True + +#optim +weight_decay: 0.0001 +base_lr: 0.1 +step: [10, 60] + +# training +device: [0,1,2,3] +batch_size: 512 +test_batch_size: 512 +num_epoch: 60 +nesterov: True + + diff --git a/config/baseline/kinetics-skeleton/test.yaml b/config/baseline/kinetics-skeleton/test.yaml index 5447ca812..931385112 100644 --- a/config/baseline/kinetics-skeleton/test.yaml +++ b/config/baseline/kinetics-skeleton/test.yaml @@ -1,10 +1,8 @@ # feeder -feeder: st_gcn.feeder.Feeder_kinetics +feeder: st_gcn.feeder.Feeder test_feeder_args: - mode: test - window_size: 300 - data_path: ./data/kinetics-skeleton/kinetics_val - label_path : ./data/kinetics-skeleton/kinetics_val_label.json + data_path: ./data/Kinetics/kinetics-skeleton/val_data.npy + label_path: ./data/Kinetics/kinetics-skeleton/val_label.pkl # model diff --git a/config/baseline/kinetics-skeleton/train.yaml b/config/baseline/kinetics-skeleton/train.yaml index 9702184d8..e6eae5f6b 100644 --- a/config/baseline/kinetics-skeleton/train.yaml +++ b/config/baseline/kinetics-skeleton/train.yaml @@ -1,20 +1,14 @@ -work_dir: ./work_dir/Kinetics/TCN-p2 +work_dir: ./work_dir/Kinetics/TCN # feeder -feeder: st_gcn.feeder.Feeder_kinetics +feeder: st_gcn.feeder.Feeder train_feeder_args: - mode: train - random_choose: True random_move: True - window_size: 300 - data_path: ./data/kinetics-skeleton/kinetics_train - label_path : ./data/kinetics-skeleton/kinetics_train_label.json + data_path: ./data/Kinetics/kinetics-skeleton/train_data.npy + label_path: ./data/Kinetics/kinetics-skeleton/train_label.pkl test_feeder_args: - mode: test - window_size: 300 - data_path: ./data/kinetics-skeleton/kinetics_val - label_path : ./data/kinetics-skeleton/kinetics_val_label.json - + data_path: ./data/Kinetics/kinetics-skeleton/val_data.npy + label_path: ./data/Kinetics/kinetics-skeleton/val_label.pkl # model model: st_gcn.net.TCN diff --git a/config/baseline/nturgbd-cross-subject/test.yaml b/config/baseline/nturgbd-cross-subject/test.yaml index 74fc5cf8f..e8d81b7b7 100644 --- a/config/baseline/nturgbd-cross-subject/test.yaml +++ b/config/baseline/nturgbd-cross-subject/test.yaml @@ -1,8 +1,6 @@ # feeder feeder: st_gcn.feeder.Feeder test_feeder_args: - mode: test - window_size: 300 data_path: ./data/NTU-RGB-D/xsub/val_data.npy label_path: ./data/NTU-RGB-D/xsub/val_label.pkl diff --git a/config/baseline/nturgbd-cross-subject/train.yaml b/config/baseline/nturgbd-cross-subject/train.yaml index 381ae50c3..f5e91ba5c 100644 --- a/config/baseline/nturgbd-cross-subject/train.yaml +++ b/config/baseline/nturgbd-cross-subject/train.yaml @@ -2,13 +2,9 @@ work_dir: ./work_dir/NTU-RGB-D/xsub/TCN # feeder feeder: st_gcn.feeder.Feeder train_feeder_args: - mode: train - window_size: 300 data_path: ./data/NTU-RGB-D/xsub/train_data.npy label_path: ./data/NTU-RGB-D/xsub/train_label.pkl test_feeder_args: - mode: test - window_size: 300 data_path: ./data/NTU-RGB-D/xsub/val_data.npy label_path: ./data/NTU-RGB-D/xsub/val_label.pkl diff --git a/config/baseline/nturgbd-cross-view/test.yaml b/config/baseline/nturgbd-cross-view/test.yaml index a7ba9ff6c..078dbc473 100644 --- a/config/baseline/nturgbd-cross-view/test.yaml +++ b/config/baseline/nturgbd-cross-view/test.yaml @@ -1,8 +1,6 @@ # feeder feeder: st_gcn.feeder.Feeder test_feeder_args: - mode: test - window_size: 300 data_path: ./data/NTU-RGB-D/xview/val_data.npy label_path: ./data/NTU-RGB-D/xview/val_label.pkl diff --git a/config/baseline/nturgbd-cross-view/train.yaml b/config/baseline/nturgbd-cross-view/train.yaml index 1eb066d15..8bc4648ad 100644 --- a/config/baseline/nturgbd-cross-view/train.yaml +++ b/config/baseline/nturgbd-cross-view/train.yaml @@ -3,13 +3,9 @@ work_dir: ./work_dir/NTU-RGB-D/xview/TCN # feeder feeder: st_gcn.feeder.Feeder train_feeder_args: - mode: train - window_size: 300 data_path: ./data/NTU-RGB-D/xview/train_data.npy label_path: ./data/NTU-RGB-D/xview/train_label.pkl test_feeder_args: - mode: test - window_size: 300 data_path: ./data/NTU-RGB-D/xview/val_data.npy label_path: ./data/NTU-RGB-D/xview/val_label.pkl diff --git a/config/st_gcn/kinetics-skeleton-from-rawdata/test.yaml b/config/st_gcn/kinetics-skeleton-from-rawdata/test.yaml new file mode 100644 index 000000000..0ae41cb29 --- /dev/null +++ b/config/st_gcn/kinetics-skeleton-from-rawdata/test.yaml @@ -0,0 +1,31 @@ +# feeder +# read data from original kenetics-skeleton dataset +feeder: st_gcn.feeder.Feeder_kinetics +test_feeder_args: + data_path: ./data/Kinetics/kinetics-skeleton/kinetics_val + label_path : ./data/Kinetics/kinetics-skeleton/kinetics_val_label.json + +# model +model: st_gcn.net.ST_GCN +model_args: + num_class: 400 + channel: 3 + window_size: 150 + num_person: 2 + num_point: 18 + dropout: 0 + graph: st_gcn.graph.Kinetics + graph_args: + labeling_mode: 'spatial' + mask_learning: True + use_data_bn: True + + +# test +phase: test +device: 0 +test_batch_size: 64 +weights: ./model/kinetics-st_gcn.pt + + + diff --git a/config/st_gcn/kinetics-skeleton-from-rawdata/train.yaml b/config/st_gcn/kinetics-skeleton-from-rawdata/train.yaml new file mode 100644 index 000000000..fc242bc15 --- /dev/null +++ b/config/st_gcn/kinetics-skeleton-from-rawdata/train.yaml @@ -0,0 +1,45 @@ +work_dir: ./work_dir/Kinetics/ST_GCN + +# feeder +# read data from original kenetics-skeleton dataset +feeder: st_gcn.feeder.Feeder_kinetics +train_feeder_args: + data_path: ./data/Kinetics/kinetics-skeleton/kinetics_train + label_path : ./data/Kinetics/kinetics-skeleton/kinetics_train_label.json + random_choose: True + random_move: True + window_size: 150 +test_feeder_args: + data_path: ./data/Kinetics/kinetics-skeleton/kinetics_val + label_path : ./data/Kinetics/kinetics-skeleton/kinetics_val_label.json + +# model +model: st_gcn.net.ST_GCN +model_args: + num_class: 400 + channel: 3 + window_size: 150 + num_person: 2 + num_point: 18 + dropout: 0 + graph: st_gcn.graph.Kinetics + graph_args: + labeling_mode: 'spatial' + mask_learning: True + use_data_bn: True + +#optim +weight_decay: 0.0001 +base_lr: 0.1 +step: [20, 30, 40, 50] + +# training +device: [0,1,2,3] +batch_size: 256 +test_batch_size: 256 +num_epoch: 60 +nesterov: True +save_interval: 10 + + + diff --git a/config/st_gcn/kinetics-skeleton/test.yaml b/config/st_gcn/kinetics-skeleton/test.yaml index e21e03fee..26c96486a 100644 --- a/config/st_gcn/kinetics-skeleton/test.yaml +++ b/config/st_gcn/kinetics-skeleton/test.yaml @@ -1,10 +1,9 @@ # feeder -feeder: st_gcn.feeder.Feeder_kinetics +feeder: st_gcn.feeder.Feeder test_feeder_args: - mode: test - data_path: ./data/kinetics-skeleton/kinetics_val - label_path : ./data/kinetics-skeleton/kinetics_val_label.json - window_size: 150 + data_path: ./data/Kinetics/kinetics-skeleton/val_data.npy + label_path: ./data/Kinetics/kinetics-skeleton/val_label.pkl + # model model: st_gcn.net.ST_GCN diff --git a/config/st_gcn/kinetics-skeleton/train.yaml b/config/st_gcn/kinetics-skeleton/train.yaml index d9c16b99b..1b17ff7c9 100644 --- a/config/st_gcn/kinetics-skeleton/train.yaml +++ b/config/st_gcn/kinetics-skeleton/train.yaml @@ -1,19 +1,16 @@ work_dir: ./work_dir/Kinetics/ST_GCN # feeder -feeder: st_gcn.feeder.Feeder_kinetics +feeder: st_gcn.feeder.Feeder train_feeder_args: - mode: train - data_path: ./data/kinetics-skeleton/kinetics_train - label_path : ./data/kinetics-skeleton/kinetics_train_label.json random_choose: True random_move: True window_size: 150 + data_path: ./data/Kinetics/kinetics-skeleton/train_data.npy + label_path: ./data/Kinetics/kinetics-skeleton/train_label.pkl test_feeder_args: - mode: test - data_path: ./data/kinetics-skeleton/kinetics_val - label_path : ./data/kinetics-skeleton/kinetics_val_label.json - window_size: 150 + data_path: ./data/Kinetics/kinetics-skeleton/val_data.npy + label_path: ./data/Kinetics/kinetics-skeleton/val_label.pkl # model model: st_gcn.net.ST_GCN diff --git a/config/st_gcn/nturgbd-cross-subject/test.yaml b/config/st_gcn/nturgbd-cross-subject/test.yaml index cae171903..9aab87ddb 100644 --- a/config/st_gcn/nturgbd-cross-subject/test.yaml +++ b/config/st_gcn/nturgbd-cross-subject/test.yaml @@ -1,8 +1,6 @@ # feeder feeder: st_gcn.feeder.Feeder test_feeder_args: - mode: test - window_size: 300 data_path: ./data/NTU-RGB-D/xsub/val_data.npy label_path: ./data/NTU-RGB-D/xsub/val_label.pkl diff --git a/config/st_gcn/nturgbd-cross-subject/train.yaml b/config/st_gcn/nturgbd-cross-subject/train.yaml index 578666582..89d97eec4 100644 --- a/config/st_gcn/nturgbd-cross-subject/train.yaml +++ b/config/st_gcn/nturgbd-cross-subject/train.yaml @@ -3,13 +3,9 @@ work_dir: ./work_dir/NTU-RGB-D/xsub/ST_GCN # feeder feeder: st_gcn.feeder.Feeder train_feeder_args: - mode: train - window_size: 300 data_path: ./data/NTU-RGB-D/xsub/train_data.npy label_path: ./data/NTU-RGB-D/xsub/train_label.pkl test_feeder_args: - mode: test - window_size: 300 data_path: ./data/NTU-RGB-D/xsub/val_data.npy label_path: ./data/NTU-RGB-D/xsub/val_label.pkl diff --git a/config/st_gcn/nturgbd-cross-view/test.yaml b/config/st_gcn/nturgbd-cross-view/test.yaml index 9e9d34138..99cd83bdf 100644 --- a/config/st_gcn/nturgbd-cross-view/test.yaml +++ b/config/st_gcn/nturgbd-cross-view/test.yaml @@ -1,8 +1,6 @@ # feeder feeder: st_gcn.feeder.Feeder test_feeder_args: - mode: test - window_size: 300 data_path: ./data/NTU-RGB-D/xview/val_data.npy label_path: ./data/NTU-RGB-D/xview/val_label.pkl diff --git a/config/st_gcn/nturgbd-cross-view/train.yaml b/config/st_gcn/nturgbd-cross-view/train.yaml index 4e7135d13..d6b485ef4 100644 --- a/config/st_gcn/nturgbd-cross-view/train.yaml +++ b/config/st_gcn/nturgbd-cross-view/train.yaml @@ -3,13 +3,9 @@ work_dir: ./work_dir/NTU-RGB-D/xview/ST_GCN # feeder feeder: st_gcn.feeder.Feeder train_feeder_args: - mode: train - window_size: 300 data_path: ./data/NTU-RGB-D/xview/train_data.npy label_path: ./data/NTU-RGB-D/xview/train_label.pkl test_feeder_args: - mode: test - window_size: 300 data_path: ./data/NTU-RGB-D/xview/val_data.npy label_path: ./data/NTU-RGB-D/xview/val_label.pkl diff --git a/st_gcn/feeder/feeder.py b/st_gcn/feeder/feeder.py index d81ab9e78..f833c95a5 100644 --- a/st_gcn/feeder/feeder.py +++ b/st_gcn/feeder/feeder.py @@ -17,7 +17,7 @@ import time # operation -from .tools import * +from . import tools class Feeder(torch.utils.data.Dataset): @@ -25,12 +25,9 @@ class Feeder(torch.utils.data.Dataset): Arguments: data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M) label_path: the path to label - mode: must be train or test random_choose: If true, randomly choose a portion of the input sequence random_shift: If true, randomly pad zeros at the begining or end of sequence window_size: The length of the output sequence - temporal_downsample_step: Step for down sampling the output sequence - mean_subtraction: The value of bias should be subtracted from output data normalization: If true, normalize input sequence debug: If true, only use the first 100 samples """ @@ -38,23 +35,19 @@ class Feeder(torch.utils.data.Dataset): def __init__(self, data_path, label_path, - mode, random_choose=False, random_shift=False, + random_move=False, window_size=-1, - temporal_downsample_step=1, - mean_subtraction=0, normalization=False, debug=False): self.debug = debug - self.mode = mode self.data_path = data_path self.label_path = label_path self.random_choose = random_choose self.random_shift = random_shift + self.random_move = random_move self.window_size = window_size - self.mean_subtraction = mean_subtraction - self.temporal_downsample_step = temporal_downsample_step self.normalization = normalization self.load_data() @@ -116,22 +109,14 @@ def __getitem__(self, index): data_numpy = (data_numpy - self.mean_map) / self.std_map # processing - if self.temporal_downsample_step != 1: - if self.mode is 'train': - data_numpy = downsample(data_numpy, - self.temporal_downsample_step) - else: - data_numpy = temporal_slice(data_numpy, - self.temporal_downsample_step) - if self.mode is 'train': - if self.random_shift: - data_numpy = random_shift(data_numpy) - if self.random_choose: - data_numpy = random_choose(data_numpy, self.window_size) - - # mean subtraction - if self.mean_subtraction != 0: - data_numpy = mean_subtractor(data_numpy, self.mean_subtraction) + if self.random_shift: + data_numpy = tools.random_shift(data_numpy) + if self.random_choose: + data_numpy = tools.random_choose(data_numpy, self.window_size) + elif self.window_size > 0: + data_numpy = tools.auto_pading(data_numpy, self.window_size) + if self.random_move: + data_numpy = tools.random_move(data_numpy) return data_numpy, label @@ -144,7 +129,7 @@ def top_k(self, score, top_k): def test(data_path, label_path, vid=None): import matplotlib.pyplot as plt loader = torch.utils.data.DataLoader( - dataset=Feeder(data_path, label_path, mode='val'), + dataset=Feeder(data_path, label_path), batch_size=64, shuffle=False, num_workers=2) diff --git a/st_gcn/feeder/feeder_kinetics.py b/st_gcn/feeder/feeder_kinetics.py index c618d66b9..2bd465f12 100644 --- a/st_gcn/feeder/feeder_kinetics.py +++ b/st_gcn/feeder/feeder_kinetics.py @@ -25,7 +25,6 @@ class Feeder_kinetics(torch.utils.data.Dataset): Arguments: data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M) label_path: the path to label - mode: must be train or test random_choose: If true, randomly choose a portion of the input sequence random_shift: If true, randomly pad zeros at the begining or end of sequence random_move: If true, perform randomly but continuously changed transformation to input sequence @@ -33,14 +32,12 @@ class Feeder_kinetics(torch.utils.data.Dataset): pose_matching: If ture, match the pose between two frames num_person_in: The number of people the feeder can observe in the input sequence num_person_out: The number of people the feeder in the output sequence - temporal_downsample_step: Step for down sampling the output sequence debug: If true, only use the first 100 samples """ def __init__(self, data_path, label_path, - mode, ignore_empty_sample=True, random_choose=False, random_shift=False, @@ -49,17 +46,14 @@ def __init__(self, pose_matching=False, num_person_in=5, num_person_out=2, - temporal_downsample_step=1, debug=False): self.debug = debug - self.mode = mode self.data_path = data_path self.label_path = label_path self.random_choose = random_choose self.random_shift = random_shift self.random_move = random_move self.window_size = window_size - self.temporal_downsample_step = temporal_downsample_step self.num_person_in = num_person_in self.num_person_out = num_person_out self.pose_matching = pose_matching @@ -136,14 +130,6 @@ def __getitem__(self, index): label = video_info['label_index'] assert (self.label[index] == label) - # processing - if self.temporal_downsample_step != 1: - if self.mode is 'train': - data_numpy = tools.downsample(data_numpy, - self.temporal_downsample_step) - else: - data_numpy = tools.temporal_slice( - data_numpy, self.temporal_downsample_step) # data augmentation if self.random_shift: data_numpy = tools.random_shift(data_numpy) @@ -179,11 +165,7 @@ def test(data_path, label_path, vid=None, graph=None): import matplotlib.pyplot as plt loader = torch.utils.data.DataLoader( dataset=Feeder_kinetics( - data_path, - label_path, - mode='val', - pose_matching=False, - num_person=10), + data_path, label_path, pose_matching=False, num_person=10), batch_size=64, shuffle=False, num_workers=2) diff --git a/st_gcn/feeder/tools.py b/st_gcn/feeder/tools.py index e70155500..b1c5dc1c5 100644 --- a/st_gcn/feeder/tools.py +++ b/st_gcn/feeder/tools.py @@ -54,21 +54,19 @@ def random_choose(data_numpy, size, auto_pad=True): return data_numpy[:, begin:begin + size, :, :] -def random_move( - data_numpy, - angle_candidate=[-10., -5., 0., 5., 10.], - scale_candidate=[0.9,1.0,1.1], - transform_candidate=[-0.2, -0.1, 0.0, 0.1, 0.2], - move_time_candidate=[1]): +def random_move(data_numpy, + angle_candidate=[-10., -5., 0., 5., 10.], + scale_candidate=[0.9, 1.0, 1.1], + transform_candidate=[-0.2, -0.1, 0.0, 0.1, 0.2], + move_time_candidate=[1]): # input: C,T,V,M C, T, V, M = data_numpy.shape - - move_time = random.choice(move_time_candidate) + move_time = random.choice(move_time_candidate) node = np.arange(0, T, T * 1.0 / move_time).round().astype(int) node = np.append(node, T) num_node = len(node) - A = np.random.choice(angle_candidate,num_node) + A = np.random.choice(angle_candidate, num_node) S = np.random.choice(scale_candidate, num_node) T_x = np.random.choice(transform_candidate, num_node) T_y = np.random.choice(transform_candidate, num_node) @@ -83,25 +81,24 @@ def random_move( a[node[i]:node[i + 1]] = np.linspace( A[i], A[i + 1], node[i + 1] - node[i]) * np.pi / 180 s[node[i]:node[i + 1]] = np.linspace(S[i], S[i + 1], - node[i + 1] - node[i]) + node[i + 1] - node[i]) t_x[node[i]:node[i + 1]] = np.linspace(T_x[i], T_x[i + 1], - node[i + 1] - node[i]) + node[i + 1] - node[i]) t_y[node[i]:node[i + 1]] = np.linspace(T_y[i], T_y[i + 1], - node[i + 1] - node[i]) + node[i + 1] - node[i]) theta = np.array([[np.cos(a) * s, -np.sin(a) * s], - [np.sin(a) * s, np.cos(a) * s]]) + [np.sin(a) * s, np.cos(a) * s]]) # perform transformation for i_frame in range(T): xy = data_numpy[0:2, i_frame, :, :] - new_xy = np.dot(theta[:,:,i_frame], xy.reshape(2, -1)) + new_xy = np.dot(theta[:, :, i_frame], xy.reshape(2, -1)) new_xy[0] += t_x[i_frame] new_xy[1] += t_y[i_frame] data_numpy[0:2, i_frame, :, :] = new_xy.reshape(2, V, M) return data_numpy - def random_shift(data_numpy): @@ -121,43 +118,44 @@ def random_shift(data_numpy): def openpose_match(data_numpy): C, T, V, M = data_numpy.shape - assert(C==3) - score = data_numpy[2,:,:,:].sum(axis=1) + assert (C == 3) + score = data_numpy[2, :, :, :].sum(axis=1) # the rank of body confidence in each frame (shape: T-1, M) - rank = (-score[0:T-1]).argsort(axis = 1).reshape(T-1, M) + rank = (-score[0:T - 1]).argsort(axis=1).reshape(T - 1, M) # data of frame 1 - xy1 = data_numpy[0:2, 0:T-1, :, :].reshape(2, T-1, V, M, 1) + xy1 = data_numpy[0:2, 0:T - 1, :, :].reshape(2, T - 1, V, M, 1) # data of frame 2 - xy2 = data_numpy[0:2, 1:T, :, :].reshape(2, T-1, V, 1, M) + xy2 = data_numpy[0:2, 1:T, :, :].reshape(2, T - 1, V, 1, M) # square of distance between frame 1&2 (shape: T-1, M, M) - distance = ((xy2-xy1)**2).sum(axis=2).sum(axis=0) + distance = ((xy2 - xy1)**2).sum(axis=2).sum(axis=0) # match pose forward_map = np.zeros((T, M), dtype=int) - 1 forward_map[0] = range(M) for m in range(M): - choose = (rank == m) - forward = distance[choose].argmin(axis=1) - for t in range(T-1): - distance[t, :,forward[t]] = np.inf + choose = (rank == m) + forward = distance[choose].argmin(axis=1) + for t in range(T - 1): + distance[t, :, forward[t]] = np.inf forward_map[1:][choose] = forward - assert(np.all(forward_map >= 0)) + assert (np.all(forward_map >= 0)) # string data - for t in range(T-1): - forward_map[t+1] = forward_map[t+1][forward_map[t]] - + for t in range(T - 1): + forward_map[t + 1] = forward_map[t + 1][forward_map[t]] + # generate data new_data_numpy = np.zeros(data_numpy.shape) for t in range(T): - new_data_numpy[:,t,:,:] = data_numpy[:,t,:,forward_map[t]].transpose(1,2,0) + new_data_numpy[:, t, :, :] = data_numpy[:, t, :, forward_map[ + t]].transpose(1, 2, 0) data_numpy = new_data_numpy # score sort - trace_score = data_numpy[2,:,:,:].sum(axis=1).sum(axis=0) + trace_score = data_numpy[2, :, :, :].sum(axis=1).sum(axis=0) rank = (-trace_score).argsort() - data_numpy = data_numpy[:,:,:,rank] + data_numpy = data_numpy[:, :, :, rank] return data_numpy diff --git a/st_gcn/net/st_gcn.py b/st_gcn/net/st_gcn.py index 426dab9fa..6176e64a4 100644 --- a/st_gcn/net/st_gcn.py +++ b/st_gcn/net/st_gcn.py @@ -156,8 +156,9 @@ def forward(self, x): x = F.avg_pool2d(x, kernel_size=(1, V)) # M pooling - x = x.view(N, M, x.size(1), x.size(2)) - x = x.mean(dim=1) + c = x.size(1) + t = x.size(2) + x = x.view(N, M, c, t).mean(dim=1).view(N, c, t) # T pooling x = F.avg_pool1d(x, kernel_size=x.size()[2]) diff --git a/tools/kinetics_gendata.py b/tools/kinetics_gendata.py new file mode 100644 index 000000000..3c9ab8e4c --- /dev/null +++ b/tools/kinetics_gendata.py @@ -0,0 +1,84 @@ +import numpy as np +import argparse +import os +import sys +import pickle +sys.path.append( + os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))) +from st_gcn.feeder.feeder_kinetics import Feeder_kinetics +from numpy.lib.format import open_memmap +import pickle + +toolbar_width = 30 + + +def print_toolbar(rate, annotation=''): + # setup toolbar + sys.stdout.write("{}[".format(annotation)) + for i in range(toolbar_width): + if i * 1.0 / toolbar_width > rate: + sys.stdout.write(' ') + else: + sys.stdout.write('-') + sys.stdout.flush() + sys.stdout.write(']\r') + + +def end_toolbar(): + sys.stdout.write("\n") + + +def gendata( + data_path, + label_path, + data_out_path, + label_out_path, + num_person_in=5, #observe the first 5 persons + num_person_out=2, #then choose 2 persons with the highest score + max_frame=300): + + feeder = Feeder_kinetics( + data_path=data_path, + label_path=label_path, + num_person_in=num_person_in, + num_person_out=num_person_out, + window_size=max_frame) + + sample_name = feeder.sample_name + sample_label = [] + + fp = open_memmap( + data_out_path, + dtype='float32', + mode='w+', + shape=(len(sample_name), 3, max_frame, 18, num_person_out)) + + for i, s in enumerate(sample_name): + data, label = feeder[i] + print_toolbar(i * 1.0 / len(sample_name), + '({:>5}/{:<5}) Processing data: '.format( + i + 1, len(sample_name))) + fp[i, :, 0:data.shape[1], :, :] = data + sample_label.append(label) + + with open(label_out_path, 'wb') as f: + pickle.dump((sample_name, list(sample_label)), f) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description='Kinetics-skeleton Data Converter.') + parser.add_argument( + '--data_path', default='data/Kinetics/kinetics-skeleton') + parser.add_argument( + '--out_folder', default='data/Kinetics/kinetics-skeleton') + arg = parser.parse_args() + + part = ['train', 'val'] + for p in part: + data_path = '{}/kinetics_{}'.format(arg.data_path, p) + label_path = '{}/kinetics_{}_label.json'.format(arg.data_path, p) + data_out_path = '{}/{}_data.npy'.format(arg.out_folder, p) + label_out_path = '{}/{}_label.pkl'.format(arg.out_folder, p) + + gendata(data_path, label_path, data_out_path, label_out_path) \ No newline at end of file diff --git a/tools/ntu_gendata.py b/tools/ntu_gendata.py index dd0c18cee..45d1c990e 100644 --- a/tools/ntu_gendata.py +++ b/tools/ntu_gendata.py @@ -43,7 +43,7 @@ def gendata(data_path, line.strip() + '.skeleton' for line in f.readlines() ] else: - ignored_samples = [] + ignored_samples = [] sample_name = [] sample_label = [] for filename in os.listdir(data_path): @@ -75,7 +75,7 @@ def gendata(data_path, sample_label.append(action_class - 1) with open('{}/{}_label.pkl'.format(out_path, part), 'w') as f: - pickle.dump((sample_name,list(sample_label)), f) + pickle.dump((sample_name, list(sample_label)), f) # np.save('{}/{}_label.npy'.format(out_path, part), sample_label) fp = open_memmap( @@ -96,13 +96,15 @@ def gendata(data_path, if __name__ == '__main__': - parser = argparse.ArgumentParser( - description='NTU-RGB-D Data Converter.') - parser.add_argument('--data_path', default='data/NTU-RGB-D/nturgb+d_skeletons') - parser.add_argument('--ignored_sample_path', default='data/NTU-RGB-D/samples_with_missing_skeletons.txt') + parser = argparse.ArgumentParser(description='NTU-RGB-D Data Converter.') + parser.add_argument( + '--data_path', default='data/NTU-RGB-D/nturgb+d_skeletons') + parser.add_argument( + '--ignored_sample_path', + default='data/NTU-RGB-D/samples_with_missing_skeletons.txt') parser.add_argument('--out_folder', default='data/NTU-RGB-D') - benchmark = [ 'xsub', 'xview'] + benchmark = ['xsub', 'xview'] part = ['train', 'val'] arg = parser.parse_args() @@ -112,4 +114,8 @@ def gendata(data_path, if not os.path.exists(out_path): os.makedirs(out_path) gendata( - arg.data_path, out_path, arg.ignored_sample_path, benchmark=b, part=p) + arg.data_path, + out_path, + arg.ignored_sample_path, + benchmark=b, + part=p)