Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new data converters #298

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
Prev Previous commit
Next Next commit
fix linting
  • Loading branch information
panghuien committed Jan 9, 2023
commit a432ef1d91f1e54fe6c5a8f66cbafcc82b3e46a8
11 changes: 5 additions & 6 deletions mmhuman3d/data/data_converters/aic.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@

@DATA_CONVERTERS.register_module()
class AicConverter(BaseModeConverter):
"""AI Challenger dataset
`Ai challenger: A large-scale dataset for going deeper in image
understanding' arXiv'2017
More details can be found in the `paper
"""AI Challenger dataset `Ai challenger: A large-scale dataset for going
deeper in image understanding' arXiv'2017 More details can be found in the
`paper.

<https://arxiv.org/abs/1711.06475>`__ .

Args:
Expand Down Expand Up @@ -88,8 +88,7 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
bbox_xywh_ = np.array(bbox_xywh_).reshape((-1, 4))
bbox_xywh_ = np.hstack([bbox_xywh_, np.ones([bbox_xywh_.shape[0], 1])])
keypoints2d_ = np.array(keypoints2d_).reshape((-1, 14, 3))
keypoints2d_, mask = convert_kps(keypoints2d_, 'aic',
'human_data')
keypoints2d_, mask = convert_kps(keypoints2d_, 'aic', 'human_data')

human_data['image_path'] = image_path_
human_data['keypoints2d_mask'] = mask
Expand Down
27 changes: 10 additions & 17 deletions mmhuman3d/data/data_converters/instavariety.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
from .base_converter import BaseModeConverter
from typing import List
import json
import os
import glob
import os
from typing import List

import cv2
import numpy as np
from tqdm import tqdm
import tensorflow as tf
from tqdm import tqdm

from mmhuman3d.core.conventions.keypoints_mapping import convert_kps
from mmhuman3d.data.data_structures.human_data import HumanData
Expand All @@ -28,9 +26,7 @@ class InstaVarietyConverter(BaseModeConverter):
"""
ACCEPTED_MODES = ['train', 'test']

def __init__(self,
modes: List = [],
extract_img: bool = False) -> None:
def __init__(self, modes: List = [], extract_img: bool = False) -> None:
super(InstaVarietyConverter, self).__init__(modes)
self.extract_img = extract_img

Expand Down Expand Up @@ -86,13 +82,13 @@ def convert_by_mode(self, dataset_path: str, out_path: str,

toe_pts = np.array(toe_pts).reshape(-1, 3, 6)

visibles = example.features.feature[
vis = example.features.feature[
'image/visibilities'].int64_list.value
visibles = np.array(visibles).reshape(-1, 1, 14)
vis = np.array(vis).reshape(-1, 1, 14)

for i in tqdm(range(N)):
image = tf.image.decode_jpeg(images_data[i], channels=3)
kp = np.vstack((xys[i], visibles[i]))
kp = np.vstack((xys[i], vis[i]))
faces = face_pts[i]

toes = toe_pts[i]
Expand All @@ -118,22 +114,20 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
bbox_xyxy = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
bbox_xywh = self._xyxy2xywh(bbox_xyxy)

image_path = images_name[i].decode(
"utf-8").replace('/data2/Data/instagram_download/frames_raw/', 'images/')

image_path = images_name[i].decode('utf-8').replace(
'/data2/Data/instagram_download/frames_raw/', 'images/')

if self.extract_img:
image_abs_path = os.path.join(dataset_path, image_path)
folder = os.path.dirname(image_abs_path)
if not os.path.exists(folder):
os.makedirs(folder, exist_ok=True)
cv2.imwrite(image_abs_path, np.array(image))

image_path_.append(image_path)
keypoints2d_.append(keypoints2d)
bbox_xywh_.append(bbox_xywh)


# convert keypoints
bbox_xywh_ = np.array(bbox_xywh_).reshape((-1, 4))
bbox_xywh_ = np.hstack([bbox_xywh_, np.ones([bbox_xywh_.shape[0], 1])])
Expand All @@ -152,4 +146,3 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
os.makedirs(out_path)
out_file = os.path.join(out_path, f'instavariety_{mode}.npz')
human_data.dump(out_file)

39 changes: 20 additions & 19 deletions mmhuman3d/data/data_converters/mtp.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import json
import os
import pickle
import xml.etree.ElementTree as ET
from typing import List

import numpy as np
from tqdm import tqdm
import json
import pickle

from mmhuman3d.core.conventions.keypoints_mapping import convert_kps
from mmhuman3d.data.data_structures.human_data import HumanData
Expand All @@ -16,18 +14,17 @@

@DATA_CONVERTERS.register_module()
class MtpConverter(BaseModeConverter):
"""MTP dataset
`On Self-Contact and Human Pose' CVPR`2021
More details can be found in the `paper
"""MTP dataset `On Self-Contact and Human Pose' CVPR`2021 More details can
be found in the `paper.

<https://arxiv.org/pdf/2104.03176.pdf>`__.

Args:
modes (list): 'valid' or 'train' for accepted modes
"""
ACCEPTED_MODES = ['train', 'val']

def __init__(self,
modes: List = []) -> None:
def __init__(self, modes: List = []) -> None:
super(MtpConverter, self).__init__(modes)

def convert_by_mode(self, dataset_path: str, out_path: str,
Expand Down Expand Up @@ -63,17 +60,23 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
for img_id in tqdm(json_data):
part_name = img_id.split('_')[1]
image_path = f'images/{part_name}/{img_id}.png'
keypoints_file = os.path.join(dataset_path, f'keypoints/openpose/{part_name}/{img_id}.json')
smpl_file = os.path.join(dataset_path, f'smplify-xmc/smpl/params/{part_name}/{img_id}.pkl')
keypoints_file = os.path.join(
dataset_path, f'keypoints/openpose/{part_name}/{img_id}.json')
smpl_file = os.path.join(
dataset_path,
f'smplify-xmc/smpl/params/{part_name}/{img_id}.pkl')

with open(keypoints_file) as f:
data = json.load(f)
if len(data['people']) <1 :
if len(data['people']) < 1:
continue
keypoints2d = np.array(data['people'][0]['pose_keypoints_2d']).reshape(25, 3)
keypoints2d[keypoints2d[:, 2] > 0.15, 2] = 1 # set based on keypoints confidence

vis_keypoints2d = keypoints2d[np.where(keypoints2d[:, 2]>0)[0]]
keypoints2d = np.array(
data['people'][0]['pose_keypoints_2d']).reshape(25, 3)
keypoints2d[keypoints2d[:, 2] > 0.15,
2] = 1 # set based on keypoints confidence

vis_keypoints2d = keypoints2d[np.where(
keypoints2d[:, 2] > 0)[0]]
# bbox
bbox_xyxy = [
min(vis_keypoints2d[:, 0]),
Expand All @@ -95,12 +98,11 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
keypoints2d_.append(keypoints2d)
bbox_xywh_.append(bbox_xywh)

smpl['body_pose'] = np.array(smpl['body_pose']).reshape(
(-1, 23, 3))
smpl['body_pose'] = np.array(smpl['body_pose']).reshape((-1, 23, 3))
smpl['global_orient'] = np.array(smpl['global_orient']).reshape(
(-1, 3))
smpl['betas'] = np.array(smpl['betas']).reshape((-1, 10))

bbox_xywh_ = np.array(bbox_xywh_).reshape((-1, 4))
bbox_xywh_ = np.hstack([bbox_xywh_, np.ones([bbox_xywh_.shape[0], 1])])
keypoints2d_ = np.array(keypoints2d_).reshape((-1, 25, 3))
Expand All @@ -121,4 +123,3 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
file_name = 'mtp_{}.npz'.format(mode)
out_file = os.path.join(out_path, file_name)
human_data.dump(out_file)

11 changes: 5 additions & 6 deletions mmhuman3d/data/data_converters/muco3dhp.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@

@DATA_CONVERTERS.register_module()
class Muco3dhpConverter(BaseConverter):
"""MuCo-3DHP dataset `Single-Shot Multi-Person 3D Pose Estimation
From Monocular RGB' 3DV'2018
More details can be found in the `paper.
"""MuCo-3DHP dataset `Single-Shot Multi-Person 3D Pose Estimation From
Monocular RGB' 3DV'2018 More details can be found in the `paper.

<https://arxiv.org/abs/1712.03453>`__ .
"""

@staticmethod
def get_intrinsic_matrix(f: List[float],
c: List[float],
Expand Down Expand Up @@ -90,8 +90,7 @@ def convert(self, dataset_path: str, out_path: str) -> dict:
smpl_param = smpl_params[str(pid)]
pose, shape, trans = np.array(
smpl_param['pose']), np.array(
smpl_param['shape']), np.array(
smpl_param['trans'])
smpl_param['shape']), np.array(smpl_param['trans'])
sum = pose.sum() + shape.sum() + trans.sum()
if np.isnan(sum):
continue
Expand Down Expand Up @@ -142,4 +141,4 @@ def convert(self, dataset_path: str, out_path: str) -> dict:

file_name = 'muco3dhp_train.npz'
out_file = os.path.join(out_path, file_name)
human_data.dump(out_file)
human_data.dump(out_file)
31 changes: 14 additions & 17 deletions mmhuman3d/data/data_converters/mupots3d.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import glob
import os
from typing import List, Tuple

import cv2
import h5py
import numpy as np
import scipy.io as sio
from tqdm import tqdm
Expand All @@ -16,32 +13,34 @@

@DATA_CONVERTERS.register_module()
class Mupots3dConverter(BaseConverter):
"""MuPoTs-3D dataset `Single-Shot Multi-Person 3D Pose Estimation
From Monocular RGB' 3DV'2018
More details can be found in the `paper.
"""MuPoTs-3D dataset `Single-Shot Multi-Person 3D Pose Estimation From
Monocular RGB' 3DV'2018 More details can be found in the `paper.

<https://arxiv.org/abs/1712.03453>`__ .
"""

@staticmethod
def load_annot(fname):

def parse_pose(dt):
res = {}
annot2 = dt['annot2'][0,0]
annot3 = dt['annot3'][0,0]
annot3_univ = dt['univ_annot3'][0,0]
is_valid = dt['isValidFrame'][0,0][0,0]
annot2 = dt['annot2'][0, 0]
annot3 = dt['annot3'][0, 0]
annot3_univ = dt['univ_annot3'][0, 0]
is_valid = dt['isValidFrame'][0, 0][0, 0]
res['annot2'] = annot2
res['annot3'] = annot3
res['annot3_univ'] = annot3_univ
res['is_valid'] = is_valid
return res
return res

data = sio.loadmat(fname)['annotations']
results = []
num_frames, num_inst = data.shape[0], data.shape[1]
for j in range(num_inst):
buff = []
for i in range(num_frames):
buff.append(parse_pose(data[i,j]))
buff.append(parse_pose(data[i, j]))
results.append(buff)
return results

Expand Down Expand Up @@ -94,7 +93,6 @@ def convert(self, dataset_path: str, out_path: str) -> dict:

image_path_, bbox_xywh_, keypoints2d_, keypoints3d_ = [], [], [], []


# test data
user_list = range(1, 21)

Expand Down Expand Up @@ -131,14 +129,13 @@ def convert(self, dataset_path: str, out_path: str) -> dict:
keypoints3d_.append(keypoints3d)

bbox_xywh_ = np.array(bbox_xywh_).reshape((-1, 4))
bbox_xywh_ = np.hstack(
[bbox_xywh_, np.ones([bbox_xywh_.shape[0], 1])])
bbox_xywh_ = np.hstack([bbox_xywh_, np.ones([bbox_xywh_.shape[0], 1])])
keypoints2d_ = np.array(keypoints2d_).reshape((-1, 17, 3))
keypoints2d_, mask = convert_kps(keypoints2d_, 'mpi_inf_3dhp_test',
'human_data')
'human_data')
keypoints3d_ = np.array(keypoints3d_).reshape((-1, 17, 4))
keypoints3d_, _ = convert_kps(keypoints3d_, 'mpi_inf_3dhp_test',
'human_data')
'human_data')

human_data['image_path'] = image_path_
human_data['bbox_xywh'] = bbox_xywh_
Expand Down
3 changes: 1 addition & 2 deletions mmhuman3d/data/data_converters/ochuman.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,7 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
bbox_xywh_ = np.array(bbox_xywh_).reshape((-1, 4))
bbox_xywh_ = np.hstack([bbox_xywh_, np.ones([bbox_xywh_.shape[0], 1])])
keypoints2d_ = np.array(keypoints2d_).reshape((-1, 19, 3))
keypoints2d_, mask = convert_kps(keypoints2d_, 'ochuman',
'human_data')
keypoints2d_, mask = convert_kps(keypoints2d_, 'ochuman', 'human_data')

human_data['image_path'] = image_path_
human_data['keypoints2d_mask'] = mask
Expand Down
33 changes: 15 additions & 18 deletions mmhuman3d/data/data_converters/oh50k3d.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import json
import os
import cv2
from typing import List

import cv2
import numpy as np
from tqdm import tqdm

Expand All @@ -15,10 +15,9 @@

@DATA_CONVERTERS.register_module()
class OH50k3DConverter(BaseModeConverter):
"""3DOH50K dataset
`Object-Occluded Human Shape and Pose Estimation from a Single Color
Image' CVPR'2020
More details can be found in the `paper
"""3DOH50K dataset `Object-Occluded Human Shape and Pose Estimation from a
Single Color Image' CVPR'2020 More details can be found in the `paper.

<https://www.yangangwang.com/papers/ZHANG-OOH-2020-03.pdf>`__ .

Args:
Expand Down Expand Up @@ -48,7 +47,8 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
human_data = HumanData()

# structs we use
image_path_, bbox_xywh_, keypoints2d_, keypoints3d_, cam_param_ = [], [], [], [], []
image_path_, bbox_xywh_, keypoints2d_, keypoints3d_, cam_param_ \
= [], [], [], [], []

smpl = {}
smpl['body_pose'] = []
Expand All @@ -70,11 +70,10 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
betas = np.array(annot['betas']).reshape(-1)
pose = np.array(annot['pose']).reshape(-1)
trans = np.array(annot['trans']).reshape(-1)
scale = np.array(annot['scale'])
smpl_joints_2d = np.array(annot['smpl_joints_2d']) # 24x2
smpl_joints_3d = np.array(annot['smpl_joints_3d']) # 24x3
lsp_joints_2d = np.array(annot['lsp_joints_2d']) # 14x2
lsp_joints_3d = np.array(annot['lsp_joints_3d']) # 14x3
smpl_joints_2d = np.array(annot['smpl_joints_2d']) # 24x2
smpl_joints_3d = np.array(annot['smpl_joints_3d']) # 24x3
lsp_joints_2d = np.array(annot['lsp_joints_2d']) # 14x2
lsp_joints_3d = np.array(annot['lsp_joints_3d']) # 14x3

# fix keypoints3d
smpl_joints_3d = smpl_joints_3d - smpl_joints_3d[0]
Expand All @@ -84,7 +83,7 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
h, w, _ = cv2.imread(f'{dataset_path}/{img_path}').shape

# scale and center
bbox_xyxy = np.array(annot['bbox']).reshape(-1) # 2x2 - check foramt
bbox_xyxy = np.array(annot['bbox']).reshape(-1)
bbox_xyxy = self._bbox_expand(bbox_xyxy, scale_factor=1.2)
bbox_xywh = self._xyxy2xywh(bbox_xyxy)
smpl_joints_2d = np.hstack([smpl_joints_2d, np.ones([24, 1])])
Expand All @@ -98,9 +97,9 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
camera = CameraParameter(H=h, W=w)
camera.set_KRT(K, R, T)
parameter_dict = camera.to_dict()
pose[:3] = cv2.Rodrigues(
np.dot(R,
cv2.Rodrigues(pose[:3])[0]))[0].T[0]
pose[:3] = cv2.Rodrigues(np.dot(R,
cv2.Rodrigues(
pose[:3])[0]))[0].T[0]

# store data
image_path_.append(img_path)
Expand All @@ -119,13 +118,11 @@ def convert_by_mode(self, dataset_path: str, out_path: str,
smpl['betas'] = np.array(smpl['betas']).reshape((-1, 10))
smpl['transl'] = np.array(smpl['transl']).reshape((-1, 3))


# convert keypoints
bbox_xywh_ = np.array(bbox_xywh_).reshape((-1, 4))
bbox_xywh_ = np.hstack([bbox_xywh_, np.ones([bbox_xywh_.shape[0], 1])])
keypoints2d_ = np.array(keypoints2d_).reshape((-1, 24, 3))
keypoints2d_, mask = convert_kps(keypoints2d_, 'smpl',
'human_data')
keypoints2d_, mask = convert_kps(keypoints2d_, 'smpl', 'human_data')
keypoints3d_ = np.array(keypoints3d_).reshape((-1, 24, 4))
keypoints3d_, _ = convert_kps(keypoints3d_, 'smpl', 'human_data')

Expand Down
Loading