Skip to content

Commit e53a741

Browse files
committed
Add pose estimation and refine code
1 parent 09d138b commit e53a741

16 files changed

+254
-44
lines changed

.gitignore

+1-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
.idea/
22
*.pyc
33
__pycache__/
4+
utils/__pycache__/
45
test.data/
56
training/snapshot/
67
training/logs/
@@ -21,5 +22,3 @@ models/shape_predictor_68_face_landmarks.dat
2122

2223
demo_obama/
2324
todo.md
24-
25-
utils/__pycacje__/

benchmark_aflw.py

-3
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
11
#!/usr/bin/env python3
22
# coding: utf-8
33

4-
import os
54
import os.path as osp
65
import numpy as np
7-
import sys
8-
from glob import glob
96
from math import sqrt
107
from utils.io import _load
118

main.py

+25-9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
#!/usr/bin/env python3
22
# coding: utf-8
3+
import sys
4+
5+
from utils.cv_plot import plot_pose_box
6+
37
__author__ = 'cleardusk'
48

59
"""
@@ -10,8 +14,6 @@
1014
1. CPU optimization: https://pmchojnacki.wordpress.com/2018/10/07/slow-pytorch-cpu-performance
1115
"""
1216

13-
# import modules
14-
1517
import torch
1618
import torchvision.transforms as transforms
1719
import mobilenet_v1
@@ -22,9 +24,12 @@
2224
import scipy.io as sio
2325
from utils.inference import get_suffix, calc_roi_box, crop_img, predict_68pts, dump_to_ply, dump_vertex, draw_landmarks, \
2426
predict_dense
27+
from utils.estimate_pose import parse_pose
2528
import argparse
2629
import torch.backends.cudnn as cudnn
2730

31+
STD_SIZE = 120
32+
2833

2934
def main(args):
3035
# 1. load pre-tained model
@@ -51,6 +56,7 @@ def main(args):
5156

5257
# 3. forward
5358
tri = sio.loadmat('visualize/tri.mat')['tri']
59+
transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
5460
for img_fp in args.files:
5561
img_ori = cv2.imread(img_fp)
5662
if args.dlib_bbox:
@@ -67,22 +73,21 @@ def main(args):
6773
rect = dlib.rectangle(l, r, t, b)
6874
rects.append(rect)
6975

70-
pts_dlib = []
7176
pts_res = []
77+
Ps = [] # Camera matrix collection
78+
poses = [] # pose collection, [todo: validate it]
7279
ind = 0
7380
suffix = get_suffix(img_fp)
7481
for rect in rects:
7582
# landmark & crop
7683
pts = face_regressor(img_ori, rect).parts()
7784
pts = np.array([[pt.x, pt.y] for pt in pts]).T
78-
pts_dlib.append(pts)
7985

8086
roi_box = calc_roi_box(pts)
8187
img = crop_img(img_ori, roi_box)
8288

8389
# forward: one step
84-
img = cv2.resize(img, dsize=(120, 120), interpolation=cv2.INTER_LINEAR)
85-
transform = transforms.Compose([ToTensorGjz(), NormalizeGjz(mean=127.5, std=128)])
90+
img = cv2.resize(img, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
8691
input = transform(img).unsqueeze(0)
8792
with torch.no_grad():
8893
if args.mode == 'gpu':
@@ -97,16 +102,20 @@ def main(args):
97102
if args.box_init == 'two':
98103
roi_box = calc_roi_box(pts68)
99104
img_step2 = crop_img(img_ori, roi_box)
100-
img_step2 = cv2.resize(img_step2, dsize=(120, 120), interpolation=cv2.INTER_LINEAR)
105+
img_step2 = cv2.resize(img_step2, dsize=(STD_SIZE, STD_SIZE), interpolation=cv2.INTER_LINEAR)
101106
input = transform(img_step2).unsqueeze(0)
102107
with torch.no_grad():
103108
if args.mode == 'gpu':
104109
input = input.cuda()
105110
param = model(input)
106111
param = param.squeeze().cpu().numpy().flatten().astype(np.float32)
112+
107113
pts68 = predict_68pts(param, roi_box)
108114

109115
pts_res.append(pts68)
116+
P, pose = parse_pose(param)
117+
Ps.append(P)
118+
poses.append(pose)
110119

111120
# dense face vertices
112121
if args.dump_ply or args.dump_vertex:
@@ -123,8 +132,14 @@ def main(args):
123132
wfp = '{}_{}.roibox'.format(img_fp.replace(suffix, ''), ind)
124133
np.savetxt(wfp, roi_box, fmt='%.3f')
125134
print('Save roi box to {}'.format(wfp))
126-
127135
ind += 1
136+
137+
if args.dump_pose:
138+
# P, pose = parse_pose(param) # Camera matrix (without scale), and pose (yaw, pitch, roll, to verify)
139+
img_pose = plot_pose_box(img_ori, Ps, pts_res)
140+
wfp = img_fp.replace(suffix, '_pose.jpg')
141+
cv2.imwrite(wfp, img_pose)
142+
print('Dump to {}'.format(wfp))
128143
if args.dump_res:
129144
draw_landmarks(img_ori, pts_res, wfp=img_fp.replace(suffix, '_3DDFA.jpg'), show_flg=args.show_flg)
130145

@@ -137,11 +152,12 @@ def main(args):
137152
parser.add_argument('--show_flg', default='True', type=str2bool, help='whether show the visualization result')
138153
parser.add_argument('--box_init', default='one', type=str, help='one|two: one-step bbox initialization or two-step')
139154
parser.add_argument('--dump_res', default='true', type=str2bool, help='whether write out the visualization image')
140-
parser.add_argument('--dump_vertex', default='true', type=str2bool,
155+
parser.add_argument('--dump_vertex', default='false', type=str2bool,
141156
help='whether write out the dense face vertices to mat')
142157
parser.add_argument('--dump_ply', default='true', type=str2bool)
143158
parser.add_argument('--dump_pts', default='true', type=str2bool)
144159
parser.add_argument('--dump_roi_box', default='false', type=str2bool)
160+
parser.add_argument('--dump_pose', default='true', type=str2bool)
145161
parser.add_argument('--dlib_bbox', default='true', type=str2bool, help='whether use dlib to predict bbox')
146162

147163
args = parser.parse_args()

readme.md

+15-9
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
**\[Updates\]**
1010

11+
- `2018.12.1`: Add pose estimation and refine code, see [utils/estimate_pose.py](./utils/estimate_pose.py) for more details.
1112
- `2018.11.17`: Refine code and map the 3d vertex to original image space.
1213
- `2018.11.11`: **Update end-to-end inference pipeline: infer/serialize 3D face shape and 68 landmarks given one arbitrary image, please see readme.md below for more details.**
1314
- `2018.11.9`: Update trained model with higher performance in [models](./models).
@@ -76,23 +77,26 @@ In addition, I strongly recommend using Python3.6+ instead of older version for
7677
```
7778
If you can see these output log in terminal, you run it successfully.
7879
```
79-
Dump to samples/test1_0.ply
80-
Dump to samples/test1_0.mat
80+
Dump tp samples/test1_0.ply
8181
Save 68 3d landmarks to samples/test1_0.txt
82-
Dump to samples/test1_1.ply
83-
Dump to samples/test1_1.mat
82+
Dump tp samples/test1_1.ply
8483
Save 68 3d landmarks to samples/test1_1.txt
84+
Dump to samples/test1_pose.jpg
8585
Save visualization result to samples/test1_3DDFA.jpg
8686
```
8787
8888
Because `test1.jpg` has two faces, there are two `mat` (stores dense face vertices, can be rendered by Matlab, see [visualize](./visualize)) and `ply` files (can be rendered by Meshlab or Microsoft 3D Builder) predicted.
8989
9090
Please run `python3 main.py -h` or review the code for more details.
9191
92-
The result `samples/test1_3DDFA.jpg` is shown below
92+
The 68 landmarks visualization result `samples/test1_3DDFA.jpg` and pose estimation result `samples/test1_pose.jpg` are shown below
9393
9494
<p align="center">
95-
<img src="samples/test1_3DDFA.jpg" alt="samples" width="700px">
95+
<img src="samples/test1_3DDFA.jpg" alt="samples" width="650px">
96+
</p>
97+
98+
<p align="center">
99+
<img src="samples/test1_pose.jpg" alt="samples" width="650px">
96100
</p>
97101
98102
3. Additional example
@@ -102,7 +106,11 @@ In addition, I strongly recommend using Python3.6+ instead of older version for
102106
```
103107
104108
<p align="center">
105-
<img src="samples/emma_input_3DDFA.jpg" alt="samples" width="700px">
109+
<img src="samples/emma_input_3DDFA.jpg" alt="samples" width="750px">
110+
</p>
111+
112+
<p align="center">
113+
<img src="samples/emma_input_pose.jpg" alt="samples" width="750px">
106114
</p>
107115
108116
## Citation
@@ -122,8 +130,6 @@ In addition, I strongly recommend using Python3.6+ instead of older version for
122130
}
123131
124132
125-
126-
127133
## Inference speed
128134
When batch size is 128, the inference time of MobileNet-V1 takes about 34.7ms. The average speed is about **0.27ms/pic**.
129135

samples/emma_input_3DDFA.jpg

47 KB
Loading

samples/emma_input_pose.jpg

1.04 MB
Loading

samples/test1_3DDFA.jpg

14.8 KB
Loading

samples/test1_pose.jpg

178 KB
Loading

utils/cv_plot.py

+99
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
#!/usr/bin/env python3
2+
# coding: utf-8
3+
4+
5+
"""
6+
Modified from: https://sourcegraph.com/github.com/YadiraF/PRNet@master/-/blob/utils/cv_plot.py
7+
"""
8+
9+
import numpy as np
10+
import cv2
11+
12+
from utils.inference import calc_hypotenuse
13+
14+
end_list = np.array([17, 22, 27, 42, 48, 31, 36, 68], dtype=np.int32) - 1
15+
16+
17+
def plot_kpt(image, kpt):
18+
''' Draw 68 key points
19+
Args:
20+
image: the input image
21+
kpt: (68, 3).
22+
'''
23+
image = image.copy()
24+
kpt = np.round(kpt).astype(np.int32)
25+
for i in range(kpt.shape[0]):
26+
st = kpt[i, :2]
27+
image = cv2.circle(image, (st[0], st[1]), 1, (0, 0, 255), 2)
28+
if i in end_list:
29+
continue
30+
ed = kpt[i + 1, :2]
31+
image = cv2.line(image, (st[0], st[1]), (ed[0], ed[1]), (255, 255, 255), 1)
32+
return image
33+
34+
35+
def build_camera_box(rear_size=90):
36+
point_3d = []
37+
rear_depth = 0
38+
point_3d.append((-rear_size, -rear_size, rear_depth))
39+
point_3d.append((-rear_size, rear_size, rear_depth))
40+
point_3d.append((rear_size, rear_size, rear_depth))
41+
point_3d.append((rear_size, -rear_size, rear_depth))
42+
point_3d.append((-rear_size, -rear_size, rear_depth))
43+
44+
front_size = int(4 / 3 * rear_size)
45+
front_depth = int(4 / 3 * rear_size)
46+
point_3d.append((-front_size, -front_size, front_depth))
47+
point_3d.append((-front_size, front_size, front_depth))
48+
point_3d.append((front_size, front_size, front_depth))
49+
point_3d.append((front_size, -front_size, front_depth))
50+
point_3d.append((-front_size, -front_size, front_depth))
51+
point_3d = np.array(point_3d, dtype=np.float).reshape(-1, 3)
52+
53+
return point_3d
54+
55+
56+
def plot_pose_box(image, Ps, pts68s, color=(40, 255, 0), line_width=2):
57+
''' Draw a 3D box as annotation of pose. Ref:https://github.com/yinguobing/head-pose-estimation/blob/master/pose_estimator.py
58+
Args:
59+
image: the input image
60+
P: (3, 4). Affine Camera Matrix.
61+
kpt: (2, 68) or (3, 68)
62+
'''
63+
image = image.copy()
64+
if not isinstance(pts68s, list):
65+
pts68s = [pts68s]
66+
if not isinstance(Ps, list):
67+
Ps = [Ps]
68+
for i in range(len(pts68s)):
69+
pts68 = pts68s[i]
70+
llength = calc_hypotenuse(pts68)
71+
point_3d = build_camera_box(llength)
72+
P = Ps[i]
73+
74+
# Map to 2d image points
75+
point_3d_homo = np.hstack((point_3d, np.ones([point_3d.shape[0], 1]))) # n x 4
76+
point_2d = point_3d_homo.dot(P.T)[:, :2]
77+
78+
point_2d[:, 1] = - point_2d[:, 1]
79+
point_2d[:, :2] = point_2d[:, :2] - np.mean(point_2d[:4, :2], 0) + np.mean(pts68[:2, :27], 1)
80+
point_2d = np.int32(point_2d.reshape(-1, 2))
81+
82+
# Draw all the lines
83+
cv2.polylines(image, [point_2d], True, color, line_width, cv2.LINE_AA)
84+
cv2.line(image, tuple(point_2d[1]), tuple(
85+
point_2d[6]), color, line_width, cv2.LINE_AA)
86+
cv2.line(image, tuple(point_2d[2]), tuple(
87+
point_2d[7]), color, line_width, cv2.LINE_AA)
88+
cv2.line(image, tuple(point_2d[3]), tuple(
89+
point_2d[8]), color, line_width, cv2.LINE_AA)
90+
91+
return image
92+
93+
94+
def main():
95+
pass
96+
97+
98+
if __name__ == '__main__':
99+
main()

utils/ddfa.py

+9-6
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,14 @@
1111
import pickle
1212
import argparse
1313
from .io import _numpy_to_tensor, _load_cpu, _load_gpu
14-
from params import *
14+
from utils.params import *
1515

1616

17-
def reconstruct_vertex(param, whitening=True, dense=False):
17+
def reconstruct_vertex(param, whitening=True, dense=False, transform=True):
1818
"""Whitening param -> 3d vertex, based on the 3dmm param: u_base, w_shp, w_exp
1919
dense: if True, return dense vertex, else return 68 sparse landmarks. All dense or sparse vertex is transformed to
2020
image coordinate space, but without alignment caused by face cropping.
21+
transform: whether transform to image space
2122
"""
2223
if len(param) == 12:
2324
param = np.concatenate((param, [0] * 50))
@@ -36,14 +37,16 @@ def reconstruct_vertex(param, whitening=True, dense=False):
3637
if dense:
3738
vertex = p @ (u + w_shp @ alpha_shp + w_exp @ alpha_exp).reshape(3, -1, order='F') + offset
3839

39-
# transform to image coordinate space
40-
vertex[1, :] = std_size + 1 - vertex[1, :]
40+
if transform:
41+
# transform to image coordinate space
42+
vertex[1, :] = std_size + 1 - vertex[1, :]
4143
else:
4244
"""For 68 pts"""
4345
vertex = p @ (u_base + w_shp_base @ alpha_shp + w_exp_base @ alpha_exp).reshape(3, -1, order='F') + offset
4446

45-
# transform to image coordinate space
46-
vertex[1, :] = std_size + 1 - vertex[1, :]
47+
if transform:
48+
# transform to image coordinate space
49+
vertex[1, :] = std_size + 1 - vertex[1, :]
4750

4851
return vertex
4952

0 commit comments

Comments
 (0)