Skip to content
This repository was archived by the owner on Nov 13, 2024. It is now read-only.

Commit fbf39d2

Browse files
committed
added new extractor: S3FD,
all extractors now produce less false-positive faces
1 parent 9440224 commit fbf39d2

10 files changed

+83
-112
lines changed

.gitignore

-9
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,5 @@
44
!*.txt
55
!*.jpg
66
!requirements*
7-
!doc
8-
!facelib
9-
!gpufmkmgr
10-
!localization
11-
!mainscripts
12-
!mathlib
13-
!models
14-
!nnlib
15-
!utils
167
!Dockerfile*
178
!*.sh

doc/manual_ru.pdf

-925 Bytes
Binary file not shown.

doc/manual_ru_source.xml

+2-2
Large diffs are not rendered by default.

facelib/2DFAN-4.h5

22.3 KB
Binary file not shown.

facelib/LandmarksExtractor.py

+51-89
Original file line numberDiff line numberDiff line change
@@ -3,101 +3,18 @@
33
import cv2
44
from pathlib import Path
55

6-
def transform(point, center, scale, resolution):
7-
pt = np.array ( [point[0], point[1], 1.0] )
8-
h = 200.0 * scale
9-
m = np.eye(3)
10-
m[0,0] = resolution / h
11-
m[1,1] = resolution / h
12-
m[0,2] = resolution * ( -center[0] / h + 0.5 )
13-
m[1,2] = resolution * ( -center[1] / h + 0.5 )
14-
m = np.linalg.inv(m)
15-
return np.matmul (m, pt)[0:2]
16-
17-
def crop(image, center, scale, resolution=256.0):
18-
ul = transform([1, 1], center, scale, resolution).astype( np.int )
19-
br = transform([resolution, resolution], center, scale, resolution).astype( np.int )
20-
if image.ndim > 2:
21-
newDim = np.array([br[1] - ul[1], br[0] - ul[0], image.shape[2]], dtype=np.int32)
22-
newImg = np.zeros(newDim, dtype=np.uint8)
23-
else:
24-
newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int)
25-
newImg = np.zeros(newDim, dtype=np.uint8)
26-
ht = image.shape[0]
27-
wd = image.shape[1]
28-
newX = np.array([max(1, -ul[0] + 1), min(br[0], wd) - ul[0]], dtype=np.int32)
29-
newY = np.array([max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32)
30-
oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32)
31-
oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32)
32-
newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1] ] = image[oldY[0] - 1:oldY[1], oldX[0] - 1:oldX[1], :]
33-
newImg = cv2.resize(newImg, dsize=(int(resolution), int(resolution)), interpolation=cv2.INTER_LINEAR)
34-
return newImg
35-
36-
def get_pts_from_predict(a, center, scale):
37-
b = a.reshape ( (a.shape[0], a.shape[1]*a.shape[2]) )
38-
c = b.argmax(1).reshape ( (a.shape[0], 1) ).repeat(2, axis=1).astype(np.float)
39-
c[:,0] %= a.shape[2]
40-
c[:,1] = np.apply_along_axis ( lambda x: np.floor(x / a.shape[2]), 0, c[:,1] )
41-
42-
for i in range(a.shape[0]):
43-
pX, pY = int(c[i,0]), int(c[i,1])
44-
if pX > 0 and pX < 63 and pY > 0 and pY < 63:
45-
diff = np.array ( [a[i,pY,pX+1]-a[i,pY,pX-1], a[i,pY+1,pX]-a[i,pY-1,pX]] )
46-
c[i] += np.sign(diff)*0.25
47-
48-
c += 0.5
49-
return [ transform (c[i], center, scale, a.shape[2]) for i in range(a.shape[0]) ]
506

51-
527
class LandmarksExtractor(object):
538
def __init__ (self, keras):
549
self.keras = keras
5510
K = self.keras.backend
56-
class TorchBatchNorm2D(self.keras.layers.Layer):
57-
def __init__(self, axis=-1, momentum=0.99, epsilon=1e-3, **kwargs):
58-
super(TorchBatchNorm2D, self).__init__(**kwargs)
59-
self.supports_masking = True
60-
self.axis = axis
61-
self.momentum = momentum
62-
self.epsilon = epsilon
63-
64-
def build(self, input_shape):
65-
dim = input_shape[self.axis]
66-
if dim is None:
67-
raise ValueError('Axis ' + str(self.axis) + ' of ' 'input tensor should have a defined dimension ' 'but the layer received an input with shape ' + str(input_shape) + '.')
68-
shape = (dim,)
69-
self.gamma = self.add_weight(shape=shape, name='gamma', initializer='ones', regularizer=None, constraint=None)
70-
self.beta = self.add_weight(shape=shape, name='beta', initializer='zeros', regularizer=None, constraint=None)
71-
self.moving_mean = self.add_weight(shape=shape, name='moving_mean', initializer='zeros', trainable=False)
72-
self.moving_variance = self.add_weight(shape=shape, name='moving_variance', initializer='ones', trainable=False)
73-
self.built = True
74-
75-
def call(self, inputs, training=None):
76-
input_shape = K.int_shape(inputs)
77-
78-
broadcast_shape = [1] * len(input_shape)
79-
broadcast_shape[self.axis] = input_shape[self.axis]
80-
81-
broadcast_moving_mean = K.reshape(self.moving_mean, broadcast_shape)
82-
broadcast_moving_variance = K.reshape(self.moving_variance, broadcast_shape)
83-
broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
84-
broadcast_beta = K.reshape(self.beta, broadcast_shape)
85-
invstd = K.ones (shape=broadcast_shape, dtype='float32') / K.sqrt(broadcast_moving_variance + K.constant(self.epsilon, dtype='float32'))
86-
87-
return (inputs - broadcast_moving_mean) * invstd * broadcast_gamma + broadcast_beta
88-
89-
def get_config(self):
90-
config = { 'axis': self.axis, 'momentum': self.momentum, 'epsilon': self.epsilon }
91-
base_config = super(TorchBatchNorm2D, self).get_config()
92-
return dict(list(base_config.items()) + list(config.items()))
93-
self.TorchBatchNorm2D = TorchBatchNorm2D
94-
11+
9512
def __enter__(self):
9613
keras_model_path = Path(__file__).parent / "2DFAN-4.h5"
9714
if not keras_model_path.exists():
9815
return None
9916

100-
self.keras_model = self.keras.models.load_model ( str(keras_model_path), custom_objects={'TorchBatchNorm2D': self.TorchBatchNorm2D} )
17+
self.keras_model = self.keras.models.load_model (str(keras_model_path))
10118

10219
return self
10320

@@ -116,13 +33,58 @@ def extract_from_bgr (self, input_image, rects):
11633
center[1] -= (bottom - top) * 0.12
11734
scale = (right - left + bottom - top) / 195.0
11835

119-
image = crop(input_image, center, scale).transpose ( (2,0,1) ).astype(np.float32) / 255.0
36+
image = self.crop(input_image, center, scale).astype(np.float32)
12037
image = np.expand_dims(image, 0)
12138

122-
predicted = self.keras_model.predict (image)
123-
124-
pts_img = get_pts_from_predict ( predicted[-1], center, scale)
39+
predicted = self.keras_model.predict (image).transpose (0,3,1,2)
40+
41+
pts_img = self.get_pts_from_predict ( predicted[-1], center, scale)
12542
pts_img = [ ( int(pt[0]), int(pt[1]) ) for pt in pts_img ]
12643
landmarks.append ( ( (left, top, right, bottom),pts_img ) )
12744

12845
return landmarks
46+
47+
def transform(self, point, center, scale, resolution):
48+
pt = np.array ( [point[0], point[1], 1.0] )
49+
h = 200.0 * scale
50+
m = np.eye(3)
51+
m[0,0] = resolution / h
52+
m[1,1] = resolution / h
53+
m[0,2] = resolution * ( -center[0] / h + 0.5 )
54+
m[1,2] = resolution * ( -center[1] / h + 0.5 )
55+
m = np.linalg.inv(m)
56+
return np.matmul (m, pt)[0:2]
57+
58+
def crop(self, image, center, scale, resolution=256.0):
59+
ul = self.transform([1, 1], center, scale, resolution).astype( np.int )
60+
br = self.transform([resolution, resolution], center, scale, resolution).astype( np.int )
61+
if image.ndim > 2:
62+
newDim = np.array([br[1] - ul[1], br[0] - ul[0], image.shape[2]], dtype=np.int32)
63+
newImg = np.zeros(newDim, dtype=np.uint8)
64+
else:
65+
newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int)
66+
newImg = np.zeros(newDim, dtype=np.uint8)
67+
ht = image.shape[0]
68+
wd = image.shape[1]
69+
newX = np.array([max(1, -ul[0] + 1), min(br[0], wd) - ul[0]], dtype=np.int32)
70+
newY = np.array([max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32)
71+
oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32)
72+
oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32)
73+
newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1] ] = image[oldY[0] - 1:oldY[1], oldX[0] - 1:oldX[1], :]
74+
newImg = cv2.resize(newImg, dsize=(int(resolution), int(resolution)), interpolation=cv2.INTER_LINEAR)
75+
return newImg
76+
77+
def get_pts_from_predict(self, a, center, scale):
78+
b = a.reshape ( (a.shape[0], a.shape[1]*a.shape[2]) )
79+
c = b.argmax(1).reshape ( (a.shape[0], 1) ).repeat(2, axis=1).astype(np.float)
80+
c[:,0] %= a.shape[2]
81+
c[:,1] = np.apply_along_axis ( lambda x: np.floor(x / a.shape[2]), 0, c[:,1] )
82+
83+
for i in range(a.shape[0]):
84+
pX, pY = int(c[i,0]), int(c[i,1])
85+
if pX > 0 and pX < 63 and pY > 0 and pY < 63:
86+
diff = np.array ( [a[i,pY,pX+1]-a[i,pY,pX-1], a[i,pY+1,pX]-a[i,pY-1,pX]] )
87+
c[i] += np.sign(diff)*0.25
88+
89+
c += 0.5
90+
return [ self.transform (c[i], center, scale, a.shape[2]) for i in range(a.shape[0]) ]

facelib/S3FD.h5

85.8 MB
Binary file not shown.

facelib/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .FaceType import FaceType
22
from .DLIBExtractor import DLIBExtractor
33
from .MTCExtractor import MTCExtractor
4+
from .S3FDExtractor import S3FDExtractor
45
from .LandmarksExtractor import LandmarksExtractor

main.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ def process_extract(arguments):
3939
extract_parser.add_argument('--output-dir', required=True, action=fixPathAction, dest="output_dir", help="Output directory. This is where the extracted files will be stored.")
4040
extract_parser.add_argument('--debug', action="store_true", dest="debug", default=False, help="Writes debug images to [output_dir]_debug\ directory.")
4141
extract_parser.add_argument('--face-type', dest="face_type", choices=['half_face', 'full_face', 'head', 'avatar', 'mark_only'], default='full_face', help="Default 'full_face'. Don't change this option, currently all models uses 'full_face'")
42-
extract_parser.add_argument('--detector', dest="detector", choices=['dlib','mt','manual'], default='dlib', help="Type of detector. Default 'dlib'. 'mt' (MTCNNv1) - faster, better, almost no jitter, perfect for gathering thousands faces for src-set. It is also good for dst-set, but can generate false faces in frames where main face not recognized! In this case for dst-set use either 'dlib' with '--manual-fix' or '--detector manual'. Manual detector suitable only for dst-set.")
42+
extract_parser.add_argument('--detector', dest="detector", choices=['dlib','mt','s3fd','manual'], default='dlib', help="Type of detector. Default 'dlib'. 'mt' (MTCNNv1) - faster, better, almost no jitter, perfect for gathering thousands faces for src-set. It is also good for dst-set, but can generate false faces in frames where main face not recognized! In this case for dst-set use either 'dlib' with '--manual-fix' or '--detector manual'. Manual detector suitable only for dst-set.")
4343
extract_parser.add_argument('--multi-gpu', action="store_true", dest="multi_gpu", default=False, help="Enables multi GPU.")
4444
extract_parser.add_argument('--manual-fix', action="store_true", dest="manual_fix", default=False, help="Enables manual extract only frames where faces were not recognized.")
4545
extract_parser.add_argument('--manual-output-debug-fix', action="store_true", dest="manual_output_debug_fix", default=False, help="Performs manual reextract input-dir frames which were deleted from [output_dir]_debug\ dir.")

mainscripts/Extractor.py

+24-10
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import shutil
77
from pathlib import Path
88
import numpy as np
9+
import mathlib
910
import cv2
1011
from utils import Path_utils
1112
from utils.DFLJPG import DFLJPG
@@ -47,6 +48,9 @@ def on_initialize(self, client_dict):
4748
elif self.detector == 'dlib':
4849
nnlib.import_dlib (device_config)
4950
self.e = facelib.DLIBExtractor(nnlib.dlib)
51+
elif self.detector == 's3fd':
52+
nnlib.import_all (device_config)
53+
self.e = facelib.S3FDExtractor()
5054
else:
5155
raise ValueError ("Wrond detector type.")
5256

@@ -104,22 +108,32 @@ def process_data(self, data):
104108
debug_output_file = '{}{}'.format( str(Path(str(self.output_path) + '_debug') / filename_path.stem), '.jpg')
105109
debug_image = image.copy()
106110

107-
for (face_idx, face) in enumerate(faces):
108-
output_file = '{}_{}{}'.format(str(self.output_path / filename_path.stem), str(face_idx), '.jpg')
109-
110-
rect = face[0]
111+
face_idx = 0
112+
for face in faces:
113+
rect = np.array(face[0])
111114
image_landmarks = np.array(face[1])
112115

113-
if self.debug:
114-
LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, self.image_size, self.face_type)
115-
116116
if self.face_type == FaceType.MARK_ONLY:
117117
face_image = image
118118
face_image_landmarks = image_landmarks
119119
else:
120120
image_to_face_mat = LandmarksProcessor.get_transform_mat (image_landmarks, self.image_size, self.face_type)
121121
face_image = cv2.warpAffine(image, image_to_face_mat, (self.image_size, self.image_size), cv2.INTER_LANCZOS4)
122122
face_image_landmarks = LandmarksProcessor.transform_points (image_landmarks, image_to_face_mat)
123+
124+
landmarks_bbox = LandmarksProcessor.transform_points ( [ (0,0), (0,self.image_size-1), (self.image_size-1, self.image_size-1), (self.image_size-1,0) ], image_to_face_mat, True)
125+
126+
rect_area = mathlib.polygon_area(np.array(rect[[0,2,2,0]]), np.array(rect[[1,1,3,3]]))
127+
landmarks_area = mathlib.polygon_area(landmarks_bbox[:,0], landmarks_bbox[:,1] )
128+
129+
if landmarks_area > 4*rect_area: #get rid of faces which umeyama-landmark-area > 4*detector-rect-area
130+
continue
131+
132+
if self.debug:
133+
LandmarksProcessor.draw_rect_landmarks (debug_image, rect, image_landmarks, self.image_size, self.face_type)
134+
135+
output_file = '{}_{}{}'.format(str(self.output_path / filename_path.stem), str(face_idx), '.jpg')
136+
face_idx += 1
123137

124138
if src_dflimg is not None:
125139
#if extracting from dflimg just copy it in order not to lose quality
@@ -199,21 +213,21 @@ def get_devices_for_type (self, type, multi_gpu, cpu_only):
199213
cpu_only = True
200214

201215
if not cpu_only and (type == 'rects' or type == 'landmarks'):
202-
if type == 'rects' and self.detector == 'mt' and nnlib.device.backend == "plaidML":
216+
if type == 'rects' and (self.detector == 'mt' or self.detector == 's3fd') and nnlib.device.backend == "plaidML":
203217
cpu_only = True
204218
else:
205219
if multi_gpu:
206220
devices = nnlib.device.getValidDevicesWithAtLeastTotalMemoryGB(2)
207221
if not multi_gpu or len(devices) == 0:
208-
devices = [nnlib.device.getBestValidDeviceIdx()]
222+
devices = [nnlib.device.getBestValidDeviceIdx()]
209223
if len(devices) == 0:
210224
devices = [0]
211225

212226
for idx in devices:
213227
dev_name = nnlib.device.getDeviceName(idx)
214228
dev_vram = nnlib.device.getDeviceVRAMTotalGb(idx)
215229

216-
if not self.manual and ( (self.type == 'rects') ):
230+
if not self.manual and ( self.type == 'rects' and self.detector != 's3fd' ):
217231
for i in range ( int (max (1, dev_vram / 2) ) ):
218232
yield (idx, 'GPU', '%s #%d' % (dev_name,i) , dev_vram)
219233
else:

mathlib/__init__.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,7 @@ def rotationMatrixToEulerAngles(R) :
1919
x = math.atan2(-R[1,2], R[1,1])
2020
y = math.atan2(-R[2,0], sy)
2121
z = 0
22-
return np.array([x, y, z])
22+
return np.array([x, y, z])
23+
24+
def polygon_area(x,y):
25+
return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))

0 commit comments

Comments
 (0)