-
Notifications
You must be signed in to change notification settings - Fork 0
/
encode_faces.py
113 lines (78 loc) · 3.13 KB
/
encode_faces.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import face_recognition
import pandas as pd
import numpy as np
import os
"""
This script is used to take a dataset of images and create an encoding for each image based on
the library face_recognition.
Our dataset consisted of "path|name|dob|gender" and we added "|encoding" with this script
"""
# Path to dataset
DATASET_PATH = r'./dataset.csv'
# Read the current dataset
df = pd.read_csv(DATASET_PATH, sep='|')
# Directory to save the encodings to
SAVE_DIR_BASE = os.path.join(os.getcwd(), 'encodings')
# Range of idexes in the dataset to created encodings for
START_INC = 0
STOP_EXC = 100000
# Number of files converted before a backup dataset is created
# This is used un case an error occurs during the enconding process
BACKUP_LENGTH = 500
# Iterate through each row in the dataset
for idx, row in df.iterrows():
# if the row index is not in the valid range
if not (START_INC <= idx < STOP_EXC):
continue
# Save a backup of the dataset if correct number of images have been processed
if idx%BACKUP_LENGTH == 0:
df.to_csv(f"./dataset-{START_INC}-{idx}.csv", sep='|', index=False)
# Tell user what idex is being processed
print("Starting: #", idx)
# Load image from row
image = face_recognition.load_image_file(row['path'])
# Get all face locations
face_locations = face_recognition.face_locations(image)
# Determine largest face in picture, store to variable
if len(face_locations) == 0:
continue
elif len(face_locations) > 1:
face = sorted(face_locations, key=lambda x: (x[2] - x[0]) * (x[1] - x[3]), reverse=True)[0]
else:
face = face_locations[0]
# Extract face from image
border_mult = 1.5
top, right, bottom, left = face
face_image_orig = image[top:bottom, left:right]
vertical_border = int((bottom - top) * border_mult / 2)
horizontal_border = int((right - left) * border_mult / 2)
# np.array.shape -> y, x
top = max(0, top-vertical_border)
bottom = min(image.shape[0], bottom+vertical_border)
left = max(0, left-horizontal_border)
right = min(image.shape[1], right+horizontal_border)
# crop image to the largest face
face_image = image[top:bottom, left:right]
# Create encoding
my_face_encodings = face_recognition.face_encodings(face_image)
try:
my_face_encoding = my_face_encodings[0]
except IndexError:
with open('error.log', 'a') as outfile:
outfile.write(f"{row['path']} had no face encodings.\n")
continue
# We could use pickle... but efficiency :(
filename = row['path'].partition('wiki')[-1].partition('.jpg')[0] + '.npy'
filename = '/' + filename.replace('/', '_')
path_to_open = SAVE_DIR_BASE + filename
# Save the file
print(f"Saving to {filename}")
with open(path_to_open, 'wb') as outfile:
np.save(outfile, my_face_encoding)
df.iloc[idx]['encoding'] = './encoding' + filename
print()
print(' -------------------------------- ')
print()
# overwrite and save the dataset
df.to_csv(f"./dataset-{START_INC}-DONE.csv", sep='|', index=False)
print("Done!")