Skip to content

Commit

Permalink
Updated training
Browse files Browse the repository at this point in the history
  • Loading branch information
AdamMiltonBarker committed Jan 28, 2018
1 parent b60ca19 commit 183d6ce
Show file tree
Hide file tree
Showing 3 changed files with 330 additions and 62 deletions.
291 changes: 288 additions & 3 deletions InceptionFlow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
OBJECT_MODEL_LABELS_PATH = os.getcwd()+"/model/imagenet/imagenet_2012_challenge_label_map_proto.pbtxt"
OBJECT_MODEL_LABELSH_PATH = os.getcwd()+"/model/imagenet/imagenet_synset_to_human_label_map.txt"

FACIAL_MODEL_DIR = os.getcwd()+"/model"
FACIAL_MODEL_PATH = os.getcwd()+"/model/InceptionFlow.pb"
FACIAL_MODEL_LABELS_PATH = os.getcwd()+"/model/InceptionFlow.txt"

Expand Down Expand Up @@ -164,6 +165,7 @@ class InceptionFlow():
def __init__(self):

self.confs = {}
self.bottleneck_path_2_bottleneck_values = {}

with open('data/confs.json') as confs:

Expand Down Expand Up @@ -419,7 +421,7 @@ def _progress(count, block_size, total_size):

tarfile.open(filepath, 'r:gz').extractall(dest_directory)

def create_image_lists(image_dir, testing_percentage, validation_percentage):
def create_image_lists(self, image_dir, testing_percentage, validation_percentage):

"""
Builds a list of training images from the file system.
Expand Down Expand Up @@ -533,7 +535,7 @@ def create_image_lists(image_dir, testing_percentage, validation_percentage):
}
return result

def get_image_path(image_lists, label_name, index, image_dir, category):
def get_image_path(self, image_lists, label_name, index, image_dir, category):

""""
Returns a path to an image for a label at the given index.
Expand Down Expand Up @@ -572,4 +574,287 @@ def get_image_path(image_lists, label_name, index, image_dir, category):
base_name = category_list[mod_index]
sub_dir = label_lists['dir']
full_path = os.path.join(image_dir, sub_dir, base_name)
return full_path
return full_path

def get_bottleneck_path(self, image_lists, label_name, index, bottleneck_dir, category, architecture):

""""
Returns a path to a bottleneck file for a label at the given index.
Args:
image_lists: Dictionary of training images for each label.
label_name: Label string we want to get an image for.
index: Integer offset of the image we want. This will be moduloed by the
available number of images for the label, so it can be arbitrarily large.
bottleneck_dir: Folder string holding cached files of bottleneck values.
category: Name string of set to pull images from - training, testing, or
validation.
architecture: The name of the model architecture.
Returns:
File system path string to an image that meets the requested parameters.
"""
return get_image_path(
image_lists,
label_name,
index,
bottleneck_dir,
category) + '_' + architecture + '.txt'

def create_model_graph(self, model_info):

""""
Creates a graph from saved GraphDef file and returns a Graph object.
Args:
model_info: Dictionary containing information about the model architecture.
Returns:
Graph holding the trained Inception network, and various tensors we'll be
manipulating.
"""

with tf.Graph().as_default() as graph:

model_path = os.path.join(FACIAL_MODEL_DIR, model_info['model_file_name'])
print('Model path: ', model_path)

with gfile.FastGFile(model_path, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
bottleneck_tensor, resized_input_tensor = (
tf.import_graph_def(
graph_def,
name='',
return_elements=[
model_info['bottleneck_tensor_name'],
model_info['resized_input_tensor_name'],
]))

return graph, bottleneck_tensor, resized_input_tensor

def run_bottleneck_on_image(self, sess, image_data, image_data_tensor,
decoded_image_tensor, resized_input_tensor,
bottleneck_tensor):

"""
Runs inference on an image to extract the 'bottleneck' summary layer.
Args:
sess: Current active TensorFlow Session.
image_data: String of raw JPEG data.
image_data_tensor: Input data layer in the graph.
decoded_image_tensor: Output of initial image resizing and preprocessing.
resized_input_tensor: The input node of the recognition graph.
bottleneck_tensor: Layer before the final softmax.
Returns:
Numpy array of bottleneck values.
"""
# First decode the JPEG image, resize it, and rescale the pixel values.
resized_input_values = sess.run(
decoded_image_tensor,
{image_data_tensor: image_data})

# Then run it through the recognition network.
bottleneck_values = sess.run(
bottleneck_tensor,
{resized_input_tensor: resized_input_values})

bottleneck_values = np.squeeze(bottleneck_values)
return bottleneck_values

def ensure_dir_exists(self, dir_name):

"""Makes sure the folder exists on disk.
Args:
dir_name: Path string to the folder we want to create.
"""

if not os.path.exists(dir_name):
os.makedirs(dir_name)

def create_bottleneck_file(bottleneck_path, image_lists, label_name, index,
image_dir, category, sess, jpeg_data_tensor,
decoded_image_tensor, resized_input_tensor,
bottleneck_tensor):

"""Create a single bottleneck file."""
tf.logging.info('Creating bottleneck at ' + bottleneck_path)
image_path = self.get_image_path(
image_lists,
label_name,
index,
image_dir,
category)

if not gfile.Exists(image_path):

tf.logging.fatal('File does not exist %s', image_path)

image_data = gfile.FastGFile(image_path, 'rb').read()

try:

bottleneck_values = self.run_bottleneck_on_image(
sess,
image_data,
jpeg_data_tensor,
decoded_image_tensor,
resized_input_tensor,
bottleneck_tensor)

except Exception as e:

raise RuntimeError('Error during processing file %s (%s)' % (image_path,str(e)))

bottleneck_string = ','.join(str(x) for x in bottleneck_values)

with open(bottleneck_path, 'w') as bottleneck_file:

bottleneck_file.write(bottleneck_string)

def get_or_create_bottleneck(sess, image_lists, label_name, index, image_dir,
category, bottleneck_dir, jpeg_data_tensor,
decoded_image_tensor, resized_input_tensor,
bottleneck_tensor, architecture):

"""
Retrieves or calculates bottleneck values for an image.
If a cached version of the bottleneck data exists on-disk, return that,
otherwise calculate the data and save it to disk for future use.
Args:
sess: The current active TensorFlow Session.
image_lists: Dictionary of training images for each label.
label_name: Label string we want to get an image for.
index: Integer offset of the image we want. This will be modulo-ed by the
available number of images for the label, so it can be arbitrarily large.
image_dir: Root folder string of the subfolders containing the training
images.
category: Name string of which set to pull images from - training, testing,
or validation.
bottleneck_dir: Folder string holding cached files of bottleneck values.
jpeg_data_tensor: The tensor to feed loaded jpeg data into.
decoded_image_tensor: The output of decoding and resizing the image.
resized_input_tensor: The input node of the recognition graph.
bottleneck_tensor: The output tensor for the bottleneck values.
architecture: The name of the model architecture.
Returns:
Numpy array of values produced by the bottleneck layer for the image.
"""
label_lists = image_lists[label_name]
sub_dir = label_lists['dir']
sub_dir_path = os.path.join(bottleneck_dir, sub_dir)
self.ensure_dir_exists(sub_dir_path)
bottleneck_path = self.get_bottleneck_path(
image_lists,
label_name,
index,
bottleneck_dir,
category,
architecture)

if not os.path.exists(bottleneck_path):

self.create_bottleneck_file(
bottleneck_path,
image_lists,
label_name,
index,
image_dir,
category,
sess,
jpeg_data_tensor,
decoded_image_tensor,
resized_input_tensor,
bottleneck_tensor)

with open(bottleneck_path, 'r') as bottleneck_file:

bottleneck_string = bottleneck_file.read()
did_hit_error = False

try:

bottleneck_values = [float(x) for x in bottleneck_string.split(',')]

except ValueError:

tf.logging.warning('Invalid float found, recreating bottleneck')
did_hit_error = True

if did_hit_error:

self.create_bottleneck_file(
bottleneck_path,
image_lists,
label_name,
index,
image_dir,
category,
sess,
jpeg_data_tensor,
decoded_image_tensor,
resized_input_tensor,
bottleneck_tensor)

with open(bottleneck_path, 'r') as bottleneck_file:

bottleneck_string = bottleneck_file.read()

# Allow exceptions to propagate here, since they shouldn't happen after a fresh creation
bottleneck_values = [float(x) for x in bottleneck_string.split(',')]

return bottleneck_values

def cache_bottlenecks(sess, image_lists, image_dir, bottleneck_dir,
jpeg_data_tensor, decoded_image_tensor,
resized_input_tensor, bottleneck_tensor, architecture):

"""
Ensures all the training, testing, and validation bottlenecks are cached.
Because we're likely to read the same image multiple times (if there are no
distortions applied during training) it can speed things up a lot if we
calculate the bottleneck layer values once for each image during
preprocessing, and then just read those cached values repeatedly during
training. Here we go through all the images we've found, calculate those
values, and save them off.
Args:
sess: The current active TensorFlow Session.
image_lists: Dictionary of training images for each label.
image_dir: Root folder string of the subfolders containing the training
images.
bottleneck_dir: Folder string holding cached files of bottleneck values.
jpeg_data_tensor: Input tensor for jpeg data from file.
decoded_image_tensor: The output of decoding and resizing the image.
resized_input_tensor: The input node of the recognition graph.
bottleneck_tensor: The penultimate output layer of the graph.
architecture: The name of the model architecture.
Returns:
Nothing.
"""

how_many_bottlenecks = 0
self.ensure_dir_exists(bottleneck_dir)

for label_name, label_lists in image_lists.items():

for category in ['training', 'testing', 'validation']:

category_list = label_lists[category]

for index, unused_base_name in enumerate(category_list):

self.get_or_create_bottleneck(
sess,
image_lists,
label_name,
index,
image_dir,
category,
bottleneck_dir,
jpeg_data_tensor,
decoded_image_tensor,
resized_input_tensor,
bottleneck_tensor,
architecture)

how_many_bottlenecks += 1
if how_many_bottlenecks % 100 == 0:

tf.logging.info(
str(how_many_bottlenecks) + ' bottleneck files created.')
23 changes: 3 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,6 @@ Published: 38
```
InceptionFlow\InceptionFlow/data/captures/2018-01-28/13/00-07.jpg
TOP PREDICTIONS:
studio couch, day bed (score = 0.55105)
quilt, comforter, comfort, puff (score = 0.21434)
Expand All @@ -306,33 +305,28 @@ Published to Device Sensors Channel
Published: 32
InceptionFlow\InceptionFlow/data/captures/2018-01-28/13/00-15.jpg
TOP PREDICTIONS:
whippet (score = 0.28307)
studio couch, day bed (score = 0.23136)
quilt, comforter, comfort, puff (score = 0.20973)
Italian greyhound (score = 0.02032)
redbone (score = 0.01729)
NOTHING IDENTIFIED
InceptionFlow\InceptionFlow/data/captures/2018-01-28/13/00-24.jpg
TOP PREDICTIONS:
quilt, comforter, comfort, puff (score = 0.28841)
studio couch, day bed (score = 0.22828)
American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier (score = 0.10877)
boxer (score = 0.05448)
whippet (score = 0.04049)
NOTHING IDENTIFIED
InceptionFlow\InceptionFlow/data/captures/2018-01-28/13/00-32.jpg
TOP PREDICTIONS:
whippet (score = 0.82170)
studio couch, day bed (score = 0.11359)
Expand All @@ -347,24 +341,13 @@ Published to Device Sensors Channel
Published: 33
```

## Preparing Training Data For Your Neural Network
## Preparing Training Data For Your Facial Recognition Neural Network

Create 1 or more folders in the model/training/Facial directory, these folders will represent classes, and there should be 1 folder / class (person), name the folder using something that will allow you identify who the photos are of, the name of the folder / class will be used by the program to let you know who it has detected. You can use names, user IDs or anything you like for the folder / class names, but bear in mind privacy. We have successfully tested with 30 training images per class, but your application may need more or less than this. You will need at least 2 classes to begin training.

## Training Your Neural Network

Now you have added your training data, you should train your neural network. Update lines 24 - 31 of InceptionFlow.py to look like the following:
## Training Your Facial Recognition Neural Network

```
#self.Mode =""
#self.Mode = "ObjectLocal"
#self.Mode = "ObjectCam"
self.Mode = "FacialLocal"
#self.Mode = "FacialCam"
self.Train=True
self.Test=False
```
Now you have added your training data, you should train your neural network. Update data/confs.json -> ClassifierSettings -> MODE to FacialTrain, this will set the program to training mode.

Then execute the program to begin training:

Expand Down
Loading

0 comments on commit 183d6ce

Please sign in to comment.