Chapter07/DS_test.py

import json
import os
import math
import time
import argparse
from datetime import datetime
import deepSpeech
import numpy as np
import tensorflow as tf
from Levenshtein import distance

# Note this definition must match the ALPHABET chosen in
# preprocess_Librispeech.py
ALPHABET = "ABCDEFGHIJKLMNOPQRSTUVWXYZ' "
IX_TO_CHAR = {i: ch for (i, ch) in enumerate(ALPHABET)}


def parse_args():
    """ Parses command line arguments."""
    parser = argparse.ArgumentParser()
    parser.add_argument('--eval_dir', type=str,
                        default='../models/librispeech/eval',
                        help='Directory to write event logs')
    parser.add_argument('--checkpoint_dir', type=str,
                        default='../models/librispeech/train',
                        help='Directory where to read model checkpoints.')
    parser.add_argument('--eval_data', type=str, default='val',
                        help="Either 'test' or 'val' or 'train' ")
    parser.add_argument('--batch_size', type=int, default=32,
                        help='Number of feats to process in a batch')
    parser.add_argument('--eval_interval_secs', type=int, default=60 * 5,
                        help='How often to run the eval')
    parser.add_argument('--data_dir', type=str,
                        default='../data/librispeech/processed/',
                        help='Path to the deepSpeech data directory')
    parser.add_argument('--run_once', type=bool, default=False,
                        help='Whether to run eval only once')
    args = parser.parse_args()

    # Read saved parameters from file
    param_file = os.path.join(args.checkpoint_dir,
                              'deepSpeech_parameters.json')
    with open(param_file, 'r') as file:
        params = json.load(file)
        # Read network architecture parameters from
        # previously saved parameter file.
        args.num_hidden = params['num_hidden']
        args.num_rnn_layers = params['num_rnn_layers']
        args.rnn_type = params['rnn_type']
        args.num_filters = params['num_filters']
        args.use_fp16 = params['use_fp16']
        args.temporal_stride = params['temporal_stride']
        args.moving_avg_decay = params['moving_avg_decay']
    return args


def sparse_to_labels(sparse_matrix):
    """ Convert index based transcripts to strings"""
    results = ['']*sparse_matrix.dense_shape[0]
    for i, val in enumerate(sparse_matrix.values.tolist()):
        results[sparse_matrix.indices[i, 0]] += IX_TO_CHAR[val]
    return results


def initialize_from_checkpoint(sess, saver):
    """ Initialize variables on the graph"""

    # Initialise variables from a checkpoint file, if provided.
    ckpt = tf.train.get_checkpoint_state(ARGS.checkpoint_dir)
    if ckpt and ckpt.model_checkpoint_path:
        # Restores from checkpoint
        saver.restore(sess, ckpt.model_checkpoint_path)
        # Assuming model_checkpoint_path looks something like:
        #   /my-favorite-path/train/model.ckpt-0,
        # extract global_step from it.
        checkpoint_path = ckpt.model_checkpoint_path
        global_step = checkpoint_path.split('/')[-1].split('-')[-1]
        return global_step
    else:
        print('No checkpoint file found')
        return


def inference(predictions_op, true_labels_op, display, sess):
    """ Perform inference per batch on pre-trained model.
    This function performs inference and computes the CER per utterance.
    Args:
        predictions_op: Prediction op
        true_labels_op: True Labels op
        display: print sample predictions if True
        sess: default session to evaluate the ops.
    Returns:
        char_err_rate: list of CER per utterance.
    """
    char_err_rate = []
    # Perform inference of batch worth of data at a time.
    [predictions, true_labels] = sess.run([predictions_op,
                                           true_labels_op])
    pred_label = sparse_to_labels(predictions[0][0])
    actual_label = sparse_to_labels(true_labels)
    for (label, pred) in zip(actual_label, pred_label):
        char_err_rate.append(distance(label, pred)/len(label))

    if display:
        # Print sample responses
        for i in range(ARGS.batch_size):
            print(actual_label[i] + ' vs ' + pred_label[i])
    return char_err_rate


def eval_once(saver, summary_writer, predictions_op, summary_op,
              true_labels_op):
    """Run Eval once.

    Args:
      saver: Saver.
      summary_writer: Summary writer.
      predictions_ops: Op to compute predictions.
      summary_op: Summary op.
    """
    with tf.Session() as sess:

        # Initialize weights from checkpoint file.
        global_step = initialize_from_checkpoint(sess, saver)

        # Start the queue runners.
        coord = tf.train.Coordinator()
        try:
            threads = []
            for queue_runners in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
                threads.extend(queue_runners.create_threads(sess, coord=coord,
                                                            daemon=True,
                                                            start=True))
            # Only using a subset of the training data
            if ARGS.eval_data == 'train':
                num_examples = 2048

            elif ARGS.eval_data == 'val':
                num_examples = 2703

            elif ARGS.eval_data == 'test':
                num_examples = 2620
            num_iter = int(math.ceil(num_examples / ARGS.batch_size))
            step = 0
            char_err_rate = []
            while step < num_iter and not coord.should_stop():
                char_err_rate.append(inference(predictions_op, true_labels_op,
                                               step == 0, sess))
                step += 1

            # Compute and print mean CER
            avg_cer = np.mean(char_err_rate)*100
            print('%s: char_err_rate = %.3f %%' % (datetime.now(), avg_cer))

            # Add summary ops
            summary = tf.Summary()
            summary.ParseFromString(sess.run(summary_op))
            summary.value.add(tag='char_err_rate', simple_value=avg_cer)
            summary_writer.add_summary(summary, global_step)
        except Exception as exc:  # pylint: disable=broad-except
            coord.request_stop(exc)

        # Close threads
        coord.request_stop()
        coord.join(threads, stop_grace_period_secs=10)


def evaluate():
    """ Evaluate deepSpeech modelfor a number of steps."""

    with tf.Graph().as_default() as graph:

        # Get feats and labels for deepSpeech.
        feats, labels, seq_lens = deepSpeech.inputs(ARGS.eval_data,
                                                    data_dir=ARGS.data_dir,
                                                    batch_size=ARGS.batch_size,
                                                    use_fp16=ARGS.use_fp16,
                                                    shuffle=True)

        # Build ops that computes the logits predictions from the
        # inference model.
        ARGS.keep_prob = 1.0  # Disable dropout during testing.
        logits = deepSpeech.inference(feats, seq_lens, ARGS)

        # Calculate predictions.
        output_log_prob = tf.nn.log_softmax(logits)
        decoder = tf.nn.ctc_greedy_decoder
        strided_seq_lens = tf.div(seq_lens, ARGS.temporal_stride)
        predictions = decoder(output_log_prob, strided_seq_lens)

        # Restore the moving average version of the learned variables for eval.
        variable_averages = tf.train.ExponentialMovingAverage(
            ARGS.moving_avg_decay)
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)

        # Build the summary operation based on the TF collection of Summaries.
        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(ARGS.eval_dir, graph)

        while True:
            eval_once(saver, summary_writer, predictions, summary_op, labels)

            if ARGS.run_once:
                break
            time.sleep(ARGS.eval_interval_secs)


def main():
    """
    Create eval directory and perform inference on checkpointed model.
    """
    if tf.gfile.Exists(ARGS.eval_dir):
        tf.gfile.DeleteRecursively(ARGS.eval_dir)
    tf.gfile.MakeDirs(ARGS.eval_dir)
    evaluate()


if __name__ == '__main__':
    ARGS = parse_args()
    main()