Skip to content

Commit

Permalink
CTC tutorial
Browse files Browse the repository at this point in the history
  • Loading branch information
vmazalov committed Jun 26, 2017
1 parent 5394b76 commit 674756d
Show file tree
Hide file tree
Showing 2 changed files with 216 additions and 0 deletions.
1 change: 1 addition & 0 deletions Source/ComputationNetworkLib/EvaluationNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,7 @@ class EditDistanceErrorNode : public ComputationNodeNonLooping/*ComputationNode*
MaskMissingColumnsToZero(*m_maxIndexes0, Input(0)->GetMBLayout(), frameRange);
MaskMissingColumnsToZero(*m_maxIndexes1, Input(1)->GetMBLayout(), frameRange);
Value()(0, 0) = ComputeEditDistanceError(*m_maxIndexes0, *m_maxIndexes1, Input(0)->GetMBLayout(), m_subPen, m_delPen, m_insPen, m_squashInputs, m_tokensToIgnore);
Value().TransferToDeviceIfNotThere(Input(0)->GetDeviceId());
}

virtual void Validate(bool isFinalValidationPass) override
Expand Down
215 changes: 215 additions & 0 deletions Tutorials/CNTK_208_Speech_Connectionist_Temporal_Classification.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Training Acoustic Model with Connectionist Temporal Classification (CTC) Criteria\n",
"CNTK implementation of CTC is *parallel* and is based on the paper by A. Graves et al. *\"Connectionist temporal classification: labeling unsegmented sequence data with recurrent neural networks\"*. Readers are expected to be familiar with the content and notation from the paper.\n",
"\n",
"## Data Preparation\n",
"CNTK consumes Acoustic Model (AM) training data in HTK/MLF format and typically expects 3 input files\n",
"* [SCP file with features](https://github.com/Microsoft/CNTK/blob/master/Tests/EndToEndTests/Speech/Data/glob_0000.scp)\n",
"* [MLF file with labels](https://github.com/Microsoft/CNTK/blob/master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf)\n",
"* [States list file](https://github.com/Microsoft/CNTK/blob/master/Tests/EndToEndTests/Speech/Data/state_ctc.list)\n",
"\n",
"The example state list file contains the CTC blank label \"s_blank\" as the last entry, i.e. at index 132.\n",
"\n",
"## Feature Input Definition\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"## Normalize Features and Define a Network with LSTM Layers"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Current directory D:\\CNTK\\CNTK\\Tutorials\n",
"Changing to data directory ..\\Tests\\EndToEndTests\\Speech\\Data\n"
]
}
],
"source": [
"import os\n",
"import cntk as C\n",
"import numpy as np\n",
"\n",
"\n",
"# Select the right target device\n",
"if 'TEST_DEVICE' in os.environ:\n",
" if os.environ['TEST_DEVICE'] == 'cpu':\n",
" C.device.try_set_default_device(C.device.cpu())\n",
" else:\n",
" C.device.try_set_default_device(C.device.gpu(0))\n",
"\n",
"C.device.try_set_default_device(C.device.gpu(0))\n",
"\n",
"data_dir = os.path.join(\"..\", \"Tests\", \"EndToEndTests\", \"Speech\", \"Data\")\n",
"print(\"Current directory {0}\".format(os.getcwd()))\n",
"if os.path.realpath(data_dir) != os.path.realpath(os.getcwd()):\n",
" print(\"Changing to data directory {0}\".format(data_dir))\n",
" os.chdir(data_dir)\n",
"\n",
"feature_dimension = 33\n",
"feature = C.sequence.input((feature_dimension))\n",
"\n",
"label_dimension = 133\n",
"label = C.sequence.input((label_dimension))\n",
"\n",
"train_feature_filepath = \"glob_0000.scp\"\n",
"train_label_filepath = \"glob_0000.mlf\"\n",
"mapping_filepath = \"state_ctc.list\"\n",
"train_feature_stream = C.io.HTKFeatureDeserializer(C.io.StreamDefs(amazing_feature = C.io.StreamDef(shape = feature_dimension, scp = train_feature_filepath)))\n",
"train_label_stream = C.io.HTKMLFDeserializer(mapping_filepath, C.io.StreamDefs(awesome_label = C.io.StreamDef(shape = label_dimension, mlf = train_label_filepath)), True)\n",
"train_data_reader = C.io.MinibatchSource([train_feature_stream, train_label_stream], frame_mode = False)\n",
"train_input_map = {feature: train_data_reader.streams.amazing_feature, label: train_data_reader.streams.awesome_label}\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"feature_mean = np.fromfile(os.path.join(\"GlobalStats\", \"mean.363\"), dtype=float, count=feature_dimension)\n",
"feature_inverse_stddev = np.fromfile(os.path.join(\"GlobalStats\", \"var.363\"), dtype=float, count=feature_dimension)\n",
"\n",
"feature_normalized = (feature - feature_mean) * feature_inverse_stddev\n",
"\n",
"with C.default_options(activation=C.sigmoid):\n",
"\tz = C.layers.Sequential([\n",
" C.layers.For(range(3), lambda: C.layers.Recurrence(C.layers.LSTM(1024))),\n",
" C.layers.Dense(label_dimension)\n",
" ])(feature_normalized)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"## Define Training Parameters, Criteria and Error\n",
"CTC criteria is implemented by combination of the **labels_to_graph** and **forward_backward** functions."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"mbsize = 1024\n",
"mbs_per_epoch = 10\n",
"max_epochs = 10\n",
"\n",
"criteria = C.forward_backward(C.labels_to_graph(label), z, blankTokenId=132, delayConstraint=3)\n",
"err = C.edit_distance_error(z, label, squashInputs=True, tokensToIgnore=[132])\n",
"lr = C.learning_rate_schedule(.01, C.UnitType.minibatch)\n",
"mm = C.momentum_schedule([(10, 0.9), (0, 0.99)], mbsize)\n",
"learner = C.momentum_sgd(z.parameters, lr, mm)\n",
"trainer = C.Trainer(z, (criteria, err), learner)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Train and Save the Model"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training 21255301 parameters in 11 parameter tensors.\n"
]
},
{
"ename": "RuntimeError",
"evalue": "SetDataLocation [CPUMatrix]: Cannot migrate the matrix between devices because it is a view.\n\n[CALL STACK]\n > Microsoft::MSR::CNTK::Matrix<char>:: SetDataLocation\n - Microsoft::MSR::CNTK::Matrix<float>:: _transferFromDeviceToDevice\n - Microsoft::MSR::CNTK::Matrix<float>:: _transferToDevice\n - Microsoft::MSR::CNTK::Matrix<float>::DecideAndMoveToRightDevice<float> \n - Microsoft::MSR::CNTK::Matrix<float>:: TensorOp\n - Microsoft::MSR::CNTK::TensorView<float>:: DoUnaryOpOf\n - CNTK::Accumulator:: Update\n - CNTK::Trainer:: UpdateTrainingProgress\n - CNTK::Trainer:: TrainMinibatch (x2)\n - _wrap_Trainer_train_minibatch_overload_for_minibatchdata__SWIG_0\n - _wrap_Trainer_train_minibatch_overload_for_minibatchdata\n - PyCFunction_Call\n - PyEval_GetFuncDesc\n - PyEval_EvalFrameEx (x2)\n\n",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mRuntimeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-4-26e4acf630ec>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mmb\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmbs_per_epoch\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mminibatch\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtrain_data_reader\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnext_minibatch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmbsize\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minput_map\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtrain_input_map\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0mtrainer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain_minibatch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mminibatch\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 8\u001b[0m \u001b[0mprogress_printer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate_with_trainer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrainer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mwith_metric\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mD:\\CNTK\\CNTK\\bindings\\python\\cntk\\train\\trainer.py\u001b[0m in \u001b[0;36mtrain_minibatch\u001b[1;34m(self, arguments, outputs, device)\u001b[0m\n\u001b[0;32m 166\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcontains_minibatch_data\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 167\u001b[0m updated = super(Trainer, self).train_minibatch_overload_for_minibatchdata(\n\u001b[1;32m--> 168\u001b[1;33m arguments, device)\n\u001b[0m\u001b[0;32m 169\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 170\u001b[0m updated = super(Trainer, self).train_minibatch(arguments,\n",
"\u001b[1;32mD:\\CNTK\\CNTK\\bindings\\python\\cntk\\cntk_py.py\u001b[0m in \u001b[0;36mtrain_minibatch_overload_for_minibatchdata\u001b[1;34m(self, *args)\u001b[0m\n\u001b[0;32m 2416\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2417\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mtrain_minibatch_overload_for_minibatchdata\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2418\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_cntk_py\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTrainer_train_minibatch_overload_for_minibatchdata\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2419\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2420\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mtrain_minibatch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mRuntimeError\u001b[0m: SetDataLocation [CPUMatrix]: Cannot migrate the matrix between devices because it is a view.\n\n[CALL STACK]\n > Microsoft::MSR::CNTK::Matrix<char>:: SetDataLocation\n - Microsoft::MSR::CNTK::Matrix<float>:: _transferFromDeviceToDevice\n - Microsoft::MSR::CNTK::Matrix<float>:: _transferToDevice\n - Microsoft::MSR::CNTK::Matrix<float>::DecideAndMoveToRightDevice<float> \n - Microsoft::MSR::CNTK::Matrix<float>:: TensorOp\n - Microsoft::MSR::CNTK::TensorView<float>:: DoUnaryOpOf\n - CNTK::Accumulator:: Update\n - CNTK::Trainer:: UpdateTrainingProgress\n - CNTK::Trainer:: TrainMinibatch (x2)\n - _wrap_Trainer_train_minibatch_overload_for_minibatchdata__SWIG_0\n - _wrap_Trainer_train_minibatch_overload_for_minibatchdata\n - PyCFunction_Call\n - PyEval_GetFuncDesc\n - PyEval_EvalFrameEx (x2)\n\n"
]
}
],
"source": [
"C.logging.log_number_of_parameters(z)\n",
"progress_printer = C.logging.progress_print.ProgressPrinter(tag='Training', num_epochs = max_epochs)\n",
"\n",
"for epoch in range(max_epochs):\n",
"\tfor mb in range(mbs_per_epoch):\n",
"\t\tminibatch = train_data_reader.next_minibatch(mbsize, input_map = train_input_map)\n",
"\t\ttrainer.train_minibatch(minibatch)\n",
"\t\tprogress_printer.update_with_trainer(trainer, with_metric = True)\n",
"\n",
"\tprint('Trained on a total of ' + str(trainer.total_number_of_samples_seen) + ' frames')\n",
"\tprogress_printer.epoch_summary(with_metric = True)\n",
"\n",
"z.save('CTC_' + str(max_epochs) + 'epochs_' + str(mbsize) + 'mbsize_' + str(mbs_per_epoch) + 'mbs.model')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [cntk-py35]",
"language": "python",
"name": "Python [cntk-py35]"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}

0 comments on commit 674756d

Please sign in to comment.