forked from microsoft/CNTK
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
216 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
215 changes: 215 additions & 0 deletions
215
Tutorials/CNTK_208_Speech_Connectionist_Temporal_Classification.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,215 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Training Acoustic Model with Connectionist Temporal Classification (CTC) Criteria\n", | ||
"CNTK implementation of CTC is *parallel* and is based on the paper by A. Graves et al. *\"Connectionist temporal classification: labeling unsegmented sequence data with recurrent neural networks\"*. Readers are expected to be familiar with the content and notation from the paper.\n", | ||
"\n", | ||
"## Data Preparation\n", | ||
"CNTK consumes Acoustic Model (AM) training data in HTK/MLF format and typically expects 3 input files\n", | ||
"* [SCP file with features](https://github.com/Microsoft/CNTK/blob/master/Tests/EndToEndTests/Speech/Data/glob_0000.scp)\n", | ||
"* [MLF file with labels](https://github.com/Microsoft/CNTK/blob/master/Tests/EndToEndTests/Speech/Data/glob_0000.mlf)\n", | ||
"* [States list file](https://github.com/Microsoft/CNTK/blob/master/Tests/EndToEndTests/Speech/Data/state_ctc.list)\n", | ||
"\n", | ||
"The example state list file contains the CTC blank label \"s_blank\" as the last entry, i.e. at index 132.\n", | ||
"\n", | ||
"## Feature Input Definition\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"source": [ | ||
"## Normalize Features and Define a Network with LSTM Layers" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Current directory D:\\CNTK\\CNTK\\Tutorials\n", | ||
"Changing to data directory ..\\Tests\\EndToEndTests\\Speech\\Data\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"import os\n", | ||
"import cntk as C\n", | ||
"import numpy as np\n", | ||
"\n", | ||
"\n", | ||
"# Select the right target device\n", | ||
"if 'TEST_DEVICE' in os.environ:\n", | ||
" if os.environ['TEST_DEVICE'] == 'cpu':\n", | ||
" C.device.try_set_default_device(C.device.cpu())\n", | ||
" else:\n", | ||
" C.device.try_set_default_device(C.device.gpu(0))\n", | ||
"\n", | ||
"C.device.try_set_default_device(C.device.gpu(0))\n", | ||
"\n", | ||
"data_dir = os.path.join(\"..\", \"Tests\", \"EndToEndTests\", \"Speech\", \"Data\")\n", | ||
"print(\"Current directory {0}\".format(os.getcwd()))\n", | ||
"if os.path.realpath(data_dir) != os.path.realpath(os.getcwd()):\n", | ||
" print(\"Changing to data directory {0}\".format(data_dir))\n", | ||
" os.chdir(data_dir)\n", | ||
"\n", | ||
"feature_dimension = 33\n", | ||
"feature = C.sequence.input((feature_dimension))\n", | ||
"\n", | ||
"label_dimension = 133\n", | ||
"label = C.sequence.input((label_dimension))\n", | ||
"\n", | ||
"train_feature_filepath = \"glob_0000.scp\"\n", | ||
"train_label_filepath = \"glob_0000.mlf\"\n", | ||
"mapping_filepath = \"state_ctc.list\"\n", | ||
"train_feature_stream = C.io.HTKFeatureDeserializer(C.io.StreamDefs(amazing_feature = C.io.StreamDef(shape = feature_dimension, scp = train_feature_filepath)))\n", | ||
"train_label_stream = C.io.HTKMLFDeserializer(mapping_filepath, C.io.StreamDefs(awesome_label = C.io.StreamDef(shape = label_dimension, mlf = train_label_filepath)), True)\n", | ||
"train_data_reader = C.io.MinibatchSource([train_feature_stream, train_label_stream], frame_mode = False)\n", | ||
"train_input_map = {feature: train_data_reader.streams.amazing_feature, label: train_data_reader.streams.awesome_label}\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"feature_mean = np.fromfile(os.path.join(\"GlobalStats\", \"mean.363\"), dtype=float, count=feature_dimension)\n", | ||
"feature_inverse_stddev = np.fromfile(os.path.join(\"GlobalStats\", \"var.363\"), dtype=float, count=feature_dimension)\n", | ||
"\n", | ||
"feature_normalized = (feature - feature_mean) * feature_inverse_stddev\n", | ||
"\n", | ||
"with C.default_options(activation=C.sigmoid):\n", | ||
"\tz = C.layers.Sequential([\n", | ||
" C.layers.For(range(3), lambda: C.layers.Recurrence(C.layers.LSTM(1024))),\n", | ||
" C.layers.Dense(label_dimension)\n", | ||
" ])(feature_normalized)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"source": [ | ||
"## Define Training Parameters, Criteria and Error\n", | ||
"CTC criteria is implemented by combination of the **labels_to_graph** and **forward_backward** functions." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"mbsize = 1024\n", | ||
"mbs_per_epoch = 10\n", | ||
"max_epochs = 10\n", | ||
"\n", | ||
"criteria = C.forward_backward(C.labels_to_graph(label), z, blankTokenId=132, delayConstraint=3)\n", | ||
"err = C.edit_distance_error(z, label, squashInputs=True, tokensToIgnore=[132])\n", | ||
"lr = C.learning_rate_schedule(.01, C.UnitType.minibatch)\n", | ||
"mm = C.momentum_schedule([(10, 0.9), (0, 0.99)], mbsize)\n", | ||
"learner = C.momentum_sgd(z.parameters, lr, mm)\n", | ||
"trainer = C.Trainer(z, (criteria, err), learner)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Train and Save the Model" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"Training 21255301 parameters in 11 parameter tensors.\n" | ||
] | ||
}, | ||
{ | ||
"ename": "RuntimeError", | ||
"evalue": "SetDataLocation [CPUMatrix]: Cannot migrate the matrix between devices because it is a view.\n\n[CALL STACK]\n > Microsoft::MSR::CNTK::Matrix<char>:: SetDataLocation\n - Microsoft::MSR::CNTK::Matrix<float>:: _transferFromDeviceToDevice\n - Microsoft::MSR::CNTK::Matrix<float>:: _transferToDevice\n - Microsoft::MSR::CNTK::Matrix<float>::DecideAndMoveToRightDevice<float> \n - Microsoft::MSR::CNTK::Matrix<float>:: TensorOp\n - Microsoft::MSR::CNTK::TensorView<float>:: DoUnaryOpOf\n - CNTK::Accumulator:: Update\n - CNTK::Trainer:: UpdateTrainingProgress\n - CNTK::Trainer:: TrainMinibatch (x2)\n - _wrap_Trainer_train_minibatch_overload_for_minibatchdata__SWIG_0\n - _wrap_Trainer_train_minibatch_overload_for_minibatchdata\n - PyCFunction_Call\n - PyEval_GetFuncDesc\n - PyEval_EvalFrameEx (x2)\n\n", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", | ||
"\u001b[1;31mRuntimeError\u001b[0m Traceback (most recent call last)", | ||
"\u001b[1;32m<ipython-input-4-26e4acf630ec>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mmb\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmbs_per_epoch\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[0mminibatch\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtrain_data_reader\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnext_minibatch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmbsize\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minput_map\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtrain_input_map\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 7\u001b[1;33m \u001b[0mtrainer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtrain_minibatch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mminibatch\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 8\u001b[0m \u001b[0mprogress_printer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mupdate_with_trainer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrainer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mwith_metric\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 9\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", | ||
"\u001b[1;32mD:\\CNTK\\CNTK\\bindings\\python\\cntk\\train\\trainer.py\u001b[0m in \u001b[0;36mtrain_minibatch\u001b[1;34m(self, arguments, outputs, device)\u001b[0m\n\u001b[0;32m 166\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mcontains_minibatch_data\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 167\u001b[0m updated = super(Trainer, self).train_minibatch_overload_for_minibatchdata(\n\u001b[1;32m--> 168\u001b[1;33m arguments, device)\n\u001b[0m\u001b[0;32m 169\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 170\u001b[0m updated = super(Trainer, self).train_minibatch(arguments,\n", | ||
"\u001b[1;32mD:\\CNTK\\CNTK\\bindings\\python\\cntk\\cntk_py.py\u001b[0m in \u001b[0;36mtrain_minibatch_overload_for_minibatchdata\u001b[1;34m(self, *args)\u001b[0m\n\u001b[0;32m 2416\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2417\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mtrain_minibatch_overload_for_minibatchdata\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2418\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0m_cntk_py\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTrainer_train_minibatch_overload_for_minibatchdata\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2419\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2420\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mtrain_minibatch\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", | ||
"\u001b[1;31mRuntimeError\u001b[0m: SetDataLocation [CPUMatrix]: Cannot migrate the matrix between devices because it is a view.\n\n[CALL STACK]\n > Microsoft::MSR::CNTK::Matrix<char>:: SetDataLocation\n - Microsoft::MSR::CNTK::Matrix<float>:: _transferFromDeviceToDevice\n - Microsoft::MSR::CNTK::Matrix<float>:: _transferToDevice\n - Microsoft::MSR::CNTK::Matrix<float>::DecideAndMoveToRightDevice<float> \n - Microsoft::MSR::CNTK::Matrix<float>:: TensorOp\n - Microsoft::MSR::CNTK::TensorView<float>:: DoUnaryOpOf\n - CNTK::Accumulator:: Update\n - CNTK::Trainer:: UpdateTrainingProgress\n - CNTK::Trainer:: TrainMinibatch (x2)\n - _wrap_Trainer_train_minibatch_overload_for_minibatchdata__SWIG_0\n - _wrap_Trainer_train_minibatch_overload_for_minibatchdata\n - PyCFunction_Call\n - PyEval_GetFuncDesc\n - PyEval_EvalFrameEx (x2)\n\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"C.logging.log_number_of_parameters(z)\n", | ||
"progress_printer = C.logging.progress_print.ProgressPrinter(tag='Training', num_epochs = max_epochs)\n", | ||
"\n", | ||
"for epoch in range(max_epochs):\n", | ||
"\tfor mb in range(mbs_per_epoch):\n", | ||
"\t\tminibatch = train_data_reader.next_minibatch(mbsize, input_map = train_input_map)\n", | ||
"\t\ttrainer.train_minibatch(minibatch)\n", | ||
"\t\tprogress_printer.update_with_trainer(trainer, with_metric = True)\n", | ||
"\n", | ||
"\tprint('Trained on a total of ' + str(trainer.total_number_of_samples_seen) + ' frames')\n", | ||
"\tprogress_printer.epoch_summary(with_metric = True)\n", | ||
"\n", | ||
"z.save('CTC_' + str(max_epochs) + 'epochs_' + str(mbsize) + 'mbsize_' + str(mbs_per_epoch) + 'mbs.model')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"anaconda-cloud": {}, | ||
"kernelspec": { | ||
"display_name": "Python [cntk-py35]", | ||
"language": "python", | ||
"name": "Python [cntk-py35]" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.5.2" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 1 | ||
} |