From b0c3c131d68d322bf667d223b18f99092f3bbb5a Mon Sep 17 00:00:00 2001
From: Jon Dehdari <jon@dehdari.org>
Date: Tue, 26 Jan 2016 00:08:35 +0100
Subject: [PATCH] spellcheck CNTK-TechReport

---
 .../lyx/CNTKBook_ASRDecoder_Chapter.lyx       |  8 ++---
 .../lyx/CNTKBook_CNTK_Adv_Chapter.lyx         | 10 +++---
 .../lyx/CNTKBook_CNTK_Chapter.lyx             | 36 +++++++++----------
 .../lyx/CNTKBook_CNTK_Programmer_Chapter.lyx  |  2 +-
 .../lyx/CNTKBook_CN_Chapter.lyx               | 16 ++++-----
 .../lyx/CNTKBook_ExampleSetup_Chapter.lyx     | 34 +++++++++---------
 6 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/Documentation/CNTK-TechReport/lyx/CNTKBook_ASRDecoder_Chapter.lyx b/Documentation/CNTK-TechReport/lyx/CNTKBook_ASRDecoder_Chapter.lyx
index eb02b6818ca5..8c9af1f69964 100644
--- a/Documentation/CNTK-TechReport/lyx/CNTKBook_ASRDecoder_Chapter.lyx
+++ b/Documentation/CNTK-TechReport/lyx/CNTKBook_ASRDecoder_Chapter.lyx
@@ -743,7 +743,7 @@ To build the TIMIT graph, only three input files are needed: the model state
 \end_layout
 
 \begin_layout Standard
-The scripts assume each context-indepenent phone is represented by a three
+The scripts assume each context-independent phone is represented by a three
  state, left to right, hidden markov model.
  The names of these states should be in a 
 \begin_inset Quotes eld
@@ -756,7 +756,7 @@ model state map
  file that has one line for every model.
  The first column is the name of the model, and subsequent columns are the
  names of the states, in left to right order.
- The transition probabilites between these states are stored in a separate
+ The transition probabilities between these states are stored in a separate
  
 \begin_inset Quotes eld
 \end_inset
@@ -859,7 +859,7 @@ To decode, the following parameters to Argon should be specified: -graph,
 The decoder uses a Viterbi beam search algorithm, in which unlikely hypotheses
  are pruned at each frame.
  The -beam parameter prevents unlikely hypotheses from being pursued.
- Any hypothesis that differes from the best hypothesis by more than this
+ Any hypothesis that differs from the best hypothesis by more than this
  amount will be be discarded.
  The -max-tokens parameter controls the number of active hypotheses.
  If the -beam parameter causes more than max-tokens hypotheses to be generated,
@@ -872,7 +872,7 @@ The decoder uses a Viterbi beam search algorithm, in which unlikely hypotheses
 \begin_layout Standard
 The -graph parameter tells Argon which compiled decoding graph should be
  used.
- The -lm should indicate an ARPA format ngram languag emodel.
+ The -lm should indicate an ARPA format ngram language model.
 \end_layout
 
 \begin_layout Standard
diff --git a/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Adv_Chapter.lyx b/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Adv_Chapter.lyx
index f5bcedbfaa35..5f783a7452aa 100644
--- a/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Adv_Chapter.lyx
+++ b/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Adv_Chapter.lyx
@@ -705,7 +705,7 @@ After defining the network, it’s important to let CNTK know what the special
  It also needs to know the default output nodes, evaluation nodes and training
  criteria nodes.
  Note here the specification of the nodes that require special handling
- (NodesReqMultiSeqHandling) when the network is evalauted or trained with
+ (NodesReqMultiSeqHandling) when the network is evaluated or trained with
  multiple sequences, e.g., when the network itself is an RNN or the model
  is trained with the sequence-level criterion.
  Since in these cases multiple sequences will be stitched together to improve
@@ -2233,7 +2233,7 @@ RowStack
 \end_layout
 
 \begin_layout Standard
-Concatnate rows of input matrices to form a bigger matrix.
+Concatenate rows of input matrices to form a bigger matrix.
  The resulting matrix is a sumof(rows) by m1.cols matrix.
  It supports variable-length input.
  The syntax is
@@ -2898,11 +2898,11 @@ labels - the ground truth labels.
  The first row is the ground truth output id.
  The second row is the ground truth class id.
  The third and fourth rows are the start (inclusive) and end (exclusive)
- output ids corresponding to the ground trueth class id.
+ output ids corresponding to the ground truth class id.
 \end_layout
 
 \begin_layout Itemize
-mainInputInfo - contains the main information to make the classfication
+mainInputInfo - contains the main information to make the classification
  decision.
  It's an inputDim by T matrix.
  In language model, inputDim is often the hidden layer size.
@@ -4422,7 +4422,7 @@ To integrate this new layer into the model, the inputs and outputs of the
  After the copy any node whose connected nodes were not copied will have
  those connections set to an invalid value.
  These need to be fixed in order to have a valid model.
- Before a model can be saved CNTK first checkes to see if all nodes are
+ Before a model can be saved CNTK first checks to see if all nodes are
  correctly connected.
 \end_layout
 
diff --git a/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Chapter.lyx b/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Chapter.lyx
index 6695f1d1360c..7ee856c27cd4 100644
--- a/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Chapter.lyx
+++ b/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Chapter.lyx
@@ -965,7 +965,7 @@ CLASSLSTM
 
 : the class-based long short-term memory neural network.
  It uses sparse input, sparse parameter and sparse output.
- This is often uesd for language modeling tasks.
+ This is often used for language modeling tasks.
 \end_layout
 
 \end_deeper
@@ -1768,10 +1768,10 @@ numMiniBatch4LRSearch
 
 \end_inset
 
-: the number of minibatches used to search the minibatch size whenin adaptive
+: the number of minibatches used to search the minibatch size when in adaptive
  minibatch size mode.
  Default value is 500.
- It's typically set to 10-20% of the total minibatches in an epochthis is
+ It's typically set to 10-20% of the total minibatches in an epoch this is
  shared with the search for learning rate in SearchBeforeEpoch mode.
  
 \end_layout
@@ -1792,10 +1792,10 @@ autoAdjustMinibatch
 \end_inset
 
 : enable or disable whether minibatch size is adaptively adjusted.
- Default value is false.Adapative minibatch sizing will begin on epochs starting
- after user minbatch sizes expcitilyspecified are complete.
+ Default value is false.Adaptive minibatch sizing will begin on epochs starting
+ after user minibatch sizes expcitilyspecified are complete.
   For example if the userspecifed minibatchSize=256:1024, then 256 and 1024are
- used in the first 2 Epochs and adaptive minibatchsizing is used aferwards
+ used in the first 2 Epochs and adaptive minibatchsizing is used afterwards
  
 \end_layout
 
@@ -1814,7 +1814,7 @@ minibatchSizeTuningFrequency
 
 \end_inset
 
-: The number of epochs to skip, on a periodic basis, beforedynamically adjusting
+: The number of epochs to skip, on a periodic basis, before dynamically adjusting
  the minibatch size.
  Default value is 1.
  
@@ -1835,7 +1835,7 @@ minibatchSizeTuningMax
 
 \end_inset
 
-: The maximum size allowed for anadaptively adjusted minibatch size.
+: The maximum size allowed for an adaptively adjusted minibatch size.
  Default value is 1048576.
  
 \end_layout
@@ -2669,10 +2669,10 @@ rollingWindow
 
  option reads in all feature files and stores them on disk in one large
  temporary binary file.
- The data is randomized by running a large rollowing window over the data
+ The data is randomized by running a large rolling window over the data
  in this file and randomizing the data within the window.
  This method produces more thorough randomization of the data but requires
- a large temprorary file written to disk.
+ a large temporary file written to disk.
  The other option is 
 \begin_inset Quotes eld
 \end_inset
@@ -2798,14 +2798,14 @@ labels
 \end_inset
 
  are the default names used by the SimpleNetworkBuilder but if the network
- is designed using the Network Descrition Language (NDL), then any names
+ is designed using the Network Description Language (NDL), then any names
  can be used, as long as they each have a corresponding node in the network.
 \end_layout
 
 \begin_layout Standard
 To specify continuous-valued features, e.g.
  MFCC's or log mel filterbank coefficients, the following parameters should
- be included in the a confguration block:
+ be included in the a configuration block:
 \end_layout
 
 \begin_layout Itemize
@@ -3378,7 +3378,7 @@ nbruttsineachrecurrentiter
  The reader arranges same-length input sentences, up to the specified limit,
  into each minibatch.
  For recurrent networks, trainer resets hidden layer activities only at
- the begining of sentences.
+ the beginning of sentences.
  Activities of hidden layers are carried over to the next minibatch if an
  end of sentence is not reached.
  Using multiple sentences in a minibatch can speed up training processes.
@@ -3425,7 +3425,7 @@ wordclass
  This is used for class-based language modeling.
  An example of the class information is below.
  The first column is the word index.
- The second column is the number of occurances, the third column is the
+ The second column is the number of occurrences, the third column is the
  word, and the last column is the class id of the word.
  
 \begin_inset listings
@@ -3795,7 +3795,7 @@ nbrUttsInEachRecurrentIter
  The reader arranges same-length input sentences, up to the specified limit,
  into each minibatch.
  For recurrent networks, trainer resets hidden layer activities only at
- the begining of sentences.
+ the beginning of sentences.
  Activities of hidden layers are carried over to the next minibatch if an
  end of sentence is not reached.
  Using multiple sentences in a minibatch can speed up training processes.
@@ -4999,7 +4999,7 @@ section
 \end_inset
 
  – the encoderReader and decoderReader are the readers for encoder and decoder.
- Similary for encoderCVReader and decoderCVReader for validation set.
+ Similarly for encoderCVReader and decoderCVReader for validation set.
  
 \end_layout
 
@@ -5365,7 +5365,7 @@ deviceId
 
 \begin_layout Standard
 CNTK supports CPU and GPU computation.
- Users can determine what device to use by setting the deviceId papameter.
+ Users can determine what device to use by setting the deviceId parameter.
  The possible values are
 \end_layout
 
@@ -5509,7 +5509,7 @@ traceLevel=0 # larger values mean more output
 
 The default value is 0 and specifies minimal output.
  The higher the number the more output can be expected.
- Currently 0 (limited output), 1 (medium ouput) and 2 (verbose output) are
+ Currently 0 (limited output), 1 (medium output) and 2 (verbose output) are
  the only values supported.
 \end_layout
 
diff --git a/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Programmer_Chapter.lyx b/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Programmer_Chapter.lyx
index 0b3d6899daf8..fbe07a008b76 100644
--- a/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Programmer_Chapter.lyx
+++ b/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Programmer_Chapter.lyx
@@ -3186,7 +3186,7 @@ s().GetNumCols() != 1)
 
 \begin_layout Plain Layout
 
-        throw std::logic_error("The left value of ScaleNode must be a scarlar
+        throw std::logic_error("The left value of ScaleNode must be a scalar
  value.");
 \end_layout
 
diff --git a/Documentation/CNTK-TechReport/lyx/CNTKBook_CN_Chapter.lyx b/Documentation/CNTK-TechReport/lyx/CNTKBook_CN_Chapter.lyx
index a67f3cd95e0e..78fbd8367874 100644
--- a/Documentation/CNTK-TechReport/lyx/CNTKBook_CN_Chapter.lyx
+++ b/Documentation/CNTK-TechReport/lyx/CNTKBook_CN_Chapter.lyx
@@ -1816,11 +1816,11 @@ sly.
  In this algorithm, all the nodes whose children have not been computed
  are in the waiting set and those whose children are computed are in the
  ready set.
- At the beginning, all non-leaf descendents of 
+ At the beginning, all non-leaf descendants of 
 \begin_inset Formula $root$
 \end_inset
 
- are in the waiting set and all leaf descendents  are in the ready set.
+ are in the waiting set and all leaf descendants  are in the ready set.
  The scheduler picks a node from the ready set based on some policy, removes
  it from the ready set, and dispatches it for computation.
  Popular policies include first-come/first-serve, shortest task first, and
@@ -2015,7 +2015,7 @@ status open
 \begin_inset Formula $waiting$
 \end_inset
 
- is initialized to include all non-leaf descendents of 
+ is initialized to include all non-leaf descendants of 
 \begin_inset Formula $root$
 \end_inset
 
@@ -2061,7 +2061,7 @@ status open
 \begin_inset Formula $ready$
 \end_inset
 
- is initialized to include all leaf descendents of 
+ is initialized to include all leaf descendants of 
 \begin_inset Formula $root$
 \end_inset
 
@@ -3412,7 +3412,7 @@ status open
 
 \end_inset
 
-Decide the order to compute the gradient at all descendents of 
+Decide the order to compute the gradient at all descendants of 
 \begin_inset Formula $node$
 \end_inset
 
@@ -8892,7 +8892,7 @@ CRF
 \color none
  CRF stands for conditional random fields.
  This node does sequence-level training, using CRF criterion.
- This node has three nputs.
+ This node has three inputs.
  The first is the label 
 \family default
 \series bold
@@ -10198,7 +10198,7 @@ reference "fig:CN-WithDelayNode"
  A simple way to do forward computation and backpropagation in a recurrent
  network is to unroll all samples in the sequence over time.
  Once unrolled, the graph is expanded into a DAG and the forward computation
- and gradient calcalclation algorithms we just discussed can be directly
+ and gradient calculation algorithms we just discussed can be directly
  used.
  This means, however, all computation nodes in the CN need to be computed
  sample by sample and this significantly reduces the potential of parallelizatio
@@ -10318,7 +10318,7 @@ key "StronglyConnectedComponents-Hopcroft+1983"
  in the CN and the CN is reduced to a DAG.
  All the nodes inside each loop (or composite node) can be unrolled over
  time and also reduced to a DAG.
- For all these DAGs the forward computation and backprogation algorithms
+ For all these DAGs the forward computation and backpropagation algorithms
  we discussed in the previous sections can be applied.
  The detailed procedure in determining the forward computation order in
  the CN with arbitrary recurrent connections is described in Algorithm 
diff --git a/Documentation/CNTK-TechReport/lyx/CNTKBook_ExampleSetup_Chapter.lyx b/Documentation/CNTK-TechReport/lyx/CNTKBook_ExampleSetup_Chapter.lyx
index d8a4cab35f41..a3aac49ce846 100644
--- a/Documentation/CNTK-TechReport/lyx/CNTKBook_ExampleSetup_Chapter.lyx
+++ b/Documentation/CNTK-TechReport/lyx/CNTKBook_ExampleSetup_Chapter.lyx
@@ -97,7 +97,7 @@ ns.
  All examples are based on the TIMIT corpus for phonetic recognition but
  can easily be modified for use for large vocabulary continuous speech recogniti
 on.
- The only significant change is that context-indepenent phonetic states
+ The only significant change is that context-independent phonetic states
  used in the TIMIT example would be replaced by context-dependent senone
  targets for large vocabulary tasks.
  We note that these examples are not meant to be representative of state
@@ -146,10 +146,10 @@ SimpleNetworkBuilder
  will also be monitored during training using the evalCriterion parameter.
  The input data will be mean and variance normalized since applyMeanVarNorm
  has been set to true.
- In addtion, if needPrior is set to true, the prior probablities of the
+ In addition, if needPrior is set to true, the prior probabilities of the
  labels will be computed and a ScaledLogLikelihood node in the network will
  be automatically created.
- This is important if this netwok will be used to generate acoustic scores
+ This is important if this network will be used to generate acoustic scores
  in a speech recognition decoder.
  
 \end_layout
@@ -838,7 +838,7 @@ SquareError
  Below is a snippet from the NDL file for this example.
  This autoencoder has three hidden layers including a middle bottleneck
  layer of 64 neurons.
- A macro is defined to peform mean and variance normalization and it is
+ A macro is defined to perform mean and variance normalization and it is
  applied to both the input and target features.
  Also, 
 \end_layout
@@ -1123,7 +1123,7 @@ discriminative pre-training
 \begin_layout Standard
 It is well known that deep networks can be difficult to optimize, especially
  when a limited amount of training data is available.
- As a result, a number of aproaches to initializing the parameters of these
+ As a result, a number of approaches to initializing the parameters of these
  networks have been proposed.
  One of these methods is known as discriminative pre-training.
  In this approach, a network with a single hidden layer is trained starting
@@ -1526,7 +1526,7 @@ multi-task learning
 
 \begin_layout Standard
 One interesting approach to network training is multi-task learning, where
- the network is trained to optmize two objective functions simultaneously.
+ the network is trained to optimize two objective functions simultaneously.
  This can be done in CNTK through the appropriate use of NDL.
  Let's assume that we have a network specified in NDL that has three hidden
  layers and output of the third hidden layer is defined as L3.
@@ -1856,7 +1856,7 @@ TIMIT.statelist"
 \begin_layout Standard
 The NDL for constructing a network with these inputs and outputs can be
  done in a number of ways.
- One way is to contruct a macro that constructs a layer that takes two inputs,
+ One way is to construct a macro that constructs a layer that takes two inputs,
  as follows:
 \end_layout
 
@@ -1984,7 +1984,7 @@ L1 = SBFF2(featInput1, HiddenDim, FeatDim1, featInput2, FeatDim2)
 
 The rest of the hidden layers and the output layer with a cross entropy
  objective function would be the same as previous examples.
- Notice that the names and dimensionality of the input adn output data have
+ Notice that the names and dimensionality of the input and output data have
  to the same in both the NDL model description and the reader configuration.
  
 \end_layout
@@ -2623,7 +2623,7 @@ layerSizes
 \end_inset
 
 =10000:200:10000.
- Sizes of input, hidden and ouput layers.
+ Sizes of input, hidden and output layers.
  Input layer size is equal to vocabulary size, hidden layer is normally
  in the range of 50 to 500, output layer size is the vocabulary size.
 \end_layout
@@ -2640,7 +2640,7 @@ uniformInit
 \end_inset
 
 =true.
- Whether to use uniformly randomizied values for initial paramter weights.
+ Whether to use uniformly randomized values for initial parameter weights.
 \end_layout
 
 \begin_layout Itemize
@@ -2898,7 +2898,7 @@ learnRateDecreaseFactor
 \end_inset
 
 =0.5.
- Learning rate decrese factor.
+ Learning rate decrease factor.
 \end_layout
 
 \end_deeper
@@ -2963,7 +2963,7 @@ t word_class
 \end_layout
 
 \begin_layout Standard
-word_id is a unique non-negative interger, frequency is the frequency of
+word_id is a unique non-negative integer, frequency is the frequency of
  word (optional), word_string is the word string (low frequent words may
  be mapped to <unk>), and word_class is the class id of word.
  Word class can be derived using frequency based heuristics 
@@ -4254,7 +4254,7 @@ wordContext
 \end_inset
 
 =0:1:2 : this specifies the time indices for forming a context window.
- In this example, this setup coresponds to using the current input, the
+ In this example, this setup corresponds to using the current input, the
  next input, and the input after the next input for a context window of
  size 3.
  User can also use other cases such as wordcontext=0:-1:1 to form a context
@@ -4344,7 +4344,7 @@ BOS
 \begin_inset Quotes erd
 \end_inset
 
- : this specifies the symbol of sequence begining.
+ : this specifies the symbol of sequence beginning.
 \end_layout
 
 \begin_layout Itemize
@@ -4634,7 +4634,7 @@ outputs:labels
 \end_inset
 
  : this specifies which nodes to output results.
- These node names are pre-spefied in CNTK's simple network builder.
+ These node names are pre-specified in CNTK's simple network builder.
  The node 
 \begin_inset Quotes eld
 \end_inset
@@ -4643,7 +4643,7 @@ outputs
 \begin_inset Quotes erd
 \end_inset
 
- is the node that output activies before softmax.
+ is the node that output activates before softmax.
  The node 
 \begin_inset Quotes eld
 \end_inset
@@ -4836,7 +4836,7 @@ output.rec.txt
 \begin_inset Quotes erd
 \end_inset
 
- : the file name for writting decode results from LUSequenceWriter.
+ : the file name for writing decode results from LUSequenceWriter.
  
 \end_layout