Merge branch 'master' of https://github.com/Microsoft/CNTK into fseid…

…e/unusedCodeCleanup
KimKJ · Jan 22, 2016 · d11963d · d11963d
2 parents 6c74cc5 + 3e651e0
commit d11963d
Show file tree

Hide file tree

Showing 7 changed files with 162 additions and 20 deletions.
diff --git a/Documentation/CNTK-TechReport/lyx/CNTKBook-20151201.pdf b/Documentation/CNTK-TechReport/lyx/CNTKBook-20151201.pdf
diff --git a/Documentation/CNTK-TechReport/lyx/CNTKBook-20160121.pdf b/Documentation/CNTK-TechReport/lyx/CNTKBook-20160121.pdf
diff --git a/Documentation/CNTK-TechReport/lyx/CNTKBook-master.lyx b/Documentation/CNTK-TechReport/lyx/CNTKBook-master.lyx
@@ -139,7 +139,7 @@ Yu Zhang, Geoffrey Zweig
 \end_layout
 
 \begin_layout Date
-MSR-TR-2014-112 (DRAFT v0.9: Nov 30, 2015)
+MSR-TR-2014-112 (DRAFT v1.0: Jan 21, 2016)
 \end_layout
 
 \begin_layout Standard
@@ -202,13 +202,22 @@ filename "CNTKBook_CNTK_Adv_Chapter.lyx"
 \end_layout
 
 \begin_layout Standard
+\begin_inset Note Comment
+status open
+
+\begin_layout Plain Layout
 \begin_inset CommandInset include
 LatexCommand include
 filename "CNTKBook_CNTK_Programmer_Chapter.lyx"
 
 \end_inset
 
 
+\end_layout
+
+\end_inset
+
+
 \end_layout
 
 \begin_layout Standard

diff --git a/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Adv_Chapter.lyx b/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Adv_Chapter.lyx
@@ -669,7 +669,7 @@ status open
 
 \begin_layout Plain Layout
 
-FeatureNodes=(features)
+FeatureNodes=(features1, features2)
 \end_layout
 
 \begin_layout Plain Layout
@@ -733,7 +733,7 @@ Comments
 
 \end_inset
 
-:
+
 \end_layout
 
 \begin_layout Standard
@@ -2569,6 +2569,10 @@ m1 and m2 must have same dimension in ElementTimes.
  vcol must have same number of rows in ColumnElementTimes.
 \end_layout
 
+\begin_layout Standard
+\begin_inset Note Comment
+status open
+
 \begin_layout Subsubsection
 KhatriRaoProduct
 \begin_inset Index idx
@@ -2593,15 +2597,15 @@ ColumnwiseCrossProduct
 
 \end_layout
 
-\begin_layout Standard
+\begin_layout Plain Layout
 Compute the cross product of each column of two input matrices.
  These two functions mean the same thing but ColumnwiseCrossProduct is easier
  to understand for most people.
  The resulting matrix is a (m1.rows times m2.rows) by m1.cols matrix.
  The syntax is
 \end_layout
 
-\begin_layout Standard
+\begin_layout Plain Layout
 \begin_inset listings
 inline false
 status open
@@ -2650,7 +2654,7 @@ GMMLL
 
 \end_layout
 
-\begin_layout Standard
+\begin_layout Plain Layout
 Compute the Gaussian mixture model log likelihood of feature values given
  the unnormalized Gaussian mixture weights (i.e., priors), Gaussian means
  and log standard deviation.
@@ -2660,7 +2664,7 @@ Compute the Gaussian mixture model log likelihood of feature values given
  The syntax is
 \end_layout
 
-\begin_layout Standard
+\begin_layout Plain Layout
 \begin_inset listings
 inline false
 status open
@@ -2706,11 +2710,16 @@ FeatureValues - the feature values on which to compute the log likelihood.
 
 \end_layout
 
-\begin_layout Standard
+\begin_layout Plain Layout
 Note, UnnormedPrior, Means and LogStddev should have the same number of
  columns which should be either 1 or the same as the columns in FeatureValues.
 \end_layout
 
+\end_inset
+
+
+\end_layout
+
 \begin_layout Subsubsection
 SquareError
 \begin_inset Index idx
@@ -3517,7 +3526,7 @@ Convolution(w, image, kernelWidth, kernelHeight,
 
 \begin_layout Plain Layout
 
-            maxTempMemSizeInSamples=0])
+            maxTempMemSizeInSamples=0, imageLayout="HWC"|cudnn"])
 \end_layout
 
 \begin_layout Plain Layout
@@ -3537,7 +3546,7 @@ Convolve(w, image, kernelWidth, kernelHeight,
 
 \begin_layout Plain Layout
 
-            maxTempMemSizeInSamples=0]) #deprecated
+            maxTempMemSizeInSamples=0, imageLayout="HWC"|cudnn"]) #deprecated
 \end_layout
 
 \end_inset
@@ -3592,6 +3601,22 @@ maxTempMemSizeInSamples - [named optional] maximum amount of memory (in
  Default is 0 which means the same as the input samples.
 \end_layout
 
+\begin_layout Itemize
+imageLayout - [named optional] the storage format of each image.
+ By default it's 
+\begin_inset Quotes eld
+\end_inset
+
+HWC
+\begin_inset Quotes erd
+\end_inset
+
+, which means each image is stored as [channel, width, height] in column
+ major.
+ If you use cuDNN to speed up training, you should set it to cudnn, which
+ means each image is stored as [width, height, channel].
+\end_layout
+
 \begin_layout Subsubsection
 MaxPooling
 \begin_inset Index idx
@@ -3631,12 +3656,13 @@ status open
 
 \begin_layout Plain Layout
 
-MaxPooling(m, windowWidth, windowHeight, stepW, stepH)
+MaxPooling(m, windowWidth, windowHeight, stepW, stepH, imageLayout="HWC"|cudnn")
 \end_layout
 
 \begin_layout Plain Layout
 
-AveragePooling(m, windowWidth, windowHeight, stepW, stepH)
+AveragePooling(m, windowWidth, windowHeight, stepW, stepH, imageLayout="HWC"|cud
+nn")
 \end_layout
 
 \end_inset
@@ -3664,6 +3690,22 @@ stepW - step (or stride) used in the width direction
 stepH - step (or stride) used in the height direction
 \end_layout
 
+\begin_layout Itemize
+imageLayout - [named optional] the storage format of each image.
+ By default it's 
+\begin_inset Quotes eld
+\end_inset
+
+HWC
+\begin_inset Quotes erd
+\end_inset
+
+, which means each image is stored as [channel, width, height] in column
+ major.
+ If you use cuDNN to speed up training, you should set it to cudnn, which
+ means each image is stored as [width, height, channel].
+\end_layout
+
 \begin_layout Subsubsection
 PastValue (or Delay
 \begin_inset Index idx
@@ -3806,6 +3848,15 @@ key "DNN-SWB-seide+2011,FeatEngInDNN-Seide+2011"
 
  on DNNs by building shallow networks first and then inserting new layers
  one on top of another.
+ To use MEL, you need to use the 
+\begin_inset Quotes eld
+\end_inset
+
+edit
+\begin_inset Quotes erd
+\end_inset
+
+ command in the config file.
 \end_layout
 
 \begin_layout Standard

diff --git a/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Chapter.lyx b/Documentation/CNTK-TechReport/lyx/CNTKBook_CNTK_Chapter.lyx
@@ -4960,6 +4960,10 @@ minibatchSize
  – the minibatch size to use when creating the label mapping file.
 \end_layout
 
+\begin_layout Standard
+\begin_inset Note Comment
+status open
+
 \begin_layout Subsection
 DoEncoderDecoder Command
 \begin_inset Index idx
@@ -4974,7 +4978,7 @@ DoEncoderDecoder Command
 
 \end_layout
 
-\begin_layout Standard
+\begin_layout Plain Layout
 Neural networks can be used to form a chain of networks.
  The first several networks can work as encoders and the following networks
  can serve as decoders.
@@ -5003,6 +5007,11 @@ section
 encoderNetworkBuilder and decoderNetworkBuilder: These specify the simple
  network builder to used.
 
+\end_layout
+
+\end_inset
+
+
 \end_layout
 
 \begin_layout Section
@@ -5482,7 +5491,7 @@ traceLevel
 \end_layout
 
 \begin_layout Standard
-The traceLevel parameter is uniformly used by the code in CNTK to specify
+The traceLevel parameter is usually used by the code in CNTK to specify
  how much extra output (verbosity) is desired as in
 \end_layout
 
@@ -5504,6 +5513,47 @@ The default value is 0 and specifies minimal output.
  the only values supported.
 \end_layout
 
+\begin_layout Subsection
+ShareNodeValueMatrices
+\begin_inset Index idx
+status open
+
+\begin_layout Plain Layout
+ShareNodeValueMatrices
+\end_layout
+
+\end_inset
+
+
+\end_layout
+
+\begin_layout Standard
+The ShareNodeValueMatrices parameter is used to indicate whether to turn
+ on the sharing of forward computation result matrices to further reduce
+ memory usage.
+ To turn it on, use
+\end_layout
+
+\begin_layout Standard
+\begin_inset listings
+inline false
+status open
+
+\begin_layout Plain Layout
+
+ShareNodeValueMatrices=true
+\end_layout
+
+\end_inset
+
+The default value is false, which means the memory sharing only happens
+ during the backward phase.
+ You should decide whether to turn it on by running the same setup with
+ this flag on and off.
+ If the results are the same, you are safe to turn it on.
+ In the future, we will make it on by default after more extensive test.
+\end_layout
+
 \begin_layout Section
 Advanced Command Line Parsing Rules
 \end_layout

diff --git a/Documentation/CNTK-TechReport/lyx/CNTKBook_CN_Chapter.lyx b/Documentation/CNTK-TechReport/lyx/CNTKBook_CN_Chapter.lyx
@@ -6775,6 +6775,12 @@ and
 
 \begin_layout Itemize
 
+\emph on
+\begin_inset Note Comment
+status open
+
+\begin_layout Itemize
+
 \emph on
 TimeReverse 
 \emph default
@@ -6795,7 +6801,7 @@ TimeReverse
 .
 \end_layout
 
-\begin_layout Standard
+\begin_layout Plain Layout
 \begin_inset Formula 
 \begin{eqnarray}
 \mathbf{\boldsymbol{\mathit{\upsilon}}}\left(\mathbf{X}(:,1:T\right) & \leftarrow & \mathbf{\boldsymbol{\mathit{\upsilon}}}\left(\mathbf{X}(:,T:-1:1\right)\\
@@ -6807,7 +6813,7 @@ TimeReverse
 
 \end_layout
 
-\begin_layout Standard
+\begin_layout Plain Layout
 
 \family roman
 \series medium
@@ -6826,6 +6832,11 @@ The time reverse node is usually used for bi-directional model.
  Then, do process on the processed input.
  Finally, use another TimeReverse node on the output of the processed data.
 
+\end_layout
+
+\end_inset
+
+
 \end_layout
 
 \begin_layout Subsection
@@ -8838,6 +8849,12 @@ v\left(\mathbf{X},\mathbf{\mathbf{Y}}\right) & += & \mathbf{C_{t}}\mathbf{\circ
 
 \begin_layout Itemize
 
+\emph on
+\begin_inset Note Comment
+status open
+
+\begin_layout Itemize
+
 \emph on
 CRF
 \emph default
@@ -9151,7 +9168,7 @@ L
 : 
 \end_layout
 
-\begin_layout Standard
+\begin_layout Plain Layout
 \begin_inset Formula 
 \begin{eqnarray}
 \alpha_{t}\left(i\right) & \leftarrow & h_{it}+LogAdd{k}\left(\delta_{t-1}(k)+\eta a_{ki}\right)\\
@@ -9166,10 +9183,15 @@ L
 
 \end_layout
 
-\begin_layout Standard
+\begin_layout Plain Layout
 Notice that the gradient to the transition weights need to be summed over
  the whole observation sequence, which is the current minibatch.
 
+\end_layout
+
+\end_inset
+
+
 \end_layout
 
 \begin_layout Subsection