Skip to content

Commit

Permalink
factored out the decoder stack in the seq-2-seq example
Browse files Browse the repository at this point in the history
  • Loading branch information
frankseide committed May 4, 2016
1 parent 636bb52 commit e0284fb
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 130 deletions.
122 changes: 54 additions & 68 deletions Examples/SequenceToSequence/Miscellaneous/G2P/G2P.cntk
Original file line number Diff line number Diff line change
Expand Up @@ -23,52 +23,27 @@ command = train
makeMode = false

# experiment id

# 30-0: switching back to LMSequenceReader, trying to recreate the old settings that worked
# 30-1: dumping all sequences with a small model, to test reader
# 30-2: fixed #samples for momentum calculation
# 30-3: after 'aux' input to LSTMP, in prep for more correct stabiliziation
# 30-4: same as 30-5 but move to new folder and reenabled memsharing
# 30-5: same as 29-5 but rerun with logging of stabilizers
# 29-3: same as 29-5, repro test
# 29-5: same as 29-4 but right-to-left encoder
# 29-4: trying once more with new reader, MB size 70, LR changed to 0.0035,then 0.002; shared stab weights in LSTMP
# 29-2: switched to new reader again, emulating 69-dim outputs --gives comparable 'ce', but not comparable convergence; SOME att weights are totally flat
# 29-1: same as 29-0 but also switched back to 128 MBSize --also GLITCH??
# 29-0: switched back to old reader --not quite the same :( what am I missing?
# 28-5: like 28-4 but using default axis for labels --minor glitch, got worse
# 28-4: like 28-3 but with momentum changed from 2500 to 1250 (since applied to different #samples) --GLITCH
# 28-3: like 28-2 but with randomization enabled
# 28-2: like 28-0 but after yet another reader fix --GLITCH
# 28-1: like 28-0 but halving the MB size--CNTKTextFormatReader interprets the length differently
# 28-0: CNTK reader after data-format fix
# 27-5: trying CNTK reader again after Ryan's bug fix --data format bad
# 27-4: back to LMSequenceReader (regression test)
# 27-3: used </s> for sent end
# 27-2: some refactoring, went back to 26-4 from LMSequenceReader
# 27-1: fixed slicing
# 27-0: incorrect slicing, dropped first input
deviceId = 0
ExpId = 30-$deviceId$-g2p
#ExpId = 22-3-g2p # for decoding a different model
#ExpId = 22-3-g2p # change to different id when decoding a different model

# directories
ExpDir = "$ExpRootDir$/$ExpId$"
ModelDir = "$ExpDir$/Models"

stderr = $ExpDir$/G2P
stderr = $ExpDir$/G2P-debug

precision = "float"
traceLevel = 1
modelPath = "$ModelDir$/G2P.dnn"

# decoding config
# decoding config --used by the "write" command ("write" decodes and writes the result)
beamDepth = 1 # 0=predict; 1=greedy; >1=beam
decodeModel = 21
decodeModelPath = "$modelPath$.$decodeModel$" # note: epoch to decode is appended
decodeModelPath = "$modelPath$.$decodeModel$" # note: epoch to decode is appended to the model path
decodeOutputPath = "$decodeModelPath$.$beamDepth$" # results are written next to the model, with beamDepth appended

# dump config
# dump config --used by the "dump" command, for inspecting the model parameters
dumpModelPath = "$modelPath$.2" # put the epoch id here

# top-level model configuration
Expand Down Expand Up @@ -115,13 +90,6 @@ lmSequenceReaderInputLabelsDef = [ dim = 1 ; labelType = "category" ; labelDim

BrainScriptNetworkBuilder = (new ComputationNetwork [

# TODO: remove these
enableTracing = true
traceFrequency = 100
tracingLabelMappingFile = "$DataDir$/$vocabFile$"
beamDepth=3 // for above Trace macros only, need to clean that up
include "S2SLib.bs"

# import general config options from outside config values
useCNTKTextFormatReader = $useCNTKTextFormatReader$

Expand Down Expand Up @@ -204,7 +172,6 @@ BrainScriptNetworkBuilder = (new ComputationNetwork [

inputEmbedded = EmbedInput (inputSequence)
labelsEmbedded = EmbedLabels (labelSequence)
#labelSentenceStartEmbedded = EmbedLabels (labelSentenceStart)
labelSentenceStartEmbedded = Pass (EmbedLabels (labelSentenceStart)) # TODO: remove Pass() if not actually needed in decoder
labelSentenceStartEmbeddedScattered = BS.Sequences.Scatter (isFirstLabel, labelSentenceStartEmbedded) # unfortunately needed presently

Expand Down Expand Up @@ -253,19 +220,20 @@ BrainScriptNetworkBuilder = (new ComputationNetwork [

# helper functions to delay h and c that apply beam-search reordering, if so configured

PreviousHCWithReorderingHook (lstmState) = [
PreviousHCWithReorderingHook (lstmState, layerIndex=0) = [
h = BS.Loop.Previous (lstmState.h * beamSearchReorderHook) // hidden state(t-1)
c = BS.Loop.Previous (lstmState.c * beamSearchReorderHook) // cell(t-1)
dim = lstmState.dim
]

PreviousHCFromThoughtVectorWithReorderingHook (lstmState) = [ # with both thought vector and beam-search hook
isFirst = BS.Loop.IsFirst (initialState.h)
# BUGBUG: Should be thoughtVector, but Scatter() can't expand from inside a loop
h = BS.Boolean.If (isFirst, thoughtVectorBroadcast.h, BS.Loop.Previous (lstmState.h * beamSearchReorderHook)) # hidden state(t-1)
c = BS.Boolean.If (isFirst, thoughtVectorBroadcast.c, BS.Loop.Previous (lstmState.c * beamSearchReorderHook)) # cell(t-1)
dim = lstmState.dim
]
PreviousHCFromThoughtVectorWithReorderingHook (lstmState, layerIndex=0) =
if layerIndex > 0 then PreviousHCWithReorderingHook (lstmState, layerIndex=1)
else [ # with both thought vector and beam-search hook
isFirst = BS.Loop.IsFirst (initialState.h)
h = BS.Boolean.If (isFirst, thoughtVectorBroadcast.h, BS.Loop.Previous (lstmState.h * beamSearchReorderHook))
c = BS.Boolean.If (isFirst, thoughtVectorBroadcast.c, BS.Loop.Previous (lstmState.c * beamSearchReorderHook))
dim = lstmState.dim
]

#############################################################
# decoder history hook: LM history, from ground truth vs. output
Expand All @@ -288,28 +256,35 @@ BrainScriptNetworkBuilder = (new ComputationNetwork [
decoderDynamicAxis = labelsEmbedded
FixedWindowAttentionHook = BS.Seq2Seq.CreateAugmentWithFixedWindowAttentionHook (attentionDim, attentionSpan, decoderDynamicAxis, encoderOutput, enableSelfStabilization=useStabilizer)

# TODO: collapse this into a single first-layer function; factor to lib; then merge with RecurrentLSTMPStack()
# NYU style: The decoder starts with hidden state 0 and takes as input [thoughtVectorBroadcast.h; previous word].
decoderOutputLayer = Length (decoderDims)-1
decoder[i:0..decoderOutputLayer] =
if i == 0
then if useEncoder && useNYUStyle then BS.RNNs.RecurrentLSTMP (decoderDims[i], cellDim=decoderDims[i],
RowStack (S(thoughtVectorBroadcast.h) : S(decoderInput)), inputDim=(thoughtVector.dim + decoderInputDim),
previousHook=PreviousHCWithReorderingHook,
enableSelfStabilization=useStabilizer)
else if useEncoder && attentionSpan > 0 then BS.RNNs.RecurrentLSTMP (decoderDims[i], cellDim=decoderDims[i],
S(decoderInput), inputDim=decoderInputDim,
augmentInputHook=FixedWindowAttentionHook, augmentInputDim=encoderOutput.dim,
previousHook=PreviousHCWithReorderingHook,
enableSelfStabilization=useStabilizer)
else BS.RNNs.RecurrentLSTMP (decoderDims[i], cellDim=decoderDims[i],
S(decoderInput), inputDim=decoderInputDim,
previousHook=PreviousHCFromThoughtVectorWithReorderingHook, # Previous() function with thought vector as initial state
enableSelfStabilization=useStabilizer)
else BS.RNNs.RecurrentLSTMP (decoderDims[i], cellDim=decoderDims[i],
S(decoder[i-1].h), inputDim=/*decoderDims[i-1]*/ decoder[i-1].dim,
previousHook=PreviousHCWithReorderingHook,
enableSelfStabilization=useStabilizer)
# some parameters to the decoder stack depend on the mode
decoderParams =
# with attention
if useEncoder && attentionSpan > 0 then [
previousHook = PreviousHCWithReorderingHook # add reordering for beam search
augmentInputHook = FixedWindowAttentionHook # input gets augmented by the attention window
augmentInputDim = encoderOutput.dim
]
# with thought vector appended to every frame
else if useEncoder && useNYUStyle then [
previousHook = PreviousHCWithReorderingHook
augmentInputHook (input, lstmState) = S(thoughtVectorBroadcast.h) # each input frame gets augmented by the thought vector
augmentInputDim = thoughtVector.dim
]
# thought vector as initial state for decoder
else [
previousHook = PreviousHCFromThoughtVectorWithReorderingHook # Previous() function with thought vector as initial state
augmentInputHook = BS.RNNs.NoAuxInputHook
augmentInputDim = 0
]

# this is the decoder LSTM stack
decoder = BS.RNNs.RecurrentLSTMPStack (decoderDims, cellDims=decoderDims,
S(decoderInput), inputDim=decoderInputDim,
augmentInputHook=decoderParams.augmentInputHook, augmentInputDim=decoderParams.augmentInputDim,
previousHook=decoderParams.previousHook,
enableSelfStabilization=useStabilizer)

decoderOutputLayer = Length (decoder)-1
decoderOutput = decoder[decoderOutputLayer].h
decoderDim = decoderDims[decoderOutputLayer]

Expand All @@ -335,6 +310,17 @@ BrainScriptNetworkBuilder = (new ComputationNetwork [

# score output for decoding
scoreSequence = Pass (z)

#############################################################
# some helper functions
#############################################################

# these trace functions log their parameter's value
TraceState (h, what) = Transpose (Trace (Transpose (h), say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=9, onlyUpToT=3, format=[ type = "real" ; transpose = false ; precisionFormat = ".4" ]))
TraceDense (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=21, onlyUpToT=25, format=[ type = "real" ; transpose = false ; precisionFormat = ".4" ])
TraceDenseTransposed (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, onlyUpToRow=9, onlyUpToT=25, format=[ type = "real" ; transpose = true ; precisionFormat = ".4" ])
TraceOneHot (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, format=[ type = "category" ; transpose = false ])
TraceSparse (h, what) = Trace (h, say=what, logFirst=10, logFrequency=100, logGradientToo=false, format=[ type = "sparse" ; transpose = false ])
])

#######################################
Expand Down
2 changes: 1 addition & 1 deletion Examples/SequenceToSequence/Miscellaneous/G2P/README.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
This example demonstrates the use of CNTK for letter-to-sound converstion using a
sequence-to-sequence model with attention.

Unfortunately, the data is not public. This shall be addressed in a future update.
This example uses the CMUDict as a corpus. The data or a conversion script will be included soon.

To Use:
=======
Expand Down
52 changes: 0 additions & 52 deletions Examples/SequenceToSequence/Miscellaneous/G2P/S2SLib.bs

This file was deleted.

30 changes: 21 additions & 9 deletions Source/CNTK/BrainScript/CNTKCoreLib/CNTK.core.bs
Original file line number Diff line number Diff line change
Expand Up @@ -586,44 +586,54 @@ RNNs =
# helper function to delay h and c
# Callers can provide their own, e.g. useful for beam decoding.
PreviousHC (lstmState) = [
PreviousHC (lstmState, layerIndex=0) = [
h = Loop.Previous (lstmState.h) // hidden state(t-1)
c = Loop.Previous (lstmState.c) // cell(t-1)
dim = lstmState.dim
]
# pass previousHook=BS.RNNs.NextHC instead of PreviousHC to get a right-to-left recurrence
NextHC (lstmState) = [
NextHC (lstmState, layerIndex=0) = [
h = Loop.Next (lstmState.h) // hidden state(t-1)
c = Loop.Next (lstmState.c) // cell(t-1)
dim = lstmState.dim
]
NoAuxInputHook (input, lstmState) = Constants.None
# this implements a recurrent (stateful) LSTM with projection and self-stabilization
# It returns a record (h,c). To use its output, say .h
# By default, this is left-to-right. Pass previousHook=BS.RNNs.NextHC for a right-to-left model.
RecurrentLSTMP (outputDim/*h.dim*/, cellDim=outputDim,
x, inputDim=x.dim,
previousHook=BS.RNNs.PreviousHC,
augmentInputHook=[NoAuxInputHook (input, lstmState) = Constants.None].NoAuxInputHook, augmentInputDim=0,
augmentInputHook=NoAuxInputHook, augmentInputDim=0,
layerIndex=0,
enableSelfStabilization=false) =
[
prevState = previousHook (lstmState) # recurrent memory. E.g. Previous or Next, with or without initial state, beam reordering etc.
enableSelfStabilization1 = enableSelfStabilization ; cellDim1 = cellDim ; inputDim1 = inputDim ; layerIndex1 = layerIndex # workaround
prevState = previousHook (lstmState, layerIndex=layerIndex1) # recurrent memory. E.g. Previous or Next, with or without initial state, beam reordering etc.
auxInput = augmentInputHook(x, prevState) # optionally augment input. Constants.None if none.
enableSelfStabilization1 = enableSelfStabilization ; cellDim1 = cellDim ; inputDim1 = inputDim # TODO: BS syntax needs to allow to say ^.enableSelfStabilization
lstmState = BS.RNNs.LSTMP (outputDim, cellDim=cellDim1, x, inputDim=inputDim1, aux=auxInput, auxDim=augmentInputDim, prevState, enableSelfStabilization=enableSelfStabilization1)
].lstmState // that's the value we return
# a stack of recurrent LSTMs (unidirectional)
RecurrentLSTMPStack (layerDims, cellDims=layerDims, input, inputDim=input.dim, previousHook=PreviousHC, enableSelfStabilization=false) = [
previousHook1 = previousHook ; useStabilizer = enableSelfStabilization
RecurrentLSTMPStack (layerDims, cellDims=layerDims,
input, inputDim=input.dim,
previousHook=PreviousHC,
augmentInputHook=NoAuxInputHook, augmentInputDim=0,
enableSelfStabilization=false) =
[
previousHook1 = previousHook ; useStabilizer = enableSelfStabilization ; augmentInputHook1 = augmentInputHook ; augmentInputDim1 = augmentInputDim
layers[i:0..Length (layerDims)-1] =
RecurrentLSTMP (layerDims[i], cellDim=cellDims[i],
if i == 0 then input else Parameters.Stabilize (layers[i-1].h, enabled=useStabilizer), inputDim=if i == 0 then inputDim else layerDims[i-1] /*TODO: layers[i-1].dim*/,
if i == 0 then input else Parameters.Stabilize (layers[i-1].h, enabled=useStabilizer), inputDim=if i == 0 then inputDim else layers[i-1].dim,
previousHook=previousHook1,
augmentInputHook=if i == 0 then augmentInputHook1 else NoAuxInputHook, augmentInputDim=if i == 0 then augmentInputDim1 else 0,
layerIndex=i,
enableSelfStabilization=useStabilizer)
].layers
Expand All @@ -638,10 +648,12 @@ RNNs =
fwd = RecurrentLSTMP (layerDims[i], cellDim=cellDims[i],
v, inputDim=vDim,
previousHook=previousHook1,
layerIndex=i,
enableSelfStabilization=useStabilizer)
bwd = RecurrentLSTMP (layerDims[i], cellDim=cellDims[i],
v, inputDim=vDim,
previousHook=nextHook1,
layerIndex=i,
enableSelfStabilization=useStabilizer)
h = Splice ((fwd.h : bwd.h), axis=1)
c = Splice ((fwd.c : bwd.c), axis=1)
Expand Down

0 comments on commit e0284fb

Please sign in to comment.