Skip to content

Commit

Permalink
code changes for 30ms training and smbr decoding
Browse files Browse the repository at this point in the history
  • Loading branch information
fmetze committed May 12, 2016
1 parent 2a6f224 commit bc80076
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 9 deletions.
2 changes: 1 addition & 1 deletion asr_egs/tedlium/v1/local/score_sclite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ mkdir -p $dir/scoring/log

# We are not using lattice-align-words, which may result in minor degradation
if [ $stage -le 0 ]; then
if false; then
if true; then
# This leads to slightly lower WERs on some tasks
$cmd ACWT=$min_acwt:$max_acwt $dir/scoring/log/get_ctm.ACWT.log \
mkdir -p $dir/score_ACWT/ '&&' \
Expand Down
3 changes: 2 additions & 1 deletion asr_egs/wsj/steps/decode_ctc_lat.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ max_active=7000 # max-active
beam=15.0 # beam used
lattice_beam=8.0
max_mem=50000000 # approx. limit to memory consumption during minimization in bytes
model=final.nnet

skip_scoring=false # whether to skip WER scoring
scoring_opts="--min-acwt 5 --max-acwt 10 --acwt-factor 0.1"
Expand Down Expand Up @@ -79,7 +80,7 @@ $subsample_feats && feats="$feats subsample-feats --n=3 --offset=0 ark:- ark:- |

# Decode for each of the acoustic scales
$cmd JOB=1:$nj $dir/log/decode.JOB.log \
net-output-extract --class-frame-counts=$srcdir/label.counts --apply-log=true $srcdir/final.nnet "$feats" ark:- \| \
net-output-extract --class-frame-counts=$srcdir/label.counts --apply-log=true $srcdir/$model "$feats" ark:- \| \
latgen-faster --max-active=$max_active --max-mem=$max_mem --beam=$beam --lattice-beam=$lattice_beam \
--acoustic-scale=$acwt --allow-partial=true --word-symbol-table=$graphdir/words.txt \
$graphdir/TLG.fst ark:- "ark:|gzip -c > $dir/lat.JOB.gz" || exit 1;
Expand Down
42 changes: 35 additions & 7 deletions asr_egs/wsj/utils/model_topo.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ def parse_arguments(arg_elements):
--fgate-bias-init : float
Initial value of the forget-gate bias. Not specifying this option means the forget-gate bias
will be initialized randomly, in the same way as the other parameters.
--input-dim : int
Reduce the input feature to a given dimensionality before passing to the LSTM.
Optional.
--projection-dim : int
Project the feature vector down to a given dimensionality between LSTM layers.
Optional.
"""

Expand All @@ -74,26 +80,48 @@ def parse_arguments(arg_elements):
if arguments.has_key('param_range'):
param_range = arguments['param_range']

actual_cell_dim = 2*lstm_cell_dim
model_type = '<BiLstmParallel>' # by default
if arguments.has_key('lstm_type') and arguments['lstm_type'] == 'uni':
actual_cell_dim = lstm_cell_dim
model_type = '<LstmParallel>'

print '<Nnet>'
lstm_comm = ' <ParamRange> ' + param_range + ' <LearnRateCoef> 1.0 <MaxGrad> 50.0'

# add the option to set the initial value of the forget-gate bias
lstm_comm = ' <ParamRange> ' + param_range + ' <LearnRateCoef> 1.0 <MaxGrad> 50.0'
if arguments.has_key('fgate_bias_init'):
lstm_comm = lstm_comm + ' <FgateBias> ' + arguments['fgate_bias_init']

actual_cell_dim = 2*lstm_cell_dim
if model_type == '<LstmParallel>':
actual_cell_dim = lstm_cell_dim
# add the option to specify projection layers
if arguments.has_key('projection_dim'):
proj_dim = arguments['projection_dim']
else:
proj_dim = 0

# add the option to reduce the dimensionality of the input features
if arguments.has_key('input_dim'):
input_dim = arguments['input_dim']
else:
input_dim = 0


# pre-amble
print '<Nnet>'

# optional dimensionality reduction layer
if input_dim > 0:
print '<AffineTransform> <InputDim> ' + str(input_feat_dim) + ' <OutputDim> ' + str(input_dim) + ' <ParamRange> ' + param_range
input_feat_dim = input_dim

# the first layer takes input features
print model_type + ' <InputDim> ' + str(input_feat_dim) + ' <CellDim> ' + str(actual_cell_dim) + lstm_comm
# the following bidirectional LSTM layers
for n in range(1, lstm_layer_num):
print model_type + ' <InputDim> ' + str(actual_cell_dim) + ' <CellDim> ' + str(actual_cell_dim) + lstm_comm
if proj_dim > 0:
print '<AffineTransform> <InputDim> ' + str(actual_cell_dim) + ' <OutputDim> ' + str(proj_dim) + ' <ParamRange> ' + param_range
print model_type + ' <InputDim> ' + str(proj_dim) + ' <CellDim> ' + str(actual_cell_dim) + lstm_comm
else:
print model_type + ' <InputDim> ' + str(actual_cell_dim) + ' <CellDim> ' + str(actual_cell_dim) + lstm_comm

# the final affine-transform and softmax layer
print '<AffineTransform> <InputDim> ' + str(actual_cell_dim) + ' <OutputDim> ' + str(target_num) + ' <ParamRange> ' + param_range
print '<Softmax> <InputDim> ' + str(target_num) + ' <OutputDim> ' + str(target_num)
Expand Down

0 comments on commit bc80076

Please sign in to comment.