Skip to content

Commit

Permalink
sandbox/language_id: Merging in trunk
Browse files Browse the repository at this point in the history
git-svn-id: https://svn.code.sf.net/p/kaldi/code/sandbox/language_id@4569 5e6a8d80-dfce-4ca6-a32a-6e07a63d50c8
  • Loading branch information
david-ryan-snyder committed Oct 30, 2014
2 parents 463b062 + 2420985 commit 62087d0
Show file tree
Hide file tree
Showing 655 changed files with 37,267 additions and 7,020 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@

# /src/latbin/
/src/latbin/.depend.mk
/src/latbin/lattice-lmrescore-const-arpa
/src/latbin/lattice-prune
/src/latbin/lattice-rmali
/src/latbin/lattice-compose
Expand Down Expand Up @@ -434,6 +435,9 @@
/src/lm/.depend.mk
/src/lm/lm-lib-test

# /src/lmbin/
src/lmbin/arpa-to-const-arpa

# /src/matrix/
/src/matrix/.depend.mk
/src/matrix/Matrix.vcxproj
Expand Down Expand Up @@ -821,6 +825,7 @@
/src/nnet2bin/nnet1-to-raw-nnet
/src/nnet2bin/raw-nnet-copy
/src/online2bin/apply-cmvn-online
/src/online2bin/online2-wav-nnet2-am-compute
/src/online2bin/compress-uncompress-speex
/src/online2bin/extend-wav-with-silence
/src/online2bin/ivector-extract-online2
Expand Down Expand Up @@ -849,3 +854,6 @@
/src/online2bin/.depend.mk
/src/online2/.depend.mk
/src/ivector/.depend.mk
/egs/librispeech/s5/data
/egs/librispeech/s5/mfcc
/egs/librispeech/s5/exp
2 changes: 1 addition & 1 deletion egs/ami/s5/local/ami_format_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ gunzip -c "$arpa_lm" | \
utils/remove_oovs.pl /dev/null | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=data/lang_test/words.txt \
--osymbols=data/lang_test/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > data/lang_test/G.fst
fstrmepsilon | fstarcsort --sort_type=ilabel > data/lang_test/G.fst
fstisstochastic data/lang_test/G.fst

echo "Checking how stochastic G is (the first of these numbers should be small):"
Expand Down
2 changes: 1 addition & 1 deletion egs/ami/s5/path.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ KALDI_ROOT=/gpfs/scratch/s1136550/kaldi-code
KALDISRC=$KALDI_ROOT/src
KALDIBIN=$KALDISRC/bin:$KALDISRC/featbin:$KALDISRC/fgmmbin:$KALDISRC/fstbin
KALDIBIN=$KALDIBIN:$KALDISRC/gmmbin:$KALDISRC/latbin:$KALDISRC/nnetbin
KALDIBIN=$KALDIBIN:$KALDISRC/sgmmbin:$KALDISRC/tiedbin
KALDIBIN=$KALDIBIN:$KALDISRC/sgmmbin

FSTBIN=$KALDI_ROOT/tools/openfst/bin
LMBIN=$KALDI_ROOT/tools/irstlm/bin
Expand Down
2 changes: 1 addition & 1 deletion egs/aurora4/s5/local/aurora4_format_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ for lm_suffix in bg tgpr tg bg_5k tgpr_5k tg_5k; do
utils/remove_oovs.pl $tmpdir/oovs_${lm_suffix}.txt | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$test/words.txt \
--osymbols=$test/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > $test/G.fst
fstrmepsilon | fstarcsort --sort_type=ilabel > $test/G.fst
fstisstochastic $test/G.fst
# The output is like:
# 9.14233e-05 -0.259833
Expand Down
2 changes: 1 addition & 1 deletion egs/aurora4/s5/local/wsj_format_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ for lm_suffix in bg tgpr tg bg_5k tgpr_5k tg_5k; do
utils/remove_oovs.pl $tmpdir/oovs_${lm_suffix}.txt | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$test/words.txt \
--osymbols=$test/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > $test/G.fst
fstrmepsilon | fstarcsort --sort_type=ilabel > $test/G.fst
fstisstochastic $test/G.fst
# The output is like:
# 9.14233e-05 -0.259833
Expand Down
8 changes: 4 additions & 4 deletions egs/aurora4/s5/local/wsj_format_local_lms.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,28 +25,28 @@ gunzip -c $lm_srcdir_3g/lm_pr6.0.gz | \
arpa2fst - | fstprint | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
--osymbols=$lang/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > data/lang_test_bd_tgpr/G.fst || exit 1;
fstrmepsilon | fstarcsort --sort_type=ilabel > data/lang_test_bd_tgpr/G.fst || exit 1;
fstisstochastic data/lang_test_bd_tgpr/G.fst

gunzip -c $lm_srcdir_3g/lm_unpruned.gz | \
arpa2fst - | fstprint | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
--osymbols=$lang/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > data/lang_test_bd_tg/G.fst || exit 1;
fstrmepsilon | fstarcsort --sort_type=ilabel > data/lang_test_bd_tg/G.fst || exit 1;
fstisstochastic data/lang_test_bd_tg/G.fst

gunzip -c $lm_srcdir_4g/lm_unpruned.gz | \
arpa2fst - | fstprint | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
--osymbols=$lang/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > data/lang_test_bd_fg/G.fst || exit 1;
fstrmepsilon | fstarcsort --sort_type=ilabel > data/lang_test_bd_fg/G.fst || exit 1;
fstisstochastic data/lang_test_bd_fg/G.fst

gunzip -c $lm_srcdir_4g/lm_pr7.0.gz | \
arpa2fst - | fstprint | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
--osymbols=$lang/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > data/lang_test_bd_fgpr/G.fst || exit 1;
fstrmepsilon | fstarcsort --sort_type=ilabel > data/lang_test_bd_fgpr/G.fst || exit 1;
fstisstochastic data/lang_test_bd_fgpr/G.fst

exit 0;
2 changes: 1 addition & 1 deletion egs/babel/s5/local/arpa2G.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ gunzip -c $lmfile | \
utils/s2eps.pl | \
fstcompile --isymbols=$langdir/words.txt \
--osymbols=$langdir/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > $destdir/G.fst || exit 1
fstrmepsilon | fstarcsort --sort_type=ilabel > $destdir/G.fst || exit 1
fstisstochastic $destdir/G.fst || true

exit 0
2 changes: 1 addition & 1 deletion egs/babel/s5/local/arpa2G_syllables.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ gunzip -c $lmfile | \
fstcompile --isymbols=$langdir/words.txt \
--osymbols=$langdir/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrhocompose "$rho" - $destdir/rho.fst | \
fstrmepsilon > $destdir/G.fst || exit 1
fstrmepsilon | fstarcsort --sort_type=ilabel > $destdir/G.fst || exit 1

fstisstochastic $destdir/G.fst || true

Expand Down
2 changes: 1 addition & 1 deletion egs/babel/s5/local/prepare_kaldi_lm_from_training_text.sh
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ gunzip -c $gzipped_ARPA_LM | \
arpa2fst - | fstprint | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
--osymbols=$lang/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > $lang/G.fst || exit 1;
fstrmepsilon | fstarcsort --sort_type=ilabel > $lang/G.fst || exit 1;
fstisstochastic $lang/G.fst

##################################################################
Expand Down
2 changes: 1 addition & 1 deletion egs/babel/s5/local/wsj_format_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ for lm_suffix in bg tgpr tg bg_5k tgpr_5k tg_5k; do
utils/remove_oovs.pl $tmpdir/oovs_${lm_suffix}.txt | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$test/words.txt \
--osymbols=$test/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > $test/G.fst
fstrmepsilon | fstarcsort --sort_type=ilabel > $test/G.fst
fstisstochastic $test/G.fst
# The output is like:
# 9.14233e-05 -0.259833
Expand Down
8 changes: 4 additions & 4 deletions egs/babel/s5/local/wsj_format_local_lms.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,28 +27,28 @@ gunzip -c $lm_srcdir_3g/lm_pr6.0.gz | \
arpa2fst - | fstprint | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
--osymbols=$lang/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > data/lang_test_bd_tgpr/G.fst || exit 1;
fstrmepsilon | fstarcsort --sort_type=ilabel > data/lang_test_bd_tgpr/G.fst || exit 1;
fstisstochastic data/lang_test_bd_tgpr/G.fst

gunzip -c $lm_srcdir_3g/lm_unpruned.gz | \
arpa2fst - | fstprint | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
--osymbols=$lang/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > data/lang_test_bd_tg/G.fst || exit 1;
fstrmepsilon | fstarcsort --sort_type=ilabel > data/lang_test_bd_tg/G.fst || exit 1;
fstisstochastic data/lang_test_bd_tg/G.fst

gunzip -c $lm_srcdir_4g/lm_unpruned.gz | \
arpa2fst - | fstprint | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
--osymbols=$lang/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > data/lang_test_bd_fg/G.fst || exit 1;
fstrmepsilon | fstarcsort --sort_type=ilabel > data/lang_test_bd_fg/G.fst || exit 1;
fstisstochastic data/lang_test_bd_fg/G.fst

gunzip -c $lm_srcdir_4g/lm_pr7.0.gz | \
arpa2fst - | fstprint | \
utils/eps2disambig.pl | utils/s2eps.pl | fstcompile --isymbols=$lang/words.txt \
--osymbols=$lang/words.txt --keep_isymbols=false --keep_osymbols=false | \
fstrmepsilon > data/lang_test_bd_fgpr/G.fst || exit 1;
fstrmepsilon | fstarcsort --sort_type=ilabel > data/lang_test_bd_fgpr/G.fst || exit 1;
fstisstochastic data/lang_test_bd_fgpr/G.fst

exit 0;
116 changes: 116 additions & 0 deletions egs/babel/s5b/EXAMPLE.vietnamese
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
#!/bin/bash

#This is an example sequence of commands for running the default Kaldi Babel OP1 system
#It is not assumed that you will run it as a script, even though you can try :)

./run-1-main.sh
./run-2a-nnet-ensemble-gpu.sh
./run-2b-bnf.sh --semisupervised false --ali-dir exp/tri5_ali/
./run-3b-bnf-sgmm.sh --semisupervised false
./run-3b-bnf-nnet.sh --semisupervised false

##Training of the automatic segmenter
./run-2-segmentation.sh

##Decoding the automatic segmentation of dev2h subset. dev2h.pem would mean decoding
##the dev2h subset using the officialy provided segmentation.
##Also possible to run dev10h.pem, dev10h.uem, dev10h.seg and so on...
./run-4-anydecode.sh --dir dev2h.seg
./run-4b-anydecode-bnf.sh --dir dev2h.seg --semisupervised false --extra-kws true

##Decoding of the unsupervivsed data
./run-4-anydecode.sh --dir unsup.seg --skip-kws true --skip-stt true
./run-4b-anydecode-bnf.sh --dir unsup.seg --skip-kws true --skip-stt true --semisupervised false

##Get the one-best path and the weights for frame-weighting of posteriors
./local/best_path_weights.sh --cmd "$train_cmd" data/unsup.seg/ data/lang \
exp/tri6b_nnet/decode_unsup.seg/ \
exp/sgmm5_mmi_b0.1/decode_fmllr_unsup.seg_it1/ \
exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_unsup.seg_it1 \
exp_bnf/tri7_nnet/decode_unsup.seg \
exp_bnf_semisup/best_path_weights/unsup.seg

##Semisupervised bottleneck system training (initial setup)
./run-2b-bnf.sh --semisupervised true --ali-model exp/tri6b_nnet/ \
--weights-dir exp/best_path_weights/unsup.seg/decode_unsup.seg/

##Semisup training, SGMM+bMMI on the top of the BN features
./run-3b-bnf-sgmm.sh --semisupervised true
##Semisup training, pNorm DNN on the top of the BN features
./run-3b-bnf-nnet.sh --semisupervised true

##And decoding again. We decode the unsup.seg again to do the second run of the
##semisupervised training
./run-4b-anydecode-bnf.sh --dir dev2h.seg --semisupervised true --extra-kws true
./run-4b-anydecode-bnf.sh --dir unsup.seg --skip-kws true --skip-stt true --semisupervised true

##One-best output and frame weights for the second run of the semisup training
./local/best_path_weights.sh --cmd "$train_cmd" data/unsup.seg/ data/lang \
exp_bnf_semisup/sgmm7_mmi_b0.1/decode_fmllr_unsup.seg_it1 \
exp_bnf_semisup/tri7_nnet/decode_unsup.seg \
exp/tri6b_nnet/decode_unsup.seg/ \
exp/sgmm5_mmi_b0.1/decode_fmllr_unsup.seg_it1/ \
exp_bnf/sgmm7_mmi_b0.1/decode_fmllr_unsup.seg_it1 \
exp_bnf/tri7_nnet/decode_unsup.seg \
exp_bnf_semisup2/best_path_weights/unsup.seg

##Second run of the semisup training
./run-2b-bnf.sh --unsup-string "_semisup2" --semisupervised true --ali-model exp/tri6b_nnet/ \
--weights-dir exp_bnf_semisup2/best_path_weights/unsup.seg/decode_fmllr_unsup.seg_it1/

./run-3b-bnf-sgmm.sh --semisupervised true --unsup_string "_semisup2"
./run-3b-bnf-nnet.sh --semisupervised true --unsup_string "_semisup2"

##Decode again to see if we got an improvement
./run-4b-anydecode-bnf.sh --dir dev2h.seg --semisupervised true --unsup_string "_semisup2" --extra-kws true


##Decoding of the dev10h (all systems, all stages)
./run-4-anydecode.sh --dir dev10h.seg --extra-kws true
./run-4b-anydecode-bnf.sh --dir dev10h.seg --semisupervised false --extra-kws true
./run-4b-anydecode-bnf.sh --dir dev10h.seg --semisupervised true --extra-kws true
./run-4b-anydecode-bnf.sh --dir dev10h.seg --semisupervised true --extra-kws true --unsup_string "_semisup2"

##Decoding of the shadow.seg (combination of dev10h.seg and eval.seg)
##We did this for eval run as a kind of "sanity check" -- we check the shadow.seg/dev10h.seg subset
##performance vs the standalone dev10h.seg performance to catch (hopefully) possible problems
./run-4-anydecode.sh --dir shadow.seg --extra-kws true
./run-4b-anydecode-bnf.sh --dir shadow.seg --semisupervised false --extra-kws true
./run-4b-anydecode-bnf.sh --dir shadow.seg --semisupervised true --extra-kws true
./run-4b-anydecode-bnf.sh --dir shadow.seg --semisupervised true --extra-kws true --unsup_string "_semisup2"



#This prepares for separation/split of the shadow dataset into the devset, which we can evaluate
# and the eval set, which we will submit
#Note: we do this only once, for ./data, as we do not really need anything else
#just the file lists...
#NB: there was a oversight in one of the scripts that was causing thectm files contain
#BN: incorrect channel info (A instead of 1)
#NB: To fix that, you can run something like this:
#NB: find exp/ -name "shadow.seg.ctm" | xargs -t -n 1 sed -i'.bakx' 's/ A / 1 /g'
./local/nist_eval/create_compound_set.sh --evlset eval.seg --devset dev10h.seg --tgtdir data/shadow.seg

./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg dev10h.seg exp/tri6b_nnet/decode_shadow.seg
./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg eval.seg exp/tri6b_nnet/decode_shadow.seg

./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg dev10h.seg exp/sgmm5_mmi_b0.1/decode_*shadow.seg*
./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg eval.seg exp/sgmm5_mmi_b0.1/decode_*shadow.seg*

./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg dev10h.seg exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.seg*
./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg eval.seg exp_bnf/sgmm7_mmi_b0.1/decode_*shadow.seg*

./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg dev10h.seg exp_bnf_semisup/sgmm7_mmi_b0.1/decode_*shadow.seg*
./local/nist_eval/filter_data.sh --cmd "$decode_cmd" data/shadow.seg eval.seg exp_bnf_semisup/sgmm7_mmi_b0.1/decode_*shadow.seg*

#The following commands will actually do two things
#a) looking at the performance of the dataset --master <dataset> they will figure out the correct LMW
#b) symlink the appropriate evaluation result file under the correct EXPID into the ./release directory
#Warning: it's a lot of files so it's easy to get confused!
./local/nist_eval/make_release.sh --dryrun false --dir exp/sgmm5_mmi_b0.1 --data data/shadow.seg --master dev10h.seg lang.conf ./release
./local/nist_eval/make_release.sh --dryrun false --dir exp/tri6b_nnet --data data/shadow.seg --master dev10h.seg lang.conf ./release
./local/nist_eval/make_release.sh --dryrun false --dir exp_bnf/sgmm7_mmi_b0.1 --data data/shadow.seg --master dev10h.seg lang.conf ./release
./local/nist_eval/make_release.sh --dryrun false --dir exp_bnf_semisup/sgmm7_mmi_b0.1 --extrasys SEMISUPX --data data/shadow.seg --master dev10h.seg lang.conf ./release

#Combine results (what we call 4way-combo)

21 changes: 8 additions & 13 deletions egs/babel/s5b/conf/common.fullLP
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ if [[ `hostname` == *.tacc.utexas.edu ]] ; then
sgmm_mmi_extra_opts=(--cmd "local/lonestar.py -pe smp 2")
dnn_denlats_extra_opts=( --num-threads 2 )

dnn_cpu_parallel_opts=(--minibatch-size 128 --max-change 10 --num-jobs-nnet 8 --num-threads 16 \
dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
--parallel-opts "-pe smp 16" )
dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 8 --num-threads 1)
dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 8 --num-threads 1)

dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1)
dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1)
Expand All @@ -67,9 +67,9 @@ else
sgmm_mmi_extra_opts=(--cmd "queue.pl -l arch=*64 -l mem_free=3.2G,ram_free=3.2G")
dnn_denlats_extra_opts=(--num-threads 4 --parallel-opts "-pe smp 4" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=0.8G")

dnn_cpu_parallel_opts=(--minibatch-size 128 --max-change 10 --num-jobs-nnet 8 --num-threads 16 \
dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
--parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=1G")
dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 8 --num-threads 1 \
dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 8 --num-threads 1 \
--parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=1G")
dnn_parallel_opts="-l gpu=1"
dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 8 --num-threads 1 \
Expand All @@ -86,19 +86,14 @@ wip=0.5

phoneme_mapping=

extend_lexicon=true
unk_fraction_boost=1.0
num_sent_gen=12000000
num_prons=1000000

minimize=true

proxy_phone_beam=-1
proxy_phone_nbest=-1
proxy_beam=5
proxy_nbest=500

proxy_extlex_phone_beam=-1
proxy_extlex_phone_nbest=300
proxy_extlex_beam=-1
proxy_extlex_nbest=-1
extlex_proxy_phone_beam=5
extlex_proxy_phone_nbest=300
extlex_proxy_beam=-1
extlex_proxy_nbest=-1
19 changes: 7 additions & 12 deletions egs/babel/s5b/conf/common.limitedLP
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ if [[ `hostname` == *.tacc.utexas.edu ]] ; then

dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
--parallel-opts "-pe smp 16" )
dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 4 --num-threads 1
dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 4 --num-threads 1
--parallel-opts "-pe smp 16" )

dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 4 --num-threads 1)
Expand All @@ -70,9 +70,9 @@ else
sgmm_mmi_extra_opts=(--cmd "queue.pl -l arch=*64 -l mem_free=1.5G,ram_free=1.5G")
dnn_denlats_extra_opts=(--num-threads 4 --parallel-opts "-pe smp 4" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=0.8G")

dnn_cpu_parallel_opts=(--minibatch-size 128 --max-change 10 --num-jobs-nnet 8 --num-threads 16 \
dnn_cpu_parallel_opts=(--minibatch-size 128 --num-jobs-nnet 8 --num-threads 16 \
--parallel-opts "-pe smp 16" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=1G")
dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 4 --num-threads 1 \
dnn_gpu_parallel_opts=(--minibatch-size 512 --num-jobs-nnet 4 --num-threads 1 \
--parallel-opts "-l gpu=1" --cmd "queue.pl -l arch=*64 -l mem_free=2G,ram_free=1G")
dnn_parallel_opts="-l gpu=1"
dnn_gpu_mpe_parallel_opts=(--num-jobs-nnet 4 --num-threads 1 \
Expand All @@ -91,19 +91,14 @@ wip=0.5

phoneme_mapping=

extend_lexicon=true
unk_fraction_boost=1.0
num_sent_gen=12000000
num_prons=1000000

minimize=true

proxy_phone_beam=-1
proxy_phone_nbest=-1
proxy_beam=5
proxy_nbest=500

proxy_extlex_phone_beam=-1
proxy_extlex_phone_nbest=300
proxy_extlex_beam=-1
proxy_extlex_nbest=-1
extlex_proxy_phone_beam=5
extlex_proxy_phone_nbest=300
extlex_proxy_beam=-1
extlex_proxy_nbest=-1
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ eval_nj=64
#Shadow data files
shadow_data_dir=(
/export/babel/data/107-vietnamese/release-current/conversational/dev/
/export/babel/data/IARPA-BABEL_OP1_dev_eval/BABEL_OP1_201/conversational/eval
/export/babel/data/107-vietnamese/release-current/conversational/eval/
)
shadow_data_cmudb=/export/babel/data/splits/Vietnamese_Babel107/uem/conv-eval/db-v8-utt.dat
shadow_data_list=(
Expand Down
2 changes: 1 addition & 1 deletion egs/babel/s5b/local/augment_original_stm.pl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
chop;
(my $filename, my $line, my $aggregated, my $seg_start, my $seg_end, my $text) = split(/\s+/, $_, 6);
#print "$filename, $seg_start, $seg_end, $text\n";
$line="A";
$line="1";
if (( $prev_filename ne $filename ) && ( ";;$prev_filename" ne $filename)){
my $_filename = $filename;
$_filename =~ s/^;;//g;
Expand Down
Loading

0 comments on commit 62087d0

Please sign in to comment.