Update ASpIRE setup

wenzhu888 · Feb 20, 2019 · da78aa2 · da78aa2
1 parent 3f19b02
commit da78aa2
Show file tree

Hide file tree

Showing 15 changed files with 37 additions and 34 deletions.
diff --git a/examples/setups/aspire/README.md b/examples/setups/aspire/README.md
@@ -26,12 +26,13 @@ There is an example data setup inside `data` directory. You can skip the rest
 of this section if you simply want to run the example setup.
 
 If you want to decode or align your own recordings, you can edit the files in
-`data` directory. For decoding, you need to provide `data/wav.scp` and
-`data/spk2utt` files. For alignment, you also need to provide `data/text`.
+`data/test` directory. For decoding, you need to provide `data/test/wav.scp` and
+`data/test/spk2utt` files. For alignment, you also need to provide
+`data/test/text`.
 
 ## List of Recordings
 
-The list of utterances `data/wav.scp` has the format:
+The list of utterances `data/test/wav.scp` has the format:
 
     utt1 /path/to/utt1.wav
     utt2 /path/to/utt2.wav
@@ -49,7 +50,7 @@ them to the required format.
 
 ## Speaker to Utterance Map
 
-The speaker to utterance map `data/spk2utt` has the format:
+The speaker to utterance map `data/test/spk2utt` has the format:
 
     spk1 utt1 utt4 ...
     spk2 utt2 utt3 ...
@@ -65,7 +66,7 @@ If no speaker information is available, make it an identity mapping:
 
 ## Transcripts (needed for alignment)
 
-The list of transcripts in `data/text` has the format:
+The list of transcripts in `data/test/text` has the format:
 
     utt1 trascript of first utterance
     utt2 trascript of second utterance
@@ -74,30 +75,30 @@ The list of transcripts in `data/text` has the format:
     ...
 
 Note that these should be tokenized transcripts and all of the tokens should be
-in system vocabulary `exp/langdir/words.txt`.
+in system vocabulary `data/lang/words.txt`.
 
 # ASR
 
-You can decode the utterances listed in `data/wav.scp` with the following
+You can decode the utterances listed in `data/test/wav.scp` with the following
 command.
 
     ./decode.py
 
-Note that the models used in this setup are fairly large so it takes some time
-to load models from disk. To decode with these models, you will need around 2GB
-memory. The decoding script will print a bunch of logs to stderr.
+The decoding script will print a bunch of logs to stderr.
 
-Decoding outputs are written to `out/decode.out`.
+Decoding outputs are written to `out/test/decode.out`.
 
 # Alignment
 
-Align the utterances listed in `data/wav.scp` with the transcripts listed in
-`data/text`:
+You can align the utterances listed in `data/test/wav.scp` with the transcripts
+listed in `data/test/text` using the following command.
 
     ./align.py
 
-Frame-level alignments are written to `out/align.out`.
+The alignment script will print a bunch of logs to stderr.
 
-Phone-level alignments are written to `out/phone_align.out`.
+Frame-level alignments are written to `out/test/align.out`.
 
-Word-level alignments are written to `out/word_align.out`.
+Phone-level alignments are written to `out/test/phone_align.out`.
+
+Word-level alignments are written to `out/test/word_align.out`.
diff --git a/examples/setups/aspire/align.py b/examples/setups/aspire/align.py
@@ -16,30 +16,30 @@
 aligner = NnetAligner.from_files(
     "exp/tdnn_7b_chain_online/final.mdl",
     "exp/tdnn_7b_chain_online/tree",
-    "exp/langdir/L.fst",
-    "exp/langdir/words.txt",
-    "exp/langdir/phones/disambig.int",
+    "data/lang/L.fst",
+    "data/lang/words.txt",
+    "data/lang/phones/disambig.int",
     decodable_opts=decodable_opts)
-phones = SymbolTable.read_text("exp/langdir/phones.txt")
+phones = SymbolTable.read_text("data/lang/phones.txt")
 wb_info = WordBoundaryInfo.from_file(WordBoundaryInfoNewOpts(),
-                                     "exp/langdir/phones/word_boundary.int")
+                                     "data/lang/phones/word_boundary.int")
 
 # Define feature pipelines as Kaldi rspecifiers
 feats_rspec = (
-    "ark:compute-mfcc-feats --config=conf/mfcc.conf scp:data/wav.scp ark:- |"
+    "ark:compute-mfcc-feats --config=conf/mfcc_hires.conf scp:data/test/wav.scp ark:- |"
 )
 ivectors_rspec = (
-    "ark:compute-mfcc-feats --config=conf/mfcc.conf scp:data/wav.scp ark:- |"
-    "ivector-extract-online2 --config=conf/ivector.conf ark:data/spk2utt ark:- ark:- |"
+    "ark:compute-mfcc-feats --config=conf/mfcc_hires.conf scp:data/test/wav.scp ark:- |"
+    "ivector-extract-online2 --config=conf/ivector_extractor.conf ark:data/test/spk2utt ark:- ark:- |"
 )
 
 # Align wav files
 with SequentialMatrixReader(feats_rspec) as f, \
      SequentialMatrixReader(ivectors_rspec) as i, \
-     open("data/text") as t, \
-     open("out/align.out", "w") as a, \
-     open("out/phone_align.out", "w") as p, \
-     open("out/word_align.out", "w") as w:
+     open("data/test/text") as t, \
+     open("out/test/align.out", "w") as a, \
+     open("out/test/phone_align.out", "w") as p, \
+     open("out/test/word_align.out", "w") as w:
     for (fkey, feats), (ikey, ivectors), line in zip(f, i, t):
         tkey, text = line.strip().split(None, 1)
         assert(fkey == ikey == tkey)

diff --git a/examples/setups/aspire/conf/ivector.conf → ...setups/aspire/conf/ivector_extractor.conf b/examples/setups/aspire/conf/ivector.conf → ...setups/aspire/conf/ivector_extractor.conf
diff --git a/examples/setups/aspire/conf/mfcc.conf → examples/setups/aspire/conf/mfcc_hires.conf b/examples/setups/aspire/conf/mfcc.conf → examples/setups/aspire/conf/mfcc_hires.conf
diff --git a/examples/setups/aspire/data/spk2utt → examples/setups/aspire/data/test/spk2utt b/examples/setups/aspire/data/spk2utt → examples/setups/aspire/data/test/spk2utt
diff --git a/examples/setups/aspire/data/text → examples/setups/aspire/data/test/text b/examples/setups/aspire/data/text → examples/setups/aspire/data/test/text
diff --git a/examples/setups/aspire/data/utt1.wav → examples/setups/aspire/data/test/utt1.wav b/examples/setups/aspire/data/utt1.wav → examples/setups/aspire/data/test/utt1.wav
diff --git a/examples/setups/aspire/data/test/wav.scp b/examples/setups/aspire/data/test/wav.scp
@@ -0,0 +1 @@
+utt1 data/test/utt1.wav
diff --git a/examples/setups/aspire/data/wav.scp b/examples/setups/aspire/data/wav.scp
diff --git a/examples/setups/aspire/decode.py b/examples/setups/aspire/decode.py
@@ -18,23 +18,23 @@
 asr = NnetLatticeFasterRecognizer.from_files(
     "exp/tdnn_7b_chain_online/final.mdl",
     "exp/tdnn_7b_chain_online/graph_pp/HCLG.fst",
-    "exp/tdnn_7b_chain_online/graph_pp/words.txt",
+    "data/lang/words.txt",
     decoder_opts=decoder_opts,
     decodable_opts=decodable_opts)
 
 # Define feature pipelines as Kaldi rspecifiers
 feats_rspec = (
-    "ark:compute-mfcc-feats --config=conf/mfcc.conf scp:data/wav.scp ark:- |"
+    "ark:compute-mfcc-feats --config=conf/mfcc_hires.conf scp:data/test/wav.scp ark:- |"
 )
 ivectors_rspec = (
-    "ark:compute-mfcc-feats --config=conf/mfcc.conf scp:data/wav.scp ark:- |"
-    "ivector-extract-online2 --config=conf/ivector.conf ark:data/spk2utt ark:- ark:- |"
+    "ark:compute-mfcc-feats --config=conf/mfcc_hires.conf scp:data/test/wav.scp ark:- |"
+    "ivector-extract-online2 --config=conf/ivector_extractor.conf ark:data/test/spk2utt ark:- ark:- |"
 )
 
 # Decode wav files
 with SequentialMatrixReader(feats_rspec) as f, \
      SequentialMatrixReader(ivectors_rspec) as i, \
-     open("out/decode.out", "w") as o:
+     open("out/test/decode.out", "w") as o:
     for (key, feats), (_, ivectors) in zip(f, i):
         out = asr.decode((feats, ivectors))
         print(key, out["text"], file=o)
diff --git a/examples/setups/aspire/models.sh b/examples/setups/aspire/models.sh
@@ -7,3 +7,5 @@ rm -f kaldi-models-0.03.zip
 wget https://lowerquality.com/gentle/aspire-hclg.tar.gz
 tar -xzf aspire-hclg.tar.gz
 rm -f aspire-hclg.tar.gz
+
+mv exp/langdir data/lang
diff --git a/examples/setups/aspire/out/align.out → examples/setups/aspire/out/test/align.out b/examples/setups/aspire/out/align.out → examples/setups/aspire/out/test/align.out
diff --git a/examples/setups/aspire/out/decode.out → examples/setups/aspire/out/test/decode.out b/examples/setups/aspire/out/decode.out → examples/setups/aspire/out/test/decode.out
diff --git a/examples/setups/aspire/out/phone_align.out → ...es/setups/aspire/out/test/phone_align.out b/examples/setups/aspire/out/phone_align.out → ...es/setups/aspire/out/test/phone_align.out
diff --git a/examples/setups/aspire/out/word_align.out → ...les/setups/aspire/out/test/word_align.out b/examples/setups/aspire/out/word_align.out → ...les/setups/aspire/out/test/word_align.out