Skip to content

Commit

Permalink
fix bugs for recognizing upper or lower case
Browse files Browse the repository at this point in the history
  • Loading branch information
a2d8a4v committed Jul 12, 2022
1 parent f3715c3 commit aafa2b6
Showing 1 changed file with 1 addition and 19 deletions.
20 changes: 1 addition & 19 deletions utils/in_place_oov_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys
import argparse
from utilities import (
opentext,
process_tltchool_gigaspeech_interregnum_tokens
)

Expand All @@ -19,27 +20,8 @@
default="Librispeech-model-mct-tdnnf/data/lang/words.txt",
type=str)

parser.add_argument("--conf",
default="Librispeech-model-mct-tdnnf/data/lang/words.txt",
type=str)

args = parser.parse_args()


def opentext(file):
s = set()
t = dict()
with open(file, "r") as f:
for l in f.readlines():
_l = l.split()
uttid = _l[0]
words = _l[1:]
t[uttid] = " ".join(words)
# remove the utt_id part
for word in words:
s.add(word.lower())
return list(s), t

def openwords(file):
s = dict()
with open(file, "r") as f:
Expand Down

0 comments on commit aafa2b6

Please sign in to comment.