Skip to content

Commit

Permalink
more consistent command line names for get-vocab
Browse files Browse the repository at this point in the history
  • Loading branch information
rsennrich committed May 16, 2018
1 parent 7483773 commit f61c957
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 12 deletions.
15 changes: 8 additions & 7 deletions subword_nmt/get_vocab.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ def create_parser(subparsers=None):
description="Generates vocabulary")

parser.add_argument(
'--train_file', type=argparse.FileType('r'), default=sys.stdin,
'--input', '-i', type=argparse.FileType('r'), default=sys.stdin,
metavar='PATH',
help="Input file (default: standard input).")

parser.add_argument(
'--vocab_file', type=argparse.FileType('w'), default=sys.stdout,
'--output', '-o', type=argparse.FileType('w'), default=sys.stdout,
metavar='PATH',
help="Output file (default: standard output)")

Expand Down Expand Up @@ -73,9 +73,10 @@ def get_vocab(train_file, vocab_file):
parser = create_parser()
args = parser.parse_args()

if args.train_file.name != '<stdin>':
args.train_file = codecs.open(args.train_file.name, encoding='utf-8')
if args.vocab_file.name != '<stdout>':
args.vocab_file = codecs.open(args.vocab_file.name, 'w', encoding='utf-8')
# read/write files as UTF-8
if args.input.name != '<stdin>':
args.input = codecs.open(args.input.name, encoding='utf-8')
if args.output.name != '<stdout>':
args.output = codecs.open(args.output.name, 'w', encoding='utf-8')

get_vocab(args.train_file, args.vocab_file)
get_vocab(args.input, args.output)
10 changes: 5 additions & 5 deletions subword_nmt/subword_nmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,11 @@ def main():
args.output.write(bpe.process_line(line))

elif args.command == 'get-vocab':
if args.train_file.name != '<stdin>':
args.train_file = codecs.open(args.train_file.name, encoding='utf-8')
if args.vocab_file.name != '<stdout>':
args.vocab_file = codecs.open(args.vocab_file.name, 'w', encoding='utf-8')
get_vocab(args.train_file, args.vocab_file)
if args.input.name != '<stdin>':
args.input = codecs.open(args.input.name, encoding='utf-8')
if args.output.name != '<stdout>':
args.output = codecs.open(args.output.name, 'w', encoding='utf-8')
get_vocab(args.input, args.output)
elif args.command == 'learn-joint-bpe-and-vocab':
learn_joint_bpe_and_vocab(args)
else:
Expand Down

0 comments on commit f61c957

Please sign in to comment.