Skip to content

Commit

Permalink
add defined_scale and add id column
Browse files Browse the repository at this point in the history
  • Loading branch information
a2d8a4v committed Jul 18, 2022
1 parent eaff8c4 commit ec6df38
Showing 1 changed file with 10 additions and 14 deletions.
24 changes: 10 additions & 14 deletions utils/prepare_auto_grader_feats.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,9 @@
remove_partial_words_call,
remove_tltschool_interregnum_tokens
)

mapping_dict = {
0: 0,
'a1': 1,
'a1+': 2,
'a2': 3,
'a2+': 4,
'b1': 5,
'b1+': 6,
'b2': 7
}
from defined_scales import (
mapping_dict
)

def nullable_string(val):
if val.lower() == 'none':
Expand Down Expand Up @@ -100,6 +92,8 @@ def xstr(s):
# argparse
args = argparse_function()

print(mapping_dict)

# variables
s2t = args.s2t
combine_same_speakerids = args.combine_same_speakerids
Expand Down Expand Up @@ -153,15 +147,16 @@ def xstr(s):
sst = 0
max_seq_len = 0
with open(args.output_text_file_path, 'w') as f:
f.write("{}\t{}\t{}\t{}\n".format('score', 'sst', 'l1', 'text'))
f.write("{}\t{}\t{}\t{}\t{}\n".format('id', 'score', 'sst', 'l1', 'text'))
for utt_or_spk_id, text in utt_text_dict.items():

if len(text.split()) > max_seq_len:
max_seq_len = len(text.split())

if get_specific_labels is not None:
if get_specific_labels.lower() == utt_cefr_file_path_dict[utt_or_spk_id].lower():
f.write("{}\t{}\t{}\t{}\n".format(
f.write("{}\t{}\t{}\t{}\t{}\n".format(
utt_or_spk_id,
mapping_cefr2num(
utt_cefr_file_path_dict[utt_or_spk_id]
),
Expand All @@ -172,7 +167,8 @@ def xstr(s):
)
count_cefr_labels+=1
else:
f.write("{}\t{}\t{}\t{}\n".format(
f.write("{}\t{}\t{}\t{}\t{}\n".format(
utt_or_spk_id,
mapping_cefr2num(
utt_cefr_file_path_dict[utt_or_spk_id]
),
Expand Down

0 comments on commit ec6df38

Please sign in to comment.