forked from OpenDriveLab/ELM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchange_vocab.py
31 lines (25 loc) · 944 Bytes
/
change_vocab.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# change the vocab of numbers to a new vocab
# e.g. {Q: 1, A: 2} to {Q: 'apple', A: 'banana'}
# Usage: python change_vocab.py old_text_label.json new_text_label.json
# change the script if the data structure is different
import sys
import json
def main():
old_text_path = sys.argv[1]
new_text_path = sys.argv[2]
num_to_vocab = {}
num_threshold = 30000
with open('vocab.txt', 'r') as file:
for line_number, line_content in enumerate(file, 1):
line_content = line_content.strip()
if line_number>=(num_threshold-1000):
num_to_vocab[line_number] = line_content
with open(old_text_path, 'r') as file:
old_text = json.load(file)
new_text = {}
for key, value in old_text.items():
new_text[key] = num_to_vocab[value]
with open(new_text_path, 'w') as file:
json.dump(new_text, file, indent=4)
if __name__ == "__main__":
main()