Skip to content

Commit

Permalink
del unnecessay generate_chat_bminf.py
Browse files Browse the repository at this point in the history
  • Loading branch information
ftgreat authored Jun 10, 2023
1 parent a718101 commit 2e5bf29
Showing 1 changed file with 1 addition and 59 deletions.
60 changes: 1 addition & 59 deletions examples/Aquila/Aquila-chat/generate_chat_bminf.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,64 +32,6 @@
"为什么湘菜那么甜?",
"东三省和海南岛的区别?",
]
##
def pack_obj(text):
obj = dict()
obj['id'] = 'demo'

obj['conversations'] = []
human = dict()
human['from'] = 'human'
human['value'] = text
obj['conversations'].append(human)
# dummy bot
bot = dict()
bot['from'] = 'gpt'
bot['value'] = ''
obj['conversations'].append(bot)

obj['instruction'] = ''

return obj

def delete_last_bot_end_singal(convo_obj):
conversations = convo_obj['conversations']
assert len(conversations) > 0 and len(conversations) % 2 == 0
assert conversations[0]['from'] == 'human'

last_bot = conversations[len(conversations)-1]
assert last_bot['from'] == 'gpt'

## from _add_speaker_and_signal
END_SIGNAL = "\n"
len_end_singal = len(END_SIGNAL)
len_last_bot_value = len(last_bot['value'])
last_bot['value'] = last_bot['value'][:len_last_bot_value-len_end_singal]
return

def convo_tokenize(convo_obj, tokenizer):
chat_desc = convo_obj['chat_desc']
instruction = convo_obj['instruction']
conversations = convo_obj['conversations']

# chat_desc
example = tokenizer.encode_plus(f"{chat_desc}", None, max_length=None)['input_ids']
EOS_TOKEN = example[-1]
example = example[:-1] # remove eos
# instruction
instruction = tokenizer.encode_plus(f"{instruction}", None, max_length=None)['input_ids']
instruction = instruction[1:-1] # remove bos & eos
example += instruction

for conversation in conversations:
role = conversation['from']
content = conversation['value']
print(f"role {role}, raw content {content}")
content = tokenizer.encode_plus(f"{content}", None, max_length=None)['input_ids']
content = content[1:-1] # remove bos & eos
print(f"role {role}, content {content}")
example += content
return example

for text in texts:
print('-'*80)
Expand All @@ -106,4 +48,4 @@ def convo_tokenize(convo_obj, tokenizer):

with torch.no_grad():
out = aquila_generate(tokenizer, model, [text], max_gen_len:=200, top_p=0.95, prompts_tokens=[tokens])
print(f"pred is {out}")
print(f"pred is {out}")

0 comments on commit 2e5bf29

Please sign in to comment.