-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathtry_minicpm_v.py
63 lines (53 loc) · 1.39 KB
/
try_minicpm_v.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from transformers import AutoTokenizer
from PIL import Image
from vllm import LLM, SamplingParams
MODEL_NAME = "/mnt/public/algm/models/MiniCPM-V-2_6/"
image = Image.open("../data/sample_image.jpg").convert("RGB")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
llm = LLM(
model=MODEL_NAME,
trust_remote_code=True,
gpu_memory_utilization=1,
max_model_len=2048
)
messages = [{
"role":
"user",
"content":
# Number of images
"(<image>./</image>)" + \
"\nWhat is the content of this image?"
}]
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
# Single Inference
inputs = {
"prompt": prompt,
"multi_modal_data": {
"image": image
# Multi images, the number of images should be equal to that of `(<image>./</image>)`
# "image": [image, image]
},
}
# Batch Inference
# inputs = [{
# "prompt": prompt,
# "multi_modal_data": {
# "image": image
# },
# } for _ in 2]
# 2.6
stop_tokens = ['<|im_end|>', '<|endoftext|>']
stop_token_ids = [tokenizer.convert_tokens_to_ids(i) for i in stop_tokens]
sampling_params = SamplingParams(
stop_token_ids=stop_token_ids,
use_beam_search=True,
temperature=0,
best_of=3,
max_tokens=1024
)
outputs = llm.generate(inputs, sampling_params=sampling_params)
print(outputs[0].outputs[0].text)