Skip to content

Commit

Permalink
Add Qwen1.5-72B-chat (OpenCSGs#44)
Browse files Browse the repository at this point in the history
* push image to opencsg registry

* fix bug pad token

* remove cpu_nums and add time log

* add deep code model

* update

* update OpenCSG model name

* update deepseek parameters

* add Qwen1.5

* add Qwen 1.5 72B

---------

Co-authored-by: haihwang <[email protected]>
  • Loading branch information
SeanHH86 and haihwang authored Mar 25, 2024
1 parent ada574d commit 600bdc1
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 1 deletion.
3 changes: 2 additions & 1 deletion llmserve/backend/server/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@
"opencsg/opencsg-starcoder-v0.1": "./models/text-generation--opencsg--opencsg-starcoder-15B-v0.1-pipeline.yaml",
"OpenCSG/opencsg-starcoder-v0.1": "./models/text-generation--opencsg--opencsg-starcoder-15B-v0.1-pipeline.yaml",
"opencsg/opencsg-deepseek-coder-1.3b-v0.1": "./models/text-generation--opencsg--opencsg-deepseek-coder-1.3b-v0.1.yaml",
"OpenCSG/opencsg-deepseek-coder-1.3b-v0.1": "./models/text-generation--opencsg--opencsg-deepseek-coder-1.3b-v0.1.yaml"
"OpenCSG/opencsg-deepseek-coder-1.3b-v0.1": "./models/text-generation--opencsg--opencsg-deepseek-coder-1.3b-v0.1.yaml",
"Qwen/Qwen1.5-72B-Chat": "./models/text-generation--Qwen--Qwen1.5-72B-Chat.yaml"
}

SERVE_RUN_HOST = "0.0.0.0"
Expand Down
49 changes: 49 additions & 0 deletions models/text-generation--Qwen--Qwen1.5-72B-Chat.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
deployment_config:
autoscaling_config:
min_replicas: 1
initial_replicas: 1
max_replicas: 1
target_num_ongoing_requests_per_replica: 1.0
metrics_interval_s: 10.0
look_back_period_s: 30.0
smoothing_factor: 1.0
downscale_delay_s: 300.0
upscale_delay_s: 90.0
ray_actor_options:
num_cpus: 2 # for a model deployment, we have 3 actor created, 1 and 2 will cost 0.1 cpu, and the model infrence will cost 6(see the setting in the end of the file)
model_config:
warmup: False
model_task: text-generation
model_id: Qwen/Qwen1.5-72B-Chat
max_input_words: 800
initialization:
s3_mirror_config:
bucket_uri: /data/models/Qwen1.5-72B-Chat/
initializer:
type: DeviceMap
dtype: float16
from_pretrained_kwargs:
use_cache: true
trust_remote_code: true
# use_kernel: true # for deepspped type only
# max_tokens: 1536 # for deepspped type only
pipeline: defaulttransformers
# pipeline: default
generation:
max_batch_size: 1
generate_kwargs:
bos_token_id: 151643,
# pad_token_id: 151643,
# eos_token_id: [151645, 151643],
do_sample: false
max_new_tokens: 512
repetition_penalty: 1.05
temperature: 0.7
top_p: 0.8
top_k: 20
prompt_format: "'role': 'user', 'content': {instruction}"
# stopping_sequences: ["### Response:", "### End"]
scaling_config:
num_workers: 1
num_gpus_per_worker: 7
num_cpus_per_worker: 32 # for inference

0 comments on commit 600bdc1

Please sign in to comment.