forked from OpenCSGs/llm-inference
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaviary-cluster.yaml
46 lines (38 loc) · 1.8 KB
/
aviary-cluster.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# An unique identifier for the head node and workers of this cluster.
cluster_name: deploy
# Cloud-provider specific configuration.
provider:
type: local
head_ip: 172.16.251.127
worker_ips: [172.16.248.191]
docker:
image: "docker.io/vincentpli/aviary:0.0.1-651cb061e4b912927ee33a4e690c54f0c389beaa"
container_name: "llmserve"
auth:
ssh_user: root
# You can comment out `ssh_private_key` if the following machines don't need a private key for SSH access to the Ray
# cluster:
# (1) The machine on which `ray up` is executed.
# (2) The head node of the Ray cluster.
#
# The machine that runs ray up executes SSH commands to set up the Ray head node. The Ray head node subsequently
# executes SSH commands to set up the Ray worker nodes. When you run ray up, ssh credentials sitting on the ray up
# machine are copied to the head node -- internally, the ssh key is added to the list of file mounts to rsync to head node.
ssh_private_key: ~/.ssh/id_rsa
min_workers: 1
max_workers: 1
# All the 'conda activate' are necessary to ensure we are in the
# python 3.10 conda env.
setup_commands:
- echo "conda activate" >> ~/.bashrc
head_setup_commands:
- conda activate
worker_setup_commands: []
head_start_ray_commands:
- conda activate && ray stop
- conda activate && ulimit -n 65536; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host=0.0.0.0
worker_start_ray_commands:
- conda activate && ray stop
# We need to make sure RAY_HEAD_IP env var is accessible
# after conda activate.
- export RAY_HEAD_IP=172.16.251.127 && echo "export RAY_HEAD_IP=$RAY_HEAD_IP" >> ~/.bashrc && conda activate && ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076