Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
helenzusa1 authored May 20, 2024
2 parents 7fb5771 + 5f95c8a commit aa0ba55
Showing 1 changed file with 27 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,9 @@
"Optimization parameters help in optimizing the GPU memory and effectively using the compute resources. Below are few of the parameters that belong to this category. _The optimization parameters differs for each model and are packaged with the model to handle these variations._\n",
"1. enable the deepspeed and LoRA\n",
"2. enable mixed precision training\n",
"2. enable multi-node training "
"2. enable multi-node training \n",
"\n",
"Note: Supervised finetuning may result in loosing alignment or catastrophic forgetting. We recommend checking for this issue and running an alignment stage after you finetune."
]
},
{
Expand Down Expand Up @@ -625,12 +627,35 @@
"metadata": {},
"outputs": [],
"source": [
"import ast\n",
"\n",
"instance_type = \"Standard_NC6s_v3\"\n",
"\n",
"# Inference compute allow list that supports deployment\n",
"if \"inference_compute_allow_list\" in foundation_model.tags:\n",
" inference_computes_allow_list = ast.literal_eval(\n",
" foundation_model.tags[\"inference_compute_allow_list\"]\n",
" ) # convert string to python list\n",
" print(f\"Please create a compute from the above list - {computes_allow_list}\")\n",
"else:\n",
" inference_computes_allow_list = None\n",
" print(\"`inference_compute_allow_list` is not part of model tags\")\n",
"\n",
"# Check if the compute is in the allow listed computes\n",
"if (\n",
" inference_computes_allow_list is not None\n",
" and instance_type not in inference_computes_allow_list\n",
"):\n",
" print(\n",
" f\"`instance_type` is not in the allow listed compute. Please select a value from {inference_computes_allow_list}\"\n",
" )\n",
"\n",
"# create a deployment\n",
"demo_deployment = ManagedOnlineDeployment(\n",
" name=\"demo\",\n",
" endpoint_name=online_endpoint_name,\n",
" model=registered_model.id,\n",
" instance_type=\"Standard_E8s_v3\",\n",
" instance_type=instance_type,\n",
" instance_count=1,\n",
" liveness_probe=ProbeSettings(initial_delay=600),\n",
" request_settings=OnlineRequestSettings(request_timeout_ms=90000),\n",
Expand Down

0 comments on commit aa0ba55

Please sign in to comment.