Skip to content

Commit

Permalink
[serve] warning about changing target ongoing requests default (ray-p…
Browse files Browse the repository at this point in the history
…roject#43813)

Add warning that default value for `target_ongoing_requests` will change.

Signed-off-by: Cindy Zhang <[email protected]>
  • Loading branch information
zcin authored Mar 8, 2024
1 parent 4854d2f commit 5f11624
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 0 deletions.
24 changes: 24 additions & 0 deletions dashboard/modules/serve/serve_rest_api_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,8 @@ def validate_http_options(self, client, http_options):
)

def log_config_change_default_warning(self, config):
from ray.serve.config import AutoscalingConfig

for deployment in [
d for app in config.applications for d in app.deployments
]:
Expand All @@ -226,6 +228,28 @@ def log_config_change_default_warning(self, config):
)
break

for deployment in [
d for app in config.applications for d in app.deployments
]:
if isinstance(deployment.autoscaling_config, dict):
autoscaling_config = deployment.autoscaling_config
elif isinstance(deployment.autoscaling_config, AutoscalingConfig):
autoscaling_config = deployment.autoscaling_config.dict(
exclude_unset=True
)
else:
continue

if (
"target_num_ongoing_requests_per_replica" not in autoscaling_config
and "target_ongoing_requests" not in autoscaling_config
):
logger.warning(
"The default value for `target_ongoing_requests` will "
"change from 1.0 to 2.0 in an upcoming release."
)
break

async def get_serve_controller(self):
"""Gets the ServeController to the this cluster's Serve app.
Expand Down
3 changes: 3 additions & 0 deletions doc/source/serve/advanced-guides/advanced-autoscaling.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ To define what the steady state of your deployments should be, set values for `t
This parameter is renamed to `target_ongoing_requests`. `target_num_ongoing_requests_per_replica` will be removed in a future release.

#### **target_ongoing_requests [default=1]**
:::{note}
The default for `target_ongoing_requests` will be changed to 2.0 in an upcoming Ray release. You can continue to set it manually to override the default.
:::
Serve scales the number of replicas for a deployment up or down based on the average number of ongoing requests per replica. Specifically, Serve compares the *actual* number of ongoing requests per replica with the target value you set in the autoscaling config and makes upscale or downscale decisions from that. Set the target value with `target_ongoing_requests`, and Serve attempts to ensure that each replica has roughly that number
of requests being processed and waiting in the queue.

Expand Down
16 changes: 16 additions & 0 deletions python/ray/serve/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,22 @@ class MyDeployment:
"version."
)

if (
isinstance(autoscaling_config, dict)
and "target_num_ongoing_requests_per_replica" not in autoscaling_config
and "target_ongoing_requests" not in autoscaling_config
) or (
isinstance(autoscaling_config, AutoscalingConfig)
and "target_num_ongoing_requests_per_replica"
not in autoscaling_config.dict(exclude_unset=True)
and "target_ongoing_requests"
not in autoscaling_config.dict(exclude_unset=True)
):
logger.warning(
"The default value for `target_ongoing_requests` is currently 1.0, "
"but will change to 2.0 in an upcoming release."
)

max_ongoing_requests = (
max_ongoing_requests
if max_ongoing_requests is not DEFAULT.VALUE
Expand Down

0 comments on commit 5f11624

Please sign in to comment.