[serve] warning about changing target ongoing requests default (ray-p…

…roject#43813) Add warning that default value for `target_ongoing_requests` will change. Signed-off-by: Cindy Zhang <[email protected]>
Mark2000 · Mar 8, 2024 · 5f11624 · 5f11624
1 parent 4854d2f
commit 5f11624
Show file tree

Hide file tree

Showing 3 changed files with 43 additions and 0 deletions.
diff --git a/dashboard/modules/serve/serve_rest_api_impl.py b/dashboard/modules/serve/serve_rest_api_impl.py
@@ -216,6 +216,8 @@ def validate_http_options(self, client, http_options):
                 )
 
         def log_config_change_default_warning(self, config):
+            from ray.serve.config import AutoscalingConfig
+
             for deployment in [
                 d for app in config.applications for d in app.deployments
             ]:
@@ -226,6 +228,28 @@ def log_config_change_default_warning(self, config):
                     )
                     break
 
+            for deployment in [
+                d for app in config.applications for d in app.deployments
+            ]:
+                if isinstance(deployment.autoscaling_config, dict):
+                    autoscaling_config = deployment.autoscaling_config
+                elif isinstance(deployment.autoscaling_config, AutoscalingConfig):
+                    autoscaling_config = deployment.autoscaling_config.dict(
+                        exclude_unset=True
+                    )
+                else:
+                    continue
+
+                if (
+                    "target_num_ongoing_requests_per_replica" not in autoscaling_config
+                    and "target_ongoing_requests" not in autoscaling_config
+                ):
+                    logger.warning(
+                        "The default value for `target_ongoing_requests` will "
+                        "change from 1.0 to 2.0 in an upcoming release."
+                    )
+                    break
+
         async def get_serve_controller(self):
             """Gets the ServeController to the this cluster's Serve app.
 

diff --git a/doc/source/serve/advanced-guides/advanced-autoscaling.md b/doc/source/serve/advanced-guides/advanced-autoscaling.md
@@ -18,6 +18,9 @@ To define what the steady state of your deployments should be, set values for `t
 This parameter is renamed to `target_ongoing_requests`. `target_num_ongoing_requests_per_replica` will be removed in a future release.
 
 #### **target_ongoing_requests [default=1]**
+:::{note}
+The default for `target_ongoing_requests` will be changed to 2.0 in an upcoming Ray release. You can continue to set it manually to override the default.
+:::
 Serve scales the number of replicas for a deployment up or down based on the average number of ongoing requests per replica. Specifically, Serve compares the *actual* number of ongoing requests per replica with the target value you set in the autoscaling config and makes upscale or downscale decisions from that. Set the target value with `target_ongoing_requests`, and Serve attempts to ensure that each replica has roughly that number
 of requests being processed and waiting in the queue. 
 

diff --git a/python/ray/serve/api.py b/python/ray/serve/api.py
@@ -350,6 +350,22 @@ class MyDeployment:
                 "version."
             )
 
+        if (
+            isinstance(autoscaling_config, dict)
+            and "target_num_ongoing_requests_per_replica" not in autoscaling_config
+            and "target_ongoing_requests" not in autoscaling_config
+        ) or (
+            isinstance(autoscaling_config, AutoscalingConfig)
+            and "target_num_ongoing_requests_per_replica"
+            not in autoscaling_config.dict(exclude_unset=True)
+            and "target_ongoing_requests"
+            not in autoscaling_config.dict(exclude_unset=True)
+        ):
+            logger.warning(
+                "The default value for `target_ongoing_requests` is currently 1.0, "
+                "but will change to 2.0 in an upcoming release."
+            )
+
     max_ongoing_requests = (
         max_ongoing_requests
         if max_ongoing_requests is not DEFAULT.VALUE