[RLlib] Fix test case test_envs_that_crash. (ray-project#44989)

Mark2000 · Apr 26, 2024 · 0e8ef33 · 0e8ef33
1 parent 5650610
commit 0e8ef33
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 24 deletions.
diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py
@@ -2695,6 +2695,7 @@ def debugging(
 
     def fault_tolerance(
         self,
+        ignore_env_runner_failures: Optional[bool] = NotProvided,
         recreate_failed_env_runners: Optional[bool] = NotProvided,
         max_num_env_runner_restarts: Optional[int] = NotProvided,
         delay_between_env_runner_restarts_s: Optional[float] = NotProvided,
@@ -2703,6 +2704,7 @@ def fault_tolerance(
         env_runner_health_probe_timeout_s: int = NotProvided,
         env_runner_restore_timeout_s: int = NotProvided,
         # Deprecated args.
+        ignore_worker_failures=DEPRECATED_VALUE,
         recreate_failed_workers=DEPRECATED_VALUE,
         max_num_worker_restarts=DEPRECATED_VALUE,
         delay_between_worker_restarts_s=DEPRECATED_VALUE,
@@ -2713,6 +2715,9 @@ def fault_tolerance(
         """Sets the config's fault tolerance settings.
 
         Args:
+            ignore_env_runner_failures: Whether to ignore any EnvRunner failures
+                and continue running with the remaining EnvRunners. This setting will
+                be ignored, if `recreate_failed_env_runners=True`.
             recreate_failed_env_runners: Whether - upon an EnvRunner failure - RLlib
                 will try to recreate the lost EnvRunner as an identical copy of the
                 failed one. The new EnvRunner will only differ from the failed one in
@@ -2746,6 +2751,12 @@ def fault_tolerance(
         Returns:
             This updated AlgorithmConfig object.
         """
+        if ignore_worker_failures != DEPRECATED_VALUE:
+            deprecation_warning(
+                old="AlgorithmConfig.fault_tolerance(ignore_worker_failures)",
+                new="AlgorithmConfig.fault_tolerance(ignore_env_runner_failures)",
+                error=True,
+            )
         if recreate_failed_workers != DEPRECATED_VALUE:
             deprecation_warning(
                 old="AlgorithmConfig.fault_tolerance(recreate_failed_workers)",
@@ -2787,6 +2798,8 @@ def fault_tolerance(
                 error=True,
             )
 
+        if ignore_env_runner_failures is not NotProvided:
+            self.ignore_env_runner_failures = ignore_env_runner_failures
         if recreate_failed_env_runners is not NotProvided:
             self.recreate_failed_env_runners = recreate_failed_env_runners
         if max_num_env_runner_restarts is not NotProvided:

diff --git a/rllib/evaluation/tests/test_envs_that_crash.py b/rllib/evaluation/tests/test_envs_that_crash.py
@@ -25,29 +25,6 @@ def setUpClass(cls) -> None:
     def tearDownClass(cls) -> None:
         ray.shutdown()
 
-    def test_env_crash_during_pre_checking(self):
-        """Expect the env pre-checking to fail on each worker."""
-        config = (
-            PPOConfig()
-            .env_runners(num_env_runners=2, num_envs_per_env_runner=4)
-            .environment(
-                env=CartPoleCrashing,
-                env_config={
-                    # Crash prob=100% (during pre-checking's `step()` test calls).
-                    "p_crash": 1.0,
-                    "init_time_s": 0.5,
-                },
-            )
-        )
-
-        # Expect ValueError due to pre-checking failing (our pre-checker module
-        # raises a ValueError if `step()` fails).
-        self.assertRaisesRegex(
-            ValueError,
-            "Simulated env crash",
-            lambda: config.build(),
-        )
-
     def test_env_crash_during_sampling(self):
         """Expect some sub-envs to fail (and not recover)."""
         config = (
@@ -87,6 +64,8 @@ def test_env_crash_on_one_worker_during_sampling_but_ignore(self):
             .env_runners(
                 num_env_runners=2,
                 num_envs_per_env_runner=3,
+            )
+            .fault_tolerance(
                 # Ignore worker failures (continue with worker #2).
                 ignore_env_runner_failures=True,
             )
@@ -125,8 +104,11 @@ def test_env_crash_on_one_worker_during_sampling_but_recreate_worker(self):
                 num_env_runners=2,
                 rollout_fragment_length=10,
                 num_envs_per_env_runner=3,
+            )
+            .fault_tolerance(
                 # Re-create failed workers (then continue).
                 recreate_failed_env_runners=True,
+                delay_between_env_runner_restarts_s=0,
             )
             .training(train_batch_size=60, sgd_minibatch_size=60)
             .environment(
@@ -139,7 +121,6 @@ def test_env_crash_on_one_worker_during_sampling_but_recreate_worker(self):
                     "crash_on_worker_indices": [2],
                 },
             )
-            .fault_tolerance(delay_between_env_runner_restarts_s=0)
         )
         for multi_agent in [True, False]:
             if multi_agent:
@@ -167,6 +148,8 @@ def test_env_crash_during_sampling_but_restart_only_crashed_sub_env(self):
             .env_runners(
                 num_env_runners=2,
                 num_envs_per_env_runner=3,
+            )
+            .fault_tolerance(
                 # Re-start failed individual sub-envs (then continue).
                 # This means no workers will ever fail due to individual env errors
                 # (only maybe for reasons other than the env).