Skip to content

Commit

Permalink
[RLlib] Fix test case test_envs_that_crash. (ray-project#44989)
Browse files Browse the repository at this point in the history
  • Loading branch information
sven1977 authored Apr 26, 2024
1 parent 5650610 commit 0e8ef33
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 24 deletions.
13 changes: 13 additions & 0 deletions rllib/algorithms/algorithm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2695,6 +2695,7 @@ def debugging(

def fault_tolerance(
self,
ignore_env_runner_failures: Optional[bool] = NotProvided,
recreate_failed_env_runners: Optional[bool] = NotProvided,
max_num_env_runner_restarts: Optional[int] = NotProvided,
delay_between_env_runner_restarts_s: Optional[float] = NotProvided,
Expand All @@ -2703,6 +2704,7 @@ def fault_tolerance(
env_runner_health_probe_timeout_s: int = NotProvided,
env_runner_restore_timeout_s: int = NotProvided,
# Deprecated args.
ignore_worker_failures=DEPRECATED_VALUE,
recreate_failed_workers=DEPRECATED_VALUE,
max_num_worker_restarts=DEPRECATED_VALUE,
delay_between_worker_restarts_s=DEPRECATED_VALUE,
Expand All @@ -2713,6 +2715,9 @@ def fault_tolerance(
"""Sets the config's fault tolerance settings.
Args:
ignore_env_runner_failures: Whether to ignore any EnvRunner failures
and continue running with the remaining EnvRunners. This setting will
be ignored, if `recreate_failed_env_runners=True`.
recreate_failed_env_runners: Whether - upon an EnvRunner failure - RLlib
will try to recreate the lost EnvRunner as an identical copy of the
failed one. The new EnvRunner will only differ from the failed one in
Expand Down Expand Up @@ -2746,6 +2751,12 @@ def fault_tolerance(
Returns:
This updated AlgorithmConfig object.
"""
if ignore_worker_failures != DEPRECATED_VALUE:
deprecation_warning(
old="AlgorithmConfig.fault_tolerance(ignore_worker_failures)",
new="AlgorithmConfig.fault_tolerance(ignore_env_runner_failures)",
error=True,
)
if recreate_failed_workers != DEPRECATED_VALUE:
deprecation_warning(
old="AlgorithmConfig.fault_tolerance(recreate_failed_workers)",
Expand Down Expand Up @@ -2787,6 +2798,8 @@ def fault_tolerance(
error=True,
)

if ignore_env_runner_failures is not NotProvided:
self.ignore_env_runner_failures = ignore_env_runner_failures
if recreate_failed_env_runners is not NotProvided:
self.recreate_failed_env_runners = recreate_failed_env_runners
if max_num_env_runner_restarts is not NotProvided:
Expand Down
31 changes: 7 additions & 24 deletions rllib/evaluation/tests/test_envs_that_crash.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,29 +25,6 @@ def setUpClass(cls) -> None:
def tearDownClass(cls) -> None:
ray.shutdown()

def test_env_crash_during_pre_checking(self):
"""Expect the env pre-checking to fail on each worker."""
config = (
PPOConfig()
.env_runners(num_env_runners=2, num_envs_per_env_runner=4)
.environment(
env=CartPoleCrashing,
env_config={
# Crash prob=100% (during pre-checking's `step()` test calls).
"p_crash": 1.0,
"init_time_s": 0.5,
},
)
)

# Expect ValueError due to pre-checking failing (our pre-checker module
# raises a ValueError if `step()` fails).
self.assertRaisesRegex(
ValueError,
"Simulated env crash",
lambda: config.build(),
)

def test_env_crash_during_sampling(self):
"""Expect some sub-envs to fail (and not recover)."""
config = (
Expand Down Expand Up @@ -87,6 +64,8 @@ def test_env_crash_on_one_worker_during_sampling_but_ignore(self):
.env_runners(
num_env_runners=2,
num_envs_per_env_runner=3,
)
.fault_tolerance(
# Ignore worker failures (continue with worker #2).
ignore_env_runner_failures=True,
)
Expand Down Expand Up @@ -125,8 +104,11 @@ def test_env_crash_on_one_worker_during_sampling_but_recreate_worker(self):
num_env_runners=2,
rollout_fragment_length=10,
num_envs_per_env_runner=3,
)
.fault_tolerance(
# Re-create failed workers (then continue).
recreate_failed_env_runners=True,
delay_between_env_runner_restarts_s=0,
)
.training(train_batch_size=60, sgd_minibatch_size=60)
.environment(
Expand All @@ -139,7 +121,6 @@ def test_env_crash_on_one_worker_during_sampling_but_recreate_worker(self):
"crash_on_worker_indices": [2],
},
)
.fault_tolerance(delay_between_env_runner_restarts_s=0)
)
for multi_agent in [True, False]:
if multi_agent:
Expand Down Expand Up @@ -167,6 +148,8 @@ def test_env_crash_during_sampling_but_restart_only_crashed_sub_env(self):
.env_runners(
num_env_runners=2,
num_envs_per_env_runner=3,
)
.fault_tolerance(
# Re-start failed individual sub-envs (then continue).
# This means no workers will ever fail due to individual env errors
# (only maybe for reasons other than the env).
Expand Down

0 comments on commit 0e8ef33

Please sign in to comment.