From bb7c4ce9c4807bb0344d11d9bd4fe656ae020014 Mon Sep 17 00:00:00 2001 From: Richard Liaw Date: Fri, 15 Feb 2019 01:04:17 -0800 Subject: [PATCH] [tune] Improve error message when Ray crashes (#3795) --- python/ray/tune/ray_trial_executor.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/ray/tune/ray_trial_executor.py b/python/ray/tune/ray_trial_executor.py index 85fa86afd4649..b2d97cee62719 100644 --- a/python/ray/tune/ray_trial_executor.py +++ b/python/ray/tune/ray_trial_executor.py @@ -9,6 +9,7 @@ import traceback import ray +from ray.tune.error import TuneError from ray.tune.logger import NoopLogger from ray.tune.trial import Trial, Resources, Checkpoint from ray.tune.trial_executor import TrialExecutor @@ -270,6 +271,11 @@ def _update_avail_resources(self, num_retries=5): logger.warning("Cluster resources not detected. Retrying...") time.sleep(0.5) + if not resources or "CPU" not in resources: + raise TuneError("Cluster resources cannot be detected. " + "You can resume this experiment by passing in " + "`resume=True` to `run_experiments`.") + resources = resources.copy() num_cpus = resources.pop("CPU") num_gpus = resources.pop("GPU")