[tune] Directional metrics for components (ray-project#4120) (ray-pro…

…ject#4915)
mbhushan · Jun 3, 2019 · 89722ff · 89722ff
1 parent 084b221
commit 89722ff
Show file tree

Hide file tree

Showing 31 changed files with 354 additions and 131 deletions.
diff --git a/ci/long_running_tests/workloads/pbt.py b/ci/long_running_tests/workloads/pbt.py
@@ -37,7 +37,8 @@
 
 pbt = PopulationBasedTraining(
     time_attr="training_iteration",
-    reward_attr="episode_reward_mean",
+    metric="episode_reward_mean",
+    mode="max",
     perturbation_interval=10,
     hyperparam_mutations={
         "lr": [0.1, 0.01, 0.001, 0.0001],

diff --git a/doc/source/tune-schedulers.rst b/doc/source/tune-schedulers.rst
@@ -7,7 +7,7 @@ By default, Tune schedules trials in serial order with the ``FIFOScheduler`` cla
 
     tune.run( ... , scheduler=AsyncHyperBandScheduler())
 
-Tune includes distributed implementations of early stopping algorithms such as `Median Stopping Rule <https://research.google.com/pubs/pub46180.html>`__, `HyperBand <https://arxiv.org/abs/1603.06560>`__, and an `asynchronous version of HyperBand <https://openreview.net/forum?id=S1Y7OOlRZ>`__. These algorithms are very resource efficient and can outperform Bayesian Optimization methods in `many cases <https://people.eecs.berkeley.edu/~kjamieson/hyperband.html>`__. Currently, all schedulers take in a ``reward_attr``, which is assumed to be maximized.
+Tune includes distributed implementations of early stopping algorithms such as `Median Stopping Rule <https://research.google.com/pubs/pub46180.html>`__, `HyperBand <https://arxiv.org/abs/1603.06560>`__, and an `asynchronous version of HyperBand <https://openreview.net/forum?id=S1Y7OOlRZ>`__. These algorithms are very resource efficient and can outperform Bayesian Optimization methods in `many cases <https://people.eecs.berkeley.edu/~kjamieson/hyperband.html>`__. All schedulers take in a ``metric``, which is a value returned in the result dict of your Trainable and is maximized or minimized according to ``mode``.
 
 Current Available Trial Schedulers:
 
@@ -25,7 +25,8 @@ Tune includes a distributed implementation of `Population Based Training (PBT) <
 
     pbt_scheduler = PopulationBasedTraining(
             time_attr='time_total_s',
-            reward_attr='mean_accuracy',
+            metric='mean_accuracy',
+            mode='max',
             perturbation_interval=600.0,
             hyperparam_mutations={
                 "lr": [1e-3, 5e-4, 1e-4, 5e-5, 1e-5],
@@ -52,7 +53,8 @@ The `asynchronous version of HyperBand <https://openreview.net/forum?id=S1Y7OOlR
 
     async_hb_scheduler = AsyncHyperBandScheduler(
         time_attr='training_iteration',
-        reward_attr='episode_reward_mean',
+        metric='episode_reward_mean',
+        mode='max',
         max_t=100,
         grace_period=10,
         reduction_factor=3,

diff --git a/python/ray/tune/examples/async_hyperband_example.py b/python/ray/tune/examples/async_hyperband_example.py
@@ -59,7 +59,8 @@ def _restore(self, checkpoint_path):
     # which is automatically filled by Tune.
     ahb = AsyncHyperBandScheduler(
         time_attr="training_iteration",
-        reward_attr="episode_reward_mean",
+        metric="episode_reward_mean",
+        mode="max",
         grace_period=5,
         max_t=100)
 

diff --git a/python/ray/tune/examples/ax_example.py b/python/ray/tune/examples/ax_example.py
@@ -112,5 +112,5 @@ def easy_objective(config, reporter):
         outcome_constraints=["l2norm <= 1.25"],  # Optional.
     )
     algo = AxSearch(client, max_concurrent=4)
-    scheduler = AsyncHyperBandScheduler(reward_attr="hartmann6")
+    scheduler = AsyncHyperBandScheduler(metric="hartmann6", mode="max")
     run(easy_objective, name="ax", search_alg=algo, **config)
diff --git a/python/ray/tune/examples/bayesopt_example.py b/python/ray/tune/examples/bayesopt_example.py
@@ -18,8 +18,7 @@ def easy_objective(config, reporter):
     for i in range(config["iterations"]):
         reporter(
             timesteps_total=i,
-            neg_mean_loss=-(config["height"] - 14)**2 +
-            abs(config["width"] - 3))
+            mean_loss=(config["height"] - 14)**2 - abs(config["width"] - 3))
         time.sleep(0.02)
 
 
@@ -46,13 +45,14 @@ def easy_objective(config, reporter):
     algo = BayesOptSearch(
         space,
         max_concurrent=4,
-        reward_attr="neg_mean_loss",
+        metric="mean_loss",
+        mode="min",
         utility_kwargs={
             "kind": "ucb",
             "kappa": 2.5,
             "xi": 0.0
         })
-    scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss")
+    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
     run(easy_objective,
         name="my_exp",
         search_alg=algo,

diff --git a/python/ray/tune/examples/genetic_example.py b/python/ray/tune/examples/genetic_example.py
@@ -50,7 +50,7 @@ def michalewicz_function(config, reporter):
         reward_attr="neg_mean_loss",
         max_generation=2 if args.smoke_test else 10,
         population_size=10 if args.smoke_test else 50)
-    scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss")
+    scheduler = AsyncHyperBandScheduler(metric="neg_mean_loss", mode="max")
     run(michalewicz_function,
         name="my_exp",
         search_alg=algo,

diff --git a/python/ray/tune/examples/hyperband_example.py b/python/ray/tune/examples/hyperband_example.py
@@ -58,7 +58,8 @@ def _restore(self, checkpoint_path):
     # which is automatically filled by Tune.
     hyperband = HyperBandScheduler(
         time_attr="training_iteration",
-        reward_attr="episode_reward_mean",
+        metric="episode_reward_mean",
+        mode="max",
         max_t=100)
 
     exp = Experiment(

diff --git a/python/ray/tune/examples/hyperopt_example.py b/python/ray/tune/examples/hyperopt_example.py
@@ -20,8 +20,7 @@ def easy_objective(config, reporter):
     for i in range(config["iterations"]):
         reporter(
             timesteps_total=i,
-            neg_mean_loss=-(config["height"] - 14)**2 +
-            abs(config["width"] - 3))
+            mean_loss=(config["height"] - 14)**2 - abs(config["width"] - 3))
         time.sleep(0.02)
 
 
@@ -66,7 +65,8 @@ def easy_objective(config, reporter):
     algo = HyperOptSearch(
         space,
         max_concurrent=4,
-        reward_attr="neg_mean_loss",
+        metric="mean_loss",
+        mode="min",
         points_to_evaluate=current_best_params)
-    scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss")
+    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
     run(easy_objective, search_alg=algo, scheduler=scheduler, **config)
diff --git a/python/ray/tune/examples/mnist_pytorch.py b/python/ray/tune/examples/mnist_pytorch.py
@@ -161,7 +161,8 @@ def test():
     ray.init()
     sched = AsyncHyperBandScheduler(
         time_attr="training_iteration",
-        reward_attr="neg_mean_loss",
+        metric="mean_loss",
+        mode="min",
         max_t=400,
         grace_period=20)
     tune.register_trainable(

diff --git a/python/ray/tune/examples/mnist_pytorch_trainable.py b/python/ray/tune/examples/mnist_pytorch_trainable.py
@@ -180,7 +180,7 @@ def _restore(self, checkpoint_path):
 
     ray.init(redis_address=args.redis_address)
     sched = HyperBandScheduler(
-        time_attr="training_iteration", reward_attr="neg_mean_loss")
+        time_attr="training_iteration", metric="mean_loss", mode="min")
     tune.run(
         TrainMNIST,
         scheduler=sched,

diff --git a/python/ray/tune/examples/nevergrad_example.py b/python/ray/tune/examples/nevergrad_example.py
@@ -18,8 +18,7 @@ def easy_objective(config, reporter):
     for i in range(config["iterations"]):
         reporter(
             timesteps_total=i,
-            neg_mean_loss=-(config["height"] - 14)**2 +
-            abs(config["width"] - 3))
+            mean_loss=(config["height"] - 14)**2 - abs(config["width"] - 3))
         time.sleep(0.02)
 
 
@@ -55,8 +54,9 @@ def easy_objective(config, reporter):
         optimizer,
         parameter_names,
         max_concurrent=4,
-        reward_attr="neg_mean_loss")
-    scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss")
+        metric="mean_loss",
+        mode="min")
+    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
     run(easy_objective,
         name="nevergrad",
         search_alg=algo,

diff --git a/python/ray/tune/examples/pbt_example.py b/python/ray/tune/examples/pbt_example.py
@@ -96,7 +96,8 @@ def reset_config(self, new_config):
 
     pbt = PopulationBasedTraining(
         time_attr="training_iteration",
-        reward_attr="mean_accuracy",
+        metric="mean_accuracy",
+        mode="max",
         perturbation_interval=20,
         hyperparam_mutations={
             # distribution for resampling

diff --git a/python/ray/tune/examples/pbt_ppo_example.py b/python/ray/tune/examples/pbt_ppo_example.py
@@ -30,7 +30,8 @@ def explore(config):
 
     pbt = PopulationBasedTraining(
         time_attr="time_total_s",
-        reward_attr="episode_reward_mean",
+        metric="episode_reward_mean",
+        mode="max",
         perturbation_interval=120,
         resample_probability=0.25,
         # Specifies the mutations of these hyperparams

diff --git a/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py b/python/ray/tune/examples/pbt_tune_cifar10_with_keras.py
@@ -206,7 +206,8 @@ def _stop(self):
 
     pbt = PopulationBasedTraining(
         time_attr="training_iteration",
-        reward_attr="mean_accuracy",
+        metric="mean_accuracy",
+        mode="max",
         perturbation_interval=10,
         hyperparam_mutations={
             "dropout": lambda _: np.random.uniform(0, 1),

diff --git a/python/ray/tune/examples/sigopt_example.py b/python/ray/tune/examples/sigopt_example.py
@@ -18,8 +18,7 @@ def easy_objective(config, reporter):
     for i in range(config["iterations"]):
         reporter(
             timesteps_total=i,
-            neg_mean_loss=-(config["height"] - 14)**2 +
-            abs(config["width"] - 3))
+            mean_loss=(config["height"] - 14)**2 - abs(config["width"] - 3))
         time.sleep(0.02)
 
 
@@ -68,8 +67,9 @@ def easy_objective(config, reporter):
         space,
         name="SigOpt Example Experiment",
         max_concurrent=1,
-        reward_attr="neg_mean_loss")
-    scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss")
+        metric="mean_loss",
+        mode="min")
+    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
     run(easy_objective,
         name="my_exp",
         search_alg=algo,

diff --git a/python/ray/tune/examples/skopt_example.py b/python/ray/tune/examples/skopt_example.py
@@ -18,8 +18,7 @@ def easy_objective(config, reporter):
     for i in range(config["iterations"]):
         reporter(
             timesteps_total=i,
-            neg_mean_loss=-(config["height"] - 14)**2 +
-            abs(config["width"] - 3))
+            mean_loss=(config["height"] - 14)**2 - abs(config["width"] - 3))
         time.sleep(0.02)
 
 
@@ -48,10 +47,11 @@ def easy_objective(config, reporter):
     algo = SkOptSearch(
         optimizer, ["width", "height"],
         max_concurrent=4,
-        reward_attr="neg_mean_loss",
+        metric="mean_loss",
+        mode="min",
         points_to_evaluate=previously_run_params,
         evaluated_rewards=known_rewards)
-    scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss")
+    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
     run(easy_objective,
         name="skopt_exp_with_warmstart",
         search_alg=algo,
@@ -63,9 +63,10 @@ def easy_objective(config, reporter):
     algo = SkOptSearch(
         optimizer, ["width", "height"],
         max_concurrent=4,
-        reward_attr="neg_mean_loss",
+        metric="mean_loss",
+        mode="min",
         points_to_evaluate=previously_run_params)
-    scheduler = AsyncHyperBandScheduler(reward_attr="neg_mean_loss")
+    scheduler = AsyncHyperBandScheduler(metric="mean_loss", mode="min")
     run(easy_objective,
         name="skopt_exp",
         search_alg=algo,

diff --git a/python/ray/tune/examples/tune_cifar10_gluon.py b/python/ray/tune/examples/tune_cifar10_gluon.py
@@ -192,7 +192,8 @@ def test():
     elif args.scheduler == "asynchyperband":
         sched = AsyncHyperBandScheduler(
             time_attr="training_iteration",
-            reward_attr="neg_mean_loss",
+            metric="mean_loss",
+            mode="min",
             max_t=400,
             grace_period=60)
     else:

diff --git a/python/ray/tune/examples/tune_mnist_async_hyperband.py b/python/ray/tune/examples/tune_mnist_async_hyperband.py
@@ -240,7 +240,8 @@ def train(config={"activation": "relu"}, reporter=None):
         name="tune_mnist_test",
         scheduler=AsyncHyperBandScheduler(
             time_attr="timesteps_total",
-            reward_attr="mean_accuracy",
+            metric="mean_accuracy",
+            mode="max",
             max_t=600,
         ),
         **mnist_spec)
diff --git a/python/ray/tune/examples/tune_mnist_keras.py b/python/ray/tune/examples/tune_mnist_keras.py
@@ -64,7 +64,8 @@ def train_mnist(config, reporter):
     ray.init()
     sched = AsyncHyperBandScheduler(
         time_attr="timesteps_total",
-        reward_attr="mean_accuracy",
+        metric="mean_accuracy",
+        mode="max",
         max_t=400,
         grace_period=20)
 

diff --git a/python/ray/tune/examples/tune_mnist_ray_hyperband.py b/python/ray/tune/examples/tune_mnist_ray_hyperband.py
@@ -233,7 +233,10 @@ def _restore(self, ckpt_data):
 
     ray.init()
     hyperband = HyperBandScheduler(
-        time_attr="training_iteration", reward_attr="mean_accuracy", max_t=10)
+        time_attr="training_iteration",
+        metric="mean_accuracy",
+        mode="max",
+        max_t=10)
 
     tune.run(
         TrainMNIST,

diff --git a/python/ray/tune/schedulers/async_hyperband.py b/python/ray/tune/schedulers/async_hyperband.py
@@ -25,9 +25,10 @@ class AsyncHyperBandScheduler(FIFOScheduler):
             Note that you can pass in something non-temporal such as
             `training_iteration` as a measure of progress, the only requirement
             is that the attribute should increase monotonically.
-        reward_attr (str): The training result objective value attribute. As
-            with `time_attr`, this may refer to any objective value. Stopping
+        metric (str): The training result objective value attribute. Stopping
             procedures will use this attribute.
+        mode (str): One of {min, max}. Determines whether objective is
+            minimizing or maximizing the metric attribute.
         max_t (float): max time units per trial. Trials will be stopped after
             max_t time units (determined by time_attr) have passed.
         grace_period (float): Only stop trials at least this old in time.
@@ -40,7 +41,9 @@ class AsyncHyperBandScheduler(FIFOScheduler):
 
     def __init__(self,
                  time_attr="training_iteration",
-                 reward_attr="episode_reward_mean",
+                 reward_attr=None,
+                 metric="episode_reward_mean",
+                 mode="max",
                  max_t=100,
                  grace_period=10,
                  reduction_factor=4,
@@ -50,6 +53,16 @@ def __init__(self,
         assert grace_period > 0, "grace_period must be positive!"
         assert reduction_factor > 1, "Reduction Factor not valid!"
         assert brackets > 0, "brackets must be positive!"
+        assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!"
+
+        if reward_attr is not None:
+            mode = "max"
+            metric = reward_attr
+            logger.warning(
+                "`reward_attr` is deprecated and will be removed in a future "
+                "version of Tune. "
+                "Setting `metric={}` and `mode=max`.".format(reward_attr))
+
         FIFOScheduler.__init__(self)
         self._reduction_factor = reduction_factor
         self._max_t = max_t
@@ -63,7 +76,11 @@ def __init__(self,
         ]
         self._counter = 0  # for
         self._num_stopped = 0
-        self._reward_attr = reward_attr
+        self._metric = metric
+        if mode == "max":
+            self._metric_op = 1.
+        elif mode == "min":
+            self._metric_op = -1.
         self._time_attr = time_attr
 
     def on_trial_add(self, trial_runner, trial):
@@ -80,15 +97,15 @@ def on_trial_result(self, trial_runner, trial, result):
         else:
             bracket = self._trial_info[trial.trial_id]
             action = bracket.on_result(trial, result[self._time_attr],
-                                       result[self._reward_attr])
+                                       self._metric_op * result[self._metric])
         if action == TrialScheduler.STOP:
             self._num_stopped += 1
         return action
 
     def on_trial_complete(self, trial_runner, trial, result):
         bracket = self._trial_info[trial.trial_id]
         bracket.on_result(trial, result[self._time_attr],
-                          result[self._reward_attr])
+                          self._metric_op * result[self._metric])
         del self._trial_info[trial.trial_id]
 
     def on_trial_remove(self, trial_runner, trial):