[rllib][asv] Support ASV for RLlib (ray-project#2304)

jhpenger · Jun 29, 2018 · 3cc27d2 · 3cc27d2
1 parent 92ab7e5
commit 3cc27d2
Show file tree

Hide file tree

Showing 5 changed files with 283 additions and 3 deletions.
diff --git a/python/README-benchmarks.rst b/python/README-benchmarks.rst
@@ -8,22 +8,31 @@ You can run the benchmark suite by doing the following:
 
 To run ASV inside docker, you can use the following command:
 ``docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA bash -c '/ray/test/jenkins_tests/run_asv.sh'``
+``docker run --rm --shm-size=10G --memory=10G $DOCKER_SHA bash -c '/ray/test/jenkins_tests/run_rllib_asv.sh'``
+
 
 Visualizing Benchmarks
 ======================
 
-To visualize benchmarks, you must copy the S3 bucket down to `$RAY_DIR/python`. Assuming asv is installed,
+For visualizing regular Ray benchmarks, you must copy the S3 bucket down to `$RAY_DIR/python`.
 
 .. code-block::
 
   cd $RAY_DIR/python
   aws s3 sync s3://$BUCKET/ASV/ .
 
-Then, you can run:
+For rllib, you must sync a _particular_ folder down to `$RLLIB_DIR (ray/python/ray/rllib)`.
+
+.. code-block::
+
+  cd $RAY_DIR/python/ray/rllib
+  aws s3 sync s3://$BUCKET/RLLIB_RESULTS/ ./RLLIB_RESULTS
+
+Then, in the directory, you can run:
 
 .. code-block::
 
   asv publish --no-pull
   asv preview
 
-This creates the directory and then launches a server.
+This creates the directory and then launches a server at which you can visualize results.
diff --git a/python/ray/rllib/asv.conf.json b/python/ray/rllib/asv.conf.json
@@ -0,0 +1,141 @@
+{
+    // The version of the config file format.  Do not change, unless
+    // you know what you are doing.
+    "version": 1,
+
+    // The name of the project being benchmarked
+    "project": "rllib",
+
+    // The project's homepage
+    "project_url": "http://rllib.io",
+
+    // The URL or local path of the source code repository for the
+    // project being benchmarked
+    "repo": "../../../",
+
+    // List of branches to benchmark. If not provided, defaults to "master"
+    // (for git) or "default" (for mercurial).
+    "branches": ["master"], // for git
+    // "branches": ["default"],    // for mercurial
+
+    // The DVCS being used.  If not set, it will be automatically
+    // determined from "repo" by looking at the protocol in the URL
+    // (if remote), or by looking for special directories, such as
+    // ".git" (if local).
+    "dvcs": "git",
+
+    // The tool to use to create environments.  May be "conda",
+    // "virtualenv" or other value depending on the plugins in use.
+    // If missing or the empty string, the tool will be automatically
+    // determined by looking for tools on the PATH environment
+    // variable.
+    "environment_type": "conda",
+
+    // timeout in seconds for installing any dependencies in environment
+    // defaults to 10 min
+    //"install_timeout": 600,
+
+    // the base URL to show a commit for the project.
+    "show_commit_url": "http://github.com/ray-project/ray/commit/",
+
+    // The Pythons you'd like to test against.  If not provided, defaults
+    // to the current version of Python used to run `asv`.
+    "pythons": ["3.6"],
+
+    // The matrix of dependencies to test.  Each key is the name of a
+    // package (in PyPI) and the values are version numbers.  An empty
+    // list or empty string indicates to just test against the default
+    // (latest) version. null indicates that the package is to not be
+    // installed. If the package to be tested is only available from
+    // PyPi, and the 'environment_type' is conda, then you can preface
+    // the package name by 'pip+', and the package will be installed via
+    // pip (with all the conda available packages installed first,
+    // followed by the pip installed packages).
+    //
+    // "matrix": {
+    //     "numpy": ["1.6", "1.7"],
+    //     "six": ["", null],        // test with and without six installed
+    //     "pip+emcee": [""],   // emcee is only available for install with pip.
+    // },
+
+    // Combinations of libraries/python versions can be excluded/included
+    // from the set to test. Each entry is a dictionary containing additional
+    // key-value pairs to include/exclude.
+    //
+    // An exclude entry excludes entries where all values match. The
+    // values are regexps that should match the whole string.
+    //
+    // An include entry adds an environment. Only the packages listed
+    // are installed. The 'python' key is required. The exclude rules
+    // do not apply to includes.
+    //
+    // In addition to package names, the following keys are available:
+    //
+    // - python
+    //     Python version, as in the *pythons* variable above.
+    // - environment_type
+    //     Environment type, as above.
+    // - sys_platform
+    //     Platform, as in sys.platform. Possible values for the common
+    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
+    //
+    // "exclude": [
+    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
+    //     {"environment_type": "conda", "six": null}, // don't run without six on conda
+    // ],
+    //
+    // "include": [
+    //     // additional env for python2.7
+    //     {"python": "2.7", "numpy": "1.8"},
+    //     // additional env if run on windows+conda
+    //     {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
+    // ],
+
+    // The directory (relative to the current directory) that benchmarks are
+    // stored in.  If not provided, defaults to "benchmarks"
+    "benchmark_dir": "tuned_examples/regression_tests",
+
+    // The directory (relative to the current directory) to cache the Python
+    // environments in.  If not provided, defaults to "env"
+    // "env_dir": "env",
+
+    // The directory (relative to the current directory) that raw benchmark
+    // results are stored in.  If not provided, defaults to "results".
+    "results_dir": "RLLIB_RESULTS",
+
+    // The directory (relative to the current directory) that the html tree
+    // should be written to.  If not provided, defaults to "html".
+    // "html_dir": "html",
+
+    // The number of characters to retain in the commit hashes.
+    // "hash_length": 8,
+
+    // `asv` will cache wheels of the recent builds in each
+    // environment, making them faster to install next time.  This is
+    // number of builds to keep, per environment.
+    // "wheel_cache_size": 0
+
+    // The commits after which the regression search in `asv publish`
+    // should start looking for regressions. Dictionary whose keys are
+    // regexps matching to benchmark names, and values corresponding to
+    // the commit (exclusive) after which to start looking for
+    // regressions.  The default is to start from the first commit
+    // with results. If the commit is `null`, regression detection is
+    // skipped for the matching benchmark.
+    //
+    // "regressions_first_commits": {
+    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
+    //    "another_benchmark": null,   // Skip regression detection altogether
+    // }
+
+    // The thresholds for relative change in results, after which `asv
+    // publish` starts reporting regressions. Dictionary of the same
+    // form as in ``regressions_first_commits``, with values
+    // indicating the thresholds.  If multiple entries match, the
+    // maximum is taken. If no entry matches, the default is 5%.
+    //
+    // "regressions_thresholds": {
+    //    "some_benchmark": 0.01,     // Threshold of 1%
+    //    "another_benchmark": 0.5,   // Threshold of 50%
+    // }
+}
diff --git a/python/ray/rllib/tuned_examples/regression_tests/__init__.py b/python/ray/rllib/tuned_examples/regression_tests/__init__.py
diff --git a/python/ray/rllib/tuned_examples/regression_tests/regression_test.py b/python/ray/rllib/tuned_examples/regression_tests/regression_test.py
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+"""
+This class runs the regression YAMLs in the ASV format.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from collections import defaultdict
+import numpy as np
+import os
+import yaml
+
+import ray
+from ray import tune
+
+
+CONFIG_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def _evaulate_config(filename):
+    with open(os.path.join(CONFIG_DIR, filename)) as f:
+        experiments = yaml.load(f)
+    ray.init()
+    trials = tune.run_experiments(experiments)
+    results = defaultdict(list)
+    for t in trials:
+        results["time_total_s"] += [t.last_result.time_total_s]
+        results["episode_reward_mean"] += [t.last_result.episode_reward_mean]
+        results["training_iteration"] += [t.last_result.training_iteration]
+
+    return {k: np.median(v) for k, v in results.items()}
+
+
+class Regression():
+    def setup_cache(self):
+        # We need to implement this in separate classes
+        # below so that ASV will register the setup/class
+        # as a separate test.
+        raise NotImplementedError
+
+    def teardown(self, *args):
+        ray.worker.cleanup()
+
+    def track_time(self, result):
+        return result["time_total_s"]
+
+    def track_reward(self, result):
+        return result["episode_reward_mean"]
+
+    def track_iterations(self, result):
+        return result["training_iteration"]
+
+
+class TestCartPolePPO(Regression):
+    _file = "cartpole-ppo.yaml"
+
+    def setup_cache(self):
+        return _evaulate_config(self._file)
+
+
+class TestCartPolePG(Regression):
+    _file = "cartpole-pg.yaml"
+
+    def setup_cache(self):
+        return _evaulate_config(self._file)
+
+
+class TestPendulumDDPG(Regression):
+    _file = "pendulum-ddpg.yaml"
+
+    def setup_cache(self):
+        return _evaulate_config(self._file)
+
+
+class TestCartPoleES(Regression):
+    _file = "cartpole-es.yaml"
+
+    def setup_cache(self):
+        return _evaulate_config(self._file)
+
+
+class TestCartPoleDQN(Regression):
+    _file = "cartpole-dqn.yaml"
+
+    def setup_cache(self):
+        return _evaulate_config(self._file)
+
+
+class TestCartPoleA3C(Regression):
+    _file = "cartpole-a3c.yaml"
+
+    def setup_cache(self):
+        return _evaulate_config(self._file)
+
+
+class TestCartPoleA3CPyTorch(Regression):
+    _file = "cartpole-a3c-pytorch.yaml"
+
+    def setup_cache(self):
+        return _evaulate_config(self._file)
diff --git a/test/jenkins_tests/run_rllib_asv.sh b/test/jenkins_tests/run_rllib_asv.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+
+# Cause the script to exit if a single command fails.
+set -e
+
+# Show explicitly which commands are currently running.
+set -x
+
+BUCKET_NAME=ray-integration-testing/ASV
+COMMIT=$(cat /ray/git-rev)
+RLLIB_RESULTS=RLLIB_RESULTS
+RLLIB_RESULTS_DIR=/ray/python/ray/rllib/RLLIB_RESULTS
+pip install awscli
+
+# Install Ray fork of ASV
+git clone https://github.com/ray-project/asv.git /tmp/asv/ || true
+cd /tmp/asv/
+pip install -e .
+
+cd /ray/python/ray/rllib/
+asv machine --machine jenkins
+mkdir $RLLIB_RESULTS_DIR || true
+aws s3 cp s3://$BUCKET_NAME/RLLIB_RESULTS/benchmarks.json $RLLIB_RESULTS_DIR/benchmarks.json || true
+
+asv run --show-stderr --python=same --force-record-commit=$COMMIT
+
+aws s3 cp $RLLIB_RESULTS_DIR/benchmarks.json s3://$BUCKET_NAME/RLLIB_RESULTS/benchmarks_$COMMIT.json
+aws s3 sync $RLLIB_RESULTS_DIR/ s3://$BUCKET_NAME/RLLIB_RESULTS/