[RLlib] Benchmark and regression test yaml cleanup and restructuring. (…

…ray-project#8414)
LecJackS · May 26, 2020 · baa0534 · baa0534
1 parent ae2e1f0
commit baa0534
Show file tree

Hide file tree

Showing 89 changed files with 574 additions and 544 deletions.
diff --git a/rllib/BUILD b/rllib/BUILD
diff --git a/rllib/agents/ddpg/ddpg_torch_policy.py b/rllib/agents/ddpg/ddpg_torch_policy.py
@@ -200,7 +200,8 @@ def build_ddpg_stats(policy, batch):
         "mean_q": torch.mean(policy.q_t),
         "max_q": torch.max(policy.q_t),
         "min_q": torch.min(policy.q_t),
-        "td_error": policy.td_error
+        "mean_td_error": torch.mean(policy.td_error),
+        "td_error": policy.td_error,
     }
     return stats
 

diff --git a/rllib/agents/ddpg/tests/test_ddpg.py b/rllib/agents/ddpg/tests/test_ddpg.py
@@ -23,7 +23,9 @@ def test_ddpg_compilation(self):
         """Test whether a DDPGTrainer can be built with both frameworks."""
         config = ddpg.DEFAULT_CONFIG.copy()
         config["num_workers"] = 0  # Run locally.
-        config["num_envs_per_worker"] = 2  # Run locally.
+        config["num_envs_per_worker"] = 2
+        config["learning_starts"] = 0
+        config["exploration_config"]["random_timesteps"] = 100
 
         num_iterations = 2
 

diff --git a/rllib/tests/run_regression_tests.py b/rllib/tests/run_regression_tests.py
@@ -9,13 +9,15 @@
 #     name = "run_regression_tests",
 #     main = "tests/run_regression_tests.py",
 #     tags = ["learning_tests"],
-#     size = "enormous",  # = 60min timeout
+#     size = "medium",  # 5min timeout
 #     srcs = ["tests/run_regression_tests.py"],
 #     data = glob(["tuned_examples/regression_tests/*.yaml"]),
-#     Pass `BAZEL` option and the path to look for yaml regression files.
+#     # Pass `BAZEL` option and the path to look for yaml regression files.
 #     args = ["BAZEL", "tuned_examples/regression_tests"]
 # )
 
+import argparse
+import os
 from pathlib import Path
 import sys
 import yaml
@@ -24,30 +26,51 @@
 from ray.tune import run_experiments
 from ray.rllib import _register_all
 
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--torch",
+    action="store_true",
+    help="Runs all tests with PyTorch enabled.")
+parser.add_argument(
+    "--yaml-dir",
+    type=str,
+    help="The directory in which to find all yamls to test.")
+
 if __name__ == "__main__":
+    args = parser.parse_args()
+
     # Bazel regression test mode: Get path to look for yaml files from argv[2].
-    if sys.argv[1] == "BAZEL":
-        # Get the path to use.
-        rllib_dir = Path(__file__).parent.parent
-        print("rllib dir={}".format(rllib_dir))
-        yaml_files = rllib_dir.rglob(sys.argv[2] + "/*.yaml")
-        yaml_files = sorted(
-            map(lambda path: str(path.absolute()), yaml_files), reverse=True)
-    # Normal mode: Get yaml files to run from command line.
-    else:
-        yaml_files = sys.argv[1:]
+    # Get the path or single file to use.
+    rllib_dir = Path(__file__).parent.parent
+    print("rllib dir={}".format(rllib_dir))
+
+    if not os.path.isdir(os.path.join(rllib_dir, args.yaml_dir)):
+        raise ValueError("yaml-dir ({}) not found!".format(args.yaml_dir))
 
-    print("Will run the following regression files:")
+    yaml_files = rllib_dir.rglob(args.yaml_dir + "/*.yaml")
+    yaml_files = sorted(
+        map(lambda path: str(path.absolute()), yaml_files), reverse=True)
+
+    print("Will run the following regression tests:")
     for yaml_file in yaml_files:
         print("->", yaml_file)
 
     # Loop through all collected files.
     for yaml_file in yaml_files:
         experiments = yaml.load(open(yaml_file).read())
+        assert len(experiments) == 1,\
+            "Error, can only run a single experiment per yaml file!"
 
         print("== Test config ==")
         print(yaml.dump(experiments))
 
+        # Add torch option to exp configs.
+        for exp in experiments.values():
+            if args.torch:
+                exp["config"]["use_pytorch"] = True
+
+        # Try running each test 3 times and make sure it reaches the given
+        # reward.
         passed = False
         for i in range(3):
             try:

diff --git a/rllib/tuned_examples/atari-a2c.yaml → rllib/tuned_examples/a3c/atari-a2c.yaml b/rllib/tuned_examples/atari-a2c.yaml → rllib/tuned_examples/a3c/atari-a2c.yaml
@@ -9,6 +9,7 @@ atari-a2c:
             - SpaceInvadersNoFrameskip-v4
     run: A2C
     config:
+        use_pytorch: false  # <- switch on/off torch
         rollout_fragment_length: 20
         clip_rewards: True
         num_workers: 5

diff --git a/...ession_tests/cartpole-a2c-microbatch.yaml → ...examples/a3c/cartpole-a2c-microbatch.yaml b/...ession_tests/cartpole-a2c-microbatch.yaml → ...examples/a3c/cartpole-a2c-microbatch.yaml
@@ -1,10 +1,11 @@
-cartpole-a2c-microbatch-tf:
+cartpole-a2c-microbatch:
     env: CartPole-v0
     run: A2C
     stop:
-        episode_reward_mean: 100
+        episode_reward_mean: 150
         timesteps_total: 100000
     config:
+        # Works for both torch and tf.
         use_pytorch: false
         num_workers: 1
         gamma: 0.95

diff --git a/rllib/tuned_examples/a3c/cartpole-a2c.yaml b/rllib/tuned_examples/a3c/cartpole-a2c.yaml
@@ -0,0 +1,11 @@
+cartpole-a2c:
+    env: CartPole-v0
+    run: A2C
+    stop:
+        episode_reward_mean: 150
+        timesteps_total: 500000
+    config:
+        # Works for both torch and tf.
+        use_pytorch: false
+        num_workers: 0
+        lr: 0.001
diff --git a/...les/regression_tests/cartpole-a3c-tf.yaml → rllib/tuned_examples/a3c/cartpole-a3c.yaml b/...les/regression_tests/cartpole-a3c-tf.yaml → rllib/tuned_examples/a3c/cartpole-a3c.yaml
@@ -1,10 +1,11 @@
-cartpole-a3c-tf:
+cartpole-a3c:
     env: CartPole-v0
     run: A3C
     stop:
-        episode_reward_mean: 100
-        timesteps_total: 100000
+        episode_reward_mean: 150
+        timesteps_total: 200000
     config:
+        # Works for both torch and tf.
         use_pytorch: false
         num_workers: 1
         gamma: 0.95
diff --git a/rllib/tuned_examples/pong-a3c.yaml → rllib/tuned_examples/a3c/pong-a3c.yaml b/rllib/tuned_examples/pong-a3c.yaml → rllib/tuned_examples/a3c/pong-a3c.yaml
@@ -4,9 +4,10 @@ pong-a3c:
     env: PongDeterministic-v4
     run: A3C
     config:
+        # Works for both torch and tf.
+        use_pytorch: false
         num_workers: 16
         rollout_fragment_length: 20
-        use_pytorch: false
         vf_loss_coeff: 0.5
         entropy_coeff: 0.01
         gamma: 0.99

diff --git a/.../regression_tests/cartpole-ars-torch.yaml → rllib/tuned_examples/ars/cartpole-ars.yaml b/.../regression_tests/cartpole-ars-torch.yaml → rllib/tuned_examples/ars/cartpole-ars.yaml
@@ -1,17 +1,16 @@
-cartpole-ars-torch:
+cartpole-ars:
     env: CartPole-v0
     run: ARS
     stop:
         episode_reward_mean: 150
         timesteps_total: 500000
     config:
-        use_pytorch: true
+        # Works for both torch and tf.
+        use_pytorch: false
         noise_stdev: 0.02
         num_rollouts: 50
         rollouts_used: 25
         num_workers: 2
         sgd_stepsize: 0.01
         noise_size: 25000000
         eval_prob: 0.5
-        model:
-            fcnet_hiddens: [64, 64]
diff --git a/rllib/tuned_examples/swimmer-ars.yaml → rllib/tuned_examples/ars/swimmer-ars.yaml b/rllib/tuned_examples/swimmer-ars.yaml → rllib/tuned_examples/ars/swimmer-ars.yaml
@@ -3,6 +3,8 @@ swimmer-ars:
     env: Swimmer-v2
     run: ARS
     config:
+        # Works for both torch and tf.
+        use_pytorch: false
         noise_stdev: 0.01
         num_rollouts: 1
         rollouts_used: 1

diff --git a/rllib/tuned_examples/cartpole-marwil-torch.yaml b/rllib/tuned_examples/cartpole-marwil-torch.yaml
diff --git a/rllib/tuned_examples/cleanup_experiment.py b/rllib/tuned_examples/cleanup_experiment.py
@@ -0,0 +1,169 @@
+"""
+This script automates cleaning up a benchmark/experiment run of some algo
+against some config (with possibly more than one tune trial,
+e.g. torch=grid_search([True, False])).
+
+Run `python cleanup_experiment.py --help` for more information.
+
+Use on an input directory with trial contents e.g.:
+..
+IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_10-17-54topr3h9k
+IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_13-59-35dqaetxnf
+IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_17-21-28tbhedw72
+IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_10-17-54lv20cgn_
+IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_13-59-35kwzhax_y
+IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_17-21-28a5j0s7za
+
+Then run:
+>> python cleanup_experiment.py --experiment-dir [parent dir w/ trial sub-dirs]
+>>   --output-dir [your out dir] --results-filter dumb_col_2,superfluous_col3
+>>   --results-max-size [max results file size in kb before(!) zipping]
+
+The script will create one output sub-dir for each trial and only copy
+the configuration and the csv results (filtered and every nth row removed
+based on the given args).
+"""
+
+import argparse
+import json
+import os
+import re
+import shutil
+import yaml
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--experiment-dir",
+    type=str,
+    help="Experiment dir in which all sub-runs (seeds) are "
+    "located (as sub-dirs). Each sub0-run dir must contain the files: "
+    "params.json and progress.csv.")
+parser.add_argument(
+    "--output-dir",
+    type=str,
+    help="The output dir, in which the cleaned up output will be placed.")
+parser.add_argument(
+    "--results-filter",
+    type=str,
+    help="comma-separated list of csv fields to exclude.",
+    default="experiment_id,pid,hostname,node_ip,trial_id,hist_stats/episode_"
+    "reward,hist_stats/episode_lengths,experiment_tag")
+parser.add_argument(
+    "--results-max-size",
+    type=int,
+    help="the max. size of the final results.csv file (in kb). Will erase "
+    "every nth line in the original input to reach that goal. "
+    "Use 0 for no limit (default=100).",
+    default=100)
+
+
+def process_single_run(in_dir, out_dir):
+    exp_dir = os.listdir(in_dir)
+
+    # Make sure trials dir is ok.
+    assert "params.json" in exp_dir and "progress.csv" in exp_dir, \
+        "params.json or progress.csv not found in {}!".format(in_dir)
+
+    os.makedirs(out_dir, exist_ok=True)
+
+    for file in exp_dir:
+        absfile = os.path.join(in_dir, file)
+        # Config file -> Convert to yaml and move to output dir.
+        if file == "params.json":
+            assert os.path.isfile(absfile), "{} not a file!".format(file)
+            with open(absfile) as fp:
+                contents = json.load(fp)
+            with open(os.path.join(out_dir, "config.yaml"), "w") as fp:
+                yaml.dump(contents, fp)
+        # Progress csv file -> Filter out some columns, cut, and write to
+        # output_dir.
+        elif file == "progress.csv":
+            assert os.path.isfile(absfile), "{} not a file!".format(file)
+            col_idx_to_filter = []
+            with open(absfile) as fp:
+                # Get column names.
+                col_names_orig = fp.readline().strip().split(",")
+                # Split by comma (abiding to quotes), filter out
+                # unwanted columns, then write to disk.
+                cols_to_filter = args.results_filter.split(",")
+                for i, c in enumerate(col_names_orig):
+                    if c in cols_to_filter:
+                        col_idx_to_filter.insert(0, i)
+                col_names = col_names_orig.copy()
+                for idx in col_idx_to_filter:
+                    col_names.pop(idx)
+                absfile_out = os.path.join(out_dir, "progress.csv")
+                with open(absfile_out, "w") as out_fp:
+                    print(",".join(col_names), file=out_fp)
+                    while True:
+                        line = fp.readline().strip()
+                        if not line:
+                            break
+                        line = re.sub(
+                            "(,{2,})",
+                            lambda m: ",None" * (len(m.group()) - 1) + ",",
+                            line)
+                        cols = re.findall('".+?"|[^,]+', line)
+                        if len(cols) != len(col_names_orig):
+                            continue
+                        for idx in col_idx_to_filter:
+                            cols.pop(idx)
+                        print(",".join(cols), file=out_fp)
+
+            # Reduce the size of the output file if necessary.
+            out_size = os.path.getsize(absfile_out)
+            max_size = args.results_max_size * 1024
+            if 0 < max_size < out_size:
+                # Figure out roughly every which line we have to drop.
+                ratio = out_size / max_size
+                # If ratio > 2.0, we'll have to keep only every nth line.
+                if ratio > 2.0:
+                    nth = out_size // max_size
+                    os.system("awk 'NR==1||NR%{}==0' {} > {}.new".format(
+                        nth, absfile_out, absfile_out))
+                # If ratio < 2.0 (>1.0), we'll have to drop every nth line.
+                else:
+                    nth = out_size // (out_size - max_size)
+                    os.system("awk 'NR==1||NR%{}!=0' {} > {}.new".format(
+                        nth, absfile_out, absfile_out))
+                os.remove(absfile_out)
+                os.rename(absfile_out + ".new", absfile_out)
+
+            # Zip progress.csv into results.zip.
+            zip_file = os.path.join(out_dir, "results.zip")
+            try:
+                os.remove(zip_file)
+            except FileNotFoundError:
+                pass
+            os.system("zip -j {} {}".format(
+                zip_file, os.path.join(out_dir, "progress.csv")))
+            os.remove(os.path.join(out_dir, "progress.csv"))
+
+        # TBX events file -> Move as is.
+        elif re.search("^(events\\.out\\.|params\\.pkl)", file):
+            assert os.path.isfile(absfile), "{} not a file!".format(file)
+            shutil.copyfile(absfile, os.path.join(out_dir, file))
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    exp_dir = os.listdir(args.experiment_dir)
+    # Loop through all sub-directories.
+    for i, sub_run in enumerate(sorted(exp_dir)):
+        abspath = os.path.join(args.experiment_dir, sub_run)
+        # This is a seed run.
+        if os.path.isdir(abspath) and \
+                re.search("^(\\w+?)_(\\w+?-v\\d+)(_\\d+)", sub_run):
+            # Create meaningful output dir name:
+            # [algo]_[env]_[trial #]_[trial-config]_[date YYYY-MM-DD].
+            cleaned_up_out = re.sub(
+                "^(\\w+?)_(\\w+?-v\\d+)(_\\d+)(_.+)?(_\\d{4}-\\d{2}-\\d{2})"
+                "_\\d{2}-\\d{2}-\\w+", "{:02}_\\1_\\2\\4\\5".format(i),
+                sub_run)
+            # Remove superflous `env=` specifier (anv always included in name).
+            cleaned_up_out = re.sub("^(.+)env=\\w+?-v\\d+,?(.+)", "\\1\\2",
+                                    cleaned_up_out)
+            out_path = os.path.join(args.output_dir, cleaned_up_out)
+            process_single_run(abspath, out_path)
+    # Done.
+    print("done")
diff --git a/rllib/tuned_examples/create_plots.py b/rllib/tuned_examples/create_plots.py
@@ -0,0 +1,5 @@
+# TODO(sven):
+#  Add a simple script that takes n csv input files and generates plot(s)
+#  from these with: x-axis=ts OR wall-time; y-axis=any metric(s) (up to 2).
+#  ability to merge any m csv files (e.g. tf vs torch; or n seeds) together
+#  in one plot.
diff --git a/rllib/tuned_examples/halfcheetah-ddpg.yaml → ...tuned_examples/ddpg/halfcheetah-ddpg.yaml b/rllib/tuned_examples/halfcheetah-ddpg.yaml → ...tuned_examples/ddpg/halfcheetah-ddpg.yaml
@@ -6,6 +6,7 @@ halfcheetah-ddpg:
         episode_reward_mean: 2000
         time_total_s: 5400 # 90 minutes
     config:
+        use_pytorch: false  # <- switch on/off torch
         # === Model ===
         actor_hiddens: [64, 64]
         critic_hiddens: [64, 64]

diff --git a/.../tuned_examples/invertedpendulum-td3.yaml → ...d_examples/ddpg/invertedpendulum-td3.yaml b/.../tuned_examples/invertedpendulum-td3.yaml → ...d_examples/ddpg/invertedpendulum-td3.yaml
@@ -9,6 +9,8 @@ invertedpendulum-td3:
         time_total_s: 900 # 15 minutes
         timesteps_total: 1000000
     config:
+        # Works for both torch and tf.
+        use_pytorch: false
         # === Model ===
         actor_hiddens: [32, 32]
         critic_hiddens: [32, 32]