Skip to content

Commit

Permalink
[RLlib] Benchmark and regression test yaml cleanup and restructuring. (
Browse files Browse the repository at this point in the history
  • Loading branch information
sven1977 authored May 26, 2020
1 parent ae2e1f0 commit baa0534
Show file tree
Hide file tree
Showing 89 changed files with 574 additions and 544 deletions.
290 changes: 171 additions & 119 deletions rllib/BUILD

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion rllib/agents/ddpg/ddpg_torch_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,8 @@ def build_ddpg_stats(policy, batch):
"mean_q": torch.mean(policy.q_t),
"max_q": torch.max(policy.q_t),
"min_q": torch.min(policy.q_t),
"td_error": policy.td_error
"mean_td_error": torch.mean(policy.td_error),
"td_error": policy.td_error,
}
return stats

Expand Down
4 changes: 3 additions & 1 deletion rllib/agents/ddpg/tests/test_ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ def test_ddpg_compilation(self):
"""Test whether a DDPGTrainer can be built with both frameworks."""
config = ddpg.DEFAULT_CONFIG.copy()
config["num_workers"] = 0 # Run locally.
config["num_envs_per_worker"] = 2 # Run locally.
config["num_envs_per_worker"] = 2
config["learning_starts"] = 0
config["exploration_config"]["random_timesteps"] = 100

num_iterations = 2

Expand Down
49 changes: 36 additions & 13 deletions rllib/tests/run_regression_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@
# name = "run_regression_tests",
# main = "tests/run_regression_tests.py",
# tags = ["learning_tests"],
# size = "enormous", # = 60min timeout
# size = "medium", # 5min timeout
# srcs = ["tests/run_regression_tests.py"],
# data = glob(["tuned_examples/regression_tests/*.yaml"]),
# Pass `BAZEL` option and the path to look for yaml regression files.
# # Pass `BAZEL` option and the path to look for yaml regression files.
# args = ["BAZEL", "tuned_examples/regression_tests"]
# )

import argparse
import os
from pathlib import Path
import sys
import yaml
Expand All @@ -24,30 +26,51 @@
from ray.tune import run_experiments
from ray.rllib import _register_all

parser = argparse.ArgumentParser()
parser.add_argument(
"--torch",
action="store_true",
help="Runs all tests with PyTorch enabled.")
parser.add_argument(
"--yaml-dir",
type=str,
help="The directory in which to find all yamls to test.")

if __name__ == "__main__":
args = parser.parse_args()

# Bazel regression test mode: Get path to look for yaml files from argv[2].
if sys.argv[1] == "BAZEL":
# Get the path to use.
rllib_dir = Path(__file__).parent.parent
print("rllib dir={}".format(rllib_dir))
yaml_files = rllib_dir.rglob(sys.argv[2] + "/*.yaml")
yaml_files = sorted(
map(lambda path: str(path.absolute()), yaml_files), reverse=True)
# Normal mode: Get yaml files to run from command line.
else:
yaml_files = sys.argv[1:]
# Get the path or single file to use.
rllib_dir = Path(__file__).parent.parent
print("rllib dir={}".format(rllib_dir))

if not os.path.isdir(os.path.join(rllib_dir, args.yaml_dir)):
raise ValueError("yaml-dir ({}) not found!".format(args.yaml_dir))

print("Will run the following regression files:")
yaml_files = rllib_dir.rglob(args.yaml_dir + "/*.yaml")
yaml_files = sorted(
map(lambda path: str(path.absolute()), yaml_files), reverse=True)

print("Will run the following regression tests:")
for yaml_file in yaml_files:
print("->", yaml_file)

# Loop through all collected files.
for yaml_file in yaml_files:
experiments = yaml.load(open(yaml_file).read())
assert len(experiments) == 1,\
"Error, can only run a single experiment per yaml file!"

print("== Test config ==")
print(yaml.dump(experiments))

# Add torch option to exp configs.
for exp in experiments.values():
if args.torch:
exp["config"]["use_pytorch"] = True

# Try running each test 3 times and make sure it reaches the given
# reward.
passed = False
for i in range(3):
try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ atari-a2c:
- SpaceInvadersNoFrameskip-v4
run: A2C
config:
use_pytorch: false # <- switch on/off torch
rollout_fragment_length: 20
clip_rewards: True
num_workers: 5
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
cartpole-a2c-microbatch-tf:
cartpole-a2c-microbatch:
env: CartPole-v0
run: A2C
stop:
episode_reward_mean: 100
episode_reward_mean: 150
timesteps_total: 100000
config:
# Works for both torch and tf.
use_pytorch: false
num_workers: 1
gamma: 0.95
Expand Down
11 changes: 11 additions & 0 deletions rllib/tuned_examples/a3c/cartpole-a2c.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
cartpole-a2c:
env: CartPole-v0
run: A2C
stop:
episode_reward_mean: 150
timesteps_total: 500000
config:
# Works for both torch and tf.
use_pytorch: false
num_workers: 0
lr: 0.001
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
cartpole-a3c-tf:
cartpole-a3c:
env: CartPole-v0
run: A3C
stop:
episode_reward_mean: 100
timesteps_total: 100000
episode_reward_mean: 150
timesteps_total: 200000
config:
# Works for both torch and tf.
use_pytorch: false
num_workers: 1
gamma: 0.95
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ pong-a3c:
env: PongDeterministic-v4
run: A3C
config:
# Works for both torch and tf.
use_pytorch: false
num_workers: 16
rollout_fragment_length: 20
use_pytorch: false
vf_loss_coeff: 0.5
entropy_coeff: 0.01
gamma: 0.99
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
cartpole-ars-torch:
cartpole-ars:
env: CartPole-v0
run: ARS
stop:
episode_reward_mean: 150
timesteps_total: 500000
config:
use_pytorch: true
# Works for both torch and tf.
use_pytorch: false
noise_stdev: 0.02
num_rollouts: 50
rollouts_used: 25
num_workers: 2
sgd_stepsize: 0.01
noise_size: 25000000
eval_prob: 0.5
model:
fcnet_hiddens: [64, 64]
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ swimmer-ars:
env: Swimmer-v2
run: ARS
config:
# Works for both torch and tf.
use_pytorch: false
noise_stdev: 0.01
num_rollouts: 1
rollouts_used: 1
Expand Down
13 changes: 0 additions & 13 deletions rllib/tuned_examples/cartpole-marwil-torch.yaml

This file was deleted.

169 changes: 169 additions & 0 deletions rllib/tuned_examples/cleanup_experiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
"""
This script automates cleaning up a benchmark/experiment run of some algo
against some config (with possibly more than one tune trial,
e.g. torch=grid_search([True, False])).
Run `python cleanup_experiment.py --help` for more information.
Use on an input directory with trial contents e.g.:
..
IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_10-17-54topr3h9k
IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_13-59-35dqaetxnf
IMPALA_BreakoutNoFrameskip-v4_0_use_pytorch=False_2020-05-11_17-21-28tbhedw72
IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_10-17-54lv20cgn_
IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_13-59-35kwzhax_y
IMPALA_BreakoutNoFrameskip-v4_2_use_pytorch=True_2020-05-11_17-21-28a5j0s7za
Then run:
>> python cleanup_experiment.py --experiment-dir [parent dir w/ trial sub-dirs]
>> --output-dir [your out dir] --results-filter dumb_col_2,superfluous_col3
>> --results-max-size [max results file size in kb before(!) zipping]
The script will create one output sub-dir for each trial and only copy
the configuration and the csv results (filtered and every nth row removed
based on the given args).
"""

import argparse
import json
import os
import re
import shutil
import yaml

parser = argparse.ArgumentParser()
parser.add_argument(
"--experiment-dir",
type=str,
help="Experiment dir in which all sub-runs (seeds) are "
"located (as sub-dirs). Each sub0-run dir must contain the files: "
"params.json and progress.csv.")
parser.add_argument(
"--output-dir",
type=str,
help="The output dir, in which the cleaned up output will be placed.")
parser.add_argument(
"--results-filter",
type=str,
help="comma-separated list of csv fields to exclude.",
default="experiment_id,pid,hostname,node_ip,trial_id,hist_stats/episode_"
"reward,hist_stats/episode_lengths,experiment_tag")
parser.add_argument(
"--results-max-size",
type=int,
help="the max. size of the final results.csv file (in kb). Will erase "
"every nth line in the original input to reach that goal. "
"Use 0 for no limit (default=100).",
default=100)


def process_single_run(in_dir, out_dir):
exp_dir = os.listdir(in_dir)

# Make sure trials dir is ok.
assert "params.json" in exp_dir and "progress.csv" in exp_dir, \
"params.json or progress.csv not found in {}!".format(in_dir)

os.makedirs(out_dir, exist_ok=True)

for file in exp_dir:
absfile = os.path.join(in_dir, file)
# Config file -> Convert to yaml and move to output dir.
if file == "params.json":
assert os.path.isfile(absfile), "{} not a file!".format(file)
with open(absfile) as fp:
contents = json.load(fp)
with open(os.path.join(out_dir, "config.yaml"), "w") as fp:
yaml.dump(contents, fp)
# Progress csv file -> Filter out some columns, cut, and write to
# output_dir.
elif file == "progress.csv":
assert os.path.isfile(absfile), "{} not a file!".format(file)
col_idx_to_filter = []
with open(absfile) as fp:
# Get column names.
col_names_orig = fp.readline().strip().split(",")
# Split by comma (abiding to quotes), filter out
# unwanted columns, then write to disk.
cols_to_filter = args.results_filter.split(",")
for i, c in enumerate(col_names_orig):
if c in cols_to_filter:
col_idx_to_filter.insert(0, i)
col_names = col_names_orig.copy()
for idx in col_idx_to_filter:
col_names.pop(idx)
absfile_out = os.path.join(out_dir, "progress.csv")
with open(absfile_out, "w") as out_fp:
print(",".join(col_names), file=out_fp)
while True:
line = fp.readline().strip()
if not line:
break
line = re.sub(
"(,{2,})",
lambda m: ",None" * (len(m.group()) - 1) + ",",
line)
cols = re.findall('".+?"|[^,]+', line)
if len(cols) != len(col_names_orig):
continue
for idx in col_idx_to_filter:
cols.pop(idx)
print(",".join(cols), file=out_fp)

# Reduce the size of the output file if necessary.
out_size = os.path.getsize(absfile_out)
max_size = args.results_max_size * 1024
if 0 < max_size < out_size:
# Figure out roughly every which line we have to drop.
ratio = out_size / max_size
# If ratio > 2.0, we'll have to keep only every nth line.
if ratio > 2.0:
nth = out_size // max_size
os.system("awk 'NR==1||NR%{}==0' {} > {}.new".format(
nth, absfile_out, absfile_out))
# If ratio < 2.0 (>1.0), we'll have to drop every nth line.
else:
nth = out_size // (out_size - max_size)
os.system("awk 'NR==1||NR%{}!=0' {} > {}.new".format(
nth, absfile_out, absfile_out))
os.remove(absfile_out)
os.rename(absfile_out + ".new", absfile_out)

# Zip progress.csv into results.zip.
zip_file = os.path.join(out_dir, "results.zip")
try:
os.remove(zip_file)
except FileNotFoundError:
pass
os.system("zip -j {} {}".format(
zip_file, os.path.join(out_dir, "progress.csv")))
os.remove(os.path.join(out_dir, "progress.csv"))

# TBX events file -> Move as is.
elif re.search("^(events\\.out\\.|params\\.pkl)", file):
assert os.path.isfile(absfile), "{} not a file!".format(file)
shutil.copyfile(absfile, os.path.join(out_dir, file))


if __name__ == "__main__":
args = parser.parse_args()
exp_dir = os.listdir(args.experiment_dir)
# Loop through all sub-directories.
for i, sub_run in enumerate(sorted(exp_dir)):
abspath = os.path.join(args.experiment_dir, sub_run)
# This is a seed run.
if os.path.isdir(abspath) and \
re.search("^(\\w+?)_(\\w+?-v\\d+)(_\\d+)", sub_run):
# Create meaningful output dir name:
# [algo]_[env]_[trial #]_[trial-config]_[date YYYY-MM-DD].
cleaned_up_out = re.sub(
"^(\\w+?)_(\\w+?-v\\d+)(_\\d+)(_.+)?(_\\d{4}-\\d{2}-\\d{2})"
"_\\d{2}-\\d{2}-\\w+", "{:02}_\\1_\\2\\4\\5".format(i),
sub_run)
# Remove superflous `env=` specifier (anv always included in name).
cleaned_up_out = re.sub("^(.+)env=\\w+?-v\\d+,?(.+)", "\\1\\2",
cleaned_up_out)
out_path = os.path.join(args.output_dir, cleaned_up_out)
process_single_run(abspath, out_path)
# Done.
print("done")
5 changes: 5 additions & 0 deletions rllib/tuned_examples/create_plots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# TODO(sven):
# Add a simple script that takes n csv input files and generates plot(s)
# from these with: x-axis=ts OR wall-time; y-axis=any metric(s) (up to 2).
# ability to merge any m csv files (e.g. tf vs torch; or n seeds) together
# in one plot.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ halfcheetah-ddpg:
episode_reward_mean: 2000
time_total_s: 5400 # 90 minutes
config:
use_pytorch: false # <- switch on/off torch
# === Model ===
actor_hiddens: [64, 64]
critic_hiddens: [64, 64]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ invertedpendulum-td3:
time_total_s: 900 # 15 minutes
timesteps_total: 1000000
config:
# Works for both torch and tf.
use_pytorch: false
# === Model ===
actor_hiddens: [32, 32]
critic_hiddens: [32, 32]
Expand Down
Loading

0 comments on commit baa0534

Please sign in to comment.