Skip to content

Commit

Permalink
[rllib] Expose algorithm parameters and tune policy gradient paramete…
Browse files Browse the repository at this point in the history
…rs for humanoid (ray-project#753)

* parameters for humanoid

* fix
  • Loading branch information
pcmoritz authored and robertnishihara committed Jul 19, 2017
1 parent ade6d80 commit d356dd3
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 4 deletions.
1 change: 1 addition & 0 deletions python/ray/rllib/test.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/bin/bash

python train.py --env Walker2d-v1 --alg PolicyGradient --upload-dir s3://bucketname/
python train.py --env Humanoid-v1 --alg PolicyGradient --config '{"kl_coeff": 1.0, "num_sgd_iter": 20, "sgd_stepsize": 1e-4, "sgd_batchsize": 32768, "devices": ["/gpu:0", "/gpu:1", "/gpu:2", "/gpu:3"], "tf_session_args": {"device_count": {"GPU": 4}, "log_device_placement": false, "allow_soft_placement": true}, "timesteps_per_batch": 320000, "num_agents": 64}' --upload-dir s3://bucketname/
python train.py --env PongNoFrameskip-v0 --alg DQN --upload-dir s3://bucketname/
python train.py --env PongDeterministic-v0 --alg A3C --upload-dir s3://bucketname/
python train.py --env Humanoid-v1 --alg EvolutionStrategies --upload-dir s3://bucketname/
17 changes: 13 additions & 4 deletions python/ray/rllib/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
description=("Train a reinforcement learning agent."))
parser.add_argument("--env", required=True, type=str)
parser.add_argument("--alg", required=True, type=str)
parser.add_argument("--config", default="{}", type=str)
parser.add_argument("--upload-dir", default="file:///tmp/ray", type=str)


Expand All @@ -28,17 +29,25 @@

env_name = args.env
if args.alg == "PolicyGradient":
config = pg.DEFAULT_CONFIG.copy()
config.update(json.loads(args.config))
alg = pg.PolicyGradient(
env_name, pg.DEFAULT_CONFIG, upload_dir=args.upload_dir)
env_name, config, upload_dir=args.upload_dir)
elif args.alg == "EvolutionStrategies":
config = es.DEFAULT_CONFIG.copy()
config.update(json.loads(args.config))
alg = es.EvolutionStrategies(
env_name, es.DEFAULT_CONFIG, upload_dir=args.upload_dir)
env_name, config, upload_dir=args.upload_dir)
elif args.alg == "DQN":
config = dqn.DEFAULT_CONFIG.copy()
config.update(json.loads(args.config))
alg = dqn.DQN(
env_name, dqn.DEFAULT_CONFIG, upload_dir=args.upload_dir)
env_name, config, upload_dir=args.upload_dir)
elif args.alg == "A3C":
config = a3c.DEFAULT_CONFIG.copy()
config.update(json.loads(args.config))
alg = a3c.A3C(
env_name, a3c.DEFAULT_CONFIG, upload_dir=args.upload_dir)
env_name, config, upload_dir=args.upload_dir)
else:
assert False, ("Unknown algorithm, check --alg argument. Valid "
"choices are PolicyGradientPolicyGradient, "
Expand Down

0 comments on commit d356dd3

Please sign in to comment.