Skip to content

Commit

Permalink
[rllib] Add downscale and frameskip options for Montezumas (ray-proje…
Browse files Browse the repository at this point in the history
…ct#908)

* up

* update

* fix

* update

* update

* update

* api break

* Update run_multi_node_tests.sh

* fix
  • Loading branch information
ericl authored and pcmoritz committed Sep 3, 2017
1 parent 7a36430 commit 1ebfe96
Show file tree
Hide file tree
Showing 12 changed files with 88 additions and 22 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
/src/common/thirdparty/redis
/src/numbuf/thirdparty/arrow
/src/thirdparty/arrow
/flatbuffers-1.7.1/
/src/thirdparty/boost/
/src/thirdparty/boost_1_60_0/
/src/thirdparty/flatbuffers/

# Files generated by flatc should be ignored
/src/common/format/*.py
Expand Down
4 changes: 2 additions & 2 deletions python/ray/rllib/dqn/dqn.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
whether to use double dqn
hiddens: array<int>
hidden layer sizes of the state and action value networks
model_config: dict
model: dict
config options to pass to the model constructor
lr: float
learning rate for adam optimizer
Expand Down Expand Up @@ -79,7 +79,7 @@
dueling=True,
double_q=True,
hiddens=[256],
model_config={},
model={},
lr=5e-4,
schedule_max_timesteps=100000,
timesteps_per_iteration=1000,
Expand Down
2 changes: 1 addition & 1 deletion python/ray/rllib/dqn/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
def _build_q_network(inputs, num_actions, config):
dueling = config["dueling"]
hiddens = config["hiddens"]
frontend = ModelCatalog.get_model(inputs, 1, config["model_config"])
frontend = ModelCatalog.get_model(inputs, 1, config["model"])
frontend_out = frontend.last_layer

with tf.variable_scope("action_value"):
Expand Down
24 changes: 20 additions & 4 deletions python/ray/rllib/models/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@
from ray.rllib.models.visionnet import VisionNetwork


MODEL_CONFIGS = [
"conv_filters",
"downscale_factor",
"extra_frameskip",
"fcnet_activation",
"fcnet_hiddens",
"free_log_std"
]


class ModelCatalog(object):
"""Registry of default models and action distributions for envs.
Expand Down Expand Up @@ -67,7 +77,7 @@ def get_model(inputs, num_outputs, options=dict()):
return FullyConnectedNetwork(inputs, num_outputs, options)

@staticmethod
def get_preprocessor(env_name, obs_shape):
def get_preprocessor(env_name, obs_shape, options=dict()):
"""Returns a suitable processor for the given environment.
Args:
Expand All @@ -81,12 +91,18 @@ def get_preprocessor(env_name, obs_shape):
ATARI_OBS_SHAPE = (210, 160, 3)
ATARI_RAM_OBS_SHAPE = (128,)

for k in options.keys():
if k not in MODEL_CONFIGS:
raise Exception(
"Unknown config key `{}`, all keys: {}".format(
k, MODEL_CONFIGS))

if obs_shape == ATARI_OBS_SHAPE:
print("Assuming Atari pixel env, using AtariPixelPreprocessor.")
return AtariPixelPreprocessor()
return AtariPixelPreprocessor(options)
elif obs_shape == ATARI_RAM_OBS_SHAPE:
print("Assuming Atari ram env, using AtariRamPreprocessor.")
return AtariRamPreprocessor()
return AtariRamPreprocessor(options)

print("Non-atari env, not using any observation preprocessor.")
return NoPreprocessor()
return NoPreprocessor(options)
19 changes: 16 additions & 3 deletions python/ray/rllib/models/preprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@
class Preprocessor(object):
"""Defines an abstract observation preprocessor function."""

def __init__(self, options):
self.options = options
self._init()

def _init(self):
pass

def transform_shape(self, obs_shape):
"""Returns the preprocessed observation shape."""
raise NotImplementedError
Expand All @@ -16,13 +23,19 @@ def transform(self, observation):


class AtariPixelPreprocessor(Preprocessor):
def _init(self):
self.downscale_factor = self.options.get("downscale_factor", 2)
self.dim = int(160 / self.downscale_factor)

def transform_shape(self, obs_shape):
return (80, 80, 3)
return (self.dim, self.dim, 3)

# TODO(ekl) why does this need to return an extra size-1 dim (the [None])
def transform(self, observation):
"""Downsamples images from (210, 160, 3) to (80, 80, 3)."""
return (observation[25:-25:2, ::2, :][None] - 128) / 128
"""Downsamples images from (210, 160, 3) by the configured factor."""
scaled = observation[
25:-25:self.downscale_factor, ::self.downscale_factor, :][None]
return (scaled - 128) / 128


class AtariRamPreprocessor(Preprocessor):
Expand Down
14 changes: 11 additions & 3 deletions python/ray/rllib/models/visionnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,19 @@ class VisionNetwork(Model):
"""Generic vision network."""

def _init(self, inputs, num_outputs, options):
filters = options.get("conv_filters", [
[16, [8, 8], 4],
[32, [4, 4], 2],
[512, [10, 10], 1],
])
with tf.name_scope("vision_net"):
conv1 = slim.conv2d(inputs, 16, [8, 8], 4, scope="conv1")
conv2 = slim.conv2d(conv1, 32, [4, 4], 2, scope="conv2")
for i, (out_size, kernel, stride) in enumerate(filters[:-1], 1):
inputs = slim.conv2d(
inputs, out_size, kernel, stride,
scope="conv{}".format(i))
out_size, kernel, stride = filters[-1]
fc1 = slim.conv2d(
conv2, 512, [10, 10], padding="VALID", scope="fc1")
inputs, out_size, kernel, stride, padding="VALID", scope="fc1")
fc2 = slim.conv2d(fc1, num_outputs, [1, 1], activation_fn=None,
normalizer_fn=None, scope="fc2")
return tf.squeeze(fc2, [1, 2]), tf.squeeze(fc1, [1, 2])
13 changes: 10 additions & 3 deletions python/ray/rllib/ppo/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@

class BatchedEnv(object):
"""This holds multiple gym envs and performs steps on all of them."""
def __init__(self, name, batchsize):
def __init__(self, name, batchsize, options):
self.envs = [gym.make(name) for _ in range(batchsize)]
self.observation_space = self.envs[0].observation_space
self.action_space = self.envs[0].action_space
self.batchsize = batchsize
self.preprocessor = ModelCatalog.get_preprocessor(
name, self.envs[0].observation_space.shape)
name, self.envs[0].observation_space.shape, options["model"])
self.extra_frameskip = options.get("extra_frameskip", 1)
assert self.extra_frameskip >= 1

def reset(self):
observations = [
Expand All @@ -33,7 +35,12 @@ def step(self, actions, render=False):
observations.append(np.zeros(self.shape))
rewards.append(0.0)
continue
observation, reward, done, info = self.envs[i].step(action)
reward = 0.0
for j in range(self.extra_frameskip):
observation, r, done, info = self.envs[i].step(action)
reward += r
if done:
break
if render:
self.envs[0].render()
observations.append(self.preprocessor.transform(observation))
Expand Down
3 changes: 3 additions & 0 deletions python/ray/rllib/ppo/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,10 @@
"clip_param": 0.3,
# Target value for KL divergence
"kl_target": 0.01,
# Config params to pass to the model
"model": {"free_log_std": False},
# If >1, adds frameskip
"extra_frameskip": 1,
# Number of timesteps collected in each outer loop
"timesteps_per_batch": 40000,
# Each tasks performs rollouts until at least this
Expand Down
2 changes: 1 addition & 1 deletion python/ray/rllib/ppo/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __init__(self, name, batchsize, config, logdir, is_remote):
self.devices = devices
self.config = config
self.logdir = logdir
self.env = BatchedEnv(name, batchsize)
self.env = BatchedEnv(name, batchsize, config)
if is_remote:
config_proto = tf.ConfigProto()
else:
Expand Down
File renamed without changes.
16 changes: 12 additions & 4 deletions python/ray/rllib/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,25 +41,33 @@

ray.init(redis_address=args.redis_address)

def _check_and_update(config, json):
for k in json.keys():
if k not in config:
raise Exception(
"Unknown model config `{}`, all model configs: {}".format(
k, config.keys()))
config.update(json)

env_name = args.env
if args.alg == "PPO":
config = ppo.DEFAULT_CONFIG.copy()
config.update(json_config)
_check_and_update(config, json_config)
alg = ppo.PPOAgent(
env_name, config, upload_dir=args.upload_dir)
elif args.alg == "ES":
config = es.DEFAULT_CONFIG.copy()
config.update(json_config)
_check_and_update(config, json_config)
alg = es.ESAgent(
env_name, config, upload_dir=args.upload_dir)
elif args.alg == "DQN":
config = dqn.DEFAULT_CONFIG.copy()
config.update(json_config)
_check_and_update(config, json_config)
alg = dqn.DQNAgent(
env_name, config, upload_dir=args.upload_dir)
elif args.alg == "A3C":
config = a3c.DEFAULT_CONFIG.copy()
config.update(json_config)
_check_and_update(config, json_config)
alg = a3c.A3CAgent(
env_name, config, upload_dir=args.upload_dir)
else:
Expand Down
9 changes: 8 additions & 1 deletion test/jenkins_tests/run_multi_node_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,18 @@ docker run --shm-size=10G --memory=10G $DOCKER_SHA \
--env CartPole-v0 \
--alg DQN \
--num-iterations 2 \
--config '{"lr": 1e-3, "schedule_max_timesteps": 100000, "exploration_fraction": 0.1, "exploration_final_eps": 0.02, "dueling": false, "hiddens": [], "model_config": {"fcnet_hiddens": [64], "fcnet_activation": "relu"}}'
--config '{"lr": 1e-3, "schedule_max_timesteps": 100000, "exploration_fraction": 0.1, "exploration_final_eps": 0.02, "dueling": false, "hiddens": [], "model": {"fcnet_hiddens": [64], "fcnet_activation": "relu"}}'

docker run --shm-size=10G --memory=10G $DOCKER_SHA \
python /ray/python/ray/rllib/train.py \
--env PongNoFrameskip-v4 \
--alg DQN \
--num-iterations 2 \
--config '{"lr": 1e-4, "schedule_max_timesteps": 2000000, "buffer_size": 10000, "exploration_fraction": 0.1, "exploration_final_eps": 0.01, "train_freq": 4, "learning_starts": 10000, "target_network_update_freq": 1000, "gamma": 0.99, "prioritized_replay": true}'

docker run --shm-size=10G --memory=10G $DOCKER_SHA \
python /ray/python/ray/rllib/train.py \
--env MontezumaRevenge-v0 \
--alg PPO \
--num-iterations 2 \
--config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "sgd_stepsize": 1e-4, "sgd_batchsize": 64, "timesteps_per_batch": 2000, "num_workers": 1, "model": {"downscale_factor": 4, "conv_filters": [[16, [8, 8], 4], [32, [4, 4], 2], [512, [5, 5], 1]]}, "extra_frameskip": 4}'

0 comments on commit 1ebfe96

Please sign in to comment.