Skip to content

Commit

Permalink
added inverted pendulum and inverted double pendulum
Browse files Browse the repository at this point in the history
  • Loading branch information
janner committed Apr 7, 2020
1 parent 7ffb5e4 commit 16e9302
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 0 deletions.
37 changes: 37 additions & 0 deletions examples/config/inverted_double_pendulum/0.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
params = {
'type': 'MBPO',
'universe': 'gym',
'domain': 'InvertedDoublePendulum',
'task': 'v2',

'log_dir': '~/ray_mbpo/',
'exp_name': 'defaults',

'kwargs': {
'n_epochs': 80, ## 20k steps
'epoch_length': 250,
'train_every_n_steps': 1,
'n_train_repeat': 20,
'eval_render_mode': None,
'eval_n_episodes': 1,
'eval_deterministic': True,

'discount': 0.99,
'tau': 5e-3,
'reward_scale': 1.0,

'model_train_freq': 250,
'model_retain_epochs': 1,
'rollout_batch_size': 100e3,
'deterministic': False,
'num_networks': 7,
'num_elites': 5,
'real_ratio': 0.05,
'target_entropy': -0.5,
'max_model_t': None,
'rollout_schedule': [1, 15, 1, 1],
'hidden_dim': 200,
'n_initial_exploration_steps': 500,
}
}

36 changes: 36 additions & 0 deletions examples/config/inverted_pendulum/0.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
params = {
'type': 'MBPO',
'universe': 'gym',
'domain': 'InvertedPendulum',
'task': 'v2',

'log_dir': '~/ray_mbpo/',
'exp_name': 'defaults',

'kwargs': {
'n_epochs': 80, ## 20k steps
'epoch_length': 250,
'train_every_n_steps': 1,
'n_train_repeat': 10,
'eval_render_mode': None,
'eval_n_episodes': 1,
'eval_deterministic': True,

'discount': 0.99,
'tau': 5e-3,
'reward_scale': 1.0,

'model_train_freq': 250,
'model_retain_epochs': 1,
'rollout_batch_size': 100e3,
'deterministic': False,
'num_networks': 7,
'num_elites': 5,
'real_ratio': 0.05,
'target_entropy': -0.05,
'max_model_t': None,
'rollout_schedule': [1, 15, 1, 1],
'hidden_dim': 200,
'n_initial_exploration_steps': 500,
}
}
20 changes: 20 additions & 0 deletions mbpo/static/inverted_double_pendulum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import sys
import numpy as np
import pdb

class StaticFns:

@staticmethod
def termination_fn(obs, act, next_obs):
assert len(obs.shape) == len(next_obs.shape) == len(act.shape) == 2

sin1, cos1 = next_obs[:,1], next_obs[:,3]
sin2, cos2 = next_obs[:,2], next_obs[:,4]
theta_1 = np.arctan2(sin1, cos1)
theta_2 = np.arctan2(sin2, cos2)
y = 0.6 * (cos1 + np.cos(theta_1 + theta_2))

done = y <= 1

done = done[:,None]
return done
17 changes: 17 additions & 0 deletions mbpo/static/inverted_pendulum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import sys
import numpy as np
import pdb

class StaticFns:

@staticmethod
def termination_fn(obs, act, next_obs):
assert len(obs.shape) == len(next_obs.shape) == len(act.shape) == 2

notdone = np.isfinite(next_obs).all(axis=-1) \
* (np.abs(next_obs[:,1]) <= .2)
done = ~notdone

done = done[:,None]

return done

0 comments on commit 16e9302

Please sign in to comment.