Skip to content

Commit

Permalink
style(on-policy): adjust algorithm parameter condition_sigma
Browse files Browse the repository at this point in the history
  • Loading branch information
StepNeverStop committed Jan 11, 2021
1 parent 15bbe65 commit d8698ca
Show file tree
Hide file tree
Showing 7 changed files with 10 additions and 20 deletions.
11 changes: 6 additions & 5 deletions rls/algos/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -341,18 +341,17 @@ pg:
lr: 5.0e-4
gamma: 0.99
batch_size: 64
condition_sigma: false
epoch: 1 # very important
network_settings:
actor_continuous:
hidden_units: [64, 64]
condition_sigma: false
log_std_bound: [-20, 2]
actor_discrete: [64, 64]

ac:
actor_lr: 5.0e-4
critic_lr: 1.0e-3
condition_sigma: false
gamma: 0.99
batch_size: 256
buffer_size: 100000
Expand All @@ -361,21 +360,22 @@ ac:
network_settings:
actor_continuous:
hidden_units: [64, 64]
condition_sigma: false
log_std_bound: [-20, 2]
actor_discrete: [64, 64]
critic: [64, 64]

a2c:
actor_lr: 5.0e-4
critic_lr: 1.0e-3
condition_sigma: false
gamma: 0.99
beta: 1.0e-3
batch_size: 64
epoch: 4 # very important
network_settings:
actor_continuous:
hidden_units: [64, 64]
condition_sigma: false
log_std_bound: [-20, 2]
actor_discrete: [64, 64]
critic: [64, 64]
Expand All @@ -394,7 +394,6 @@ ppo:
actor_lr: 3.0e-4
critic_lr: 1.0e-3
max_grad_norm: null
condition_sigma: false # not recommended

# duel clip
use_duel_clip: false
Expand Down Expand Up @@ -424,6 +423,7 @@ ppo:
network_settings:
share:
continuous:
condition_sigma: false # not recommended
log_std_bound: [-20, 2]
share: [64, 64]
mu: [64, 64]
Expand All @@ -434,6 +434,7 @@ ppo:
v: [64, 64]
actor_continuous:
hidden_units: [64, 64]
condition_sigma: false # not recommended
log_std_bound: [-20, 2]
actor_discrete: [64, 64]
critic: [64, 64]
Expand All @@ -452,10 +453,10 @@ trpo:
train_v_iters: 10
batch_size: 64
critic_lr: 1.0e-3
condition_sigma: false
network_settings:
actor_continuous:
hidden_units: [64, 64]
condition_sigma: false
log_std_bound: [-20, 2]
actor_discrete: [64, 64]
critic: [64, 64]
Expand Down
2 changes: 0 additions & 2 deletions rls/algos/single/a2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def __init__(self,
beta=1.0e-3,
actor_lr=5.0e-4,
critic_lr=1.0e-3,
condition_sigma: bool = False,
network_settings={
'actor_continuous': [32, 32],
'actor_discrete': [32, 32],
Expand All @@ -42,7 +41,6 @@ def __init__(self,
representation_net=self._representation_net,
policy_net_type=OutputNetworkType.ACTOR_MU_LOGSTD,
policy_net_kwargs=dict(output_shape=self.a_dim,
condition_sigma=condition_sigma,
network_settings=network_settings['actor_continuous']),
value_net_type=OutputNetworkType.CRITIC_VALUE,
value_net_kwargs=dict(network_settings=network_settings['critic'])
Expand Down
2 changes: 0 additions & 2 deletions rls/algos/single/ac.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def __init__(self,

actor_lr=5.0e-4,
critic_lr=1.0e-3,
condition_sigma: bool = False,
network_settings={
'actor_continuous': [32, 32],
'actor_discrete': [32, 32],
Expand All @@ -40,7 +39,6 @@ def __init__(self,
representation_net=self._representation_net,
policy_net_type=OutputNetworkType.ACTOR_MU_LOGSTD,
policy_net_kwargs=dict(output_shape=self.a_dim,
condition_sigma=condition_sigma,
network_settings=network_settings['actor_continuous']),
value_net_type=OutputNetworkType.CRITIC_QVALUE_ONE,
value_net_kwargs=dict(action_dim=self.a_dim,
Expand Down
2 changes: 0 additions & 2 deletions rls/algos/single/pg.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ def __init__(self,

lr=5.0e-4,
epoch=5,
condition_sigma: bool = False,
network_settings={
'actor_continuous': [32, 32],
'actor_discrete': [32, 32]
Expand All @@ -37,7 +36,6 @@ def __init__(self,
representation_net=self._representation_net,
value_net_type=OutputNetworkType.ACTOR_MU_LOGSTD,
value_net_kwargs=dict(output_shape=self.a_dim,
condition_sigma=condition_sigma,
network_settings=network_settings['actor_continuous'])
)
else:
Expand Down
3 changes: 0 additions & 3 deletions rls/algos/single/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ def __init__(self,
actor_lr: float = 3e-4,
critic_lr: float = 1e-3,
max_grad_norm: float = 0.5,
condition_sigma: bool = False,
kl_reverse: bool = False,
kl_target: float = 0.02,
kl_target_cutoff: float = 2,
Expand Down Expand Up @@ -118,7 +117,6 @@ def __init__(self,
representation_net=self._representation_net,
value_net_type=OutputNetworkType.ACTOR_CRITIC_VALUE_CTS,
value_net_kwargs=dict(output_shape=self.a_dim,
condition_sigma=condition_sigma,
network_settings=network_settings['share']['continuous'])
)
else:
Expand All @@ -142,7 +140,6 @@ def __init__(self,
representation_net=self._representation_net,
policy_net_type=OutputNetworkType.ACTOR_MU_LOGSTD,
policy_net_kwargs=dict(output_shape=self.a_dim,
condition_sigma=condition_sigma,
network_settings=network_settings['actor_continuous']),
value_net_type=OutputNetworkType.CRITIC_VALUE,
value_net_kwargs=dict(network_settings=network_settings['critic'])
Expand Down
2 changes: 0 additions & 2 deletions rls/algos/single/trpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def __init__(self,
backtrack_coeff=0.8,
epsilon=0.2,
critic_lr=1e-3,
condition_sigma: bool = False,
network_settings={
'actor_continuous': [32, 32],
'actor_discrete': [32, 32],
Expand All @@ -80,7 +79,6 @@ def __init__(self,
representation_net=self._representation_net,
policy_net_type=OutputNetworkType.ACTOR_MU_LOGSTD,
policy_net_kwargs=dict(output_shape=self.a_dim,
condition_sigma=condition_sigma,
network_settings=network_settings['actor_continuous']),
value_net_type=OutputNetworkType.CRITIC_VALUE,
value_net_kwargs=dict(network_settings=network_settings['critic'])
Expand Down
8 changes: 4 additions & 4 deletions rls/nn/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ class ActorMuLogstd(M):
output: [stochastic action(mu), log of std]
'''

def __init__(self, vector_dim, output_shape, condition_sigma, network_settings):
def __init__(self, vector_dim, output_shape, network_settings):
super().__init__()
self.condition_sigma = condition_sigma
self.condition_sigma = network_settings['condition_sigma']
self.log_std_min, self.log_std_max = network_settings['log_std_bound']

self.share = mlp(network_settings['hidden_units'], out_layer=False)
Expand Down Expand Up @@ -356,9 +356,9 @@ class ActorCriticValueCts(M):
output: mean(mu) of Gaussian Distribution of actions given a state, v(s)
'''

def __init__(self, vector_dim, output_shape, condition_sigma, network_settings):
def __init__(self, vector_dim, output_shape, network_settings):
super().__init__()
self.condition_sigma = condition_sigma
self.condition_sigma = network_settings['condition_sigma']
self.log_std_min, self.log_std_max = network_settings['log_std_bound']

self.share = mlp(network_settings['share'], out_layer=False)
Expand Down

0 comments on commit d8698ca

Please sign in to comment.