-
Notifications
You must be signed in to change notification settings - Fork 0
/
configs.yaml
173 lines (157 loc) · 4.37 KB
/
configs.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
defaults:
# Train Script
logdir: logs/
seed: 0
task: crafter_reward
envs: 1
envs_parallel: none
render_size: [64, 64]
dmc_camera: -1
atari_grayscale: True
time_limit: 0
action_repeat: 1
steps: 1.2e6
log_every: 1e4
eval_every: 1e5
eval_eps: 1
prefill: 1e4
pretrain: 1
train_every: 6
train_steps: 1
expl_until: 0
replay: {capacity: 2e6, ongoing: False, minlen: 50, maxlen: 50, prioritize_ends: True}
dataset: {batch: 8, length: 50}
log_keys_video: ['image']
log_keys_sum: '^$'
log_keys_mean: '^$'
log_keys_max: '^$'
precision: 16
jit: True
# Agent
clip_rewards: tanh
expl_behavior: greedy
expl_noise: 0.0
eval_noise: 0.0
eval_state_mean: False
# World Model
grad_heads: [decoder, reward, discount]
pred_discount: True
rssm: {ensemble: 1, hidden: 1024, deter: 1024, stoch: 32, discrete: 32, act: elu, norm: none, std_act: sigmoid2, min_std: 0.1}
encoder: {mlp_keys: '.*', cnn_keys: '.*', act: elu, norm: none, cnn_depth: 48, cnn_kernels: [4, 4, 4, 4], mlp_layers: [400, 400, 400, 400]}
decoder: {mlp_keys: '.*', cnn_keys: '.*', act: elu, norm: none, cnn_depth: 48, cnn_kernels: [5, 5, 6, 6], mlp_layers: [400, 400, 400, 400]}
reward_head: {layers: 4, units: 400, act: elu, norm: none, dist: mse}
discount_head: {layers: 4, units: 400, act: elu, norm: none, dist: binary}
loss_scales: {kl: 1.0, reward: 1.0, discount: 0.99, proprio: 1.0}
kl: {free: 0.0, forward: False, balance: 0.8, free_avg: True}
model_opt: {opt: adam, lr: 1e-4, eps: 1e-5, clip: 100, wd: 1e-6}
# Actor Critic
actor: {layers: 4, units: 400, act: elu, norm: none, dist: auto, min_std: 0.1}
critic: {layers: 4, units: 400, act: elu, norm: none, dist: mse}
actor_opt: {opt: adam, lr: 8e-5, eps: 1e-5, clip: 100, wd: 1e-6}
critic_opt: {opt: adam, lr: 2e-4, eps: 1e-5, clip: 100, wd: 1e-6}
discount: 0.99
discount_lambda: 0.95
imag_horizon: 15
actor_grad: auto
actor_grad_mix: 0.1
actor_ent: 3e-3
slow_target: True
slow_target_update: 100
slow_target_fraction: 1
slow_baseline: True
reward_norm: {momentum: 1.0, scale: 1.0, eps: 1e-8}
# Modification Flags
itervaml: False
wm_backpropvalue: False
multistep: False
# Exploration
expl_intr_scale: 1.0
expl_extr_scale: 0.0
expl_opt: {opt: adam, lr: 3e-4, eps: 1e-5, clip: 100, wd: 1e-6}
expl_head: {layers: 4, units: 400, act: elu, norm: none, dist: mse}
expl_reward_norm: {momentum: 1.0, scale: 1.0, eps: 1e-8}
disag_target: stoch
disag_log: False
disag_models: 10
disag_offset: 1
disag_action_cond: True
expl_model_loss: kl
atari:
task: atari_pong
encoder: {mlp_keys: '$^', cnn_keys: 'image'}
decoder: {mlp_keys: '$^', cnn_keys: 'image'}
time_limit: 27000
action_repeat: 4
steps: 5e7
eval_every: 2.5e5
log_every: 1e4
prefill: 50000
train_every: 16
clip_rewards: tanh
rssm: {hidden: 600, deter: 600}
model_opt.lr: 2e-4
actor_opt.lr: 4e-5
critic_opt.lr: 1e-4
actor_ent: 1e-3
discount: 0.999
loss_scales.kl: 0.1
loss_scales.discount: 5.0
crafter:
task: crafter_reward
encoder: {mlp_keys: '$^', cnn_keys: 'image'}
decoder: {mlp_keys: '$^', cnn_keys: 'image'}
log_keys_max: '^log_achievement_.*'
log_keys_sum: '^log_reward$'
rssm: {hidden: 1024, deter: 1024}
discount: 0.999
model_opt.lr: 1e-4
actor_opt.lr: 1e-4
critic_opt.lr: 1e-4
actor_ent: 3e-3
.*\.norm: layer
dmc_vision:
task: dmc_walker_walk
encoder: {mlp_keys: '$^', cnn_keys: 'image'}
decoder: {mlp_keys: '$^', cnn_keys: 'image'}
action_repeat: 2
eval_every: 1e4
prefill: 1000
pretrain: 100
clip_rewards: identity
pred_discount: False
replay.prioritize_ends: False
grad_heads: [decoder, reward]
rssm: {hidden: 200, deter: 200}
model_opt.lr: 3e-4
actor_opt.lr: 8e-5
critic_opt.lr: 8e-5
actor_ent: 1e-4
kl.free: 1.0
dmc_proprio:
task: dmc_walker_walk
encoder: {mlp_keys: '.*', cnn_keys: '$^'}
decoder: {mlp_keys: '.*', cnn_keys: '$^'}
action_repeat: 2
eval_every: 1e4
prefill: 1000
pretrain: 100
clip_rewards: identity
pred_discount: False
replay.prioritize_ends: False
grad_heads: [decoder, reward]
rssm: {hidden: 200, deter: 200}
model_opt.lr: 3e-4
actor_opt.lr: 8e-5
critic_opt.lr: 8e-5
actor_ent: 1e-4
kl.free: 1.0
debug:
jit: False
time_limit: 100
eval_every: 300
log_every: 300
prefill: 100
pretrain: 1
train_steps: 1
replay: {minlen: 10, maxlen: 30}
dataset: {batch: 10, length: 10}