forked from ray-project/ray
-
Notifications
You must be signed in to change notification settings - Fork 0
/
compact-regression-test.yaml
157 lines (157 loc) · 4.1 KB
/
compact-regression-test.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# This file runs on a single g3.16xl or p3.16xl node. It is suggested
# to run these in a DLAMI / tensorflow_p36 env. Note that RL runs are
# inherently high variance, so you'll have to check to see if the
# rewards reached seem reasonably in line with previous results.
#
# You can find the reference results here:
# https://github.com/ray-project/ray/tree/master/release/release_logs
atari-impala:
env: ALE/Breakout-v5
run: IMPALA
num_samples: 4
stop:
time_total_s: 3600
config:
env_config:
frameskip: 1 # no frameskip
rollout_fragment_length: 50
train_batch_size: 500
num_workers: 10
num_envs_per_worker: 5
clip_rewards: True
lr_schedule: [
[0, 0.0005],
[20000000, 0.000000000001],
]
num_gpus: 1
atari-ppo-tf:
env: ALE/Breakout-v5
run: PPO
num_samples: 4
stop:
time_total_s: 3600
config:
env_config:
frameskip: 1 # no frameskip
lambda: 0.95
kl_coeff: 0.5
clip_rewards: True
clip_param: 0.1
vf_clip_param: 10.0
entropy_coeff: 0.01
train_batch_size: 5000
rollout_fragment_length: 100
sgd_minibatch_size: 500
num_sgd_iter: 10
num_workers: 10
num_envs_per_worker: 5
batch_mode: truncate_episodes
observation_filter: NoFilter
model:
vf_share_layers: true
num_gpus: 1
atari-ppo-torch:
env: ALE/Breakout-v5
run: PPO
num_samples: 4
stop:
time_total_s: 3600
config:
framework: torch
env_config:
frameskip: 1 # no frameskip
lambda: 0.95
kl_coeff: 0.5
clip_rewards: True
clip_param: 0.1
vf_clip_param: 10.0
entropy_coeff: 0.01
train_batch_size: 5000
rollout_fragment_length: 100
sgd_minibatch_size: 500
num_sgd_iter: 10
num_workers: 10
num_envs_per_worker: 5
batch_mode: truncate_episodes
observation_filter: NoFilter
model:
vf_share_layers: true
num_gpus: 1
apex:
env: ALE/Breakout-v5
run: APEX
num_samples: 4
stop:
time_total_s: 3600
config:
env_config:
frameskip: 1 # no frameskip
double_q: false
dueling: false
num_atoms: 1
noisy: false
n_step: 3
lr: .0001
adam_epsilon: .00015
hiddens: [512]
exploration_config:
epsilon_timesteps: 200000
final_epsilon: 0.01
replay_buffer_config:
type: MultiAgentPrioritizedReplayBuffer
prioritized_replay_alpha: 0.5
capacity: 1000000
num_gpus: 1
num_workers: 8
num_envs_per_worker: 8
rollout_fragment_length: 20
train_batch_size: 512
target_network_update_freq: 50000
min_sample_timesteps_per_iteration: 25000
atari-a2c:
env: ALE/Breakout-v5
run: A2C
num_samples: 4
stop:
time_total_s: 3600
config:
env_config:
frameskip: 1 # no frameskip
rollout_fragment_length: 20
clip_rewards: True
num_workers: 5
num_envs_per_worker: 5
num_gpus: 1
lr_schedule: [
[0, 0.0007],
[20000000, 0.000000000001],
]
atari-basic-dqn:
env: ALE/Breakout-v5
run: DQN
num_samples: 4
stop:
time_total_s: 3600
config:
env_config:
frameskip: 1 # no frameskip
double_q: false
dueling: false
num_atoms: 1
noisy: false
replay_buffer_config:
type: MultiAgentReplayBuffer
capacity: 1000000
num_steps_sampled_before_learning_starts: 20000
n_step: 1
target_network_update_freq: 8000
lr: .0000625
adam_epsilon: .00015
hiddens: [512]
rollout_fragment_length: 4
train_batch_size: 32
exploration_config:
epsilon_timesteps: 200000
final_epsilon: 0.01
num_gpus: 0.2
min_sample_timesteps_per_iteration: 10000