forked from ray-project/ray
-
Notifications
You must be signed in to change notification settings - Fork 0
/
halfcheetah-sac.yaml
36 lines (35 loc) · 1.08 KB
/
halfcheetah-sac.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# Our implementation of SAC can reach 9k reward in 400k timesteps
halfcheetah_sac:
env: HalfCheetah-v3
run: SAC
stop:
episode_reward_mean: 9000
config:
# Works for both torch and tf.
framework: torch
q_model_config:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
policy_model_config:
fcnet_activation: relu
fcnet_hiddens: [256, 256]
tau: 0.005
target_entropy: auto
n_step: 1
rollout_fragment_length: 1
train_batch_size: 256
target_network_update_freq: 1
min_sample_timesteps_per_iteration: 1000
replay_buffer_config:
type: MultiAgentPrioritizedReplayBuffer
num_steps_sampled_before_learning_starts: 10000
optimization:
actor_learning_rate: 0.0003
critic_learning_rate: 0.0003
entropy_learning_rate: 0.0003
num_workers: 0
num_gpus: 0
clip_actions: false
normalize_actions: true
evaluation_interval: 1
metrics_num_episodes_for_smoothing: 5