Skip to content

Commit f075f1c

Browse files
committed
Chapter 7: Policy-based methods
1 parent 7dad02c commit f075f1c

11 files changed

+337
-0
lines changed

Chapter07/a2c_agent.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import argparse
2+
import pprint
3+
from ray import tune
4+
import ray
5+
from ray.rllib.agents.a3c.a2c import (
6+
A2C_DEFAULT_CONFIG as DEFAULT_CONFIG,
7+
A2CTrainer)
8+
9+
10+
if __name__ == "__main__":
11+
trainer = A2CTrainer
12+
parser = argparse.ArgumentParser()
13+
parser.add_argument('--env',
14+
help='Gym env name.')
15+
args = parser.parse_args()
16+
config = DEFAULT_CONFIG.copy()
17+
config_update = {
18+
"env": args.env,
19+
"num_gpus": 1,
20+
"num_workers": 50,
21+
"evaluation_num_workers": 10,
22+
"evaluation_interval": 1,
23+
"use_gae": False
24+
}
25+
config.update(config_update)
26+
pp = pprint.PrettyPrinter(indent=4)
27+
pp.pprint(config)
28+
ray.init()
29+
tune.run(trainer,
30+
stop={"timesteps_total": 2000000},
31+
config=config
32+
)

Chapter07/a2c_agent_gae.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import argparse
2+
import pprint
3+
from ray import tune
4+
import ray
5+
from ray.rllib.agents.a3c.a2c import (
6+
A2C_DEFAULT_CONFIG as DEFAULT_CONFIG,
7+
A2CTrainer)
8+
9+
10+
if __name__ == "__main__":
11+
trainer = A2CTrainer
12+
parser = argparse.ArgumentParser()
13+
parser.add_argument('--env',
14+
help='Gym env name.')
15+
args = parser.parse_args()
16+
config = DEFAULT_CONFIG.copy()
17+
config_update = {
18+
"env": args.env,
19+
"num_gpus": 1,
20+
"num_workers": 50,
21+
"evaluation_num_workers": 10,
22+
"evaluation_interval": 1,
23+
"use_gae": True
24+
}
25+
config.update(config_update)
26+
pp = pprint.PrettyPrinter(indent=4)
27+
pp.pprint(config)
28+
ray.init()
29+
tune.run(trainer,
30+
stop={"timesteps_total": 2000000},
31+
config=config
32+
)

Chapter07/a3c_agent.py

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import argparse
2+
import pprint
3+
from ray import tune
4+
import ray
5+
from ray.rllib.agents.a3c.a3c import (
6+
DEFAULT_CONFIG,
7+
A3CTrainer as trainer)
8+
9+
10+
if __name__ == "__main__":
11+
parser = argparse.ArgumentParser()
12+
parser.add_argument('--env',
13+
help='Gym env name.')
14+
args = parser.parse_args()
15+
config = DEFAULT_CONFIG.copy()
16+
config_update = {
17+
"env": args.env,
18+
"num_gpus": 1,
19+
"num_workers": 50,
20+
"evaluation_num_workers": 10,
21+
"evaluation_interval": 1,
22+
"use_gae": False
23+
}
24+
config.update(config_update)
25+
pp = pprint.PrettyPrinter(indent=4)
26+
pp.pprint(config)
27+
ray.init()
28+
tune.run(trainer,
29+
stop={"timesteps_total": 2000000},
30+
config=config
31+
)

Chapter07/a3c_agent_gae.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import argparse
2+
import pprint
3+
from ray import tune
4+
import ray
5+
from ray.rllib.agents.a3c.a3c import (
6+
DEFAULT_CONFIG,
7+
A3CTrainer)
8+
9+
10+
if __name__ == "__main__":
11+
trainer = A3CTrainer
12+
parser = argparse.ArgumentParser()
13+
parser.add_argument('--env',
14+
help='Gym env name.')
15+
args = parser.parse_args()
16+
config = DEFAULT_CONFIG.copy()
17+
config_update = {
18+
"env": args.env,
19+
"num_gpus": 1,
20+
"num_workers": 50,
21+
"evaluation_num_workers": 10,
22+
"evaluation_interval": 1,
23+
"use_gae": True
24+
}
25+
config.update(config_update)
26+
pp = pprint.PrettyPrinter(indent=4)
27+
pp.pprint(config)
28+
ray.init()
29+
tune.run(trainer,
30+
stop={"timesteps_total": 2000000},
31+
config=config
32+
)

Chapter07/apex_ddpg_agent.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import argparse
2+
import pprint
3+
from ray import tune
4+
import ray
5+
from ray.rllib.agents.ddpg.apex import (
6+
APEX_DDPG_DEFAULT_CONFIG as DEFAULT_CONFIG,
7+
ApexDDPGTrainer as trainer)
8+
9+
10+
if __name__ == "__main__":
11+
parser = argparse.ArgumentParser()
12+
parser.add_argument('--env',
13+
help='Gym env name.')
14+
args = parser.parse_args()
15+
config = DEFAULT_CONFIG.copy()
16+
config_update = {
17+
"env": args.env,
18+
"num_gpus": 1,
19+
"num_workers": 50,
20+
"evaluation_num_workers": 10,
21+
"evaluation_interval": 1
22+
}
23+
config.update(config_update)
24+
pp = pprint.PrettyPrinter(indent=4)
25+
pp.pprint(config)
26+
ray.init()
27+
tune.run(trainer,
28+
stop={"timesteps_total": 2000000},
29+
config=config
30+
)

Chapter07/ddpg_agent.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import argparse
2+
import pprint
3+
from ray import tune
4+
import ray
5+
from ray.rllib.agents.ddpg.ddpg import(
6+
DEFAULT_CONFIG,
7+
DDPGTrainer as trainer)
8+
9+
10+
if __name__ == "__main__":
11+
parser = argparse.ArgumentParser()
12+
parser.add_argument('--env',
13+
help='Gym env name.')
14+
args = parser.parse_args()
15+
config = DEFAULT_CONFIG.copy()
16+
config_update = {
17+
"env": args.env,
18+
"num_gpus": 1,
19+
"num_workers": 50,
20+
"evaluation_num_workers": 10,
21+
"evaluation_interval": 1
22+
}
23+
config.update(config_update)
24+
pp = pprint.PrettyPrinter(indent=4)
25+
pp.pprint(config)
26+
ray.init()
27+
tune.run(trainer,
28+
stop={"timesteps_total": 2000000},
29+
config=config
30+
)

Chapter07/impala_agent.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import argparse
2+
import pprint
3+
from ray import tune
4+
import ray
5+
from ray.rllib.agents.impala.impala import (
6+
DEFAULT_CONFIG,
7+
ImpalaTrainer as trainer)
8+
9+
10+
if __name__ == "__main__":
11+
parser = argparse.ArgumentParser()
12+
parser.add_argument('--env',
13+
help='Gym env name.')
14+
args = parser.parse_args()
15+
config = DEFAULT_CONFIG.copy()
16+
config_update = {
17+
"env": args.env,
18+
"num_gpus": 1,
19+
"num_workers": 50,
20+
"evaluation_num_workers": 10,
21+
"evaluation_interval": 1
22+
}
23+
config.update(config_update)
24+
pp = pprint.PrettyPrinter(indent=4)
25+
pp.pprint(config)
26+
ray.init()
27+
tune.run(trainer,
28+
stop={"timesteps_total": 2000000},
29+
config=config
30+
)

Chapter07/pg_agent.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import argparse
2+
import pprint
3+
from ray import tune
4+
import ray
5+
from ray.rllib.agents.pg.pg import (
6+
DEFAULT_CONFIG,
7+
PGTrainer as trainer)
8+
9+
10+
if __name__ == "__main__":
11+
parser = argparse.ArgumentParser()
12+
parser.add_argument('--env',
13+
help='Gym env name.')
14+
args = parser.parse_args()
15+
config = DEFAULT_CONFIG.copy()
16+
config_update = {
17+
"env": args.env,
18+
"num_gpus": 1,
19+
"num_workers": 50,
20+
"evaluation_num_workers": 10,
21+
"evaluation_interval": 1
22+
}
23+
config.update(config_update)
24+
pp = pprint.PrettyPrinter(indent=4)
25+
pp.pprint(config)
26+
ray.init()
27+
tune.run(trainer,
28+
stop={"timesteps_total": 2000000},
29+
config=config
30+
)

Chapter07/ppo_agent.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import argparse
2+
import pprint
3+
from ray import tune
4+
import ray
5+
from ray.rllib.agents.ppo.ppo import (
6+
DEFAULT_CONFIG,
7+
PPOTrainer as trainer)
8+
9+
10+
if __name__ == "__main__":
11+
parser = argparse.ArgumentParser()
12+
parser.add_argument('--env',
13+
help='Gym env name.')
14+
args = parser.parse_args()
15+
config = DEFAULT_CONFIG.copy()
16+
config_update = {
17+
"env": args.env,
18+
"num_gpus": 1,
19+
"num_workers": 50,
20+
"evaluation_num_workers": 10,
21+
"evaluation_interval": 1
22+
}
23+
config.update(config_update)
24+
pp = pprint.PrettyPrinter(indent=4)
25+
pp.pprint(config)
26+
ray.init()
27+
tune.run(trainer,
28+
stop={"timesteps_total": 2000000},
29+
config=config
30+
)

Chapter07/sac_agent.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import argparse
2+
import pprint
3+
from ray import tune
4+
import ray
5+
from ray.rllib.agents.sac.sac import (
6+
DEFAULT_CONFIG,
7+
SACTrainer as trainer)
8+
9+
10+
if __name__ == "__main__":
11+
parser = argparse.ArgumentParser()
12+
parser.add_argument('--env',
13+
help='Gym env name.')
14+
args = parser.parse_args()
15+
config = DEFAULT_CONFIG.copy()
16+
config_update = {
17+
"env": args.env,
18+
"num_gpus": 1,
19+
"num_workers": 50,
20+
"evaluation_num_workers": 10,
21+
"evaluation_interval": 1
22+
}
23+
config.update(config_update)
24+
pp = pprint.PrettyPrinter(indent=4)
25+
pp.pprint(config)
26+
ray.init()
27+
tune.run(trainer,
28+
stop={"timesteps_total": 2000000},
29+
config=config
30+
)

Chapter07/td3_agent.py

+30
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import argparse
2+
import pprint
3+
from ray import tune
4+
import ray
5+
from ray.rllib.agents.ddpg.td3 import (
6+
TD3_DEFAULT_CONFIG as DEFAULT_CONFIG,
7+
TD3Trainer as trainer)
8+
9+
10+
if __name__ == "__main__":
11+
parser = argparse.ArgumentParser()
12+
parser.add_argument('--env',
13+
help='Gym env name.')
14+
args = parser.parse_args()
15+
config = DEFAULT_CONFIG.copy()
16+
config_update = {
17+
"env": args.env,
18+
"num_gpus": 1,
19+
"num_workers": 50,
20+
"evaluation_num_workers": 10,
21+
"evaluation_interval": 1
22+
}
23+
config.update(config_update)
24+
pp = pprint.PrettyPrinter(indent=4)
25+
pp.pprint(config)
26+
ray.init()
27+
tune.run(trainer,
28+
stop={"timesteps_total": 2000000},
29+
config=config
30+
)

0 commit comments

Comments
 (0)