Skip to content

Commit

Permalink
add MPE environment
Browse files Browse the repository at this point in the history
  • Loading branch information
liber145 committed Oct 16, 2022
1 parent b8d579d commit b962612
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 7 deletions.
2 changes: 1 addition & 1 deletion 04_dqn.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""7.3节DQN算法实现。
"""4.3节DQN算法实现。
"""
import argparse
from collections import defaultdict
Expand Down
2 changes: 1 addition & 1 deletion 05_sarsa.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""8.3节多步SARSA算法实现。
"""5.3节多步SARSA算法实现。
"""
import argparse
import os
Expand Down
2 changes: 1 addition & 1 deletion 08_a2c.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""11.3节A2C算法实现。"""
"""8.3节A2C算法实现。"""
import argparse
import os
from collections import defaultdict
Expand Down
2 changes: 1 addition & 1 deletion 08_reinforce_with_baseline.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""11.3节带基线的REINFORCE算法实现。"""
"""8.3节带基线的REINFORCE算法实现。"""
import argparse
import os
from collections import defaultdict
Expand Down
2 changes: 1 addition & 1 deletion 10_td3.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""13.4节TD3算法实现。
"""10.4节TD3算法实现。
"""
import argparse
from collections import defaultdict
Expand Down
2 changes: 1 addition & 1 deletion 13_a3c.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""16.3节A3C算法实现。"""
"""13.3节A3C算法实现。"""
import argparse
import os
import gym
Expand Down
36 changes: 36 additions & 0 deletions 14_mpe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""14.3节MPE环境。
安装依赖环境:pip install "pettingzoo[mpe]"
"""

from pettingzoo.mpe import simple_crypto_v2
import time

env = simple_crypto_v2.env()

num_agents = len(env.possible_agents)
num_actions = env.action_space(env.possible_agents[0]).n
observation_size = env.observation_space(env.possible_agents[0]).shape

print(f"{num_agents} agents")
for i in range(num_agents):
num_actions = env.action_space(env.possible_agents[i]).n
observation_size = env.observation_space(env.possible_agents[i]).shape
print(i, env.possible_agents[i], "num_actions:", num_actions, "observation_size:", observation_size)


env.reset()
for i, agent in enumerate(env.agent_iter()):
observation, reward, termination, info = env.last()
action = 0

action = env.action_space(agent).sample()
env.step(action)

print(i, agent)
print(f"action={action}, observation={observation}, reward={reward}, termination={termination}, info={info}")

env.render()
time.sleep(0.1)

if i == 50:
break
2 changes: 1 addition & 1 deletion ReadMe.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ python -u 04_dqn.py --do_train --output_dir output 2>&1 | tee output/log.txt
| 11 对状态的不完全观测 | |
| 12 模仿学习 | GAIL |
| 13 并行计算 | A3C |
| 14 多智能体系统 | |
| 14 多智能体系统 | MPE |
| 15 合作关系设定下的多智能体强化学习 | |
| 16 非合作关系设定下的多智能体强化学习 | |
| 17 注意力机制与多智能体强化学习 | |
Expand Down

0 comments on commit b962612

Please sign in to comment.