forked from xinyutan/study-ReinforcementLearning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrl_glue.py
216 lines (155 loc) · 5.73 KB
/
rl_glue.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#!/usr/bin/env python
"""Glues together an experiment, agent, and environment.
"""
from __future__ import print_function
class RLGlue:
"""RLGlue class
args:
env_name (string): the name of the module where the Environment class can be found
agent_name (string): the name of the module where the Agent class can be found
"""
def __init__(self, env_class, agent_class):
self.environment = env_class()
self.agent = agent_class()
self.total_reward = None
self.last_action = None
self.num_steps = None
self.num_episodes = None
def rl_init(self, agent_init_info={}, env_init_info={}):
"""Initial method called when RLGlue experiment is created"""
self.environment.env_init(env_init_info)
self.agent.agent_init(agent_init_info)
self.total_reward = 0.0
self.num_steps = 0
self.num_episodes = 0
def rl_start(self, agent_start_info={}, env_start_info={}):
"""Starts RLGlue experiment
Returns:
tuple: (state, action)
"""
last_state = self.environment.env_start()
self.last_action = self.agent.agent_start(last_state)
observation = (last_state, self.last_action)
return observation
def rl_agent_start(self, observation):
"""Starts the agent.
Args:
observation: The first observation from the environment
Returns:
The action taken by the agent.
"""
return self.agent.agent_start(observation)
def rl_agent_step(self, reward, observation):
"""Step taken by the agent
Args:
reward (float): the last reward the agent received for taking the
last action.
observation : the state observation the agent receives from the
environment.
Returns:
The action taken by the agent.
"""
return self.agent.agent_step(reward, observation)
def rl_agent_end(self, reward):
"""Run when the agent terminates
Args:
reward (float): the reward the agent received when terminating
"""
self.agent.agent_end(reward)
def rl_env_start(self):
"""Starts RL-Glue environment.
Returns:
(float, state, Boolean): reward, state observation, boolean
indicating termination
"""
self.total_reward = 0.0
self.num_steps = 1
this_observation = self.environment.env_start()
return this_observation
def rl_env_step(self, action):
"""Step taken by the environment based on action from agent
Args:
action: Action taken by agent.
Returns:
(float, state, Boolean): reward, state observation, boolean
indicating termination.
"""
ro = self.environment.env_step(action)
(this_reward, _, terminal) = ro
self.total_reward += this_reward
if terminal:
self.num_episodes += 1
else:
self.num_steps += 1
return ro
def rl_step(self):
"""Step taken by RLGlue, takes environment step and either step or
end by agent.
Returns:
(float, state, action, Boolean): reward, last state observation,
last action, boolean indicating termination
"""
(reward, last_state, term) = self.environment.env_step(self.last_action)
self.total_reward += reward
if term:
self.num_episodes += 1
self.agent.agent_end(reward)
roat = (reward, last_state, None, term)
else:
self.num_steps += 1
self.last_action = self.agent.agent_step(reward, last_state)
roat = (reward, last_state, self.last_action, term)
return roat
def rl_cleanup(self):
"""Cleanup done at end of experiment."""
self.environment.env_cleanup()
self.agent.agent_cleanup()
def rl_agent_message(self, message):
"""Message passed to communicate with agent during experiment
Args:
message: the message (or question) to send to the agent
Returns:
The message back (or answer) from the agent
"""
return self.agent.agent_message(message)
def rl_env_message(self, message):
"""Message passed to communicate with environment during experiment
Args:
message: the message (or question) to send to the environment
Returns:
The message back (or answer) from the environment
"""
return self.environment.env_message(message)
def rl_episode(self, max_steps_this_episode):
"""Runs an RLGlue episode
Args:
max_steps_this_episode (Int): the maximum steps for the experiment to run in an episode
Returns:
Boolean: if the episode should terminate
"""
is_terminal = False
self.rl_start()
num_steps = 0
while (not is_terminal) and ((max_steps_this_episode == 0) or
(self.num_steps < max_steps_this_episode)):
rl_step_result = self.rl_step()
is_terminal = rl_step_result[3]
return is_terminal
def rl_return(self):
"""The total reward
Returns:
float: the total reward
"""
return self.total_reward
def rl_num_steps(self):
"""The total number of steps taken
Returns:
Int: the total number of steps taken
"""
return self.num_steps
def rl_num_episodes(self):
"""The number of episodes
Returns
Int: the total number of episodes
"""
return self.num_episodes