Skip to content

Commit 6b7e92b

Browse files
committed
edit
1 parent 0205c4e commit 6b7e92b

File tree

2 files changed

+14
-10
lines changed

2 files changed

+14
-10
lines changed

Reinforcement_learning_TUT/11_Dyna_Q/RL_brain.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -52,27 +52,28 @@ def check_state_exist(self, state):
5252

5353

5454
class EnvModel:
55-
"""Similar to the memory buffer of DQN, you can store past experiences in here"""
55+
"""Similar to the memory buffer in DQN, you can store past experiences in here.
56+
Alternatively, the model can generate next state and reward signal accurately."""
5657
def __init__(self, actions):
5758
# the simplest case is to think about the model is a memory which has all past transition information
5859
self.actions = actions
59-
self.memory = pd.DataFrame(columns=actions, dtype=np.object)
60+
self.database = pd.DataFrame(columns=actions, dtype=np.object)
6061

6162
def store_transition(self, s, a, r, s_):
62-
if s not in self.memory.index:
63-
self.memory = self.memory.append(
63+
if s not in self.database.index:
64+
self.database = self.database.append(
6465
pd.Series(
6566
[None] * len(self.actions),
66-
index=self.memory.columns,
67+
index=self.database.columns,
6768
name=s,
6869
))
69-
self.memory.set_value(s, a, (r, s_))
70+
self.database.set_value(s, a, (r, s_))
7071

7172
def sample_s_a(self):
72-
s = np.random.choice(self.memory.index)
73-
a = np.random.choice(self.memory.ix[s].dropna().index) # filter out the None value
73+
s = np.random.choice(self.database.index)
74+
a = np.random.choice(self.database.ix[s].dropna().index) # filter out the None value
7475
return s, a
7576

7677
def get_r_s_(self, s, a):
77-
r, s_ = self.memory.ix[s, a]
78+
r, s_ = self.database.ix[s, a]
7879
return r, s_

Reinforcement_learning_TUT/README.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
# Reinforcement Methods and Tutorials
1+
# Reinforcement Learning Methods and Tutorials
2+
3+
In these tutorials for reinforcement learning, it covers from the basic RL algorithms to advanced algorithms developed recent years.
24

35
All methods mentioned below have their video and text tutorial in Chinese. Visit [莫烦 Python](https://morvanzhou.github.io/tutorials/) for more.
6+
If you speak Chinese, you can watch my [Youtube channel](https://www.youtube.com/channel/UCdyjiB5H8Pu7aDTNVXTTpcg) as well.
47

58

69
* [Simple entry example](https://github.com/MorvanZhou/tutorials/tree/master/Reinforcement_learning_TUT/1_command_line_reinforcement_learning)

0 commit comments

Comments
 (0)