Skip to content

Commit 59bb69a

Browse files
committed
change maze for gain more positive reward
1 parent 49940ee commit 59bb69a

File tree

2 files changed

+9
-9
lines changed

2 files changed

+9
-9
lines changed

Reinforcement_learning_TUT/5_Deep_Q_Network/maze_env.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,11 @@ def _build_maze(self):
5656
hell1_center[0] + 15, hell1_center[1] + 15,
5757
fill='black')
5858
# hell
59-
hell2_center = origin + np.array([UNIT, UNIT * 2])
60-
self.hell2 = self.canvas.create_rectangle(
61-
hell2_center[0] - 15, hell2_center[1] - 15,
62-
hell2_center[0] + 15, hell2_center[1] + 15,
63-
fill='black')
59+
# hell2_center = origin + np.array([UNIT, UNIT * 2])
60+
# self.hell2 = self.canvas.create_rectangle(
61+
# hell2_center[0] - 15, hell2_center[1] - 15,
62+
# hell2_center[0] + 15, hell2_center[1] + 15,
63+
# fill='black')
6464

6565
# create oval
6666
oval_center = origin + UNIT * 2
@@ -114,7 +114,7 @@ def step(self, action):
114114
if next_coords == self.canvas.coords(self.oval):
115115
reward = 1
116116
done = True
117-
elif next_coords in [self.canvas.coords(self.hell1), self.canvas.coords(self.hell2)]:
117+
elif next_coords in [self.canvas.coords(self.hell1)]:
118118
reward = -1
119119
done = True
120120
else:

Reinforcement_learning_TUT/5_Deep_Q_Network/run_this.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
def run_maze():
1515
step = 0
16-
for episode in range(200):
16+
for episode in range(100):
1717
# initial observation
1818
observation = env.reset()
1919

@@ -53,8 +53,8 @@ def run_maze():
5353
reward_decay=0.9,
5454
e_greedy=0.9,
5555
hidden_layers=[10, 10],
56-
replace_target_iter=100,
57-
memory_size=1000,
56+
replace_target_iter=200,
57+
memory_size=3000,
5858
# output_graph=True
5959
)
6060
env.after(100, run_maze)

0 commit comments

Comments
 (0)