File tree 2 files changed +9
-9
lines changed
Reinforcement_learning_TUT/5_Deep_Q_Network
2 files changed +9
-9
lines changed Original file line number Diff line number Diff line change @@ -56,11 +56,11 @@ def _build_maze(self):
56
56
hell1_center [0 ] + 15 , hell1_center [1 ] + 15 ,
57
57
fill = 'black' )
58
58
# hell
59
- hell2_center = origin + np .array ([UNIT , UNIT * 2 ])
60
- self .hell2 = self .canvas .create_rectangle (
61
- hell2_center [0 ] - 15 , hell2_center [1 ] - 15 ,
62
- hell2_center [0 ] + 15 , hell2_center [1 ] + 15 ,
63
- fill = 'black' )
59
+ # hell2_center = origin + np.array([UNIT, UNIT * 2])
60
+ # self.hell2 = self.canvas.create_rectangle(
61
+ # hell2_center[0] - 15, hell2_center[1] - 15,
62
+ # hell2_center[0] + 15, hell2_center[1] + 15,
63
+ # fill='black')
64
64
65
65
# create oval
66
66
oval_center = origin + UNIT * 2
@@ -114,7 +114,7 @@ def step(self, action):
114
114
if next_coords == self .canvas .coords (self .oval ):
115
115
reward = 1
116
116
done = True
117
- elif next_coords in [self .canvas .coords (self .hell1 ), self . canvas . coords ( self . hell2 ) ]:
117
+ elif next_coords in [self .canvas .coords (self .hell1 )]:
118
118
reward = - 1
119
119
done = True
120
120
else :
Original file line number Diff line number Diff line change 13
13
14
14
def run_maze ():
15
15
step = 0
16
- for episode in range (200 ):
16
+ for episode in range (100 ):
17
17
# initial observation
18
18
observation = env .reset ()
19
19
@@ -53,8 +53,8 @@ def run_maze():
53
53
reward_decay = 0.9 ,
54
54
e_greedy = 0.9 ,
55
55
hidden_layers = [10 , 10 ],
56
- replace_target_iter = 100 ,
57
- memory_size = 1000 ,
56
+ replace_target_iter = 200 ,
57
+ memory_size = 3000 ,
58
58
# output_graph=True
59
59
)
60
60
env .after (100 , run_maze )
You can’t perform that action at this time.
0 commit comments