forked from MorvanZhou/tutorials
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhunter_prey.py
130 lines (109 loc) · 3.21 KB
/
hunter_prey.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import tkinter as tk
import numpy as np
import pandas as pd
class TableLUQ(object):
def __init__(self, actions, epsilon=0.1, alpha=0.2, gamma=0.9):
self._actions = actions
self._epsilon = epsilon
self._alpha = alpha
self._gamma = gamma
# state-actions table. columns include [state, a1, a2, a3...]
self._q_table = pd.DataFrame(columns=['state']+self._actions)
self._q_table.set_index('state', inplace=True)
def learn(self, s0, a0, r, s1):
q_s0_a0 = self._q_table.loc[s0, a0]
if s1 in self._q_table.index:
q_s1_a = self._q_table.loc[s1, :]
q_s1_a_max = q_s1_a.max()
else:
q_s1_a_max = 0
self._q_table.loc[s0, a0] = q_s0_a0 + self._alpha * (
r + self._gamma * q_s1_a_max - q_s0_a0
)
def choose_action(self, state):
if np.random.random() < self._epsilon:
action = np.random.choice(self._actions)
else:
if state not in self._q_table.index:
new_state_actions = pd.Series([0]*len(self._actions), index=self._actions, name=state)
self._q_table = self._q_table.append(new_state_actions)
action = np.random.choice(self._actions)
else:
state_actions_pair = self._q_table.loc[state, :]
shuffled_state_actions_pair = state_actions_pair.reindex(
np.random.permutation(
state_actions_pair.index))
action = shuffled_state_actions_pair.argmax()
return action
@property
def epsilon(self):
return self._epsilon
@property
def alpha(self):
return self._alpha
@property
def gamma(self):
return self._gamma
def get_state(h_loc, p_loc):
state = str(
(
p_loc[0] - h_loc[0],
p_loc[1] - h_loc[1]
)
)
return state
def get_reward(h_loc, p_loc):
distance = ((p_loc[0] - h_loc[0])**2 + (p_loc[1] - h_loc[1])**2)**(1/2)
reward = 1/distance
return reward
def move(h_loc, action):
h_x, h_y = h_loc[0], h_loc[1]
if action == 'u':
h_y -= 1
h_y = max([h_y, 0])
elif action == 'd':
h_y += 1
h_y = min([h_y, 4])
elif action == 'l':
h_x -= 1
h_x = max([h_x, 0])
else:
h_x += 1
h_x = min([h_x, 4])
x_amount = (h_x - h_loc[0])*100
y_amount = (h_y - h_loc[1])*100
canvas.move(hunter_icon, x_amount, y_amount)
h_loc = [h_x, h_y]
window = tk.Tk()
window.geometry('500x500')
canvas = tk.Canvas(window, height=500, width=500)
canvas.pack()
hunter_loc = [0, 0] # can move
prey_loc = [4, 4] # fixed
hunter_icon = canvas.create_rectangle(
(
hunter_loc[0]*100,
hunter_loc[1]*100,
hunter_loc[0]*100+100,
hunter_loc[1]*100+100),
fill='black'
)
prey_icon = canvas.create_oval(
(
prey_loc[0]*100,
prey_loc[1]*100,
prey_loc[0]*100+100,
prey_loc[1]*100+100),
fill='red'
)
move_up = [+1, 0]
move_down = [-1, 0]
move_left = [0, -1]
move_right = [0, +1]
hunter_actions = {
'u': move_up,
'd': move_down,
'l': move_left,
'r': move_right,
}
window.mainloop()