-
Notifications
You must be signed in to change notification settings - Fork 0
/
CMazeExperience.py
59 lines (50 loc) · 1.58 KB
/
CMazeExperience.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import random
import numpy as np
import math
class CMazeExperience:
def __init__(self, maxSize):
self.maxSize = maxSize
self.sizeLimit = (maxSize * 1.1)
self.episodes = []
self.gamma = 0.5
self.minScore = -math.inf
def addEpisode(self, replay):
score = sum(x[2] for x in replay)
if score < self.minScore: return
self.episodes.append((replay, score))
if self.sizeLimit < len(self.episodes):
self.update()
return
def update(self):
self.episodes = list(
sorted(self.episodes, key=lambda x: x[1], reverse=True)
)[:self.maxSize]
self.minScore = self.episodes[-1][1]
print('Min score: %.6f' % self.minScore)
def __len__(self):
return len(self.episodes)
def take_batch(self, batch_size):
batch = []
weights = [x[1] for x in self.episodes]
while len(batch) < batch_size:
episode, _ = random.choices(
self.episodes,
weights=weights,
k=1
)[0]
minibatchIndexes = set(random.choices(
np.arange(len(episode)),
weights=[abs(x[2]) for x in episode],
k=min((5, batch_size - len(batch), len(episode)))
))
for ind in minibatchIndexes:
state, act, score, nextState = episode[ind]
nextStateWeight = 1 if ind < len(episode) - 1 else 0
batch.append((state, act, score, nextState, nextStateWeight))
return (
np.array([x[0] for x in batch]),
np.array([x[1] for x in batch]),
np.array([x[2] for x in batch]),
np.array([x[3] for x in batch]),
np.array([x[4] for x in batch]),
)