-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
4389744
commit 3a2d4c4
Showing
5 changed files
with
384 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import random | ||
import numpy as np | ||
import math | ||
|
||
class CMazeExperience: | ||
def __init__(self, maxSize): | ||
self.maxSize = maxSize | ||
self.sizeLimit = (maxSize * 1.1) | ||
self.episodes = [] | ||
self.gamma = 0.5 | ||
self.minScore = -math.inf | ||
|
||
def addEpisode(self, replay): | ||
score = sum(x[2] for x in replay) | ||
if score < self.minScore: return | ||
|
||
# for i in range(len(replay)): | ||
# state, act, score, nextState = replay[i] | ||
# gamma = self.gamma | ||
# for j in range(i + 1, len(replay)): | ||
# score += gamma * replay[j][2] | ||
# gamma *= self.gamma | ||
self.episodes.append((replay, score)) | ||
|
||
if self.sizeLimit < len(self.episodes): | ||
self.update() | ||
return | ||
|
||
def update(self): | ||
self.episodes = list( | ||
sorted(self.episodes, key=lambda x: x[1], reverse=True) | ||
)[:self.maxSize] | ||
self.minScore = self.episodes[-1][1] | ||
print('Min score: %.6f' % self.minScore) | ||
|
||
def __len__(self): | ||
return len(self.episodes) | ||
|
||
def take_batch(self, batch_size): | ||
batch = [] | ||
weights = [x[1] for x in self.episodes] | ||
while len(batch) < batch_size: | ||
episode, _ = random.choices( | ||
self.episodes, | ||
weights=weights, | ||
k=1 | ||
)[0] | ||
|
||
minibatchIndexes = set(random.choices( | ||
np.arange(len(episode)), | ||
weights=[abs(x[2]) for x in episode], | ||
k=min((5, batch_size - len(batch), len(episode))) | ||
)) | ||
|
||
for ind in minibatchIndexes: | ||
state, act, score, nextState = episode[ind] | ||
nextStateWeight = 1 if ind < len(episode) - 1 else 0 | ||
batch.append((state, act, score, nextState, nextStateWeight)) | ||
|
||
|
||
return ( | ||
np.array([x[0] for x in batch]), | ||
np.array([x[1] for x in batch]), | ||
np.array([x[2] for x in batch]), | ||
np.array([x[3] for x in batch]), | ||
np.array([x[4] for x in batch]), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import tensorflow.keras as keras | ||
import tensorflow.keras.layers as layers | ||
|
||
def convBlock(prev, sz, filters): | ||
conv_1 = layers.Convolution2D(filters, (sz, sz), padding="same", activation="relu")(prev) | ||
conv_1 = layers.Dropout(0.1)(conv_1) | ||
conv_1 = layers.BatchNormalization()(conv_1) | ||
return conv_1 | ||
|
||
def createModel(shape): | ||
inputs = res = layers.Input(shape=shape) | ||
res = convBlock(res, 3, filters=32) | ||
res = convBlock(res, 3, filters=32) | ||
res = convBlock(res, 3, filters=32) | ||
|
||
res = layers.Flatten()(res) | ||
|
||
res = layers.Dense(16 ** 2, activation='relu')(res) | ||
res = layers.Dropout(.2)(res) | ||
res = layers.Dense(16 ** 2, activation='relu')(res) | ||
res = layers.Dropout(.2)(res) | ||
res = layers.Dense(16 ** 2, activation='relu')(res) | ||
res = layers.Dropout(.2)(res) | ||
res = layers.Dense(8 ** 2, activation='relu')(res) | ||
res = layers.Dropout(.2)(res) | ||
res = layers.Dense(8 ** 2, activation='relu')(res) | ||
res = layers.Dropout(.2)(res) | ||
res = layers.Dense(8 ** 2, activation='relu')(res) | ||
res = layers.Dropout(.2)(res) | ||
res = layers.Dense(4 ** 2, activation='relu')(res) | ||
res = layers.Dropout(.2)(res) | ||
res = layers.Dense(4 ** 2, activation='relu')(res) | ||
res = layers.Dropout(.2)(res) | ||
res = layers.Dense(4 ** 2, activation='relu')(res) | ||
res = layers.Dropout(.2)(res) | ||
|
||
res = layers.Dense(4, activation='linear')(res) | ||
return keras.Model( | ||
inputs=inputs, | ||
outputs=res | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
# -*- coding: utf-8 -*- | ||
import sys | ||
import os | ||
import tensorflow as tf | ||
from CMazeExperience import CMazeExperience | ||
|
||
if 'COLAB_GPU' in os.environ: | ||
# fix resolve modules | ||
from os.path import dirname | ||
sys.path.append(dirname(dirname(dirname(__file__)))) | ||
else: # local GPU | ||
gpus = tf.config.experimental.list_physical_devices('GPU') | ||
tf.config.experimental.set_virtual_device_configuration( | ||
gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1 * 1024)] | ||
) | ||
|
||
import random | ||
import numpy as np | ||
|
||
from keras.optimizers import Adam | ||
|
||
from Core.CMazeEnviroment import CMazeEnviroment, MAZE_ACTIONS | ||
from model import createModel | ||
|
||
def emulate(env, model, exploreRate, exploreDecay, steps, stopOnInvalid=False): | ||
episodeReplay = [] | ||
done = False | ||
N = 0 | ||
while (N < steps) and not done: | ||
N += 1 | ||
act = None | ||
valid = env.validActionsIndex() | ||
if not valid: break | ||
|
||
state = env.state2input() | ||
if random.random() < exploreRate: | ||
act = random.choice(valid) | ||
else: | ||
probe = model.predict(np.array([state]))[0] | ||
if not stopOnInvalid: | ||
for i in env.invalidActions(): | ||
probe[i] = -1 | ||
act = np.argmax(probe) | ||
|
||
if stopOnInvalid and not (act in valid): | ||
episodeReplay.append([state, act, -1, env.state2input()]) | ||
break | ||
|
||
prevScore = env.score | ||
env.apply(MAZE_ACTIONS[act]) | ||
normedScore = 1 if 0 < (env.score - prevScore) else -.1 | ||
episodeReplay.append([state, act, normedScore, env.state2input()]) | ||
|
||
done = env.done | ||
exploreRate = max((.01, exploreRate * exploreDecay)) | ||
return episodeReplay | ||
|
||
if __name__ == "__main__": | ||
sz = 32 | ||
env = CMazeEnviroment( | ||
maze=(0.8 < np.random.rand(sz, sz)).astype(np.float32), | ||
pos=(0, 0), | ||
FOV=3, | ||
minimapSize=8 | ||
) | ||
memory = CMazeExperience(maxSize=100) | ||
done = False | ||
batch_size = 64 | ||
playSteps = 64 | ||
|
||
bestModelScore = 0 | ||
model = createModel(shape=env.input_size) | ||
model.compile( | ||
optimizer=Adam(lr=1e-3), | ||
loss='mean_squared_error' | ||
) | ||
# model.load_weights('model.h5') | ||
|
||
targetModel = createModel(shape=env.input_size) | ||
np.set_printoptions(precision=3) | ||
# collect data | ||
while len(memory) < 50: | ||
env.respawn() | ||
episodeReplay = emulate( | ||
env, model, | ||
exploreRate=0.9, | ||
exploreDecay=0.9, | ||
steps=playSteps, | ||
stopOnInvalid=False | ||
) | ||
################# | ||
if 1 < len(episodeReplay): | ||
memory.addEpisode(episodeReplay) | ||
print(len(memory), env.score) | ||
memory.update() | ||
|
||
train_episodes = 500 | ||
test_episodes = 10 | ||
exploreRate = 1 | ||
exploreDecayPerEpoch = .9 | ||
exploreDecay = .9 | ||
for epoch in range(5000): | ||
print('Epoch %d' % epoch) | ||
# train | ||
targetModel.set_weights(model.get_weights()) | ||
lossSum = 0 | ||
for n in range(train_episodes): | ||
states, actions, rewards, nextStates, nextReward = memory.take_batch(batch_size) | ||
targets = targetModel.predict(nextStates) | ||
targets[np.arange(len(targets)), actions] = rewards + np.max(targets, axis=1) * .9 * nextReward | ||
|
||
lossSum += model.fit( | ||
states, targets, | ||
epochs=1, | ||
verbose=0 | ||
).history['loss'][0] | ||
print('Avg. train loss: %.4f' % (lossSum / train_episodes)) | ||
print(targets[0]) | ||
|
||
# test | ||
print('Epoch %d testing' % epoch) | ||
bestScore = scoreSum = movesSum = 0 | ||
n = 0 | ||
while n < test_episodes: | ||
env.respawn() | ||
episodeReplay = emulate( | ||
env, model, | ||
exploreRate=exploreRate, | ||
exploreDecay=exploreDecay, | ||
steps=playSteps*2, | ||
stopOnInvalid=True | ||
) | ||
if 1 < len(episodeReplay): | ||
memory.addEpisode(episodeReplay) | ||
n += 1 | ||
bestScore = max((bestScore, env.score)) | ||
scoreSum += env.score | ||
movesSum += len(episodeReplay) | ||
################# | ||
print('Best score: %.3f, avg. score: %.3f, avg. moves: %.1f' % (bestScore, scoreSum / n, movesSum / n)) | ||
if bestModelScore < scoreSum: | ||
bestModelScore = scoreSum | ||
print('save best model') | ||
model.save_weights('model.h5') | ||
model.save_weights('latest.h5') | ||
exploreRate *= exploreDecayPerEpoch |
Oops, something went wrong.