Skip to content

Commit

Permalink
fix bugs and make it work :) fat model, but its avg. okay
Browse files Browse the repository at this point in the history
  • Loading branch information
GreenWizard2015 committed Dec 19, 2020
1 parent 3a2d4c4 commit 18e9dde
Show file tree
Hide file tree
Showing 7 changed files with 87 additions and 77 deletions.
8 changes: 0 additions & 8 deletions CMazeExperience.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,6 @@ def __init__(self, maxSize):
def addEpisode(self, replay):
score = sum(x[2] for x in replay)
if score < self.minScore: return

# for i in range(len(replay)):
# state, act, score, nextState = replay[i]
# gamma = self.gamma
# for j in range(i + 1, len(replay)):
# score += gamma * replay[j][2]
# gamma *= self.gamma
self.episodes.append((replay, score))

if self.sizeLimit < len(self.episodes):
Expand Down Expand Up @@ -57,7 +50,6 @@ def take_batch(self, batch_size):
nextStateWeight = 1 if ind < len(episode) - 1 else 0
batch.append((state, act, score, nextState, nextStateWeight))


return (
np.array([x[0] for x in batch]),
np.array([x[1] for x in batch]),
Expand Down
7 changes: 3 additions & 4 deletions Core/CMazeEnviroment.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,8 @@ def _takeShot(self):
return (data[x - d:x + d + 1, y - d:y + d + 1] for data in (maze, fog, moves))

def minimap(self):
#maze, fog, moves = self._takeShot()
maze, fog, moves = self.maze, self.fog, self.moves
return (maze * fog, moves)
maze, fog, moves = self._takeShot()
return ((maze * fog) - (1 - fog), moves)

@property
def state(self):
Expand Down Expand Up @@ -120,7 +119,7 @@ def invalidActions(self):

def state2input(self):
maze, moves = self.minimap()
state = np.dstack((maze, ))
state = np.dstack((maze, moves))
return state

@property
Expand Down
42 changes: 18 additions & 24 deletions model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
import tensorflow as tf

def convBlock(prev, sz, filters):
conv_1 = layers.Convolution2D(filters, (sz, sz), padding="same", activation="relu")(prev)
Expand All @@ -14,28 +15,21 @@ def createModel(shape):
res = convBlock(res, 3, filters=32)

res = layers.Flatten()(res)

res = layers.Dense(16 ** 2, activation='relu')(res)
res = layers.Dropout(.2)(res)
res = layers.Dense(16 ** 2, activation='relu')(res)
res = layers.Dropout(.2)(res)
res = layers.Dense(16 ** 2, activation='relu')(res)
res = layers.Dropout(.2)(res)
res = layers.Dense(8 ** 2, activation='relu')(res)
res = layers.Dropout(.2)(res)
res = layers.Dense(8 ** 2, activation='relu')(res)
res = layers.Dropout(.2)(res)
res = layers.Dense(8 ** 2, activation='relu')(res)
res = layers.Dropout(.2)(res)
res = layers.Dense(4 ** 2, activation='relu')(res)
res = layers.Dropout(.2)(res)
res = layers.Dense(4 ** 2, activation='relu')(res)
res = layers.Dropout(.2)(res)
res = layers.Dense(4 ** 2, activation='relu')(res)
res = layers.Dropout(.2)(res)

res = layers.Dense(4, activation='linear')(res)
return keras.Model(
inputs=inputs,
outputs=res
)
# dueling dqn
valueBranch = layers.Dense(32, activation='relu')(res)
valueBranch = layers.Dense(32, activation='relu')(valueBranch)
valueBranch = layers.Dense(32, activation='relu')(valueBranch)
valueBranch = layers.Dense(1, activation='linear')(valueBranch)

actionsBranch = layers.Dense(128, activation='relu')(res)
actionsBranch = layers.Dense(64, activation='relu')(actionsBranch)
actionsBranch = layers.Dense(64, activation='relu')(actionsBranch)
actionsBranch = layers.Dense(64, activation='relu')(actionsBranch)
actionsBranch = layers.Dense(4, activation='linear')(actionsBranch)

res = layers.Lambda(
lambda x: x[1] + (x[0] - tf.reduce_mean(x[0], axis=-1, keepdims=True))
)([actionsBranch, valueBranch])

return keras.Model(inputs=inputs, outputs=res)
49 changes: 24 additions & 25 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,83 +39,82 @@ def emulate(env, model, exploreRate, exploreDecay, steps, stopOnInvalid=False):
probe = model.predict(np.array([state]))[0]
if not stopOnInvalid:
for i in env.invalidActions():
probe[i] = -1
probe[i] = -float('inf')
act = np.argmax(probe)

if stopOnInvalid and not (act in valid):
episodeReplay.append([state, act, -1, env.state2input()])
episodeReplay.append([state, act, -10, env.state2input()])
break

prevScore = env.score
env.apply(MAZE_ACTIONS[act])
normedScore = 1 if 0 < (env.score - prevScore) else -.1
normedScore = 1 if 0 < (env.score - prevScore) else -0.1
episodeReplay.append([state, act, normedScore, env.state2input()])

done = env.done
exploreRate = max((.01, exploreRate * exploreDecay))
exploreRate = max((.001, exploreRate * exploreDecay))
return episodeReplay

if __name__ == "__main__":
sz = 32
sz = 64
env = CMazeEnviroment(
maze=(0.8 < np.random.rand(sz, sz)).astype(np.float32),
pos=(0, 0),
FOV=3,
minimapSize=8
)
memory = CMazeExperience(maxSize=100)
memory = CMazeExperience(maxSize=1000)
done = False
batch_size = 64
playSteps = 64
batch_size = 256
playSteps = 96

bestModelScore = 0
bestModelScore = -float('inf')
model = createModel(shape=env.input_size)
model.compile(
optimizer=Adam(lr=1e-3),
loss='mean_squared_error'
)
# model.load_weights('model.h5')
#model.load_weights('weights/best.h5')

targetModel = createModel(shape=env.input_size)
np.set_printoptions(precision=3)
# collect data
while len(memory) < 50:
while len(memory) < 100:
env.respawn()
episodeReplay = emulate(
env, model,
exploreRate=0.9,
exploreDecay=0.9,
exploreRate=1,
exploreDecay=1,
steps=playSteps,
stopOnInvalid=False
)
#################
if 1 < len(episodeReplay):
memory.addEpisode(episodeReplay)
print(len(memory), env.score)
memory.update()

train_episodes = 500
test_episodes = 10
exploreRate = 1
exploreDecayPerEpoch = .9
exploreDecay = .9
train_episodes = 100
test_episodes = 20
exploreRate = .5
exploreDecayPerEpoch = .95
exploreDecay = .95
for epoch in range(5000):
print('Epoch %d' % epoch)
# train
targetModel.set_weights(model.get_weights())
lossSum = 0
for n in range(train_episodes):
states, actions, rewards, nextStates, nextReward = memory.take_batch(batch_size)
targets = targetModel.predict(nextStates)
targets[np.arange(len(targets)), actions] = rewards + np.max(targets, axis=1) * .9 * nextReward
nextScores = targetModel.predict(nextStates)
targets = targetModel.predict(states)
targets[np.arange(len(targets)), actions] = rewards + np.max(nextScores, axis=1) * .95 * nextReward

lossSum += model.fit(
states, targets,
epochs=1,
verbose=0
).history['loss'][0]

print('Avg. train loss: %.4f' % (lossSum / train_episodes))
print(targets[0])

# test
print('Epoch %d testing' % epoch)
Expand All @@ -141,6 +140,6 @@ def emulate(env, model, exploreRate, exploreDecay, steps, stopOnInvalid=False):
if bestModelScore < scoreSum:
bestModelScore = scoreSum
print('save best model')
model.save_weights('model.h5')
model.save_weights('latest.h5')
model.save_weights('weights/best.h5')
model.save_weights('weights/latest.h5')
exploreRate *= exploreDecayPerEpoch
58 changes: 42 additions & 16 deletions view_maze.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,13 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import os
import tensorflow as tf
import os

if 'COLAB_GPU' in os.environ:
# fix resolve modules
from os.path import dirname
sys.path.append(dirname(dirname(dirname(__file__))))
else: # local GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_virtual_device_configuration(
gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1 * 1024)]
)
# limit GPU usage
gpus = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_virtual_device_configuration(
gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1 * 1024)]
)

from Core.CMazeEnviroment import CMazeEnviroment, MazeActions
import numpy as np
Expand Down Expand Up @@ -44,6 +39,7 @@ class Colors:

class App:
MODES = ['manual', 'random', 'agent']
NETWORKS = ['best', 'latest']

def __init__(self):
self._running = True
Expand All @@ -52,6 +48,8 @@ def __init__(self):
self._mode = 'manual'
self._paused = True
self._speed = 20
self._usedNetwork = self.NETWORKS[0]
return

def _createMaze(self):
self._maze = createMaze()
Expand All @@ -65,7 +63,18 @@ def on_init(self):
pygame.display.set_caption('Deep maze')
self._font = pygame.font.Font(pygame.font.get_default_font(), 16)
self._running = True


def _createNewAgent(self):
filename = 'weights/%s.h5' % self._usedNetwork
if not os.path.exists(filename):
self._usedNetwork = self.NETWORKS[0]
filename = 'weights/%s.h5' % self._usedNetwork

self._agent = createModel(shape=self._maze.input_size)
self._agent.load_weights(filename)
self._paused = True
return

def on_event(self, event):
if event.type == G.QUIT:
self._running = False
Expand All @@ -77,12 +86,21 @@ def on_event(self, event):
self._paused = True

if 'agent' == self._mode:
self._agent = createModel(shape=self._maze.input_size)
self._agent.load_weights('model.h5')
self._createNewAgent()

if G.K_SPACE == event.key:
self._paused = not self._paused

if 'agent' == self._mode:
if G.K_r == event.key:
self._createMaze()
if G.K_n == event.key:
self._createNewAgent()
if G.K_t == event.key:
network = next((i for i, x in enumerate(self.NETWORKS) if x == self._usedNetwork))
self._usedNetwork = self.NETWORKS[(network + 1) % len(self.NETWORKS)]
self._createNewAgent()

if G.K_ESCAPE == event.key:
self._running = False

Expand Down Expand Up @@ -121,7 +139,7 @@ def on_loop(self):
if 'agent' == self._mode:
probe = self._agent.predict(np.array([self._maze.state2input()]))[0]
for i in self._maze.invalidActions():
probe[i] = -1
probe[i] = -float('inf')
pred = np.argmax(probe)

act = list(MazeActions)[pred]
Expand Down Expand Up @@ -196,12 +214,20 @@ def _renderInfo(self):
False, Colors.BLUE
), (655, 35)
)

if 'agent' == self._mode:
self._display_surf.blit(
self._font.render(
'Network: %s' % (self._usedNetwork),
False, Colors.BLUE
), (655, 55)
)
return

def on_render(self):
self._display_surf.fill(Colors.SILVER)
self._renderMaze()
# self._renderMazeMinimap()
self._renderMazeMinimap()
self._renderInfo()
pygame.display.flip()

Expand Down
Binary file added weights/best.h5
Binary file not shown.
Binary file added weights/latest.h5
Binary file not shown.

0 comments on commit 18e9dde

Please sign in to comment.