diff --git a/CMazeExperience.py b/CMazeExperience.py new file mode 100644 index 0000000..e491447 --- /dev/null +++ b/CMazeExperience.py @@ -0,0 +1,67 @@ +import random +import numpy as np +import math + +class CMazeExperience: + def __init__(self, maxSize): + self.maxSize = maxSize + self.sizeLimit = (maxSize * 1.1) + self.episodes = [] + self.gamma = 0.5 + self.minScore = -math.inf + + def addEpisode(self, replay): + score = sum(x[2] for x in replay) + if score < self.minScore: return + +# for i in range(len(replay)): +# state, act, score, nextState = replay[i] +# gamma = self.gamma +# for j in range(i + 1, len(replay)): +# score += gamma * replay[j][2] +# gamma *= self.gamma + self.episodes.append((replay, score)) + + if self.sizeLimit < len(self.episodes): + self.update() + return + + def update(self): + self.episodes = list( + sorted(self.episodes, key=lambda x: x[1], reverse=True) + )[:self.maxSize] + self.minScore = self.episodes[-1][1] + print('Min score: %.6f' % self.minScore) + + def __len__(self): + return len(self.episodes) + + def take_batch(self, batch_size): + batch = [] + weights = [x[1] for x in self.episodes] + while len(batch) < batch_size: + episode, _ = random.choices( + self.episodes, + weights=weights, + k=1 + )[0] + + minibatchIndexes = set(random.choices( + np.arange(len(episode)), + weights=[abs(x[2]) for x in episode], + k=min((5, batch_size - len(batch), len(episode))) + )) + + for ind in minibatchIndexes: + state, act, score, nextState = episode[ind] + nextStateWeight = 1 if ind < len(episode) - 1 else 0 + batch.append((state, act, score, nextState, nextStateWeight)) + + + return ( + np.array([x[0] for x in batch]), + np.array([x[1] for x in batch]), + np.array([x[2] for x in batch]), + np.array([x[3] for x in batch]), + np.array([x[4] for x in batch]), + ) \ No newline at end of file diff --git a/Core/CMazeEnviroment.py b/Core/CMazeEnviroment.py index f271407..e63f575 100644 --- a/Core/CMazeEnviroment.py +++ b/Core/CMazeEnviroment.py @@ -7,10 +7,14 @@ class MazeActions(Enum): RIGHT = (1, 0) UP = (0, -1) DOWN = (0, 1) + +MAZE_ACTIONS_AS_INT = { x: i for i, x in enumerate(MazeActions) } +MAZE_ACTIONS = [x for x in MazeActions] class CMazeEnviroment: - def __init__(self, maze, pos, FOV): + def __init__(self, maze, pos, FOV, minimapSize): self.maze = np.pad(np.array(maze), FOV, constant_values=(1,)) + self.minimapSize = minimapSize self._fov = FOV x, y = np.array(pos) + FOV @@ -20,7 +24,8 @@ def __init__(self, maze, pos, FOV): def spawnAt(self, x, y): self.pos = np.array([y, x]) self.fog = np.zeros_like(self.maze) - self._updateFog() + self.moves = np.zeros_like(self.maze) + self._update() return def respawn(self): @@ -33,17 +38,17 @@ def respawn(self): break return - def _updateFog(self): + def _update(self): y, x = self.pos - self.fog[ - x - self._fov:x + self._fov + 1, - y - self._fov:y + self._fov + 1 - ] = 1 + d = self._fov + self.fog[x - d:x + d + 1, y - d:y + d + 1] = 1 + self.moves[x, y] = 1 return def apply(self, action): self.pos += action.value - self._updateFog() + self.lastAction = MAZE_ACTIONS_AS_INT[action] + self._update() return def vision(self): @@ -52,15 +57,38 @@ def vision(self): x - self._fov:x + self._fov + 1, y - self._fov:y + self._fov + 1 ] + + def _takeShot(self): + maze, fog, moves = self.maze, self.fog, self.moves + y, x = self.pos + h, w = self.maze.shape + + isXAxisOk = (self.minimapSize < x) and (x < (w - self.minimapSize)) + isYAxisOk = (self.minimapSize < y) and (y < (h - self.minimapSize)) + if not (isXAxisOk and isYAxisOk): + x += self.minimapSize + y += self.minimapSize + maze = np.pad(maze, self.minimapSize, constant_values=(1,)) + fog, moves = ( + np.pad(data, self.minimapSize, constant_values=(0,)) for data in (fog, moves) + ) + + d = self.minimapSize + return (data[x - d:x + d + 1, y - d:y + d + 1] for data in (maze, fog, moves)) + + def minimap(self): + #maze, fog, moves = self._takeShot() + maze, fog, moves = self.maze, self.fog, self.moves + return (maze * fog, moves) @property def state(self): - return ((self.vision(), self.fog, ), self.score, self.done) + return ((self.minimap(), ), self.score, self.done) @property def done(self): - y, x = self._pos - return 1 < self.maze[x, y] + y, x = self.pos + return 0 < self.maze[x, y] @property def score(self): @@ -70,10 +98,11 @@ def score(self): def copy(self): # dirty copy - res = CMazeEnviroment(self.maze, self.pos, self._fov) + res = CMazeEnviroment(self.maze, self.pos, self._fov, self.minimapSize) res.maze = self.maze.copy() res.fog = self.fog.copy() res.pos = self.pos.copy() + res.moves = self.moves.copy() return res def isPossible(self, action): @@ -81,4 +110,19 @@ def isPossible(self, action): return self.maze[x, y] <= 0 def validActions(self): - return [ act for act in MazeActions if self.isPossible(act) ] \ No newline at end of file + return [ act for act in MazeActions if self.isPossible(act) ] + + def validActionsIndex(self): + return [ i for i, act in enumerate(MazeActions) if self.isPossible(act) ] + + def invalidActions(self): + return [ i for i, act in enumerate(MazeActions) if not self.isPossible(act) ] + + def state2input(self): + maze, moves = self.minimap() + state = np.dstack((maze, )) + return state + + @property + def input_size(self): + return self.state2input().shape \ No newline at end of file diff --git a/model.py b/model.py new file mode 100644 index 0000000..3c30285 --- /dev/null +++ b/model.py @@ -0,0 +1,41 @@ +import tensorflow.keras as keras +import tensorflow.keras.layers as layers + +def convBlock(prev, sz, filters): + conv_1 = layers.Convolution2D(filters, (sz, sz), padding="same", activation="relu")(prev) + conv_1 = layers.Dropout(0.1)(conv_1) + conv_1 = layers.BatchNormalization()(conv_1) + return conv_1 + +def createModel(shape): + inputs = res = layers.Input(shape=shape) + res = convBlock(res, 3, filters=32) + res = convBlock(res, 3, filters=32) + res = convBlock(res, 3, filters=32) + + res = layers.Flatten()(res) + + res = layers.Dense(16 ** 2, activation='relu')(res) + res = layers.Dropout(.2)(res) + res = layers.Dense(16 ** 2, activation='relu')(res) + res = layers.Dropout(.2)(res) + res = layers.Dense(16 ** 2, activation='relu')(res) + res = layers.Dropout(.2)(res) + res = layers.Dense(8 ** 2, activation='relu')(res) + res = layers.Dropout(.2)(res) + res = layers.Dense(8 ** 2, activation='relu')(res) + res = layers.Dropout(.2)(res) + res = layers.Dense(8 ** 2, activation='relu')(res) + res = layers.Dropout(.2)(res) + res = layers.Dense(4 ** 2, activation='relu')(res) + res = layers.Dropout(.2)(res) + res = layers.Dense(4 ** 2, activation='relu')(res) + res = layers.Dropout(.2)(res) + res = layers.Dense(4 ** 2, activation='relu')(res) + res = layers.Dropout(.2)(res) + + res = layers.Dense(4, activation='linear')(res) + return keras.Model( + inputs=inputs, + outputs=res + ) \ No newline at end of file diff --git a/train.py b/train.py new file mode 100644 index 0000000..2d44ecf --- /dev/null +++ b/train.py @@ -0,0 +1,146 @@ +# -*- coding: utf-8 -*- +import sys +import os +import tensorflow as tf +from CMazeExperience import CMazeExperience + +if 'COLAB_GPU' in os.environ: + # fix resolve modules + from os.path import dirname + sys.path.append(dirname(dirname(dirname(__file__)))) +else: # local GPU + gpus = tf.config.experimental.list_physical_devices('GPU') + tf.config.experimental.set_virtual_device_configuration( + gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1 * 1024)] + ) + +import random +import numpy as np + +from keras.optimizers import Adam + +from Core.CMazeEnviroment import CMazeEnviroment, MAZE_ACTIONS +from model import createModel + +def emulate(env, model, exploreRate, exploreDecay, steps, stopOnInvalid=False): + episodeReplay = [] + done = False + N = 0 + while (N < steps) and not done: + N += 1 + act = None + valid = env.validActionsIndex() + if not valid: break + + state = env.state2input() + if random.random() < exploreRate: + act = random.choice(valid) + else: + probe = model.predict(np.array([state]))[0] + if not stopOnInvalid: + for i in env.invalidActions(): + probe[i] = -1 + act = np.argmax(probe) + + if stopOnInvalid and not (act in valid): + episodeReplay.append([state, act, -1, env.state2input()]) + break + + prevScore = env.score + env.apply(MAZE_ACTIONS[act]) + normedScore = 1 if 0 < (env.score - prevScore) else -.1 + episodeReplay.append([state, act, normedScore, env.state2input()]) + + done = env.done + exploreRate = max((.01, exploreRate * exploreDecay)) + return episodeReplay + +if __name__ == "__main__": + sz = 32 + env = CMazeEnviroment( + maze=(0.8 < np.random.rand(sz, sz)).astype(np.float32), + pos=(0, 0), + FOV=3, + minimapSize=8 + ) + memory = CMazeExperience(maxSize=100) + done = False + batch_size = 64 + playSteps = 64 + + bestModelScore = 0 + model = createModel(shape=env.input_size) + model.compile( + optimizer=Adam(lr=1e-3), + loss='mean_squared_error' + ) +# model.load_weights('model.h5') + + targetModel = createModel(shape=env.input_size) + np.set_printoptions(precision=3) + # collect data + while len(memory) < 50: + env.respawn() + episodeReplay = emulate( + env, model, + exploreRate=0.9, + exploreDecay=0.9, + steps=playSteps, + stopOnInvalid=False + ) + ################# + if 1 < len(episodeReplay): + memory.addEpisode(episodeReplay) + print(len(memory), env.score) + memory.update() + + train_episodes = 500 + test_episodes = 10 + exploreRate = 1 + exploreDecayPerEpoch = .9 + exploreDecay = .9 + for epoch in range(5000): + print('Epoch %d' % epoch) + # train + targetModel.set_weights(model.get_weights()) + lossSum = 0 + for n in range(train_episodes): + states, actions, rewards, nextStates, nextReward = memory.take_batch(batch_size) + targets = targetModel.predict(nextStates) + targets[np.arange(len(targets)), actions] = rewards + np.max(targets, axis=1) * .9 * nextReward + + lossSum += model.fit( + states, targets, + epochs=1, + verbose=0 + ).history['loss'][0] + print('Avg. train loss: %.4f' % (lossSum / train_episodes)) + print(targets[0]) + + # test + print('Epoch %d testing' % epoch) + bestScore = scoreSum = movesSum = 0 + n = 0 + while n < test_episodes: + env.respawn() + episodeReplay = emulate( + env, model, + exploreRate=exploreRate, + exploreDecay=exploreDecay, + steps=playSteps*2, + stopOnInvalid=True + ) + if 1 < len(episodeReplay): + memory.addEpisode(episodeReplay) + n += 1 + bestScore = max((bestScore, env.score)) + scoreSum += env.score + movesSum += len(episodeReplay) + ################# + print('Best score: %.3f, avg. score: %.3f, avg. moves: %.1f' % (bestScore, scoreSum / n, movesSum / n)) + if bestModelScore < scoreSum: + bestModelScore = scoreSum + print('save best model') + model.save_weights('model.h5') + model.save_weights('latest.h5') + exploreRate *= exploreDecayPerEpoch \ No newline at end of file diff --git a/view_maze.py b/view_maze.py index 2ca0dd1..10c6c61 100644 --- a/view_maze.py +++ b/view_maze.py @@ -1,10 +1,25 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +import sys +import os +import tensorflow as tf + +if 'COLAB_GPU' in os.environ: + # fix resolve modules + from os.path import dirname + sys.path.append(dirname(dirname(dirname(__file__)))) +else: # local GPU + gpus = tf.config.experimental.list_physical_devices('GPU') + tf.config.experimental.set_virtual_device_configuration( + gpus[0], [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1 * 1024)] + ) + from Core.CMazeEnviroment import CMazeEnviroment, MazeActions import numpy as np import pygame import pygame.locals as G import random +from model import createModel def createMaze(): sz = 64 @@ -12,8 +27,9 @@ def createMaze(): res = CMazeEnviroment( maze=maze, pos=(0, 0), - FOV=3 - ) + FOV=3, + minimapSize=8 + ) res.respawn() return res @@ -27,7 +43,8 @@ class Colors: PURPLE = (255, 0, 255) class App: - MODES = ['manual', 'random'] + MODES = ['manual', 'random', 'agent'] + def __init__(self): self._running = True self._display_surf = None @@ -59,13 +76,17 @@ def on_event(self, event): self._mode = self.MODES[(mode + 1) % len(self.MODES)] self._paused = True + if 'agent' == self._mode: + self._agent = createModel(shape=self._maze.input_size) + self._agent.load_weights('model.h5') + if G.K_SPACE == event.key: self._paused = not self._paused if G.K_ESCAPE == event.key: self._running = False - if 'manual' == self._mode: + if 'manual' == self._mode: if G.K_r == event.key: self._createMaze() @@ -89,25 +110,43 @@ def on_event(self, event): return def on_loop(self): - if ('random' == self._mode) and not self._paused: + if self._paused: return + + if 'random' == self._mode: for _ in range(self._speed): actions = self._maze.validActions() if actions: self._maze.apply(random.choice(actions)) + + if 'agent' == self._mode: + probe = self._agent.predict(np.array([self._maze.state2input()]))[0] + for i in self._maze.invalidActions(): + probe[i] = -1 + pred = np.argmax(probe) + + act = list(MazeActions)[pred] + if self._maze.isPossible(act): + self._maze.apply(act) pass def _renderMaze(self): fog = self._maze.fog maze = self._maze.maze + moves = self._maze.moves + h, w = maze.shape dx, dy = delta = np.array([640, 640]) / np.array([w, h]) for ix in range(w): for iy in range(h): isDiscovered = 0 < fog[ix, iy] isWall = 0 < maze[ix, iy] + isWasHere = 0 < moves[ix, iy] y, x = delta * np.array([ix, iy]) - clr = Colors.PURPLE if isWall else Colors.WHITE + clr = Colors.WHITE + if isWasHere: clr = Colors.GREEN + if isWall: clr = Colors.PURPLE + if not isDiscovered: clr = np.array(clr) * .3 pygame.draw.rect(self._display_surf, clr, [x, y, dx - 1, dy - 1], 0) @@ -116,6 +155,33 @@ def _renderMaze(self): pygame.draw.rect(self._display_surf, Colors.RED, [x, y, dx - 1, dy - 1], 0) return + def _renderMazeMinimap(self): + anchor = np.array((450, 650)) + maze, moves = self._maze.minimap() + h, w = maze.shape + dx, dy = delta = 2 * np.array([64, 64]) / np.array([w, h]) + for ix in range(w): + for iy in range(h): + isWall = 0 < maze[ix, iy] + isWasHere = 0 < moves[ix, iy] + isUnknownArea = maze[ix, iy] < 0 + + clr = Colors.WHITE + if isWasHere: clr = Colors.GREEN + if isWall: clr = Colors.PURPLE + if isUnknownArea: clr = Colors.BLACK + + y, x = (delta * np.array([ix, iy])) + anchor + pygame.draw.rect(self._display_surf, clr, [x, y, dx - 1, dy - 1], 0) + + self._display_surf.blit( + self._font.render( + 'Observed state:', + False, Colors.BLUE + ), (anchor[1], anchor[0] - 25) + ) + return + def _renderInfo(self): self._display_surf.blit( self._font.render( @@ -135,6 +201,7 @@ def _renderInfo(self): def on_render(self): self._display_surf.fill(Colors.SILVER) self._renderMaze() +# self._renderMazeMinimap() self._renderInfo() pygame.display.flip()