Skip to content

Commit

Permalink
huge update
Browse files Browse the repository at this point in the history
  • Loading branch information
GreenWizard2015 committed Dec 31, 2020
1 parent 18e9dde commit 1cedbc4
Show file tree
Hide file tree
Showing 29 changed files with 980 additions and 407 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ __pycache__
/.pydevproject
/.project
/.settings
/charts
/chart.jpg
31 changes: 31 additions & 0 deletions Agent/DQNAgent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import numpy as np
import math

class DQNAgent:
def __init__(self, model, exploreRate=0, noise=None):
self._model = model
self._exploreRate = exploreRate
self._noise = noise
return

def reset(self):
return

def process(self, state, actionsMask = [1, 1, 1, 1]):
return self.processBatch([state], [actionsMask])[0]

def processBatch(self, states, actionsMask):
actions = self._model.predict(np.array(states))
if 0 < self._exploreRate:
rndIndexes = np.where(np.random.random_sample(actions.shape[0]) < self._exploreRate)
actions[rndIndexes] = np.random.random_sample(actions.shape)[rndIndexes]

if not (self._noise is None):
# softmax
e_x = np.exp(actions - actions.max(axis=-1, keepdims=True))
normed = e_x / e_x.sum(axis=-1, keepdims=True)
# add noise
actions = normed + (np.random.random_sample(actions.shape) * self._noise)

actions[np.where(~(1 == np.array(actionsMask)))] = -math.inf
return actions.argmax(axis=-1)
67 changes: 67 additions & 0 deletions Agent/DQNEnsembleAgent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import numpy as np
import math
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
import tensorflow as tf

def combineModels(models, combiner):
shape = models[0].layers[0].input_shape[0][1:]
inputs = layers.Input(shape=shape)
actionsMask = layers.Input(shape=(4, ))
res = layers.Lambda(combiner)([actionsMask] + [ x(inputs) for x in models ])
return keras.Model(inputs=[inputs, actionsMask], outputs=res)

def maskedSoftmax(mask, inputs):
mask = tf.where(tf.equal(mask, 1))
return [
tf.sparse.to_dense(
tf.sparse.softmax(
tf.sparse.SparseTensor(
indices=mask,
values=tf.gather_nd(x, mask),
dense_shape=tf.shape(x, out_type=tf.int64)
)
)
) for x in inputs
]

def multiplyOutputs(inputs):
outputs = maskedSoftmax(inputs[0], inputs[1:])

res = 1 + outputs[0]
for x in outputs[1:]:
res = tf.math.multiply(res, 1 + x)
return res

ENSEMBLE_MODE = {
'multiply': multiplyOutputs
}

class DQNEnsembleAgent:
def __init__(self, models, mode='multiply', exploreRate=0, noise=None):
self._model = combineModels(models, ENSEMBLE_MODE.get(mode, mode))
self._exploreRate = exploreRate
self._noise = noise
return

def reset(self):
return

def process(self, state, actionsMask = [1, 1, 1, 1]):
return self.processBatch([state], [actionsMask])[0]

def processBatch(self, states, actionsMask):
actions = self._model.predict([np.array(states), np.array(actionsMask)])
if 0 < self._exploreRate:
rndIndexes = np.where(np.random.random_sample(actions.shape[0]) < self._exploreRate)
actions[rndIndexes] = np.random.random_sample(actions.shape)[rndIndexes]

if not (self._noise is None):
# softmax
e_x = np.exp(actions - actions.max(axis=-1, keepdims=True))
normed = e_x / e_x.sum(axis=-1, keepdims=True)
# add noise
actions = normed + (np.random.random_sample(actions.shape) * self._noise)

actions[np.where(~(1 == np.array(actionsMask)))] = -math.inf
return actions.argmax(axis=-1)
17 changes: 17 additions & 0 deletions Agent/RandomAgent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import numpy as np
import math

class RandomAgent:
def __init__(self):
return

def reset(self):
pass

def process(self, state, actionsMask = [1, 1, 1, 1]):
return self.processBatch([state], [actionsMask])

def processBatch(self, states, actionsMask):
actions = np.random.random_sample((np.array(states).shape[0], 4))
actions[np.where(~(1 == np.array(actionsMask)))] = -math.inf
return actions.argmax(axis=-1)
File renamed without changes.
59 changes: 0 additions & 59 deletions CMazeExperience.py

This file was deleted.

38 changes: 25 additions & 13 deletions Core/CMazeEnviroment.py → Core/CMazeEnvironment.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,18 @@ class MazeActions(Enum):
MAZE_ACTIONS_AS_INT = { x: i for i, x in enumerate(MazeActions) }
MAZE_ACTIONS = [x for x in MazeActions]

class CMazeEnviroment:
class CMazeEnvironment:
def __init__(self, maze, pos, FOV, minimapSize):
self.maze = np.pad(np.array(maze), FOV, constant_values=(1,))
self.minimapSize = minimapSize
self._fov = FOV
self._fov = self.FOV = FOV

x, y = np.array(pos) + FOV
self.spawnAt(x, y)
return

def spawnAt(self, x, y):
self._steps = 0
self.pos = np.array([y, x])
self.fog = np.zeros_like(self.maze)
self.moves = np.zeros_like(self.maze)
Expand All @@ -42,14 +43,19 @@ def _update(self):
y, x = self.pos
d = self._fov
self.fog[x - d:x + d + 1, y - d:y + d + 1] = 1
self.moves *= .98
self.moves[x, y] = 1
return

def apply(self, action):
self._steps += 1
self.pos += action.value
self.lastAction = MAZE_ACTIONS_AS_INT[action]
self._update()
return

def isMovingToVisited(self, action):
y, x = self.pos + action.value
return 1 == self.moves[x, y]

def vision(self):
y, x = self.pos
Expand Down Expand Up @@ -81,23 +87,21 @@ def minimap(self):
return ((maze * fog) - (1 - fog), moves)

@property
def state(self):
return ((self.minimap(), ), self.score, self.done)

@property
def done(self):
def dead(self):
y, x = self.pos
return 0 < self.maze[x, y]

@property
def score(self):
h, w = self.fog.shape
total = h * w
return np.count_nonzero(self.fog) / total
return np.count_nonzero(self.fog) * self.minScoreDelta

@property
def steps(self):
return self._steps

def copy(self):
# dirty copy
res = CMazeEnviroment(self.maze, self.pos, self._fov, self.minimapSize)
res = CMazeEnvironment(self.maze, self.pos, self._fov, self.minimapSize)
res.maze = self.maze.copy()
res.fog = self.fog.copy()
res.pos = self.pos.copy()
Expand All @@ -114,6 +118,9 @@ def validActions(self):
def validActionsIndex(self):
return [ i for i, act in enumerate(MazeActions) if self.isPossible(act) ]

def actionsMask(self):
return [ (1 if self.isPossible(act) else 0) for act in MazeActions ]

def invalidActions(self):
return [ i for i, act in enumerate(MazeActions) if not self.isPossible(act) ]

Expand All @@ -124,4 +131,9 @@ def state2input(self):

@property
def input_size(self):
return self.state2input().shape
return self.state2input().shape

@property
def minScoreDelta(self):
h, w = self.fog.shape
return 1.0 / (h * w)
97 changes: 97 additions & 0 deletions Core/MazeRLWrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
from Core.CMazeEnvironment import CMazeEnvironment, MAZE_ACTIONS
import numpy as np
import math

class MazeRLWrapper:
def __init__(self, params):
maze = (
params.get('obstacles rate', 0.8) < np.random.rand(params['size'], params['size'])
).astype(np.float32)

env = CMazeEnvironment(
maze=maze,
pos=(0, 0),
FOV=params['FOV'],
minimapSize=params['minimapSize']
)
env.respawn()
self._env = env

self._stepsLimit = params['loop limit']
self._minUniqSteps = params.get('min unique positions rate', 0.3)
self._stopIfLoop = params.get('stop if loop', True)
self._onlyNewCells = params.get('only new cells reward', False)
return

def reset(self):
self._stopInLoop = False
self._done = False
self._env.respawn()
self._moves = []
return

def apply(self, actionIndex):
act = MAZE_ACTIONS[actionIndex]
prevState = self.state
prevScore = self.score
isNewCell = not self._env.isMovingToVisited(act)
self._env.apply(act)
nextState = self.state

self._done = True
if self._env.dead: # unreachable due to actions masking
return nextState, -10, True, prevState

if 0.95 <= self._env.score:
return nextState, 0, True, prevState

if self._movingLoop():
return nextState, -5, True, prevState

self._done = False
reward = 0.3 if isNewCell else 0 # small reward for visiting new cell

if not self._onlyNewCells:
discovered = (self._env.score - prevScore) / self._env.minScoreDelta
reward += 1 + math.log(discovered, 10) if 0 < discovered else -1
return nextState, reward, False, prevState

def actionsMask(self):
return self._env.actionsMask()

@property
def state(self):
return self._env.state2input()

@property
def done(self):
return self._done

@property
def hitTheLoop(self):
return self._stopInLoop

@property
def score(self):
return self._env.score

@property
def input_size(self):
return self._env.input_size

@property
def uniqueMoves(self):
if self._stepsLimit <= len(self._moves):
return len(set(self._moves)) / len(self._moves)
return 1

def _movingLoop(self):
self._moves.append(str(self._env.pos))
self._moves = self._moves[1:] if self._stepsLimit < len(self._moves) else self._moves
self._stopInLoop = self._stopIfLoop and (self.uniqueMoves < self._minUniqSteps)
return self._stopInLoop

def Continue(self):
self._done = False
self._moves = []
return
Loading

0 comments on commit 1cedbc4

Please sign in to comment.