-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
18e9dde
commit 1cedbc4
Showing
29 changed files
with
980 additions
and
407 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,3 +3,5 @@ __pycache__ | |
/.pydevproject | ||
/.project | ||
/.settings | ||
/charts | ||
/chart.jpg |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import numpy as np | ||
import math | ||
|
||
class DQNAgent: | ||
def __init__(self, model, exploreRate=0, noise=None): | ||
self._model = model | ||
self._exploreRate = exploreRate | ||
self._noise = noise | ||
return | ||
|
||
def reset(self): | ||
return | ||
|
||
def process(self, state, actionsMask = [1, 1, 1, 1]): | ||
return self.processBatch([state], [actionsMask])[0] | ||
|
||
def processBatch(self, states, actionsMask): | ||
actions = self._model.predict(np.array(states)) | ||
if 0 < self._exploreRate: | ||
rndIndexes = np.where(np.random.random_sample(actions.shape[0]) < self._exploreRate) | ||
actions[rndIndexes] = np.random.random_sample(actions.shape)[rndIndexes] | ||
|
||
if not (self._noise is None): | ||
# softmax | ||
e_x = np.exp(actions - actions.max(axis=-1, keepdims=True)) | ||
normed = e_x / e_x.sum(axis=-1, keepdims=True) | ||
# add noise | ||
actions = normed + (np.random.random_sample(actions.shape) * self._noise) | ||
|
||
actions[np.where(~(1 == np.array(actionsMask)))] = -math.inf | ||
return actions.argmax(axis=-1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import numpy as np | ||
import math | ||
import tensorflow.keras as keras | ||
import tensorflow.keras.layers as layers | ||
import tensorflow as tf | ||
|
||
def combineModels(models, combiner): | ||
shape = models[0].layers[0].input_shape[0][1:] | ||
inputs = layers.Input(shape=shape) | ||
actionsMask = layers.Input(shape=(4, )) | ||
res = layers.Lambda(combiner)([actionsMask] + [ x(inputs) for x in models ]) | ||
return keras.Model(inputs=[inputs, actionsMask], outputs=res) | ||
|
||
def maskedSoftmax(mask, inputs): | ||
mask = tf.where(tf.equal(mask, 1)) | ||
return [ | ||
tf.sparse.to_dense( | ||
tf.sparse.softmax( | ||
tf.sparse.SparseTensor( | ||
indices=mask, | ||
values=tf.gather_nd(x, mask), | ||
dense_shape=tf.shape(x, out_type=tf.int64) | ||
) | ||
) | ||
) for x in inputs | ||
] | ||
|
||
def multiplyOutputs(inputs): | ||
outputs = maskedSoftmax(inputs[0], inputs[1:]) | ||
|
||
res = 1 + outputs[0] | ||
for x in outputs[1:]: | ||
res = tf.math.multiply(res, 1 + x) | ||
return res | ||
|
||
ENSEMBLE_MODE = { | ||
'multiply': multiplyOutputs | ||
} | ||
|
||
class DQNEnsembleAgent: | ||
def __init__(self, models, mode='multiply', exploreRate=0, noise=None): | ||
self._model = combineModels(models, ENSEMBLE_MODE.get(mode, mode)) | ||
self._exploreRate = exploreRate | ||
self._noise = noise | ||
return | ||
|
||
def reset(self): | ||
return | ||
|
||
def process(self, state, actionsMask = [1, 1, 1, 1]): | ||
return self.processBatch([state], [actionsMask])[0] | ||
|
||
def processBatch(self, states, actionsMask): | ||
actions = self._model.predict([np.array(states), np.array(actionsMask)]) | ||
if 0 < self._exploreRate: | ||
rndIndexes = np.where(np.random.random_sample(actions.shape[0]) < self._exploreRate) | ||
actions[rndIndexes] = np.random.random_sample(actions.shape)[rndIndexes] | ||
|
||
if not (self._noise is None): | ||
# softmax | ||
e_x = np.exp(actions - actions.max(axis=-1, keepdims=True)) | ||
normed = e_x / e_x.sum(axis=-1, keepdims=True) | ||
# add noise | ||
actions = normed + (np.random.random_sample(actions.shape) * self._noise) | ||
|
||
actions[np.where(~(1 == np.array(actionsMask)))] = -math.inf | ||
return actions.argmax(axis=-1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import numpy as np | ||
import math | ||
|
||
class RandomAgent: | ||
def __init__(self): | ||
return | ||
|
||
def reset(self): | ||
pass | ||
|
||
def process(self, state, actionsMask = [1, 1, 1, 1]): | ||
return self.processBatch([state], [actionsMask]) | ||
|
||
def processBatch(self, states, actionsMask): | ||
actions = np.random.random_sample((np.array(states).shape[0], 4)) | ||
actions[np.where(~(1 == np.array(actionsMask)))] = -math.inf | ||
return actions.argmax(axis=-1) |
File renamed without changes.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
from Core.CMazeEnvironment import CMazeEnvironment, MAZE_ACTIONS | ||
import numpy as np | ||
import math | ||
|
||
class MazeRLWrapper: | ||
def __init__(self, params): | ||
maze = ( | ||
params.get('obstacles rate', 0.8) < np.random.rand(params['size'], params['size']) | ||
).astype(np.float32) | ||
|
||
env = CMazeEnvironment( | ||
maze=maze, | ||
pos=(0, 0), | ||
FOV=params['FOV'], | ||
minimapSize=params['minimapSize'] | ||
) | ||
env.respawn() | ||
self._env = env | ||
|
||
self._stepsLimit = params['loop limit'] | ||
self._minUniqSteps = params.get('min unique positions rate', 0.3) | ||
self._stopIfLoop = params.get('stop if loop', True) | ||
self._onlyNewCells = params.get('only new cells reward', False) | ||
return | ||
|
||
def reset(self): | ||
self._stopInLoop = False | ||
self._done = False | ||
self._env.respawn() | ||
self._moves = [] | ||
return | ||
|
||
def apply(self, actionIndex): | ||
act = MAZE_ACTIONS[actionIndex] | ||
prevState = self.state | ||
prevScore = self.score | ||
isNewCell = not self._env.isMovingToVisited(act) | ||
self._env.apply(act) | ||
nextState = self.state | ||
|
||
self._done = True | ||
if self._env.dead: # unreachable due to actions masking | ||
return nextState, -10, True, prevState | ||
|
||
if 0.95 <= self._env.score: | ||
return nextState, 0, True, prevState | ||
|
||
if self._movingLoop(): | ||
return nextState, -5, True, prevState | ||
|
||
self._done = False | ||
reward = 0.3 if isNewCell else 0 # small reward for visiting new cell | ||
|
||
if not self._onlyNewCells: | ||
discovered = (self._env.score - prevScore) / self._env.minScoreDelta | ||
reward += 1 + math.log(discovered, 10) if 0 < discovered else -1 | ||
return nextState, reward, False, prevState | ||
|
||
def actionsMask(self): | ||
return self._env.actionsMask() | ||
|
||
@property | ||
def state(self): | ||
return self._env.state2input() | ||
|
||
@property | ||
def done(self): | ||
return self._done | ||
|
||
@property | ||
def hitTheLoop(self): | ||
return self._stopInLoop | ||
|
||
@property | ||
def score(self): | ||
return self._env.score | ||
|
||
@property | ||
def input_size(self): | ||
return self._env.input_size | ||
|
||
@property | ||
def uniqueMoves(self): | ||
if self._stepsLimit <= len(self._moves): | ||
return len(set(self._moves)) / len(self._moves) | ||
return 1 | ||
|
||
def _movingLoop(self): | ||
self._moves.append(str(self._env.pos)) | ||
self._moves = self._moves[1:] if self._stepsLimit < len(self._moves) else self._moves | ||
self._stopInLoop = self._stopIfLoop and (self.uniqueMoves < self._minUniqSteps) | ||
return self._stopInLoop | ||
|
||
def Continue(self): | ||
self._done = False | ||
self._moves = [] | ||
return |
Oops, something went wrong.