Skip to content

Commit

Permalink
remove doom memory
Browse files Browse the repository at this point in the history
  • Loading branch information
GreenWizard2015 committed Jan 8, 2021
1 parent 9977f7f commit e11e369
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 83 deletions.
4 changes: 2 additions & 2 deletions Core/MazeRLWrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ def apply(self, actionIndex):
if self._env.dead: # unreachable due to actions masking
return nextState, -10, True, prevState

if 0.95 <= self._env.score:
if 0.99 <= self._env.score:
return nextState, 0, True, prevState

if self._movingLoop():
return nextState, -5, True, prevState
return nextState, 0, True, prevState

self._done = False
reward = 0.3 if isNewCell else 0 # small reward for visiting new cell
Expand Down
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@

# Идеи и эксперименты

- [ ] Заменить разделение памяти/эпизодов на основные и после попадания в цикл.
- [ ] Реализовать дистилляцию нескольких политик, используя доп. награды или иные методы.
- [ ] Сравнить обученного без учителя агента с обученным с учителем. (500 эпох)
- [ ] Обучить агента, который не получает информацию о своих перемещениях (только с данным об окружении).
Expand Down
55 changes: 17 additions & 38 deletions distillation.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,6 @@ def learn_environment(teacher, model, params):
]

memory = CebPrioritized(maxSize=5000, sampleWeight='abs')
doomMemory = CebLinear(
maxSize=params.get('max steps after loop', 16) * 10000,
sampleWeight='abs'
)
trainableModel, teacherPower = wrapStudentModel(model)
######################################################
def withTeacherPredictions(replay):
Expand All @@ -119,24 +115,20 @@ def withTeacherPredictions(replay):

def testModel(EXPLORE_RATE):
for e in environments: e.reset()
replays = Utils.emulateBatch(
environments,
DQNAgent(model, exploreRate=EXPLORE_RATE, noise=params.get('agent noise', 0)),
maxSteps=params.get('max test steps')
)
for replay, _ in replays:
if params.get('clip replay', False):
replay = Utils.clipReplay(replay, loopLimit=LOOP_LIMIT)
if BOOTSTRAPPED_STEPS < len(replay):
memory.addEpisode(withTeacherPredictions(replay), terminated=True)
replays = [replay for replay, _ in Utils.emulateBatch(
environments,
DQNAgent(model, exploreRate=EXPLORE_RATE, noise=params.get('agent noise', 0)),
maxSteps=params.get('max test steps')
)
]

scores = [x.score for x in environments]
################
# collect bad experience
envs = [e for e in environments if e.hitTheLoop]
if envs:
# explore if hit the loop
envsIndexes = [i for i, e in enumerate(environments) if e.hitTheLoop]
if envsIndexes:
envs = [environments[i] for i in envsIndexes]
for e in envs: e.Continue()
replays = Utils.emulateBatch(
exploreReplays = Utils.emulateBatch(
envs,
DQNAgent(
model,
Expand All @@ -145,11 +137,13 @@ def testModel(EXPLORE_RATE):
),
maxSteps=params.get('max steps after loop', 16)
)
for replay, _ in replays:
if BOOTSTRAPPED_STEPS < len(replay):
doomMemory.addEpisode(withTeacherPredictions(replay), terminated=True)
for ind, (replay, _) in zip(envsIndexes, exploreReplays):
replays[ind] += replay[1:]
################
return scores
for replay in replays:
if BOOTSTRAPPED_STEPS < len(replay):
memory.addEpisode(withTeacherPredictions(replay), terminated=True)
return [x.score for x in environments]
######################################################
# collect some experience
for _ in range(2):
Expand Down Expand Up @@ -181,19 +175,6 @@ def testModel(EXPLORE_RATE):
}
)
print('Avg. train loss: %.4f' % trainLoss)

if BATCH_SIZE < len(doomMemory):
trainLoss = train(
model, trainableModel, doomMemory,
{
'gamma': GAMMA,
'batchSize': BATCH_SIZE,
'steps': BOOTSTRAPPED_STEPS,
'episodes': params['train doom episodes'](epoch),
'alpha': params.get('doom alpha', lambda _: alpha)(epoch)
}
)
print('Avg. train doom loss: %.4f' % trainLoss)
##################
# test
print('Testing...')
Expand Down Expand Up @@ -249,13 +230,11 @@ def testModel(EXPLORE_RATE):
'warm up epochs': 0,
'test episodes': 128,
'train episodes': lambda _: 128,
'train doom episodes': lambda _: 32,

'alpha': lambda _: 1,
'explore rate': lambda _: 0,

'agent noise': 0.01,
'clip replay': True,

'explore rate after loop': 0.2,
'agent noise after loop': 0.1,
Expand Down
56 changes: 17 additions & 39 deletions learn_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,47 +5,36 @@
import Utils
import fit_stage
import os
from Utils.ExperienceBuffers.CebLinear import CebLinear

def learn_environment(model, params):
NAME = params['name']
BATCH_SIZE = params['batch size']
GAMMA = params['gamma']
BOOTSTRAPPED_STEPS = params['bootstrapped steps']
LOOP_LIMIT = params['maze']['loop limit']
metrics = {}

environments = [
MazeRLWrapper(params['maze']) for _ in range(params['test episodes'])
]

memory = CebPrioritized(maxSize=5000, sampleWeight='abs')
doomMemory = CebLinear(
maxSize=params.get('max steps after loop', 16) * 1000,
sampleWeight='abs'
)

######################################################
def testModel(EXPLORE_RATE):
for e in environments: e.reset()
replays = Utils.emulateBatch(
environments,
DQNAgent(model, exploreRate=EXPLORE_RATE, noise=params.get('agent noise', 0)),
maxSteps=params.get('max test steps')
)
for replay, _ in replays:
if params.get('clip replay', False):
replay = Utils.clipReplay(replay, loopLimit=LOOP_LIMIT)
if BOOTSTRAPPED_STEPS < len(replay):
memory.addEpisode(replay, terminated=True)
replays = [replay for replay, _ in Utils.emulateBatch(
environments,
DQNAgent(model, exploreRate=EXPLORE_RATE, noise=params.get('agent noise', 0)),
maxSteps=params.get('max test steps')
)
]

scores = [x.score for x in environments]
################
# collect bad experience
envs = [e for e in environments if e.hitTheLoop]
if envs:
# explore if hit the loop
envsIndexes = [i for i, e in enumerate(environments) if e.hitTheLoop]
if envsIndexes:
envs = [environments[i] for i in envsIndexes]
for e in envs: e.Continue()
replays = Utils.emulateBatch(
exploreReplays = Utils.emulateBatch(
envs,
DQNAgent(
model,
Expand All @@ -54,11 +43,13 @@ def testModel(EXPLORE_RATE):
),
maxSteps=params.get('max steps after loop', 16)
)
for replay, _ in replays:
if BOOTSTRAPPED_STEPS < len(replay):
doomMemory.addEpisode(replay, terminated=True)
for ind, (replay, _) in zip(envsIndexes, exploreReplays):
replays[ind] += replay[1:]
################
return scores
for replay in replays:
if BOOTSTRAPPED_STEPS < len(replay):
memory.addEpisode(replay, terminated=True)
return [x.score for x in environments]
######################################################
# collect some experience
for _ in range(2):
Expand Down Expand Up @@ -86,19 +77,6 @@ def testModel(EXPLORE_RATE):
}
)
print('Avg. train loss: %.4f' % trainLoss)

if BATCH_SIZE < len(doomMemory):
trainLoss = fit_stage.train(
model, doomMemory,
{
'gamma': GAMMA,
'batchSize': BATCH_SIZE,
'steps': BOOTSTRAPPED_STEPS,
'episodes': params['train doom episodes'](epoch),
'alpha': params.get('doom alpha', lambda _: alpha)(epoch)
}
)
print('Avg. train doom loss: %.4f' % trainLoss)
##################
# test
print('Testing...')
Expand Down
2 changes: 0 additions & 2 deletions train.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,11 @@ def getModel(shape):
'warm up epochs': 0,
'test episodes': 128,
'train episodes': lambda _: 128,
'train doom episodes': lambda _: 32,

'alpha': lambda _: 1,
'explore rate': lambda _: 0,

'agent noise': 0.01,
'clip replay': True,

'explore rate after loop': 0.2,
'agent noise after loop': 0.1
Expand Down
2 changes: 1 addition & 1 deletion view_maze.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from model import createModel

def createMaze():
sz = 16 * 4
sz = 64
maze = (0.8 < np.random.rand(sz, sz)).astype(np.float32)
res = CMazeEnvironment(
maze=maze,
Expand Down

0 comments on commit e11e369

Please sign in to comment.