Skip to content

Commit

Permalink
Added params
Browse files Browse the repository at this point in the history
  • Loading branch information
maurock committed Mar 15, 2020
1 parent 3f9d065 commit ee82e90
Show file tree
Hide file tree
Showing 5 changed files with 199 additions and 49 deletions.
54 changes: 27 additions & 27 deletions DQN.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,41 @@
import numpy as np
import pandas as pd
from operator import add

import collections

class DQNAgent(object):

def __init__(self):
def __init__(self, params):
self.reward = 0
self.gamma = 0.9
self.dataframe = pd.DataFrame()
self.short_memory = np.array([])
self.agent_target = 1
self.agent_predict = 0
self.learning_rate = 0.0005
self.model = self.network()
self.epsilon = 0
self.learning_rate = params['learning_rate']
self.epsilon = 1
self.actual = []
self.memory = []
self.first_layer = params['first_layer_size']
self.second_layer = params['second_layer_size']
self.third_layer = params['third_layer_size']
self.memory = collections.deque(maxlen=params['memory_size'])
self.weights = params['weights_path']
self.load_weights = params['load_weights']
self.model = self.network()

def get_state(self, game, player, food):
def network(self):
model = Sequential()
model.add(Dense(output_dim=self.first_layer, activation='relu', input_dim=11))
model.add(Dense(output_dim=self.second_layer, activation='relu'))
model.add(Dense(output_dim=self.third_layer, activation='relu'))
model.add(Dense(output_dim=3, activation='softmax'))
opt = Adam(self.learning_rate)
model.compile(loss='mse', optimizer=opt)

if self.load_weights:
model.load_weights(self.weights_path)
return model

def get_state(self, game, player, food):
state = [
(player.x_change == 20 and player.y_change == 0 and ((list(map(add, player.position[-1], [20, 0])) in player.position) or
player.position[-1][0] + 20 >= (game.game_width - 20))) or (player.x_change == -20 and player.y_change == 0 and ((list(map(add, player.position[-1], [-20, 0])) in player.position) or
Expand Down Expand Up @@ -73,28 +89,12 @@ def set_reward(self, player, crash):
self.reward = 10
return self.reward

def network(self, weights=None):
model = Sequential()
model.add(Dense(output_dim=120, activation='relu', input_dim=11))
model.add(Dropout(0.15))
model.add(Dense(output_dim=120, activation='relu'))
model.add(Dropout(0.15))
model.add(Dense(output_dim=120, activation='relu'))
model.add(Dropout(0.15))
model.add(Dense(output_dim=3, activation='softmax'))
opt = Adam(self.learning_rate)
model.compile(loss='mse', optimizer=opt)

if weights:
model.load_weights(weights)
return model

def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))

def replay_new(self, memory):
if len(memory) > 1000:
minibatch = random.sample(memory, 1000)
def replay_new(self, memory, batch_size):
if len(memory) > batch_size:
minibatch = random.sample(memory, batch_size)
else:
minibatch = memory
for state, action, reward, next_state, done in minibatch:
Expand Down
127 changes: 127 additions & 0 deletions bayesOpt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# -*- coding: utf-8 -*-
"""
Created on Sun Mar 15 21:10:29 2020
@author: mauro
"""
from snakeClass import run
from utils import create_log
from GPyOpt.methods import BayesianOptimization
import datetime

##################
# Set parameters #
##################
params = dict()

params['epsilon_decay_linear'] = 1/5000
params['learning_rate'] = 0.00001
params['first_layer'] = 1000
params['second_layer'] = 200
params['third_layer'] = 400
params['episodes'] = 150
params['memory_size'] = 2500
params['batch_size'] = 500
params['load_weights'] = False
params['bayesian_optimization'] = True

# Folders
params['reference_path'] = 'images\\reference\\reference_scene3_128x128_5120spp.png'
params['path_SSIM_total'] = 'logs\\SSIM_total_' + str(datetime.datetime.now().strftime("%Y%m%d%H%M%S")) +'.txt'


class BayesianOptimizer():
def __init__(self, params):
self.params = params

def optimize_raytracer(self):
def optimize(inputs):
print("INPUT", inputs)
inputs = inputs[0]

# Variables to optimize
self.params["learning_rate"] = inputs[0]
lr_string = '{:.8f}'.format(self.params["learning_rate"])[2:]
self.params["dense_layer"] = int(inputs[1])
self.params["state_layer"] = int(inputs[2])
self.params["advantage_layer"] = int(inputs[3])
self.params["epsilon_decay_linear"] = int(inputs[4])

params['name_scenario'] =
params['weights'] = 'weights/'

params['img_title'] = 'DDQN_scene{}_lr{}_struct{}_{}_{}_eps{}'.format(params['scene'],
lr_string,
params['dense_layer'],
params['state_layer'],
params['advantage_layer'],
params['epsilon_decay_linear'])
params['weights_path'] = 'weights_scene{}_'.format(params['scene']) + params['img_title'] + '.h5'
params['training'] = True
print(self.params)
ssim = main(self.params)
self.counter += 1
return ssim

self.counter = 0
optim_params = [
{"name": "learning_rate", "type": "continuous", "domain": (0.000001, 0.00005)},
{"name": "dense_layer", "type": "discrete", "domain": (100,200,300,400,500,600,700,800,900,1000)},
{"name": "state_layer", "type": "discrete", "domain": (100, 200, 300, 400, 500,600,700)},
{"name": "advantage_layer", "type": "discrete", "domain": (100, 200, 300, 400, 500,600,700)},
{"name":'epsilon_decay_linear', "type": "discrete", "domain": (2000,3000,4000,5000,6000,7000,8000,9000,10000)}
]

bayes_optimizer = BayesianOptimization(f=optimize,
domain=optim_params,
initial_design_numdata=6,
acquisition_type="EI",
exact_feval=True,
maximize=True)

bayes_optimizer.run_optimization(max_iter=19)
print('Optimized learning rate: ', bayes_optimizer.x_opt[0])
print('Optimized dense layer: ', bayes_optimizer.x_opt[1])
print('Optimized state layer: ', bayes_optimizer.x_opt[2])
print('Optimized advantage layer: ', bayes_optimizer.x_opt[3])
print('Optimized epsilon linear decay: ', bayes_optimizer.x_opt[4])

with open(params['path_SSIM_total'], 'a') as file:
file.write("Best parameters: \n")
file.write('Optimized learning rate: ' + bayes_optimizer.x_opt[0] + "\n")
file.write('Optimized dense layer: ' + bayes_optimizer.x_opt[1] + "\n")
file.write('Optimized state layer: ' + bayes_optimizer.x_opt[2] + "\n")
file.write('Optimized advantage layer: ' + bayes_optimizer.x_opt[3] + "\n")
file.write('Optimized epsilon linear decay: ' + bayes_optimizer.x_opt[4])
return self.params





##################
# Main #
##################
if __name__ == '__main__':

# Traditional training and testing
if params['bayesOpt'] == False:
# Set automatic parameters
lr_string = '{:.8f}'.format(params["learning_rate"])[2:]
params['img_title'] = 'DDQN_scene{}_lr{}_struct{}_{}_{}_eps{}_NotOpt'.format(params['scene'],lr_string,
params['dense_layer'],params['state_layer'],params['advantage_layer'],params['epsilon_decay_linear'])
params['weight'] = 'weights_scene{}_'.format(params['scene']) + params['img_title'] + '.h5'

# Custom weight
#params['weight'] = 'weights_scene3_DDQN_scene3_lr00001000_struct1000_200_400_eps5000_NotOpt.h5'

main(params)

# Bayesian Optimization
else:
bayesOpt = BayesianOptimizer(params)
bayesOpt.optimize_raytracer()




67 changes: 45 additions & 22 deletions snakeClass.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,27 @@
from random import randint
from keras.utils import to_categorical

WEIGHTS_FILENAME = 'weights.hdf5'
WEIGHTS_FILENAME = 'weights3.hdf5'

#################################
# Define parameters manually #
#################################
def define_parameters():
params = dict()
params['epsilon_decay_linear'] = 1/75
params['learning_rate'] = 0.00001
params['first_layer_size'] = 1000 # neurons in the first layer
params['second_layer_size'] = 200 # neurons in the second layer
params['third_layer_size'] = 400 # neurons in the third layer
params['episodes'] = 150
params['memory_size'] = 2500
params['batch_size'] = 500
params['weights_path'] = 'weights/weights2.hdf5'
params['load_weights'] = False
return params


class Game:

def __init__(self, game_width, game_height):
pygame.display.set_caption('SnakeGen')
self.game_width = game_width
Expand All @@ -26,7 +42,6 @@ def __init__(self, game_width, game_height):


class Player(object):

def __init__(self, game):
x = 0.45 * game.game_width
y = 0.5 * game.game_height
Expand Down Expand Up @@ -93,7 +108,6 @@ def display_player(self, x, y, food, game):


class Food(object):

def __init__(self):
self.x_food = 240
self.y_food = 200
Expand Down Expand Up @@ -153,14 +167,14 @@ def update_screen():
pygame.display.update()


def initialize_game(player, game, food, agent):
def initialize_game(player, game, food, agent, batch_size):
state_init1 = agent.get_state(game, player, food) # [0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0]
action = [1, 0, 0]
player.do_move(action, player.x, player.y, game, food, agent)
state_init2 = agent.get_state(game, player, food)
reward1 = agent.set_reward(player, game.crash)
agent.remember(state_init1, action, reward1, state_init2, game.crash)
agent.replay_new(agent.memory)
agent.replay_new(agent.memory, batch_size)


def plot_seaborn(array_counter, array_score):
Expand All @@ -176,12 +190,17 @@ def plot_seaborn(array_counter, array_score):
plt.show()


def run(episodes, load_weights, display_option, speed):
def test(params):
agent_test = DQNAgent(params)


def run(episodes, load_weights, display_option, speed, params):
pygame.init()
agent = DQNAgent()
weights_filepath = os.path.join(os.getcwd(), WEIGHTS_FILENAME)
if load_weights and os.path.isfile(weights_filepath):
agent.model.load_weights(weights_filepath)
agent = DQNAgent(params)
weights_filepath = os.path.join(os.getcwd(), params['weights_path'])
#if load_weights and os.path.isfile(weights_filepath):
# agent.model.load_weights(weights_filepath)
# print("weights loaded")

counter_games = 0
score_plot = []
Expand All @@ -198,19 +217,19 @@ def run(episodes, load_weights, display_option, speed):
food1 = game.food

# Perform first move
initialize_game(player1, game, food1, agent)
initialize_game(player1, game, food1, agent, params['batch_size'])
if display_option:
display(player1, food1, game, record)

while not game.crash:
# agent.epsilon is set to give randomness to actions
agent.epsilon = 80 - counter_games
agent.epsilon = 1 - (counter_games * params['epsilon_decay_linear'])

# get old state
state_old = agent.get_state(game, player1, food1)

# perform random actions based on agent.epsilon, or choose the action
if randint(0, 200) < agent.epsilon:
if randint(0, 1) < agent.epsilon:
final_move = to_categorical(randint(0, 2), num_classes=3)
else:
# predict action based on the old state
Expand All @@ -221,7 +240,7 @@ def run(episodes, load_weights, display_option, speed):
player1.do_move(final_move, player1.x, player1.y, game, food1, agent)
state_new = agent.get_state(game, player1, food1)

# set treward for the new state
# set reward for the new state
reward = agent.set_reward(player1, game.crash)

# train short memory base on the new action and state
Expand All @@ -233,23 +252,27 @@ def run(episodes, load_weights, display_option, speed):
if display_option:
display(player1, food1, game, record)
pygame.time.wait(speed)

agent.replay_new(agent.memory)
print(f"Memory size: {len(agent.memory)}")
print(f"Epsilon: {agent.epsilon}")
agent.replay_new(agent.memory, params['batch_size'])
counter_games += 1
print(f'Game {counter_games} Score: {game.score}')
score_plot.append(game.score)
counter_plot.append(counter_games)
agent.model.save_weights(WEIGHTS_FILENAME)
agent.model.save_weights(params['weights_path'])
plot_seaborn(counter_plot, score_plot)


if __name__ == '__main__':
# Set options to activate or deactivate the game view, and its speed
pygame.font.init()
parser = argparse.ArgumentParser()
parser.add_argument("--episodes", type=int, default=150)
parser.add_argument("--display", type=bool, default=False)
params = define_parameters()
parser.add_argument("--episodes", type=int, default=params['episodes']) # TODO: delete
parser.add_argument("--display", type=bool, default=False)
parser.add_argument("--speed", type=int, default=0)
parser.add_argument("--load-weights", type=bool, default=False)
parser.add_argument("--load-weights", type=bool, default=params['load_weights']) # TODO: delete
args = parser.parse_args()
run(args.episodes, args.display, args.speed, args.load_weights)
params['bayesian_optimization'] = False # Use bayesOpt.py for Bayesian Optimization
params['load_weights'] = args.load_weights
run(args.episodes, args.display, args.speed, args.load_weights, params)
Binary file removed weights.hdf5
Binary file not shown.
Binary file added weights/weights2.hdf5
Binary file not shown.

0 comments on commit ee82e90

Please sign in to comment.