Skip to content

Commit

Permalink
cleaned up codebase
Browse files Browse the repository at this point in the history
  • Loading branch information
David Foster committed Jan 26, 2018
1 parent 1b22955 commit d5aa6e7
Show file tree
Hide file tree
Showing 44 changed files with 772 additions and 19,699 deletions.
Binary file modified .DS_Store
Binary file not shown.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,5 @@ analysis/
archive/
ipynb/
run_archive/
code_archive/
games/archive/
9,286 changes: 159 additions & 9,127 deletions .ipynb_checkpoints/run-checkpoint.ipynb

Large diffs are not rendered by default.

31 changes: 5 additions & 26 deletions MCTS.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,16 @@ def __init__(self, state):
self.state = state
self.playerTurn = state.playerTurn
self.id = state.id

self.edges = []


def isLeaf(self):
if len(self.edges) > 0:
return False
else:
return True


class Edge():


def __init__(self, inNode, outNode, prior, action):
self.id = inNode.state.id + '|' + outNode.state.id
self.inNode = inNode
Expand All @@ -37,12 +33,11 @@ def __init__(self, inNode, outNode, prior, action):
'W': 0,
'Q': 0,
'P': prior,
#'nu': np.random.dirichlet([config.ALPHA] * 10)
}



class MCTS():

def __init__(self, root, cpuct):
self.root = root
self.tree = {}
Expand All @@ -52,7 +47,7 @@ def __init__(self, root, cpuct):
def __len__(self):
return len(self.tree)

def _moveToLeaf(self):
def moveToLeaf(self):

lg.logger_mcts.info('------MOVING TO LEAF------')

Expand Down Expand Up @@ -80,23 +75,13 @@ def _moveToLeaf(self):
Nb = Nb + edge.stats['N']

for idx, (action, edge) in enumerate(currentNode.edges):
#nextState, _, _ = currentNode.state.takeAction(action)
#nextNode = self.tree[nextState.id]


# U = self.cpuct * \
# ((1-epsilon) * nextNode.stats['P'] + epsilon * nu[idx] ) * \
# np.sqrt(Nb) / (1 + nextNode.stats['N'])
U = self.cpuct * \
((1-epsilon) * edge.stats['P'] + epsilon * nu[idx] ) * \
np.sqrt(Nb) / (1 + edge.stats['N'])

#Q = nextNode.stats['Q']
Q = edge.stats['Q']

# lg.logger_mcts.info('action: %d... N = %d, P = %f, nu = %f, adjP = %f, W = %f, Q = %f, U = %f, Q+U = %f'
# , action, nextNode.stats['N'], round(nextNode.stats['P'],6), round(nu[idx],6), ((1-epsilon) * nextNode.stats['P'] + epsilon * nu[idx] )
# , round(nextNode.stats['W'],6), round(Q,6), round(U,6), round(Q+U,6))
lg.logger_mcts.info('action: %d (%d)... N = %d, P = %f, nu = %f, adjP = %f, W = %f, Q = %f, U = %f, Q+U = %f'
, action, action % 7, edge.stats['N'], round(edge.stats['P'],6), round(nu[idx],6), ((1-epsilon) * edge.stats['P'] + epsilon * nu[idx] )
, round(edge.stats['W'],6), round(Q,6), round(U,6), round(Q+U,6))
Expand All @@ -110,19 +95,15 @@ def _moveToLeaf(self):

newState, value, done = currentNode.state.takeAction(simulationAction) #the value of the newState from the POV of the new playerTurn
currentNode = simulationEdge.outNode
#currentNode = self.tree[newState.id]
#breadcrumbs.append({"nodeId": currentNode.id, "playerTurn": currentNode.state.playerTurn})
breadcrumbs.append(simulationEdge)
#lg.logger_mcts.info('moving to...%s', currentNode.id)



lg.logger_mcts.info('DONE...%d', done)
#lg.logger_mcts.info('BREADCRUMBS...%s', breadcrumbs)

return currentNode, value, done, breadcrumbs

def _backFill(self, leaf, value, breadcrumbs):


def backFill(self, leaf, value, breadcrumbs):
lg.logger_mcts.info('------DOING BACKFILL------')

currentPlayer = leaf.state.playerTurn
Expand All @@ -138,8 +119,6 @@ def _backFill(self, leaf, value, breadcrumbs):
edge.stats['W'] = edge.stats['W'] + value * direction
edge.stats['Q'] = edge.stats['W'] / edge.stats['N']



lg.logger_mcts.info('updating edge with value %f for player %d... N = %d, W = %f, Q = %f'
, value * direction
, playerTurn
Expand Down
Binary file added MCTS.pyc
Binary file not shown.
150 changes: 34 additions & 116 deletions agent.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
# %matplotlib inline

import numpy as np
import logging
import random

import MCTS as mc
from game import GameState
from loss import cemse_loss, _cemse, softmax_cross_entropy_with_logits
from utils import setup_logger
from loss import softmax_cross_entropy_with_logits

import config
import loggers as lg
import time
Expand All @@ -17,26 +16,18 @@
import pylab as pl



# moveToLeaf_time = []
# evaluateLeaf_time = []
# backFill_time = []
# total_time = []


class User():
def __init__(self, name, state_size, action_size):
self.name = name
self.state_size = state_size
self.action_size = action_size

def act(self, state, tau):

action = input('Enter your chosen action: ')
pi = np.zeros(self.action_size)
pi[action] = 1
value = -1
NN_value = -1
value = None
NN_value = None
return (action, pi, value, NN_value)


Expand All @@ -63,42 +54,37 @@ def __init__(self, name, state_size, action_size, mcts_simulations, cpuct, model
self.val_policy_loss = []


def _buildMCTS(self, state):
lg.logger_mcts.info('****** BUILDING NEW MCTS TREE FOR AGENT %s ******', self.name)
self.root = mc.Node(state)
self.mcts = mc.MCTS(self.root, self.cpuct)
def simulate(self):

def _changeRootMCTS(self, state):
lg.logger_mcts.info('****** CHANGING ROOT OF MCTS TREE TO %s FOR AGENT %s ******', state.id, self.name)
self.mcts.root = self.mcts.tree[state.id]
lg.logger_mcts.info('ROOT NODE...%s', self.mcts.root.state.id)
self.mcts.root.state.render(lg.logger_mcts)
lg.logger_mcts.info('CURRENT PLAYER...%d', self.mcts.root.state.playerTurn)

##### MOVE THE LEAF NODE
leaf, value, done, breadcrumbs = self.mcts.moveToLeaf()
leaf.state.render(lg.logger_mcts)

##### EVALUATE THE LEAF NODE
value, breadcrumbs = self.evaluateLeaf(leaf, value, done, breadcrumbs)

##### BACKFILL THE VALUE THROUGH THE TREE
self.mcts.backFill(leaf, value, breadcrumbs)


def act(self, state, tau):

if self.mcts == None or state.id not in self.mcts.tree:
#print('building root from scratch')
#lg.logger_mcts.info('building root from scratch')

self._buildMCTS(state)
self.buildMCTS(state)
else:
self._changeRootMCTS(state)
self.changeRootMCTS(state)

#### run the simulation
for sim in range(self.MCTSsimulations):
lg.logger_mcts.info('***************************')
lg.logger_mcts.info('****** SIMULATION %d ******', sim + 1)
lg.logger_mcts.info('***************************')
#moveToLeaf_time, evaluateLeaf_time, backFill_time, total_time = self.simulate()
self.simulate()

# print('move to leaf time: ', np.mean(moveToLeaf_time))
# print('evaluate leaf time: ', np.mean(evaluateLeaf_time))
# print('backfill time: ', np.mean(backFill_time))
# print('total time: ', np.mean(total_time))

# print('move to leaf length: ', len(moveToLeaf_time))
# print('----------')

#### get action values
pi, values = self.getAV(1)

Expand All @@ -114,74 +100,34 @@ def act(self, state, tau):
lg.logger_mcts.info('MCTS PERCEIVED VALUE...%f', value)
lg.logger_mcts.info('NN PERCEIVED VALUE...%f', NN_value)

# if self.mcts != None:
# print(len(self.mcts.tree))
# else:
# print(0)

#print(action)

return (action, pi, value, NN_value)

def simulate(self):

lg.logger_mcts.info('ROOT NODE...%s', self.mcts.root.state.id)
self.mcts.root.state.render(lg.logger_mcts)
lg.logger_mcts.info('CURRENT PLAYER...%d', self.mcts.root.state.playerTurn)


t0 = time.time()
leaf, value, done, breadcrumbs = self.mcts._moveToLeaf() #the value of the position from the POV of the player in the leaf node
leaf.state.render(lg.logger_mcts)

t1 = time.time()
value, breadcrumbs = self._evaluateLeaf(leaf, value, done, breadcrumbs) #the value of the position from the POV of the player in the leaf node
t2 = time.time()
self.mcts._backFill(leaf, value, breadcrumbs)
t3 = time.time()

# moveToLeaf_time.append(t1-t0)
# evaluateLeaf_time.append(t2 - t1)
# backFill_time.append(t3 - t2)
# total_time.append(t3 - t0)

# return ((moveToLeaf_time, evaluateLeaf_time, backFill_time, total_time))





def get_preds(self, state):
#predict the leaf
inputToModel = np.array([self.model.convertToModelInput(state)]) #or currentPLayer?
inputToModel = np.array([self.model.convertToModelInput(state)])

preds = self.model.predict(inputToModel)
value_array = preds[0]
logits_array = preds[1]
#value = np.tanh(preds[0])
value = value_array[0]

logits = logits_array[0]
#print(value)
#print(logits)

allowedActions = state.allowedActions()
allowedActions = state.allowedActions

mask = np.ones(logits.shape,dtype=bool) #np.ones_like(a,dtype=bool)
mask = np.ones(logits.shape,dtype=bool)
mask[allowedActions] = False
logits[mask] = -100

#SOFTMAX
odds = np.exp(logits)
probs = odds / np.sum(odds) ###put this just before the for?

# #SIGMOID
# probs = 1/(1+np.exp(-logits))

return ((value, probs, allowedActions))


def _evaluateLeaf(self, leaf, value, done, breadcrumbs):
def evaluateLeaf(self, leaf, value, done, breadcrumbs):

lg.logger_mcts.info('------EVALUATING LEAF------')

Expand All @@ -205,10 +151,7 @@ def _evaluateLeaf(self, leaf, value, done, breadcrumbs):
newEdge = mc.Edge(leaf, node, probs[idx], action)
leaf.edges.append((action, newEdge))

#breadcrumbs.append(newEdge)
else:
#if leaf.currentPlayer == -leaf.state.playerTurn:
# value = -value
lg.logger_mcts.info('GAME VALUE FOR %d: %f', leaf.playerTurn, value)

return ((value, breadcrumbs))
Expand All @@ -229,11 +172,9 @@ def getAV(self, tau):

def chooseAction(self, pi, values, tau):
if tau == 0:
#action = np.argmax(pi)
actions = np.argwhere(pi == max(pi))
action = random.choice(actions)[0]
else:
#print(sum(pi[:-1]))
action_idx = np.random.multinomial(1, pi)
action = np.where(action_idx==1)[0][0]

Expand All @@ -248,63 +189,40 @@ def replay(self, ltmemory):
for i in xrange(config.TRAINING_LOOPS):
minibatch = random.sample(ltmemory, min(config.BATCH_SIZE, len(ltmemory)))

#minibatch[0]['state'].render(lg.logger_memory)
#minibatch = ltmemory

training_states = np.array([self.model.convertToModelInput(row['state']) for row in minibatch])
training_targets = {'value_head': np.array([row['value'] for row in minibatch])
, 'policy_head': np.array([row['AV'] for row in minibatch])}

fit = self.model.fit(training_states, training_targets, epochs=config.EPOCHS, verbose=1, validation_split=0, batch_size = 32)
lg.logger_mcts.info('NEW LOSS %s', fit.history)
#print(fit.history)


self.train_overall_loss.append(round(fit.history['loss'][config.EPOCHS - 1],4))
self.train_value_loss.append(round(fit.history['value_head_loss'][config.EPOCHS - 1],4))
self.train_policy_loss.append(round(fit.history['policy_head_loss'][config.EPOCHS - 1],4))

# self.val_overall_loss.append(round(fit.history['val_loss'][config.EPOCHS - 1],4))
# self.val_value_loss.append(round(fit.history['val_value_head_loss'][config.EPOCHS - 1],4))
# self.val_policy_loss.append(round(fit.history['val_policy_head_loss'][config.EPOCHS - 1],4))



plt.plot(self.train_overall_loss, 'k')
plt.plot(self.train_value_loss, 'k-')
plt.plot(self.train_value_loss, 'k:')
plt.plot(self.train_policy_loss, 'k--')
# plt.plot(self.val_overall_loss, 'r')
# plt.plot(self.val_value_loss, 'r-')
# plt.plot(self.val_policy_loss, 'r--')

plt.legend(['train_overall_loss', 'train_value_loss', 'train_policy_loss'
#, 'val_overall_loss', 'val_value_loss', 'val_policy_loss'
], loc='lower left')
plt.legend(['train_overall_loss', 'train_value_loss', 'train_policy_loss'], loc='lower left')

display.clear_output(wait=True)
display.display(pl.gcf())
pl.gcf().clear()
time.sleep(1.0)

#print('TRAIN OVERALL LOSS: ' + str(self.train_overall_loss))
#print('TRAIN VALUE LOSS: ' + str(self.train_value_loss))
#print('TRAIN POLICY LOSS: ' + str(self.train_policy_loss))

# print('VAL OVERALL LOSS: ' + str(self.val_overall_loss))
# print('VAL VALUE LOSS: ' + str(self.val_value_loss))
# print('VAL POLICY LOSS: ' + str(self.val_policy_loss))
print('\n')



self.model.printWeightAverages()
#self.model.viewLayers()





def predict(self, inputToModel):
preds = self.model.predict(inputToModel)
return preds

def buildMCTS(self, state):
lg.logger_mcts.info('****** BUILDING NEW MCTS TREE FOR AGENT %s ******', self.name)
self.root = mc.Node(state)
self.mcts = mc.MCTS(self.root, self.cpuct)

def changeRootMCTS(self, state):
lg.logger_mcts.info('****** CHANGING ROOT OF MCTS TREE TO %s FOR AGENT %s ******', state.id, self.name)
self.mcts.root = self.mcts.tree[state.id]
Binary file added agent.pyc
Binary file not shown.
Loading

0 comments on commit d5aa6e7

Please sign in to comment.