cleaned up codebase

mlfisch3 · Jan 26, 2018 · d5aa6e7 · d5aa6e7
1 parent 1b22955
commit d5aa6e7
Show file tree

Hide file tree

Showing 44 changed files with 772 additions and 19,699 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -105,3 +105,5 @@ analysis/
 archive/
 ipynb/
 run_archive/
+code_archive/
+games/archive/
diff --git a/.ipynb_checkpoints/run-checkpoint.ipynb b/.ipynb_checkpoints/run-checkpoint.ipynb
diff --git a/MCTS.py b/MCTS.py
@@ -11,20 +11,16 @@ def __init__(self, state):
 		self.state = state
 		self.playerTurn = state.playerTurn
 		self.id = state.id
-
 		self.edges = []
 
-
 	def isLeaf(self):
 		if len(self.edges) > 0:
 			return False
 		else:
 			return True
 
-
 class Edge():
 
-
 	def __init__(self, inNode, outNode, prior, action):
 		self.id = inNode.state.id + '|' + outNode.state.id
 		self.inNode = inNode
@@ -37,12 +33,11 @@ def __init__(self, inNode, outNode, prior, action):
 					'W': 0,
 					'Q': 0,
 					'P': prior,
-					#'nu': np.random.dirichlet([config.ALPHA] * 10)
 				}
 
-
 
 class MCTS():
+
 	def __init__(self, root, cpuct):
 		self.root = root
 		self.tree = {}
@@ -52,7 +47,7 @@ def __init__(self, root, cpuct):
 	def __len__(self):
 		return len(self.tree)
 
-	def _moveToLeaf(self):
+	def moveToLeaf(self):
 
 		lg.logger_mcts.info('------MOVING TO LEAF------')
 
@@ -80,23 +75,13 @@ def _moveToLeaf(self):
 				Nb = Nb + edge.stats['N']
 
 			for idx, (action, edge) in enumerate(currentNode.edges):
-				#nextState, _, _ = currentNode.state.takeAction(action)
-				#nextNode = self.tree[nextState.id]
-
 
-				# U = self.cpuct * \
-				# 	((1-epsilon) * nextNode.stats['P'] + epsilon * nu[idx] )  * \
-				# 	np.sqrt(Nb) / (1 + nextNode.stats['N'])
 				U = self.cpuct * \
 					((1-epsilon) * edge.stats['P'] + epsilon * nu[idx] )  * \
 					np.sqrt(Nb) / (1 + edge.stats['N'])
 
-				#Q = nextNode.stats['Q']
 				Q = edge.stats['Q']
 
-				# lg.logger_mcts.info('action: %d... N = %d, P = %f, nu = %f, adjP = %f, W = %f, Q = %f, U = %f, Q+U = %f'
-				# 	, action, nextNode.stats['N'], round(nextNode.stats['P'],6), round(nu[idx],6), ((1-epsilon) * nextNode.stats['P'] + epsilon * nu[idx] )
-				# 	, round(nextNode.stats['W'],6), round(Q,6), round(U,6), round(Q+U,6))
 				lg.logger_mcts.info('action: %d (%d)... N = %d, P = %f, nu = %f, adjP = %f, W = %f, Q = %f, U = %f, Q+U = %f'
 					, action, action % 7, edge.stats['N'], round(edge.stats['P'],6), round(nu[idx],6), ((1-epsilon) * edge.stats['P'] + epsilon * nu[idx] )
 					, round(edge.stats['W'],6), round(Q,6), round(U,6), round(Q+U,6))
@@ -110,19 +95,15 @@ def _moveToLeaf(self):
 
 			newState, value, done = currentNode.state.takeAction(simulationAction) #the value of the newState from the POV of the new playerTurn
 			currentNode = simulationEdge.outNode
-			#currentNode = self.tree[newState.id]
-			#breadcrumbs.append({"nodeId": currentNode.id, "playerTurn": currentNode.state.playerTurn})
 			breadcrumbs.append(simulationEdge)
-			#lg.logger_mcts.info('moving to...%s', currentNode.id)
-
-
 
 		lg.logger_mcts.info('DONE...%d', done)
-		#lg.logger_mcts.info('BREADCRUMBS...%s', breadcrumbs)
 
 		return currentNode, value, done, breadcrumbs
 
-	def _backFill(self, leaf, value, breadcrumbs):
+
+
+	def backFill(self, leaf, value, breadcrumbs):
 		lg.logger_mcts.info('------DOING BACKFILL------')
 
 		currentPlayer = leaf.state.playerTurn
@@ -138,8 +119,6 @@ def _backFill(self, leaf, value, breadcrumbs):
 			edge.stats['W'] = edge.stats['W'] + value * direction
 			edge.stats['Q'] = edge.stats['W'] / edge.stats['N']
 
-
-
 			lg.logger_mcts.info('updating edge with value %f for player %d... N = %d, W = %f, Q = %f'
 				, value * direction
 				, playerTurn

diff --git a/MCTS.pyc b/MCTS.pyc
diff --git a/agent.py b/agent.py
@@ -1,13 +1,12 @@
 # %matplotlib inline
 
 import numpy as np
-import logging
 import random
 
 import MCTS as mc
 from game import GameState
-from loss import cemse_loss, _cemse, softmax_cross_entropy_with_logits
-from utils import setup_logger
+from loss import softmax_cross_entropy_with_logits
+
 import config
 import loggers as lg
 import time
@@ -17,26 +16,18 @@
 import pylab as pl
 
 
-
-# moveToLeaf_time = []
-# evaluateLeaf_time = []
-# backFill_time = []
-# total_time = []
-
-
 class User():
 	def __init__(self, name, state_size, action_size):
 		self.name = name
 		self.state_size = state_size
 		self.action_size = action_size
 
 	def act(self, state, tau):
-
 		action = input('Enter your chosen action: ')
 		pi = np.zeros(self.action_size)
 		pi[action] = 1
-		value = -1
-		NN_value = -1
+		value = None
+		NN_value = None
 		return (action, pi, value, NN_value)
 
 
@@ -63,42 +54,37 @@ def __init__(self, name, state_size, action_size, mcts_simulations, cpuct, model
 		self.val_policy_loss = []
 
 
-	def _buildMCTS(self, state):
-		lg.logger_mcts.info('****** BUILDING NEW MCTS TREE FOR AGENT %s ******', self.name)
-		self.root = mc.Node(state)
-		self.mcts = mc.MCTS(self.root, self.cpuct)
+	def simulate(self):
 
-	def _changeRootMCTS(self, state):
-		lg.logger_mcts.info('****** CHANGING ROOT OF MCTS TREE TO %s FOR AGENT %s ******', state.id, self.name)
-		self.mcts.root = self.mcts.tree[state.id]
+		lg.logger_mcts.info('ROOT NODE...%s', self.mcts.root.state.id)
+		self.mcts.root.state.render(lg.logger_mcts)
+		lg.logger_mcts.info('CURRENT PLAYER...%d', self.mcts.root.state.playerTurn)
+
+		##### MOVE THE LEAF NODE
+		leaf, value, done, breadcrumbs = self.mcts.moveToLeaf()
+		leaf.state.render(lg.logger_mcts)
+
+		##### EVALUATE THE LEAF NODE
+		value, breadcrumbs = self.evaluateLeaf(leaf, value, done, breadcrumbs)
+
+		##### BACKFILL THE VALUE THROUGH THE TREE
+		self.mcts.backFill(leaf, value, breadcrumbs)
 
 
 	def act(self, state, tau):
 
 		if self.mcts == None or state.id not in self.mcts.tree:
-			#print('building root from scratch')
-			#lg.logger_mcts.info('building root from scratch')
-
-			self._buildMCTS(state)
+			self.buildMCTS(state)
 		else:
-			self._changeRootMCTS(state)
+			self.changeRootMCTS(state)
 
 		#### run the simulation
 		for sim in range(self.MCTSsimulations):
 			lg.logger_mcts.info('***************************')
 			lg.logger_mcts.info('****** SIMULATION %d ******', sim + 1)
 			lg.logger_mcts.info('***************************')
-			#moveToLeaf_time, evaluateLeaf_time, backFill_time, total_time = self.simulate()
 			self.simulate()
 
-		# print('move to leaf time: ', np.mean(moveToLeaf_time))
-		# print('evaluate leaf time: ', np.mean(evaluateLeaf_time))
-		# print('backfill time: ', np.mean(backFill_time))
-		# print('total time: ', np.mean(total_time))
-
-		# print('move to leaf length: ', len(moveToLeaf_time))
-		# print('----------')
-
 		#### get action values
 		pi, values = self.getAV(1)
 
@@ -114,74 +100,34 @@ def act(self, state, tau):
 		lg.logger_mcts.info('MCTS PERCEIVED VALUE...%f', value)
 		lg.logger_mcts.info('NN PERCEIVED VALUE...%f', NN_value)
 
-		# if self.mcts != None:
-		# 	print(len(self.mcts.tree))
-		# else:
-		# 	print(0)
-
-		#print(action)
-
 		return (action, pi, value, NN_value)
 
-	def simulate(self):
-
-		lg.logger_mcts.info('ROOT NODE...%s', self.mcts.root.state.id)
-		self.mcts.root.state.render(lg.logger_mcts)
-		lg.logger_mcts.info('CURRENT PLAYER...%d', self.mcts.root.state.playerTurn)
-
-
-		t0 = time.time()
-		leaf, value, done, breadcrumbs = self.mcts._moveToLeaf() #the value of the position from the POV of the player in the leaf node
-		leaf.state.render(lg.logger_mcts)
-
-		t1 = time.time()
-		value, breadcrumbs = self._evaluateLeaf(leaf, value, done, breadcrumbs) #the value of the position from the POV of the player in the leaf node
-		t2 = time.time()
-		self.mcts._backFill(leaf, value, breadcrumbs)
-		t3 = time.time()
-
-		# moveToLeaf_time.append(t1-t0)
-		# evaluateLeaf_time.append(t2 - t1)
-		# backFill_time.append(t3 - t2)
-		# total_time.append(t3 - t0)
-
-		# return ((moveToLeaf_time, evaluateLeaf_time, backFill_time, total_time))
-
-
-
-
 
 	def get_preds(self, state):
 		#predict the leaf
-		inputToModel = np.array([self.model.convertToModelInput(state)]) #or currentPLayer?
+		inputToModel = np.array([self.model.convertToModelInput(state)])
 
 		preds = self.model.predict(inputToModel)
 		value_array = preds[0]
 		logits_array = preds[1]
-		#value = np.tanh(preds[0])
 		value = value_array[0]
 
 		logits = logits_array[0]
-		#print(value)
-		#print(logits)
 
-		allowedActions = state.allowedActions()
+		allowedActions = state.allowedActions
 
-		mask = np.ones(logits.shape,dtype=bool) #np.ones_like(a,dtype=bool)
+		mask = np.ones(logits.shape,dtype=bool)
 		mask[allowedActions] = False
 		logits[mask] = -100
 
 		#SOFTMAX
 		odds = np.exp(logits)
 		probs = odds / np.sum(odds) ###put this just before the for?
-
-		# #SIGMOID
-		# probs = 1/(1+np.exp(-logits))
 
 		return ((value, probs, allowedActions))
 
 
-	def _evaluateLeaf(self, leaf, value, done, breadcrumbs):
+	def evaluateLeaf(self, leaf, value, done, breadcrumbs):
 
 		lg.logger_mcts.info('------EVALUATING LEAF------')
 
@@ -205,10 +151,7 @@ def _evaluateLeaf(self, leaf, value, done, breadcrumbs):
 				newEdge = mc.Edge(leaf, node, probs[idx], action)
 				leaf.edges.append((action, newEdge))
 
-				#breadcrumbs.append(newEdge)
 		else:
-			#if leaf.currentPlayer == -leaf.state.playerTurn:
-			#	value = -value
 			lg.logger_mcts.info('GAME VALUE FOR %d: %f', leaf.playerTurn, value)
 
 		return ((value, breadcrumbs))
@@ -229,11 +172,9 @@ def getAV(self, tau):
 
 	def chooseAction(self, pi, values, tau):
 		if tau == 0:
-			#action = np.argmax(pi)
 			actions = np.argwhere(pi == max(pi))
 			action = random.choice(actions)[0]
 		else:
-			#print(sum(pi[:-1]))
 			action_idx = np.random.multinomial(1, pi)
 			action = np.where(action_idx==1)[0][0]
 
@@ -248,63 +189,40 @@ def replay(self, ltmemory):
 		for i in xrange(config.TRAINING_LOOPS):
 			minibatch = random.sample(ltmemory, min(config.BATCH_SIZE, len(ltmemory)))
 
-			#minibatch[0]['state'].render(lg.logger_memory)
-			#minibatch = ltmemory
-
 			training_states = np.array([self.model.convertToModelInput(row['state']) for row in minibatch])
 			training_targets = {'value_head': np.array([row['value'] for row in minibatch])
 								, 'policy_head': np.array([row['AV'] for row in minibatch])} 
 
 			fit = self.model.fit(training_states, training_targets, epochs=config.EPOCHS, verbose=1, validation_split=0, batch_size = 32)
 			lg.logger_mcts.info('NEW LOSS %s', fit.history)
-			#print(fit.history)
-
 
 			self.train_overall_loss.append(round(fit.history['loss'][config.EPOCHS - 1],4))
 			self.train_value_loss.append(round(fit.history['value_head_loss'][config.EPOCHS - 1],4)) 
 			self.train_policy_loss.append(round(fit.history['policy_head_loss'][config.EPOCHS - 1],4)) 
 
-			# self.val_overall_loss.append(round(fit.history['val_loss'][config.EPOCHS - 1],4))
-			# self.val_value_loss.append(round(fit.history['val_value_head_loss'][config.EPOCHS - 1],4))
-			# self.val_policy_loss.append(round(fit.history['val_policy_head_loss'][config.EPOCHS - 1],4))
-
-
-
 		plt.plot(self.train_overall_loss, 'k')
-		plt.plot(self.train_value_loss, 'k-')
+		plt.plot(self.train_value_loss, 'k:')
 		plt.plot(self.train_policy_loss, 'k--')
-		# plt.plot(self.val_overall_loss, 'r')
-		# plt.plot(self.val_value_loss, 'r-')
-		# plt.plot(self.val_policy_loss, 'r--')
 
-		plt.legend(['train_overall_loss', 'train_value_loss', 'train_policy_loss'
-			#, 'val_overall_loss', 'val_value_loss', 'val_policy_loss'
-			], loc='lower left')
+		plt.legend(['train_overall_loss', 'train_value_loss', 'train_policy_loss'], loc='lower left')
 
 		display.clear_output(wait=True)
 		display.display(pl.gcf())
 		pl.gcf().clear()
 		time.sleep(1.0)
 
-		#print('TRAIN OVERALL LOSS: ' + str(self.train_overall_loss))
-		#print('TRAIN VALUE LOSS: ' + str(self.train_value_loss))
-		#print('TRAIN POLICY LOSS: ' + str(self.train_policy_loss))
-
-		# print('VAL OVERALL LOSS: ' + str(self.val_overall_loss))
-		# print('VAL VALUE LOSS: ' + str(self.val_value_loss))
-		# print('VAL POLICY LOSS: ' + str(self.val_policy_loss))
 		print('\n')
-
-
-
 		self.model.printWeightAverages()
-		#self.model.viewLayers()
-
-
-
-
 
 	def predict(self, inputToModel):
 		preds = self.model.predict(inputToModel)
 		return preds
 
+	def buildMCTS(self, state):
+		lg.logger_mcts.info('****** BUILDING NEW MCTS TREE FOR AGENT %s ******', self.name)
+		self.root = mc.Node(state)
+		self.mcts = mc.MCTS(self.root, self.cpuct)
+
+	def changeRootMCTS(self, state):
+		lg.logger_mcts.info('****** CHANGING ROOT OF MCTS TREE TO %s FOR AGENT %s ******', state.id, self.name)
+		self.mcts.root = self.mcts.tree[state.id]
diff --git a/agent.pyc b/agent.pyc