Skip to content

Commit

Permalink
Fix to main scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
DanielSlater committed Feb 18, 2017
1 parent 80b9d67 commit 5bd4aed
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 11 deletions.
2 changes: 1 addition & 1 deletion common/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def monte_carlo_move_func(board_state, side):
return monte_carlo_tree_search_uct(game_spec, board_state, side, 100000)[1]

results = []
for _ in range(games_vs_random / 2):
for _ in range(int(games_vs_random / 2)):
result = game_spec.play_game(make_move,
game_spec.get_random_player_func(),
log=log_games)
Expand Down
8 changes: 3 additions & 5 deletions policy_gradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,19 @@

from common.network_helpers import create_network
from games.tic_tac_toe import TicTacToeGameSpec
from games.tic_tac_toe_x import TicTacToeXGameSpec
from techniques.train_policy_gradient import train_policy_gradients

HIDDEN_NODES = (100, 100, 100)
BATCH_SIZE = 100 # every how many games to do a parameter update?
LEARN_RATE = 1e-6
LEARN_RATE = 1e-4
PRINT_RESULTS_EVERY_X = 1000 # every how many games to print the results
NETWORK_FILE_PATH = None#'current_network.p' # path to save the network to
NUMBER_OF_GAMES_TO_RUN = 1000000

# to play a different game change this to another spec, e.g TicTacToeXGameSpec or ConnectXGameSpec, to get these to run
# well may require tuning the hyper parameters a bit
game_spec = TicTacToeXGameSpec(4, 3)
game_spec = TicTacToeGameSpec()

create_network_func = functools.partial(create_network, game_spec.board_squares(), (300, 200, 100, 100))
create_network_func = functools.partial(create_network, game_spec.board_squares(), (100, 100, 100))

train_policy_gradients(game_spec, create_network_func, NETWORK_FILE_PATH,
number_of_games=NUMBER_OF_GAMES_TO_RUN,
Expand Down
2 changes: 1 addition & 1 deletion policy_gradient_historical_competition.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from games.tic_tac_toe import TicTacToeGameSpec
from techniques.train_policy_gradient_historic import train_policy_gradients_vs_historic

HIDDEN_NODES = (100, 80, 60, 40)
HIDDEN_NODES = (100, 100, 100)
SAVE_HISTORICAL_NETWORK_EVERY = 10000
game_spec = TicTacToeGameSpec()

Expand Down
3 changes: 1 addition & 2 deletions tests/games/test_connect_4.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,4 @@ def test_has_winner(self):
self.assertEqual(1, has_winner(board_state), 4)

def test_play_game(self):
play_game(random_player, random_player)

play_game(random_player, random_player)
15 changes: 15 additions & 0 deletions tests/techniques/test_create_positions_set.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from unittest import TestCase

from games.tic_tac_toe import TicTacToeGameSpec
from techniques.create_positions_set import create_positions_set


class TestCreatePositionsSet(TestCase):
def setUp(self):
self._game_spec = TicTacToeGameSpec()

def test_create_positions(self):
number_of_positions = 100
positions = create_positions_set(self._game_spec, number_of_positions, self._game_spec.get_random_player_func())

self.assertGreater(len(positions), number_of_positions-1)
15 changes: 15 additions & 0 deletions tests/techniques/test_train_policy_gradient_historic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from unittest import TestCase

from games.tic_tac_toe import TicTacToeGameSpec
from techniques.create_positions_set import create_positions_set


class TestCreatePositionsSet(TestCase):
def setUp(self):
self._game_spec = TicTacToeGameSpec()

def test_create_positions(self):
number_of_positions = 100
positions = create_positions_set(self._game_spec, number_of_positions, self._game_spec.get_random_player_func())

self.assertGreater(len(positions), number_of_positions-1)
5 changes: 5 additions & 0 deletions tic_tac_toe_5_4/network.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import tensorflow as tf

from common.benchmark import benchmark
from games.tic_tac_toe_x import TicTacToeXGameSpec

tic_tac_toe_5_4_game_spec = TicTacToeXGameSpec(5, 4)
Expand Down Expand Up @@ -75,3 +76,7 @@ def create_convolutional_network():
# convolution_weights_5, convolution_bias_5,
# feed_forward_weights_1, feed_forward_bias_1,
feed_forward_weights_2, feed_forward_bias_2]

file_path = 'convolutional_net_5_4_l_c_4_f_1_other_fresh.p'

benchmark(tic_tac_toe_5_4_game_spec, file_path, create_convolutional_network)
4 changes: 2 additions & 2 deletions value_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
get_deterministic_network_move
from games.tic_tac_toe import TicTacToeGameSpec

HIDDEN_NODES_VALUE = (120, 100, 80, 60, 40)
HIDDEN_NODES_REINFORCEMENT = (100, 80, 60, 40)
HIDDEN_NODES_VALUE = (100, 100, 100)
HIDDEN_NODES_REINFORCEMENT = (100, 100, 100)
BATCH_SIZE = 100 # every how many games to do a parameter update?
LEARN_RATE = 1e-4
REINFORCEMENT_NETWORK_PATH = 'current_network.p'
Expand Down

0 comments on commit 5bd4aed

Please sign in to comment.