Fix to main scripts

lorerave85 · Feb 18, 2017 · 5bd4aed · 5bd4aed
1 parent 80b9d67
commit 5bd4aed
Show file tree

Hide file tree

Showing 8 changed files with 43 additions and 11 deletions.
diff --git a/common/benchmark.py b/common/benchmark.py
@@ -40,7 +40,7 @@ def monte_carlo_move_func(board_state, side):
             return monte_carlo_tree_search_uct(game_spec, board_state, side, 100000)[1]
 
         results = []
-        for _ in range(games_vs_random / 2):
+        for _ in range(int(games_vs_random / 2)):
             result = game_spec.play_game(make_move,
                                          game_spec.get_random_player_func(),
                                          log=log_games)

diff --git a/policy_gradient.py b/policy_gradient.py
@@ -19,21 +19,19 @@
 
 from common.network_helpers import create_network
 from games.tic_tac_toe import TicTacToeGameSpec
-from games.tic_tac_toe_x import TicTacToeXGameSpec
 from techniques.train_policy_gradient import train_policy_gradients
 
-HIDDEN_NODES = (100, 100, 100)
 BATCH_SIZE = 100  # every how many games to do a parameter update?
-LEARN_RATE = 1e-6
+LEARN_RATE = 1e-4
 PRINT_RESULTS_EVERY_X = 1000  # every how many games to print the results
 NETWORK_FILE_PATH = None#'current_network.p'  # path to save the network to
 NUMBER_OF_GAMES_TO_RUN = 1000000
 
 # to play a different game change this to another spec, e.g TicTacToeXGameSpec or ConnectXGameSpec, to get these to run
 # well may require tuning the hyper parameters a bit
-game_spec = TicTacToeXGameSpec(4, 3)
+game_spec = TicTacToeGameSpec()
 
-create_network_func = functools.partial(create_network, game_spec.board_squares(), (300, 200, 100, 100))
+create_network_func = functools.partial(create_network, game_spec.board_squares(), (100, 100, 100))
 
 train_policy_gradients(game_spec, create_network_func, NETWORK_FILE_PATH,
                        number_of_games=NUMBER_OF_GAMES_TO_RUN,

diff --git a/policy_gradient_historical_competition.py b/policy_gradient_historical_competition.py
@@ -18,7 +18,7 @@
 from games.tic_tac_toe import TicTacToeGameSpec
 from techniques.train_policy_gradient_historic import train_policy_gradients_vs_historic
 
-HIDDEN_NODES = (100, 80, 60, 40)
+HIDDEN_NODES = (100, 100, 100)
 SAVE_HISTORICAL_NETWORK_EVERY = 10000
 game_spec = TicTacToeGameSpec()
 

diff --git a/tests/games/test_connect_4.py b/tests/games/test_connect_4.py
@@ -26,5 +26,4 @@ def test_has_winner(self):
         self.assertEqual(1, has_winner(board_state), 4)
 
     def test_play_game(self):
-        play_game(random_player, random_player)
-
+        play_game(random_player, random_player)
diff --git a/tests/techniques/test_create_positions_set.py b/tests/techniques/test_create_positions_set.py
@@ -0,0 +1,15 @@
+from unittest import TestCase
+
+from games.tic_tac_toe import TicTacToeGameSpec
+from techniques.create_positions_set import create_positions_set
+
+
+class TestCreatePositionsSet(TestCase):
+    def setUp(self):
+        self._game_spec = TicTacToeGameSpec()
+
+    def test_create_positions(self):
+        number_of_positions = 100
+        positions = create_positions_set(self._game_spec, number_of_positions, self._game_spec.get_random_player_func())
+
+        self.assertGreater(len(positions), number_of_positions-1)
diff --git a/tests/techniques/test_train_policy_gradient_historic.py b/tests/techniques/test_train_policy_gradient_historic.py
@@ -0,0 +1,15 @@
+from unittest import TestCase
+
+from games.tic_tac_toe import TicTacToeGameSpec
+from techniques.create_positions_set import create_positions_set
+
+
+class TestCreatePositionsSet(TestCase):
+    def setUp(self):
+        self._game_spec = TicTacToeGameSpec()
+
+    def test_create_positions(self):
+        number_of_positions = 100
+        positions = create_positions_set(self._game_spec, number_of_positions, self._game_spec.get_random_player_func())
+
+        self.assertGreater(len(positions), number_of_positions-1)
diff --git a/tic_tac_toe_5_4/network.py b/tic_tac_toe_5_4/network.py
@@ -1,5 +1,6 @@
 import tensorflow as tf
 
+from common.benchmark import benchmark
 from games.tic_tac_toe_x import TicTacToeXGameSpec
 
 tic_tac_toe_5_4_game_spec = TicTacToeXGameSpec(5, 4)
@@ -75,3 +76,7 @@ def create_convolutional_network():
                                        # convolution_weights_5, convolution_bias_5,
                                        # feed_forward_weights_1, feed_forward_bias_1,
                                        feed_forward_weights_2, feed_forward_bias_2]
+
+file_path = 'convolutional_net_5_4_l_c_4_f_1_other_fresh.p'
+
+benchmark(tic_tac_toe_5_4_game_spec, file_path, create_convolutional_network)
diff --git a/value_network.py b/value_network.py
@@ -15,8 +15,8 @@
     get_deterministic_network_move
 from games.tic_tac_toe import TicTacToeGameSpec
 
-HIDDEN_NODES_VALUE = (120, 100, 80, 60, 40)
-HIDDEN_NODES_REINFORCEMENT = (100, 80, 60, 40)
+HIDDEN_NODES_VALUE = (100, 100, 100)
+HIDDEN_NODES_REINFORCEMENT = (100, 100, 100)
 BATCH_SIZE = 100  # every how many games to do a parameter update?
 LEARN_RATE = 1e-4
 REINFORCEMENT_NETWORK_PATH = 'current_network.p'