-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
109 lines (94 loc) · 3.08 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import numpy as np
from mctsnodes import *
from mcts import MCTS
from tictactoeboard import TicTacToeState
import random
mt = 0
rt = 0
tt = 0
at = 0
rside = -1
def init(): # Initialization
state = np.zeros((3, 3)) # Initialize board (3*3)
state[0, 1] = -1
state[1, 1] = 1
state[1, 0] = 1
state[1, 2] = -1
initial_board_state = TicTacToeState(state=state, next_to_move=1)
root = MCTSNode(state=initial_board_state, parent=None)
mcts = MCTS(root)
best_node = mcts.best_action(500)
new_state = best_node.state
new_board = new_state.board
return new_state, new_board
def graphics(board): # Display board
for i in range(3):
print("")
print("{0:3}".format(i).center(8)+"|", end='')
for j in range(3):
if new_board[i][j] == 0:
print('_'.center(8), end='')
if new_board[i][j] == 1:
print('X'.center(8), end='')
if new_board[i][j] == -1:
print('O'.center(8), end='')
print("")
print("______________________________")
def agent_random(board): # Ramdom player, random moves
ri = random.randint(0, 2)
rj = random.randint(0, 2)
while board[ri][rj] != 0:
ri = random.randint(0, 2)
rj = random.randint(0, 2)
return ri, rj
def get_action(state): # Get random player's move and update to the board
try: # Then determine if the move is legal
x, y = agent_random(state.board)
move = TicTacToeMove(x, y, rside)
except Exception as e:
move = -1
if move == -1 or not state.move_legal(move):
print("invalid move")
move = get_action(state)
return move
def judge(state): # Determine win/lose
global mt, rt, tt, at
if state.game_over():
if state.game_result == 1.0:
print("MCTS Win!")
mt += 1
at += 1
if state.game_result == 0.0:
print("Tie!")
tt += 1
at += 1
if state.game_result == -1.0:
print("Random Agent Win!")
rt += 1
at += 1
return 1
else:
return -1
for i in range(0, 100): # Play 100 games
print(at + 1, 'game :')
while True:
new_state, n_board = init()
move1 = get_action(new_state)
new_state = new_state.move(move1)
new_board = new_state.board
board_state = TicTacToeState(state=new_board, next_to_move=1)
root = MCTSNode(state=board_state, parent=None)
mcts = MCTS(root)
# Return best moves after 500 simulations
best_node = mcts.best_action(500)
new_state = best_node.state
new_board = new_state.board
if judge(new_state) == 1:
graphics(new_board)
break
elif judge(new_state) == -1:
continue
print('Total Games Played: ', at)
print('MCTS Win Rate: ', float(mt/at)*100, '%')
print('Tie Rate: ', float(tt/at)*100, '%')
print('Random Agent Win Rate: ', float(rt/at)*100, '%')