-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtic-tac-toe_montecarlo.py
executable file
·93 lines (83 loc) · 2.83 KB
/
tic-tac-toe_montecarlo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
"""
Name: Monte Carlo Tic-Tac-Toe
Author: jraleman
Year: 2014
"""
try:
import poc_ttt_gui
import poc_ttt_provided as provided
except ImportError:
import assets.poc_ttt_gui as poc_ttt_gui
import assets.poc_ttt_provided as provided
import random
# Constants for Monte Carlo simulator
NTRIALS = 10
MCMATCH = 1.0
MCOTHER = 1.0
def mc_trial(board, player):
"""
Takes a current board and the next player to move.
"""
while board.check_win() == None:
random_move = random.randrange(len(board.get_empty_squares()))
next_move = board.get_empty_squares()[random_move]
board.move(next_move[0], next_move[1], player)
player = provided.switch_player(player)
def mc_update_scores(scores, board, player):
"""
Function to update the scores.
"""
if board.check_win() == player :
for row in range(board.get_dim()):
for col in range(board.get_dim()):
if board.square(row, col) == player:
scores[row][col] += MCMATCH
else:
scores[row][col] -= MCOTHER
elif board.check_win() == None or board.check_win() == provided.DRAW:
return
else:
for row_index in range(board.get_dim()):
for col_index in range(board.get_dim()):
if board.square(row_index, col_index) == player:
scores[row_index][col_index] -= MCMATCH
else:
scores[row_index][col_index] += MCOTHER
def mc_move(board, player, trials):
"""
The function should use the Monte Carlo simulation described above to
return a move for the machine player in the form of a tuple.
"""
number = trials
scores = [[[] for row in range(board.get_dim())] \
for col in range(board.get_dim())]
for row in range(board.get_dim()):
for col in range(board.get_dim()):
scores[row][col] = 0
while number:
board1 = board.clone()
mc_trial(board1, player)
mc_update_scores(scores, board1, player)
number -= 1
next_move = get_best_move(board, scores)
return next_move
def get_best_move(board, scores):
"""
Choose the square with the highest score as the nest move.
"""
# No available moves.
if len(board.get_empty_squares()) == 0:
return
available = []
for move in board.get_empty_squares():
available.append(scores[move[0]][move[1]])
best = max(available)
poss_moves = []
index = 0
while index < len(available):
if available[index] == best:
poss_moves.append(board.get_empty_squares()[index])
index += 1
return random.choice(poss_moves)
provided.play_game(mc_move, NTRIALS, False)
poc_ttt_gui.run_gui(3, provided.PLAYERX, mc_move, NTRIALS, False)