-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtictactoe.py
73 lines (59 loc) · 2.42 KB
/
tictactoe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import numpy as np
class TicTacToe:
def __init__(self):
self.row_count = 3
self.column_count = 3
self.action_size = self.row_count * self.column_count
def get_initial_state(self):
return np.zeros((self.row_count, self.column_count))
def get_next_state(self, state, action, player):
row = action // self.column_count
column = action % self.column_count
state[row, column] = player
return state
def get_valid_moves(self, state):
return (state.reshape(-1) == 0).astype(np.uint8)
def check_win(self, state, action):
if action is None:
return False
row = action // self.column_count
column = action % self.column_count
player = state[row, column]
return (
np.sum(state[row, :]) == player * self.column_count
or np.sum(state[:, column]) == player * self.row_count
or np.sum(np.diag(state)) == player * self.row_count
or np.sum(np.diag(np.flip(state, axis=0))) == player * self.row_count
)
def get_value_and_terminated(self, state, action):
"""
Determines the value and termination status of a game state after a given action.
Args:
state (np.ndarray): The current state of the game board.
action (int): The action to be evaluated.
Returns:
tuple: A tuple containing:
- int: The value of the state (1 if the action results in a win, 0 otherwise).
- bool: True if the game is terminated (either win or draw), False otherwise.
"""
if self.check_win(state, action):
return 1, True
if np.sum(self.get_valid_moves(state)) == 0:
return 0, True
return 0, False
def get_opponent(self, player):
return -player
def get_opponent_value(self, value):
return -value
def change_perspective(self, state, player):
return state * player
def get_encoded_state(self, state):
"""
Encodes the 2D game state into a binary representation suitable for neural network input:
a 3D binary tensor with shape (3, height, width).
"""
return np.stack((
state == -1, # all squares where player -1 has played
state == 0, # all empty squares
state == 1 # all squares where player 1 has played
)).astype(np.float32)