-
Notifications
You must be signed in to change notification settings - Fork 0
MuZero and AlphaZero
Skirlax edited this page Mar 6, 2024
·
1 revision
Maybe you are in a situation where you want to evaluate your trained network against a player that is not defined in the library. In that case, you can simply create your own player and use it with the library as long as you implement the required features.
class Player(ABC):
"""
To create a custom player, extend this class and implement the choose_move method. You can see different implementations below. """
@abstractmethod
def __init__(self, game_manager: AlphaZeroGame, **kwargs):
pass
@abstractmethod
def choose_move(self, board: np.ndarray, **kwargs) -> tuple[int, int]:
pass
@abstractmethod
def make_fresh_instance(self):
pass
def init_kwargs(self, kwargs: dict):
for key in kwargs.keys():
setattr(self, key, kwargs[key])
To define a custom player simply extend this class and implement all the require methods.
The constructor has to always take game manager and **kwargs
. You can use **kwargs
if your player needs to take special arguments. In such you would call the init_kwargs
method in the constructor and supply the dictionary containing the keyword arguments to the constructor.
class NetPlayer(Player):
def __init__(self, game_manager: TicTacToeGameManager, **kwargs):
self.game_manager = game_manager
self.name = self.__class__.__name__
self.kwargs = kwargs
self.init_kwargs(kwargs)
def choose_move(self, board: np.ndarray, **kwargs) -> tuple[int, int]:
try:
current_player = kwargs["current_player"]
device = kwargs["device"]
tau = kwargs["tau"]
except KeyError:
raise KeyError("Missing keyword argument. Please supply kwargs: current_player, device, tau")
pi, _ = self.monte_carlo_tree_search.search(self.network, board, current_player, device, tau=tau)
move = self.game_manager.select_move(pi)
self.monte_carlo_tree_search.step_root(None)
if "unravel" in kwargs.keys():
unravel = kwargs["unravel"]
else:
unravel = True
return self.game_manager.network_to_board(move) if unravel else move
class HumanPlayer(Player):
def __init__(self, game_manager: TicTacToeGameManager, **kwargs):
self.name = self.__class__.__name__
self.game_manager = game_manager
self.kwargs = kwargs
self.init_kwargs(kwargs)
def choose_move(self, board: np.ndarray, **kwargs) -> tuple[int, int]:
if self.game_manager.headless:
raise RuntimeError("Cannot play with a human player in headless mode.")
move = self.game_manager.get_human_input(board)
return move
def make_fresh_instance(self):
return HumanPlayer(self.game_manager.make_fresh_instance(), **self.kwargs)