-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 8d31958
Showing
28 changed files
with
913 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
<?xml version="1.0" encoding="UTF-8"?> | ||
<module type="PYTHON_MODULE" version="4"> | ||
<component name="NewModuleRootManager" inherit-compiler-output="true"> | ||
<exclude-output /> | ||
<content url="file://$MODULE_DIR$"> | ||
<sourceFolder url="file://$MODULE_DIR$/py" isTestSource="false" /> | ||
</content> | ||
<orderEntry type="inheritedJdk" /> | ||
<orderEntry type="sourceFolder" forTests="false" /> | ||
</component> | ||
</module> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#AI 2048 | ||
|
||
[2048](https://gabrielecirulli.github.io/2048/) should be a well-known game. If you've never played it, I suggest you to play several times and learn about its game rules. | ||
|
||
This is a project making an AI to play the game 2048. To be honest, this is my first try in making AI for a game. (And it is the first practical project about deep learning that succeeds) | ||
|
||
In fact, it is not hard at all to make the AI for this game. (Maybe that's why I can succeed 🙂 ) So I think this project may be suitable for beginners and that's why I share this project here. | ||
|
||
|
||
# Project structure | ||
|
||
The main enters of this project is the python scripts in directory `./py`. All these scripts are suggested to run under its directory (i.e. working directory is `./py`). | ||
- `./py/game_console.py` - a console-based game 2048 | ||
- `./py/game_random.py` - the game played by a AI that actions randomly (well, maybe is not really "intelligence") | ||
- `./py/game_aix-corex.py` - a set of AIs, they are built step by step. And they are stronger and stronger. | ||
- `./py/game_ai2-core3_extreme.py` - this is almost the best AI in this project, but it takes a lot of time to computes. | ||
- `./py/game_assist2_core3_extreme.py` - scripts that can let the best AI assist you when playing 2048. | ||
- `./py/train_corex.py` - a set of scripts, which can train the nerve network "core" using in AIs. | ||
- `./py/match.py` - a script that test all the AIs and compare how strong they are. | ||
|
||
The directory `./graph` is storing the well-trained nerve network graph of the cores. And of course you can move it to some other place and train you own core. | ||
|
||
The directory `./.idea` and file `./AI2048.iml` are project files of **Intellij IDEA**. If you are using it, you can import this project. | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import tensorflow as tf | ||
|
||
from nerve.FullConnectedNetwork import FullConnectedNetwork | ||
from nerve.NerveCore import NerveCore | ||
|
||
|
||
class EvaluateCore(NerveCore): | ||
def __init__(self, path=None, name=None): | ||
super().__init__(path, name) | ||
with self.graph.as_default() as graph: | ||
self.board = graph.get_tensor_by_name("board:0") | ||
self.score = graph.get_tensor_by_name("score:0") | ||
self.real_score = graph.get_tensor_by_name("real_score:0") | ||
|
||
self.loss = graph.get_tensor_by_name("loss:0") | ||
self.ave_loss = graph.get_tensor_by_name("ave_loss:0") | ||
self.learning_rate = graph.get_tensor_by_name("learning_rate:0") | ||
self.train = graph.get_operation_by_name("train") | ||
|
||
def create_graph(self): | ||
with self.graph.as_default(): | ||
# nerve network | ||
board = tf.placeholder(tf.float32, shape=[None, 4, 4], name="board") # [-1,4,4] | ||
|
||
rows = tf.unstack(board, axis=-1) # 4*[-1,4] | ||
cols = tf.unstack(board, axis=-2) # 4*[-1,4] | ||
lines = [line for group in [rows, cols] for line in group] # 8*[-1,4] | ||
|
||
line_analyser = FullConnectedNetwork([4, 16, 16, 16], tf.nn.relu) | ||
analysed = [line_analyser.apply(line) for line in lines] # 8*[-1,16] | ||
analysed = tf.stack(analysed, -2) # [-1,8,16] | ||
analysed = tf.reshape(analysed, [-1, 8 * 16]) # [-1,8*16] | ||
|
||
final_analyser = FullConnectedNetwork([8 * 16, 64, 32], tf.nn.relu) | ||
final = final_analyser.apply(analysed) # [-1,32] | ||
|
||
score = tf.reduce_mean(final, -1, name="score") # [-1,32] | ||
|
||
# train | ||
real_score = tf.placeholder(tf.float32, name="real_score") # [-1] | ||
loss = tf.square(score - real_score, name="loss") # [-1] | ||
ave_loss = tf.reduce_mean(loss, name="ave_loss") | ||
|
||
learning_rate = tf.Variable(0.005, False, name="learning_rate") | ||
train = tf.train.AdamOptimizer(learning_rate).minimize(ave_loss, name="train") | ||
|
||
self.sess.run(tf.global_variables_initializer()) | ||
|
||
def run_evaluate(self, val_board): | ||
return self.sess.run( | ||
fetches=self.score, | ||
feed_dict={self.board: val_board} | ||
) | ||
|
||
def run_train(self, val_board, val_real_score, val_learning_rate=None): | ||
feed_dict = {self.board: val_board, self.real_score: val_real_score, } | ||
if val_learning_rate is not None: | ||
feed_dict[self.learning_rate] = val_learning_rate | ||
return self.sess.run( | ||
fetches=[self.train, self.loss, self.score], | ||
feed_dict=feed_dict | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
from abc import ABCMeta, abstractmethod | ||
|
||
from game.Action import Action | ||
from game.Pop import Pop | ||
from game.utils import apply_action, apply_pop | ||
|
||
|
||
class Tree: | ||
__metaclass__ = ABCMeta | ||
|
||
def __init__(self, board): | ||
self.board = board | ||
self.board_id = None | ||
self.board_score = None | ||
|
||
self.weight = None | ||
|
||
self.branches = None | ||
|
||
def evaluate_board_score_gather(self, pending: list): | ||
if self.branches is not None: | ||
for branch in self.branches: | ||
branch.evaluate_board_score_gather(pending) | ||
elif self.board_score is None: | ||
self.board_id = len(pending) | ||
pending.append(self.board) | ||
return pending | ||
|
||
def evaluate_board_score_dispatch(self, result): | ||
if self.branches is not None: | ||
for branch in self.branches: | ||
branch.evaluate_board_score_dispatch(result) | ||
elif self.board_id is not None: | ||
self.board_score = result[self.board_id] | ||
self.board_id = None | ||
|
||
def grow_tree(self, variant): | ||
if self.grow_next_branches(variant): | ||
for branch in self.branches: | ||
branch.grow_tree(variant * branch.weight) | ||
|
||
@abstractmethod | ||
def grow_next_branches(self, variant: float = None) -> bool: | ||
pass | ||
|
||
@abstractmethod | ||
def compute_tree_score(self): | ||
pass | ||
|
||
|
||
class ActionTree(Tree): | ||
__metaclass__ = ABCMeta | ||
|
||
def __init__(self, board, action: Action): | ||
super().__init__(board) | ||
self.action = action | ||
|
||
def grow_next_branches(self, variant: float = None) -> bool: | ||
if self.branches is not None: | ||
return True | ||
|
||
positions = [] | ||
for row in range(4): | ||
for column in range(4): | ||
if self.board[row][column] == 0: | ||
positions.append((row, column)) | ||
|
||
branch_count = 2 * len(positions) | ||
if variant is not None and variant < branch_count: | ||
return False | ||
|
||
branches = [] | ||
for position in positions: | ||
for (value, rate) in [(1, 0.875), (2, 0.125)]: | ||
pop = Pop(position, value) | ||
new_board = apply_pop(self.board, pop) | ||
branches.append(PopTree(new_board, rate / branch_count, pop)) | ||
self.branches = branches | ||
return True | ||
|
||
def compute_tree_score(self): | ||
if self.branches is None: | ||
return self.board_score | ||
else: | ||
weighted_branch_scores = [branch.compute_tree_score() * branch.possibility for branch in self.branches] | ||
return sum(weighted_branch_scores) | ||
|
||
def pick_branch(self, pop: Pop): | ||
if self.branches is None: | ||
new_board = apply_pop(self.board, pop) | ||
return PopTree(new_board, 1, pop) | ||
for branch in self.branches: | ||
if branch.pop == pop: | ||
return branch | ||
|
||
|
||
class PopTree(Tree): | ||
def __init__(self, board, possibility: float, pop: Pop): | ||
super().__init__(board) | ||
self.weight = possibility | ||
self.pop = pop | ||
self.possibility = possibility | ||
|
||
def grow_next_branches(self, variant: float = None) -> bool: | ||
if self.branches is not None: | ||
return True | ||
|
||
branches = [] | ||
for action in Action: | ||
new_board, changed = apply_action(self.board, action) | ||
if changed: | ||
branches.append(ActionTree(new_board, action)) | ||
branch_count = len(branches) | ||
|
||
if variant is not None and variant < branch_count: | ||
return False | ||
|
||
for branch in branches: | ||
branch.weight = 1.0 / branch_count | ||
self.branches = branches | ||
return True | ||
|
||
def compute_tree_score(self): | ||
if self.branches is None: | ||
return self.board_score | ||
else: | ||
branch_scores = [branch.compute_tree_score() for branch in self.branches] | ||
return max(branch_scores, default=0) | ||
|
||
def pick_branch(self, action: Action): | ||
if self.branches is None: | ||
new_board, changed = apply_action(self.board, action) | ||
return ActionTree(new_board, action) | ||
for branch in self.branches: | ||
if branch.action == action: | ||
return branch |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import numpy as np | ||
|
||
from ai.EvaluateCore import EvaluateCore | ||
from game.Game import Game | ||
|
||
|
||
def train_core(core: EvaluateCore, game: Game, train_count, group_size=None, learning_rate=0.005): | ||
if group_size is None: | ||
group_size = train_count | ||
|
||
train_id = 0 | ||
group_id = 0 | ||
while train_id < train_count: | ||
item_id = 0 | ||
sum_distance = 0 | ||
sum_round = 0 | ||
while train_id < train_count and item_id < group_size: | ||
val_board = game.begin_loop_recorded() | ||
val_real_score = np.arange(0, len(val_board), 1.0)[::-1] | ||
val_train, val_loss, val_score = core.run_train(val_board, val_real_score, learning_rate) | ||
|
||
sum_distance += np.average(np.sqrt(val_loss)) | ||
sum_round += game.round | ||
train_id += 1 | ||
item_id += 1 | ||
|
||
print("train[", train_id - group_size, ":", train_id, "]", "\t", | ||
"ave_distance=", sum_distance / group_size, "\t", | ||
"ave_max_round", sum_round / group_size) | ||
core.save_graph() | ||
group_id += 1 | ||
|
||
|
||
def match_core(matches): | ||
result = [] | ||
for (name, game, count) in matches: | ||
print() | ||
print(name, ":") | ||
|
||
sum_round = 0 | ||
for i in range(count): | ||
game.begin_loop() | ||
max_round = game.round | ||
print(name, "[", i, "]", " max_round =", max_round) | ||
sum_round += max_round | ||
|
||
ave_max_round = sum_round / count | ||
print(name, " ave_max_round =", ave_max_round) | ||
result.append((name, ave_max_round)) | ||
|
||
print() | ||
print() | ||
print("Summary:") | ||
for (name, ave_max_round) in result: | ||
print(name, "\tave_max_round =", ave_max_round) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from enum import Enum | ||
|
||
|
||
class Action(Enum): | ||
UP = "↑" | ||
DOWN = "↓" | ||
RIGHT = "→" | ||
LEFT = "←" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
from abc import ABCMeta, abstractmethod | ||
|
||
import numpy as np | ||
|
||
from game.Action import Action | ||
from game.Pop import Pop | ||
from game.utils import get_action_map, apply_pop | ||
|
||
|
||
class Game: | ||
__metaclass__ = ABCMeta | ||
|
||
def __init__(self): | ||
self.board = None | ||
self.action_map = None | ||
self.round = 0 | ||
|
||
self.last_action = 0 | ||
self.last_pop = 0 | ||
|
||
def begin_loop(self): | ||
self.init_game() | ||
while True: | ||
if not self.next_round(): | ||
break | ||
|
||
def begin_loop_recorded(self): | ||
self.init_game() | ||
history = [] | ||
while True: | ||
history.append(self.board) | ||
if not self.next_round(): | ||
history.append(self.board) | ||
break | ||
return history | ||
|
||
def on_init_board(self): | ||
self.board = np.zeros([4, 4], np.int) | ||
self.apply_pop() | ||
self.apply_pop() | ||
|
||
def init_game(self): | ||
self.round = 0 | ||
self.on_init_board() | ||
self.action_map = get_action_map(self.board) | ||
|
||
def next_round(self): | ||
self.apply_action() | ||
self.apply_pop() | ||
|
||
self.action_map = get_action_map(self.board) | ||
if len(self.action_map) == 0: | ||
self.on_dead() | ||
return False | ||
|
||
self.round += 1 | ||
return True | ||
|
||
@abstractmethod | ||
def on_dead(self): | ||
pass | ||
|
||
def apply_action(self): | ||
action = self.on_get_action() | ||
self.board = self.action_map[action] | ||
self.last_action = action | ||
|
||
@abstractmethod | ||
def on_get_action(self) -> Action: | ||
pass | ||
|
||
def apply_pop(self): | ||
pop = self.on_get_pop() | ||
self.board = apply_pop(self.board, pop) | ||
self.last_pop = pop | ||
|
||
def on_get_pop(self) -> Pop: | ||
empty_places = [] | ||
for row in range(4): | ||
for column in range(4): | ||
if self.board[row][column] == 0: | ||
empty_places.append((row, column)) | ||
index = int(np.random.uniform() * len(empty_places)) | ||
position = empty_places[index] | ||
|
||
if np.random.uniform() < 0.125: | ||
value = 2 | ||
else: | ||
value = 1 | ||
|
||
return Pop(position, value) |
Oops, something went wrong.