Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhengKeli committed Nov 30, 2017
0 parents commit 8d31958
Show file tree
Hide file tree
Showing 28 changed files with 913 additions and 0 deletions.
11 changes: 11 additions & 0 deletions AI2048.iml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/py" isTestSource="false" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#AI 2048

[2048](https://gabrielecirulli.github.io/2048/) should be a well-known game. If you've never played it, I suggest you to play several times and learn about its game rules.

This is a project making an AI to play the game 2048. To be honest, this is my first try in making AI for a game. (And it is the first practical project about deep learning that succeeds)

In fact, it is not hard at all to make the AI for this game. (Maybe that's why I can succeed 🙂 ) So I think this project may be suitable for beginners and that's why I share this project here.


# Project structure

The main enters of this project is the python scripts in directory `./py`. All these scripts are suggested to run under its directory (i.e. working directory is `./py`).
- `./py/game_console.py` - a console-based game 2048
- `./py/game_random.py` - the game played by a AI that actions randomly (well, maybe is not really "intelligence")
- `./py/game_aix-corex.py` - a set of AIs, they are built step by step. And they are stronger and stronger.
- `./py/game_ai2-core3_extreme.py` - this is almost the best AI in this project, but it takes a lot of time to computes.
- `./py/game_assist2_core3_extreme.py` - scripts that can let the best AI assist you when playing 2048.
- `./py/train_corex.py` - a set of scripts, which can train the nerve network "core" using in AIs.
- `./py/match.py` - a script that test all the AIs and compare how strong they are.

The directory `./graph` is storing the well-trained nerve network graph of the cores. And of course you can move it to some other place and train you own core.

The directory `./.idea` and file `./AI2048.iml` are project files of **Intellij IDEA**. If you are using it, you can import this project.


62 changes: 62 additions & 0 deletions py/ai/EvaluateCore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import tensorflow as tf

from nerve.FullConnectedNetwork import FullConnectedNetwork
from nerve.NerveCore import NerveCore


class EvaluateCore(NerveCore):
def __init__(self, path=None, name=None):
super().__init__(path, name)
with self.graph.as_default() as graph:
self.board = graph.get_tensor_by_name("board:0")
self.score = graph.get_tensor_by_name("score:0")
self.real_score = graph.get_tensor_by_name("real_score:0")

self.loss = graph.get_tensor_by_name("loss:0")
self.ave_loss = graph.get_tensor_by_name("ave_loss:0")
self.learning_rate = graph.get_tensor_by_name("learning_rate:0")
self.train = graph.get_operation_by_name("train")

def create_graph(self):
with self.graph.as_default():
# nerve network
board = tf.placeholder(tf.float32, shape=[None, 4, 4], name="board") # [-1,4,4]

rows = tf.unstack(board, axis=-1) # 4*[-1,4]
cols = tf.unstack(board, axis=-2) # 4*[-1,4]
lines = [line for group in [rows, cols] for line in group] # 8*[-1,4]

line_analyser = FullConnectedNetwork([4, 16, 16, 16], tf.nn.relu)
analysed = [line_analyser.apply(line) for line in lines] # 8*[-1,16]
analysed = tf.stack(analysed, -2) # [-1,8,16]
analysed = tf.reshape(analysed, [-1, 8 * 16]) # [-1,8*16]

final_analyser = FullConnectedNetwork([8 * 16, 64, 32], tf.nn.relu)
final = final_analyser.apply(analysed) # [-1,32]

score = tf.reduce_mean(final, -1, name="score") # [-1,32]

# train
real_score = tf.placeholder(tf.float32, name="real_score") # [-1]
loss = tf.square(score - real_score, name="loss") # [-1]
ave_loss = tf.reduce_mean(loss, name="ave_loss")

learning_rate = tf.Variable(0.005, False, name="learning_rate")
train = tf.train.AdamOptimizer(learning_rate).minimize(ave_loss, name="train")

self.sess.run(tf.global_variables_initializer())

def run_evaluate(self, val_board):
return self.sess.run(
fetches=self.score,
feed_dict={self.board: val_board}
)

def run_train(self, val_board, val_real_score, val_learning_rate=None):
feed_dict = {self.board: val_board, self.real_score: val_real_score, }
if val_learning_rate is not None:
feed_dict[self.learning_rate] = val_learning_rate
return self.sess.run(
fetches=[self.train, self.loss, self.score],
feed_dict=feed_dict
)
136 changes: 136 additions & 0 deletions py/ai/Tree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
from abc import ABCMeta, abstractmethod

from game.Action import Action
from game.Pop import Pop
from game.utils import apply_action, apply_pop


class Tree:
__metaclass__ = ABCMeta

def __init__(self, board):
self.board = board
self.board_id = None
self.board_score = None

self.weight = None

self.branches = None

def evaluate_board_score_gather(self, pending: list):
if self.branches is not None:
for branch in self.branches:
branch.evaluate_board_score_gather(pending)
elif self.board_score is None:
self.board_id = len(pending)
pending.append(self.board)
return pending

def evaluate_board_score_dispatch(self, result):
if self.branches is not None:
for branch in self.branches:
branch.evaluate_board_score_dispatch(result)
elif self.board_id is not None:
self.board_score = result[self.board_id]
self.board_id = None

def grow_tree(self, variant):
if self.grow_next_branches(variant):
for branch in self.branches:
branch.grow_tree(variant * branch.weight)

@abstractmethod
def grow_next_branches(self, variant: float = None) -> bool:
pass

@abstractmethod
def compute_tree_score(self):
pass


class ActionTree(Tree):
__metaclass__ = ABCMeta

def __init__(self, board, action: Action):
super().__init__(board)
self.action = action

def grow_next_branches(self, variant: float = None) -> bool:
if self.branches is not None:
return True

positions = []
for row in range(4):
for column in range(4):
if self.board[row][column] == 0:
positions.append((row, column))

branch_count = 2 * len(positions)
if variant is not None and variant < branch_count:
return False

branches = []
for position in positions:
for (value, rate) in [(1, 0.875), (2, 0.125)]:
pop = Pop(position, value)
new_board = apply_pop(self.board, pop)
branches.append(PopTree(new_board, rate / branch_count, pop))
self.branches = branches
return True

def compute_tree_score(self):
if self.branches is None:
return self.board_score
else:
weighted_branch_scores = [branch.compute_tree_score() * branch.possibility for branch in self.branches]
return sum(weighted_branch_scores)

def pick_branch(self, pop: Pop):
if self.branches is None:
new_board = apply_pop(self.board, pop)
return PopTree(new_board, 1, pop)
for branch in self.branches:
if branch.pop == pop:
return branch


class PopTree(Tree):
def __init__(self, board, possibility: float, pop: Pop):
super().__init__(board)
self.weight = possibility
self.pop = pop
self.possibility = possibility

def grow_next_branches(self, variant: float = None) -> bool:
if self.branches is not None:
return True

branches = []
for action in Action:
new_board, changed = apply_action(self.board, action)
if changed:
branches.append(ActionTree(new_board, action))
branch_count = len(branches)

if variant is not None and variant < branch_count:
return False

for branch in branches:
branch.weight = 1.0 / branch_count
self.branches = branches
return True

def compute_tree_score(self):
if self.branches is None:
return self.board_score
else:
branch_scores = [branch.compute_tree_score() for branch in self.branches]
return max(branch_scores, default=0)

def pick_branch(self, action: Action):
if self.branches is None:
new_board, changed = apply_action(self.board, action)
return ActionTree(new_board, action)
for branch in self.branches:
if branch.action == action:
return branch
55 changes: 55 additions & 0 deletions py/ai/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import numpy as np

from ai.EvaluateCore import EvaluateCore
from game.Game import Game


def train_core(core: EvaluateCore, game: Game, train_count, group_size=None, learning_rate=0.005):
if group_size is None:
group_size = train_count

train_id = 0
group_id = 0
while train_id < train_count:
item_id = 0
sum_distance = 0
sum_round = 0
while train_id < train_count and item_id < group_size:
val_board = game.begin_loop_recorded()
val_real_score = np.arange(0, len(val_board), 1.0)[::-1]
val_train, val_loss, val_score = core.run_train(val_board, val_real_score, learning_rate)

sum_distance += np.average(np.sqrt(val_loss))
sum_round += game.round
train_id += 1
item_id += 1

print("train[", train_id - group_size, ":", train_id, "]", "\t",
"ave_distance=", sum_distance / group_size, "\t",
"ave_max_round", sum_round / group_size)
core.save_graph()
group_id += 1


def match_core(matches):
result = []
for (name, game, count) in matches:
print()
print(name, ":")

sum_round = 0
for i in range(count):
game.begin_loop()
max_round = game.round
print(name, "[", i, "]", " max_round =", max_round)
sum_round += max_round

ave_max_round = sum_round / count
print(name, " ave_max_round =", ave_max_round)
result.append((name, ave_max_round))

print()
print()
print("Summary:")
for (name, ave_max_round) in result:
print(name, "\tave_max_round =", ave_max_round)
8 changes: 8 additions & 0 deletions py/game/Action.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from enum import Enum


class Action(Enum):
UP = "↑"
DOWN = "↓"
RIGHT = "→"
LEFT = "←"
91 changes: 91 additions & 0 deletions py/game/Game.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
from abc import ABCMeta, abstractmethod

import numpy as np

from game.Action import Action
from game.Pop import Pop
from game.utils import get_action_map, apply_pop


class Game:
__metaclass__ = ABCMeta

def __init__(self):
self.board = None
self.action_map = None
self.round = 0

self.last_action = 0
self.last_pop = 0

def begin_loop(self):
self.init_game()
while True:
if not self.next_round():
break

def begin_loop_recorded(self):
self.init_game()
history = []
while True:
history.append(self.board)
if not self.next_round():
history.append(self.board)
break
return history

def on_init_board(self):
self.board = np.zeros([4, 4], np.int)
self.apply_pop()
self.apply_pop()

def init_game(self):
self.round = 0
self.on_init_board()
self.action_map = get_action_map(self.board)

def next_round(self):
self.apply_action()
self.apply_pop()

self.action_map = get_action_map(self.board)
if len(self.action_map) == 0:
self.on_dead()
return False

self.round += 1
return True

@abstractmethod
def on_dead(self):
pass

def apply_action(self):
action = self.on_get_action()
self.board = self.action_map[action]
self.last_action = action

@abstractmethod
def on_get_action(self) -> Action:
pass

def apply_pop(self):
pop = self.on_get_pop()
self.board = apply_pop(self.board, pop)
self.last_pop = pop

def on_get_pop(self) -> Pop:
empty_places = []
for row in range(4):
for column in range(4):
if self.board[row][column] == 0:
empty_places.append((row, column))
index = int(np.random.uniform() * len(empty_places))
position = empty_places[index]

if np.random.uniform() < 0.125:
value = 2
else:
value = 1

return Pop(position, value)
Loading

0 comments on commit 8d31958

Please sign in to comment.