Initial commit

ZhengKeli · Nov 30, 2017 · 8d31958 · 8d31958
commit 8d31958
Show file tree

Hide file tree

Showing 28 changed files with 913 additions and 0 deletions.
diff --git a/AI2048.iml b/AI2048.iml
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/py" isTestSource="false" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
diff --git a/README.md b/README.md
@@ -0,0 +1,25 @@
+#AI 2048
+
+[2048](https://gabrielecirulli.github.io/2048/) should be a well-known game. If you've never played it, I suggest you to play several times and learn about its game rules.
+
+This is a project making an AI to play the game 2048. To be honest, this is my first try in making AI for a game. (And it is the first practical project about deep learning that succeeds) 
+
+In fact, it is not hard at all to make the AI for this game. (Maybe that's why I can succeed 🙂 ) So I think this project may be suitable for beginners and that's why I share this project here.
+
+
+# Project structure
+
+The main enters of this project is the python scripts in directory `./py`. All these scripts are suggested to run under its directory (i.e. working directory is `./py`).
+- `./py/game_console.py` - a console-based game 2048
+- `./py/game_random.py` - the game played by a AI that actions randomly (well, maybe is not really "intelligence")
+- `./py/game_aix-corex.py` - a set of AIs, they are built step by step. And they are stronger and stronger.
+- `./py/game_ai2-core3_extreme.py` - this is almost the best AI in this project, but it takes a lot of time to computes.
+- `./py/game_assist2_core3_extreme.py` - scripts that can let the best AI assist you when playing 2048.
+- `./py/train_corex.py` - a set of scripts, which can train the nerve network "core" using in AIs.
+- `./py/match.py` - a script that test all the AIs and compare how strong they are. 
+
+The directory `./graph` is storing the well-trained nerve network graph of the cores. And of course you can move it to some other place and train you own core.
+
+The directory `./.idea` and file `./AI2048.iml` are project files of **Intellij IDEA**. If you are using it, you can import this project.
+
+
diff --git a/py/ai/EvaluateCore.py b/py/ai/EvaluateCore.py
@@ -0,0 +1,62 @@
+import tensorflow as tf
+
+from nerve.FullConnectedNetwork import FullConnectedNetwork
+from nerve.NerveCore import NerveCore
+
+
+class EvaluateCore(NerveCore):
+    def __init__(self, path=None, name=None):
+        super().__init__(path, name)
+        with self.graph.as_default() as graph:
+            self.board = graph.get_tensor_by_name("board:0")
+            self.score = graph.get_tensor_by_name("score:0")
+            self.real_score = graph.get_tensor_by_name("real_score:0")
+
+            self.loss = graph.get_tensor_by_name("loss:0")
+            self.ave_loss = graph.get_tensor_by_name("ave_loss:0")
+            self.learning_rate = graph.get_tensor_by_name("learning_rate:0")
+            self.train = graph.get_operation_by_name("train")
+
+    def create_graph(self):
+        with self.graph.as_default():
+            # nerve network
+            board = tf.placeholder(tf.float32, shape=[None, 4, 4], name="board")  # [-1,4,4]
+
+            rows = tf.unstack(board, axis=-1)  # 4*[-1,4]
+            cols = tf.unstack(board, axis=-2)  # 4*[-1,4]
+            lines = [line for group in [rows, cols] for line in group]  # 8*[-1,4]
+
+            line_analyser = FullConnectedNetwork([4, 16, 16, 16], tf.nn.relu)
+            analysed = [line_analyser.apply(line) for line in lines]  # 8*[-1,16]
+            analysed = tf.stack(analysed, -2)  # [-1,8,16]
+            analysed = tf.reshape(analysed, [-1, 8 * 16])  # [-1,8*16]
+
+            final_analyser = FullConnectedNetwork([8 * 16, 64, 32], tf.nn.relu)
+            final = final_analyser.apply(analysed)  # [-1,32]
+
+            score = tf.reduce_mean(final, -1, name="score")  # [-1,32]
+
+            # train
+            real_score = tf.placeholder(tf.float32, name="real_score")  # [-1]
+            loss = tf.square(score - real_score, name="loss")  # [-1]
+            ave_loss = tf.reduce_mean(loss, name="ave_loss")
+
+            learning_rate = tf.Variable(0.005, False, name="learning_rate")
+            train = tf.train.AdamOptimizer(learning_rate).minimize(ave_loss, name="train")
+
+            self.sess.run(tf.global_variables_initializer())
+
+    def run_evaluate(self, val_board):
+        return self.sess.run(
+            fetches=self.score,
+            feed_dict={self.board: val_board}
+        )
+
+    def run_train(self, val_board, val_real_score, val_learning_rate=None):
+        feed_dict = {self.board: val_board, self.real_score: val_real_score, }
+        if val_learning_rate is not None:
+            feed_dict[self.learning_rate] = val_learning_rate
+        return self.sess.run(
+            fetches=[self.train, self.loss, self.score],
+            feed_dict=feed_dict
+        )
diff --git a/py/ai/Tree.py b/py/ai/Tree.py
@@ -0,0 +1,136 @@
+from abc import ABCMeta, abstractmethod
+
+from game.Action import Action
+from game.Pop import Pop
+from game.utils import apply_action, apply_pop
+
+
+class Tree:
+    __metaclass__ = ABCMeta
+
+    def __init__(self, board):
+        self.board = board
+        self.board_id = None
+        self.board_score = None
+
+        self.weight = None
+
+        self.branches = None
+
+    def evaluate_board_score_gather(self, pending: list):
+        if self.branches is not None:
+            for branch in self.branches:
+                branch.evaluate_board_score_gather(pending)
+        elif self.board_score is None:
+            self.board_id = len(pending)
+            pending.append(self.board)
+        return pending
+
+    def evaluate_board_score_dispatch(self, result):
+        if self.branches is not None:
+            for branch in self.branches:
+                branch.evaluate_board_score_dispatch(result)
+        elif self.board_id is not None:
+            self.board_score = result[self.board_id]
+            self.board_id = None
+
+    def grow_tree(self, variant):
+        if self.grow_next_branches(variant):
+            for branch in self.branches:
+                branch.grow_tree(variant * branch.weight)
+
+    @abstractmethod
+    def grow_next_branches(self, variant: float = None) -> bool:
+        pass
+
+    @abstractmethod
+    def compute_tree_score(self):
+        pass
+
+
+class ActionTree(Tree):
+    __metaclass__ = ABCMeta
+
+    def __init__(self, board, action: Action):
+        super().__init__(board)
+        self.action = action
+
+    def grow_next_branches(self, variant: float = None) -> bool:
+        if self.branches is not None:
+            return True
+
+        positions = []
+        for row in range(4):
+            for column in range(4):
+                if self.board[row][column] == 0:
+                    positions.append((row, column))
+
+        branch_count = 2 * len(positions)
+        if variant is not None and variant < branch_count:
+            return False
+
+        branches = []
+        for position in positions:
+            for (value, rate) in [(1, 0.875), (2, 0.125)]:
+                pop = Pop(position, value)
+                new_board = apply_pop(self.board, pop)
+                branches.append(PopTree(new_board, rate / branch_count, pop))
+        self.branches = branches
+        return True
+
+    def compute_tree_score(self):
+        if self.branches is None:
+            return self.board_score
+        else:
+            weighted_branch_scores = [branch.compute_tree_score() * branch.possibility for branch in self.branches]
+            return sum(weighted_branch_scores)
+
+    def pick_branch(self, pop: Pop):
+        if self.branches is None:
+            new_board = apply_pop(self.board, pop)
+            return PopTree(new_board, 1, pop)
+        for branch in self.branches:
+            if branch.pop == pop:
+                return branch
+
+
+class PopTree(Tree):
+    def __init__(self, board, possibility: float, pop: Pop):
+        super().__init__(board)
+        self.weight = possibility
+        self.pop = pop
+        self.possibility = possibility
+
+    def grow_next_branches(self, variant: float = None) -> bool:
+        if self.branches is not None:
+            return True
+
+        branches = []
+        for action in Action:
+            new_board, changed = apply_action(self.board, action)
+            if changed:
+                branches.append(ActionTree(new_board, action))
+        branch_count = len(branches)
+
+        if variant is not None and variant < branch_count:
+            return False
+
+        for branch in branches:
+            branch.weight = 1.0 / branch_count
+        self.branches = branches
+        return True
+
+    def compute_tree_score(self):
+        if self.branches is None:
+            return self.board_score
+        else:
+            branch_scores = [branch.compute_tree_score() for branch in self.branches]
+            return max(branch_scores, default=0)
+
+    def pick_branch(self, action: Action):
+        if self.branches is None:
+            new_board, changed = apply_action(self.board, action)
+            return ActionTree(new_board, action)
+        for branch in self.branches:
+            if branch.action == action:
+                return branch
diff --git a/py/ai/utils.py b/py/ai/utils.py
@@ -0,0 +1,55 @@
+import numpy as np
+
+from ai.EvaluateCore import EvaluateCore
+from game.Game import Game
+
+
+def train_core(core: EvaluateCore, game: Game, train_count, group_size=None, learning_rate=0.005):
+    if group_size is None:
+        group_size = train_count
+
+    train_id = 0
+    group_id = 0
+    while train_id < train_count:
+        item_id = 0
+        sum_distance = 0
+        sum_round = 0
+        while train_id < train_count and item_id < group_size:
+            val_board = game.begin_loop_recorded()
+            val_real_score = np.arange(0, len(val_board), 1.0)[::-1]
+            val_train, val_loss, val_score = core.run_train(val_board, val_real_score, learning_rate)
+
+            sum_distance += np.average(np.sqrt(val_loss))
+            sum_round += game.round
+            train_id += 1
+            item_id += 1
+
+        print("train[", train_id - group_size, ":", train_id, "]", "\t",
+              "ave_distance=", sum_distance / group_size, "\t",
+              "ave_max_round", sum_round / group_size)
+        core.save_graph()
+        group_id += 1
+
+
+def match_core(matches):
+    result = []
+    for (name, game, count) in matches:
+        print()
+        print(name, ":")
+
+        sum_round = 0
+        for i in range(count):
+            game.begin_loop()
+            max_round = game.round
+            print(name, "[", i, "]", " max_round =", max_round)
+            sum_round += max_round
+
+        ave_max_round = sum_round / count
+        print(name, " ave_max_round =", ave_max_round)
+        result.append((name, ave_max_round))
+
+    print()
+    print()
+    print("Summary:")
+    for (name, ave_max_round) in result:
+        print(name, "\tave_max_round =", ave_max_round)
diff --git a/py/game/Action.py b/py/game/Action.py
@@ -0,0 +1,8 @@
+from enum import Enum
+
+
+class Action(Enum):
+    UP = "↑"
+    DOWN = "↓"
+    RIGHT = "→"
+    LEFT = "←"
diff --git a/py/game/Game.py b/py/game/Game.py
@@ -0,0 +1,91 @@
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+
+from game.Action import Action
+from game.Pop import Pop
+from game.utils import get_action_map, apply_pop
+
+
+class Game:
+    __metaclass__ = ABCMeta
+
+    def __init__(self):
+        self.board = None
+        self.action_map = None
+        self.round = 0
+
+        self.last_action = 0
+        self.last_pop = 0
+
+    def begin_loop(self):
+        self.init_game()
+        while True:
+            if not self.next_round():
+                break
+
+    def begin_loop_recorded(self):
+        self.init_game()
+        history = []
+        while True:
+            history.append(self.board)
+            if not self.next_round():
+                history.append(self.board)
+                break
+        return history
+
+    def on_init_board(self):
+        self.board = np.zeros([4, 4], np.int)
+        self.apply_pop()
+        self.apply_pop()
+
+    def init_game(self):
+        self.round = 0
+        self.on_init_board()
+        self.action_map = get_action_map(self.board)
+
+    def next_round(self):
+        self.apply_action()
+        self.apply_pop()
+
+        self.action_map = get_action_map(self.board)
+        if len(self.action_map) == 0:
+            self.on_dead()
+            return False
+
+        self.round += 1
+        return True
+
+    @abstractmethod
+    def on_dead(self):
+        pass
+
+    def apply_action(self):
+        action = self.on_get_action()
+        self.board = self.action_map[action]
+        self.last_action = action
+
+    @abstractmethod
+    def on_get_action(self) -> Action:
+        pass
+
+    def apply_pop(self):
+        pop = self.on_get_pop()
+        self.board = apply_pop(self.board, pop)
+        self.last_pop = pop
+
+    def on_get_pop(self) -> Pop:
+        empty_places = []
+        for row in range(4):
+            for column in range(4):
+                if self.board[row][column] == 0:
+                    empty_places.append((row, column))
+        index = int(np.random.uniform() * len(empty_places))
+        position = empty_places[index]
+
+        if np.random.uniform() < 0.125:
+            value = 2
+        else:
+            value = 1
+
+        return Pop(position, value)