diff --git a/Dockerfile b/Dockerfile index 9d3e624e..3ca26ee3 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM tensorflow/tensorflow:0.12.0 +FROM tensorflow/tensorflow:1.0.0 ENV LANG C.UTF-8 RUN apt-get update && apt-get install -y bc @@ -9,7 +9,7 @@ RUN rm -rf /notebooks/* COPY caicloud.tensorflow /caicloud.tensorflow COPY Deep_Learning_with_TensorFlow/datasets /notebooks/Deep_Learning_with_TensorFlow/datasets -COPY Deep_Learning_with_TensorFlow/0.12.0 /notebooks/Deep_Learning_with_TensorFlow/0.12.0 +COPY Deep_Learning_with_TensorFlow/1.0.0 /notebooks/Deep_Learning_with_TensorFlow/1.0.0 COPY run_tf.sh /run_tf.sh CMD ["/run_tf.sh"] diff --git a/caicloud.tensorflow/caicloud/clever/examples/recommandation/README.md b/caicloud.tensorflow/caicloud/clever/examples/recommandation/README.md new file mode 100644 index 00000000..304d8137 --- /dev/null +++ b/caicloud.tensorflow/caicloud/clever/examples/recommandation/README.md @@ -0,0 +1,41 @@ +# 使用TensorFlow解决推荐问题 + +## 数据集 +这里使用[电影评级数据集](http://grouplens.org/datasets/movielens/)来模拟推荐问题。该数据集中数据格式如下: +``` +1::1193::5::978300760 +1::661::3::978302109 +1::914::3::978301968 +1::3408::4::978300275 +1::2355::5::978824291 +``` +每一行包含了一个用户对一个电影的评分。比如第一行表示用户1对电影1193评分为5。数据中最后一列为时间戳,在本样例中我们并没有使用时间戳信息。这里我们的目标是对于给定的(用户,电影)对,预测给定用户对给定电影的评分。 + +运行一下命令可以下载数据: +``` +./download_data.sh +``` + + +## 任务训练 +通过以下脚本可以在本地训练: +``` +./train_model.sh +``` + +运行改脚本可以得到类似下面的结果: +``` +Training begins @ 2017-05-18 00:24:33.373159 +Eval RMSE at round 0 is: 2.81291127205 +Eval RMSE at round 2000 is: 0.945966959 +Eval RMSE at round 4000 is: 0.933194696903 +Eval RMSE at round 6000 is: 0.927836835384 +Eval RMSE at round 8000 is: 0.923974812031 +Eval RMSE at round 10000 is: 0.92291110754 +Eval RMSE at round 12000 is: 0.919465661049 +Eval RMSE at round 14000 is: 0.918680250645 +Eval RMSE at round 16000 is: 0.917023718357 +Eval RMSE at round 18000 is: 0.915674805641 +Eval RMSE at round 20000 is: 0.91452050209 +Eval RMSE at round 22000 is: 0.915164649487 +``` \ No newline at end of file diff --git a/caicloud.tensorflow/caicloud/clever/examples/recommandation/download_data.sh b/caicloud.tensorflow/caicloud/clever/examples/recommandation/download_data.sh new file mode 100755 index 00000000..7f1027b4 --- /dev/null +++ b/caicloud.tensorflow/caicloud/clever/examples/recommandation/download_data.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +DATA_DIR=/tmp/movielens +SIZE=1m +mkdir -p ${DATA_DIR} +wget http://files.grouplens.org/datasets/movielens/ml-${SIZE}.zip -O ${DATA_DIR}/ml-${SIZE}.zip +unzip ${DATA_DIR}/ml-${SIZE}.zip -d ${DATA_DIR} \ No newline at end of file diff --git a/caicloud.tensorflow/caicloud/clever/examples/recommandation/train.py b/caicloud.tensorflow/caicloud/clever/examples/recommandation/train.py new file mode 100644 index 00000000..9347b20c --- /dev/null +++ b/caicloud.tensorflow/caicloud/clever/examples/recommandation/train.py @@ -0,0 +1,141 @@ +# coding=utf-8 + +import time + +import numpy as np +import tensorflow as tf +import pandas as pd + +from caicloud.clever.tensorflow import dist_base +from caicloud.clever.tensorflow import model_exporter + +tf.app.flags.DEFINE_string("export_dir", + "/tmp/saved_model/movie", + "model export directory path.") + +tf.app.flags.DEFINE_string("batch_size", 128, "training batch size.") +tf.app.flags.DEFINE_string("embedding_dim", 50, "embedding dimension.") + +FLAGS = tf.app.flags.FLAGS +USER_NUM = 6040 +ITEM_NUM = 3952 + +def get_data(): + col_names = ["user", "item", "rate", "st"] + df = pd.read_csv("/tmp/movielens/ml-1m/ratings.dat", sep="::", header=None, names=col_names, engine='python') + + df["user"] -= 1 + df["item"] -= 1 + for col in ("user", "item"): + df[col] = df[col].astype(np.int32) + df["rate"] = df["rate"].astype(np.float32) + + rows = len(df) + print "Total number of instances: ", rows + df = df.iloc[np.random.permutation(rows)].reset_index(drop=True) + split_index = int(rows * 0.9) + return df[0:split_index], df[split_index:] + +class ShuffleIterator(object): + def __init__(self, inputs, batch_size=10): + self.inputs = inputs + self.batch_size = batch_size + self.num_cols = len(self.inputs) + self.len = len(self.inputs[0]) + self.inputs = np.transpose(np.vstack([np.array(self.inputs[i]) for i in range(self.num_cols)])) + + def __len__(self): + return self.len + + def __iter__(self): + return self + + def __next__(self): + return self.next() + + def next(self): + ids = np.random.randint(0, self.len, (self.batch_size,)) + out = self.inputs[ids, :] + return [out[:, i] for i in range(self.num_cols)] + +_train, _test = get_data() +_iter_train = ShuffleIterator([_train["user"], _train["item"], _train["rate"]], batch_size=FLAGS.batch_size) +_train_op = None +_infer = None +_global_step = None +_user_batch = None +_item_batch = None +_rate_batch = None +_cost = None +_rmse = None +_local_step = 0 + +def inference(user_batch, item_batch, dim): + w_user = tf.get_variable("embd_user", shape=[USER_NUM, dim], + initializer=tf.truncated_normal_initializer(stddev=0.02)) + w_item = tf.get_variable("embd_item", shape=[ITEM_NUM, dim], + initializer=tf.truncated_normal_initializer(stddev=0.02)) + + input1 = tf.nn.embedding_lookup(w_user, user_batch) + input2 = tf.nn.embedding_lookup(w_item, item_batch) + input = tf.concat([input1, input2], 1) + + w = tf.get_variable("w", shape=[2*dim, 1], initializer=tf.truncated_normal_initializer(stddev=0.02)) + b = tf.get_variable("b", shape=[1], initializer=tf.constant_initializer(1)) + infer = tf.transpose(tf.matmul(input, w) + b, name="infer") + return infer + +def model_fn(sync, num_replicas): + global _train_op, _infer, _user_batch, _item_batch, _rate_batch, _rmse, _cost, _global_step + + _user_batch = tf.placeholder(tf.int32, shape=[None], name="user") + _item_batch = tf.placeholder(tf.int32, shape=[None], name="item") + _rate_batch = tf.placeholder(tf.float32, shape=[None], name="rate") + + _infer = inference(_user_batch, _item_batch, FLAGS.embedding_dim) + _global_step = tf.contrib.framework.get_or_create_global_step() + + _cost = tf.square(_infer - _rate_batch) + optimizer = tf.train.AdamOptimizer(0.001) + _train_op = optimizer.minimize(_cost, global_step=_global_step) + + _rmse = tf.sqrt(tf.reduce_mean(_cost)) + + def rmse_evalute_fn(session): + return session.run(_rmse, feed_dict={ + _user_batch: _test["user"], _item_batch: _test["item"], _rate_batch: _test["rate"]}) + + # 定义模型导出配置 + model_export_spec = model_exporter.ModelExportSpec( + export_dir=FLAGS.export_dir, + input_tensors={"user": _user_batch, "item": _item_batch}, + output_tensors={"infer": _infer}) + + # 定义模型评测(准确率)的计算方法 + model_metric_ops = { + "rmse": rmse_evalute_fn + } + + return dist_base.ModelFnHandler( + global_step=_global_step, + optimizer=optimizer, + model_metric_ops=model_metric_ops, + model_export_spec=model_export_spec, + summary_op=None) + +def train_fn(session, num_global_step): + global _train_op, _infer, _user_batch, _item_batch, _rate_batch, _rmse, _local_step, _cost + + users, items, rates = next(_iter_train) + session.run(_train_op, feed_dict={_user_batch: users, _item_batch: items, _rate_batch: rates}) + + if _local_step % 2000 == 0: + rmse, infer, cost = session.run([_rmse, _infer, _cost], feed_dict={_user_batch: _test["user"], _item_batch: _test["item"], _rate_batch: _test["rate"]}) + print("Eval RMSE at round {} is: {}".format(num_global_step, rmse)) + + _local_step += 1 + return False + +if __name__ == '__main__': + distTfRunner = dist_base.DistTensorflowRunner(model_fn = model_fn, gen_init_fn=None) + distTfRunner.run(train_fn) diff --git a/caicloud.tensorflow/caicloud/clever/examples/recommandation/train_model.sh b/caicloud.tensorflow/caicloud/clever/examples/recommandation/train_model.sh new file mode 100755 index 00000000..dd2d7c42 --- /dev/null +++ b/caicloud.tensorflow/caicloud/clever/examples/recommandation/train_model.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# +# Copyright 2017 Caicloud authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +rm -rf /tmp/caicloud-dist-tf +rm -rf /tmp/saved_model/movie + +export TF_MAX_STEPS=30000 +export TF_SAVE_CHECKPOINTS_SECS=60 +export TF_SAVE_SUMMARIES_STEPS=1000 +python train.py