From 9fb9afe79f9133400c5be04b031ee59a049b5ef2 Mon Sep 17 00:00:00 2001 From: "perhapszzy@sina.com" Date: Sat, 3 Jun 2017 06:05:26 +0800 Subject: [PATCH] add restfule client and clip the gradient. --- .../examples/recommandation/restful_client.py | 41 +++++++++++++++++++ .../clever/examples/recommandation/train.py | 39 ++++++++++++------ 2 files changed, 68 insertions(+), 12 deletions(-) create mode 100644 caicloud.tensorflow/caicloud/clever/examples/recommandation/restful_client.py diff --git a/caicloud.tensorflow/caicloud/clever/examples/recommandation/restful_client.py b/caicloud.tensorflow/caicloud/clever/examples/recommandation/restful_client.py new file mode 100644 index 00000000..94fa154e --- /dev/null +++ b/caicloud.tensorflow/caicloud/clever/examples/recommandation/restful_client.py @@ -0,0 +1,41 @@ +# coding=utf-8 +# Copyright 2017 Caicloud authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import print_function + +import tensorflow as tf +from caicloud.clever.serving.client import restful_client +from caicloud.clever.serving.client import serving_error + +make_ndarray = tf.contrib.util.make_ndarray + +client = restful_client.RESTfulClient('192.168.16.42:31036') + +def run(): + inputs = { + 'user': tf.contrib.util.make_tensor_proto([1], shape=[1]), + 'item': tf.contrib.util.make_tensor_proto([2], shape=[1]), + } + try: + outputs = client.call_predict(inputs) + result = outputs["infer"] + print('score: {0}'.format(make_ndarray(result)[0][0])) + except serving_error.ServingRESTfulError as e: + print('serving error,\n status: {0},\n reason: {1},\n body: {2}'.format( + e.status, e.reason, e.body)) + +if __name__ == '__main__': + run() diff --git a/caicloud.tensorflow/caicloud/clever/examples/recommandation/train.py b/caicloud.tensorflow/caicloud/clever/examples/recommandation/train.py index 9347b20c..4457ac62 100644 --- a/caicloud.tensorflow/caicloud/clever/examples/recommandation/train.py +++ b/caicloud.tensorflow/caicloud/clever/examples/recommandation/train.py @@ -5,24 +5,26 @@ import numpy as np import tensorflow as tf import pandas as pd +import os from caicloud.clever.tensorflow import dist_base from caicloud.clever.tensorflow import model_exporter -tf.app.flags.DEFINE_string("export_dir", - "/tmp/saved_model/movie", - "model export directory path.") - -tf.app.flags.DEFINE_string("batch_size", 128, "training batch size.") -tf.app.flags.DEFINE_string("embedding_dim", 50, "embedding dimension.") +tf.app.flags.DEFINE_string("export_dir", "/tmp/saved_model/movie", "model export directory path.") +tf.app.flags.DEFINE_string("data_dir", "/caicloud/admin/hengfengPOC/data", "path where data is located.") +tf.app.flags.DEFINE_integer("batch_size", 128, "training batch size.") +tf.app.flags.DEFINE_integer("embedding_dim", 50, "embedding dimension.") +tf.app.flags.DEFINE_float("learning_rate", 0.01, "learning rate.") FLAGS = tf.app.flags.FLAGS + USER_NUM = 6040 ITEM_NUM = 3952 def get_data(): col_names = ["user", "item", "rate", "st"] - df = pd.read_csv("/tmp/movielens/ml-1m/ratings.dat", sep="::", header=None, names=col_names, engine='python') + datafile = os.path.join(FLAGS.data_dir, "ml-1m/ratings.dat") + df = pd.read_csv(datafile, sep="::", header=None, names=col_names, engine='python') df["user"] -= 1 df["item"] -= 1 @@ -96,9 +98,19 @@ def model_fn(sync, num_replicas): _global_step = tf.contrib.framework.get_or_create_global_step() _cost = tf.square(_infer - _rate_batch) - optimizer = tf.train.AdamOptimizer(0.001) - _train_op = optimizer.minimize(_cost, global_step=_global_step) - + optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) + + if sync: + optimizer = tf.train.SyncReplicasOptimizer( + optimizer, + replicas_to_aggregate=num_replicas, + total_num_replicas=num_replicas, + name="mnist_sync_replicas") + + gradients, variables = zip(*optimizer.compute_gradients(_cost)) + gradients, _ = tf.clip_by_global_norm(gradients, 5.0) + _train_op = optimizer.apply_gradients(zip(gradients, variables), global_step=_global_step) + _rmse = tf.sqrt(tf.reduce_mean(_cost)) def rmse_evalute_fn(session): @@ -129,8 +141,11 @@ def train_fn(session, num_global_step): users, items, rates = next(_iter_train) session.run(_train_op, feed_dict={_user_batch: users, _item_batch: items, _rate_batch: rates}) - if _local_step % 2000 == 0: - rmse, infer, cost = session.run([_rmse, _infer, _cost], feed_dict={_user_batch: _test["user"], _item_batch: _test["item"], _rate_batch: _test["rate"]}) + if _local_step % 200 == 0: + rmse, infer, cost = session.run( + [_rmse, _infer, _cost], + feed_dict={_user_batch: _test["user"], _item_batch: _test["item"], _rate_batch: _test["rate"]}) + print("Eval RMSE at round {} is: {}".format(num_global_step, rmse)) _local_step += 1