From 9fb9afe79f9133400c5be04b031ee59a049b5ef2 Mon Sep 17 00:00:00 2001
From: "perhapszzy@sina.com" <perhaps.zzy@gmail.com>
Date: Sat, 3 Jun 2017 06:05:26 +0800
Subject: [PATCH] add restfule client and clip the gradient.

---
 .../examples/recommandation/restful_client.py | 41 +++++++++++++++++++
 .../clever/examples/recommandation/train.py   | 39 ++++++++++++------
 2 files changed, 68 insertions(+), 12 deletions(-)
 create mode 100644 caicloud.tensorflow/caicloud/clever/examples/recommandation/restful_client.py

diff --git a/caicloud.tensorflow/caicloud/clever/examples/recommandation/restful_client.py b/caicloud.tensorflow/caicloud/clever/examples/recommandation/restful_client.py
new file mode 100644
index 00000000..94fa154e
--- /dev/null
+++ b/caicloud.tensorflow/caicloud/clever/examples/recommandation/restful_client.py
@@ -0,0 +1,41 @@
+# coding=utf-8
+# Copyright 2017 Caicloud authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+from __future__ import print_function
+
+import tensorflow as tf
+from caicloud.clever.serving.client import restful_client
+from caicloud.clever.serving.client import serving_error
+
+make_ndarray = tf.contrib.util.make_ndarray
+
+client = restful_client.RESTfulClient('192.168.16.42:31036')
+
+def run():
+    inputs = {
+        'user': tf.contrib.util.make_tensor_proto([1], shape=[1]),
+        'item': tf.contrib.util.make_tensor_proto([2], shape=[1]),
+    }
+    try:
+        outputs = client.call_predict(inputs)
+        result = outputs["infer"]
+        print('score: {0}'.format(make_ndarray(result)[0][0]))
+    except serving_error.ServingRESTfulError as e:
+        print('serving error,\n  status: {0},\n  reason: {1},\n  body: {2}'.format(
+            e.status, e.reason, e.body))
+
+if __name__ == '__main__':
+    run()
diff --git a/caicloud.tensorflow/caicloud/clever/examples/recommandation/train.py b/caicloud.tensorflow/caicloud/clever/examples/recommandation/train.py
index 9347b20c..4457ac62 100644
--- a/caicloud.tensorflow/caicloud/clever/examples/recommandation/train.py
+++ b/caicloud.tensorflow/caicloud/clever/examples/recommandation/train.py
@@ -5,24 +5,26 @@
 import numpy as np
 import tensorflow as tf
 import pandas as pd
+import os
 
 from caicloud.clever.tensorflow import dist_base
 from caicloud.clever.tensorflow import model_exporter
 
-tf.app.flags.DEFINE_string("export_dir",
-                           "/tmp/saved_model/movie",
-                           "model export directory path.")
-
-tf.app.flags.DEFINE_string("batch_size", 128, "training batch size.")
-tf.app.flags.DEFINE_string("embedding_dim", 50, "embedding dimension.")
+tf.app.flags.DEFINE_string("export_dir", "/tmp/saved_model/movie", "model export directory path.")
+tf.app.flags.DEFINE_string("data_dir", "/caicloud/admin/hengfengPOC/data", "path where data is located.")
 
+tf.app.flags.DEFINE_integer("batch_size", 128, "training batch size.")
+tf.app.flags.DEFINE_integer("embedding_dim", 50, "embedding dimension.")
+tf.app.flags.DEFINE_float("learning_rate", 0.01, "learning rate.")
 FLAGS = tf.app.flags.FLAGS
+
 USER_NUM = 6040
 ITEM_NUM = 3952
 
 def get_data():
     col_names = ["user", "item", "rate", "st"]
-    df = pd.read_csv("/tmp/movielens/ml-1m/ratings.dat", sep="::", header=None, names=col_names, engine='python')
+    datafile = os.path.join(FLAGS.data_dir, "ml-1m/ratings.dat")
+    df = pd.read_csv(datafile, sep="::", header=None, names=col_names, engine='python')
     
     df["user"] -= 1
     df["item"] -= 1
@@ -96,9 +98,19 @@ def model_fn(sync, num_replicas):
     _global_step = tf.contrib.framework.get_or_create_global_step()
     
     _cost = tf.square(_infer - _rate_batch)
-    optimizer = tf.train.AdamOptimizer(0.001)
-    _train_op = optimizer.minimize(_cost, global_step=_global_step)
-
+    optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
+    
+    if sync:
+        optimizer = tf.train.SyncReplicasOptimizer(
+            optimizer,
+            replicas_to_aggregate=num_replicas,
+            total_num_replicas=num_replicas,
+            name="mnist_sync_replicas")
+        
+    gradients, variables = zip(*optimizer.compute_gradients(_cost))
+    gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
+    _train_op = optimizer.apply_gradients(zip(gradients, variables), global_step=_global_step)
+       
     _rmse = tf.sqrt(tf.reduce_mean(_cost))
     
     def rmse_evalute_fn(session):
@@ -129,8 +141,11 @@ def train_fn(session, num_global_step):
     users, items, rates = next(_iter_train)            
     session.run(_train_op, feed_dict={_user_batch: users, _item_batch: items, _rate_batch: rates})
             
-    if _local_step % 2000 == 0:
-        rmse, infer, cost = session.run([_rmse, _infer, _cost], feed_dict={_user_batch: _test["user"], _item_batch: _test["item"], _rate_batch: _test["rate"]})
+    if _local_step % 200 == 0:
+        rmse, infer, cost = session.run(
+            [_rmse, _infer, _cost], 
+            feed_dict={_user_batch: _test["user"], _item_batch: _test["item"], _rate_batch: _test["rate"]})
+        
         print("Eval RMSE at round {} is: {}".format(num_global_step, rmse))
     
     _local_step += 1