add weight column; add l1, l2 reg; add weight decay lr

rcdnn · Mar 30, 2018 · 2a1e6ff · 2a1e6ff
1 parent 41cdad3
commit 2a1e6ff
Show file tree

Hide file tree

Showing 20 changed files with 755 additions and 554 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,6 @@
 *.egg-info
 dist
 build
+
+#
+model
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/README.md b/README.md
@@ -19,12 +19,13 @@ The code is based on the TensorFlow wide and deep tutorial.
 2. scalable to arbitrarily train data size in production environment.
 3. support multi value feature input (multihot).
 4. support distributed tensorflow  
-5. support custom dnn network (arbitrary connections between layers)
-6. support dnn, multidnn joint learning, even combine with cnn.
-7. support 3 types normalization for continuous features.
-8. support weight column for imbalance sample.
-9. provide tensorflow serving for tf.estimator.
-10. provide scripts to do data proprocess using pyspark (generate continuous features from category features).
+5. support custom dnn network (arbitrary connections between layers) with flexible options.
+6. add BN layer; activation_fn; l1,l2 reg; weight decay lr options for training.
+7. support dnn, multidnn joint learning, even combine with cnn.
+8. support 3 types normalization for continuous features.
+9. support weight column for imbalance sample.
+10. provide tensorflow serving for tf.estimator.
+11. provide scripts to do data proprocess using pyspark (generate continuous features from category features).
 
 
 ## Running the code

diff --git a/conf/cross_feature.yaml b/conf/cross_feature.yaml
@@ -1,4 +1,4 @@
-# Cross Feature Configuration
+## Cross Feature Configuration
 
 # Using tf.feature_column.crossed_column API,
 # see https://www.tensorflow.org/api_docs/python/tf/feature_column/crossed_column

diff --git a/conf/data_process.yaml b/conf/data_process.yaml
@@ -1,4 +1,4 @@
-# Raw Data Process Configuration
+## Raw Data Process Configuration
 
 # Using this config for data preprocess with Spark
 # Scripts directory: wide_deep/lib/spark

diff --git a/conf/feature.yaml b/conf/feature.yaml
@@ -1,4 +1,4 @@
-# Feature Configuration
+## Feature Configuration
 
 # Using tf.feature_column in TensorFlow.
 # see https://www.tensorflow.org/api_docs/python/tf/feature_column
@@ -45,9 +45,7 @@
 #     for low sparsity category, set `hash_bucket_size` 3~4*category size to reduce collision
 #     for high sparsity category, set 1.5~2*category size to save memory.
 
-
-# TODO: support all tf.feature_column, add use_wide, use_deep flag, scaler.
-
+# TODO: support all tf.feature_column.
 
 adplan_id:
   type: category
@@ -99,11 +97,6 @@ u:
   transform: hash_bucket
   parameter: 10000000
 
-#ctr_strategy_type:
-#  type: category
-#  transform: hash_bucket
-#  parameter: 100
-
 device_model:
   type: category
   transform: hash_bucket
@@ -139,11 +132,6 @@ ucomp:
   transform: hash_bucket
   parameter: 1000
 
-#cost_type:
-#  type: category
-#  transform: vocab
-#  parameter: [0,1]
-
 os:
   type: category
   transform: vocab
@@ -180,11 +168,6 @@ weekdays:
   transform: vocab
   parameter: [1,2,3,4,5,6,7]
 
-#month:
-#  type: category
-#  transform: vocab
-#  parameter: ['01','02','03','04','05','06','07','08','09','10','11','12']
-
 day:
   type: category
   transform: vocab
@@ -267,7 +250,7 @@ latitude:
 
 age:
   type: continuous
-  transform: standard
+  transform: min_max
   parameter:
     normalization: [10, 90]
     boundaries:  [15,20,25,30,35,40,45,50,55,60,65]

diff --git a/conf/hdfs.path b/conf/hdfs.path
@@ -1,4 +1,4 @@
-# TensorFlow on Hadoop Configuration Path.
+## TensorFlow on Hadoop Configuration Path.
 # https://www.tensorflow.org/deploy/hadoop#hdfs
 # TODO: to debug
 

diff --git a/conf/model.yaml b/conf/model.yaml
@@ -1,64 +1,82 @@
-# Model Parameter Configuration
+### Model Parameter Configuration
 
-# Wide Parameters
-# optimizer: one of {'`Adagrad`, `Adam`, `Ftrl`, `RMSProp`, `SGD`}
-linear:
-  linear_optimizer: 'Ftrl'
-  wide_learning_rate: 0.1
-  # regularization parameters, optional
-  wide_l1: 0.5
-  wide_l2: 1
+## Linear Parameters
 
+# linear_optimizer:
+#     Required, one of {`Adagrad`, `Adam`, `Ftrl`, `RMSProp`, `SGD`} or
+#     use tf.train.Optimizer instance to pass specific optimizer args.
+# linear_initial_learning_rate:
+#     Optional, initial value of lr, if not specified, defaults to 0.05, can be override by tf.train.Optimizer instance lr args.
+# linear_decay_rate:
+#     Optional, decay rate for each epoch, if not specified, defaults to 1, set empty or 1 to not use weight decay.
+#     decayed_learning_rate = learning_rate * decay_rate ^ (global_step / decay_steps)
+#     After a long time of keep training, set proper small learning rate and turn off weight decay.
+linear_optimizer: Ftrl # tf.train.FtrlOptimizer(learning_rate=0.1,l1_regularization_strength=0.5,l2_regularization_strength=1)
+linear_initial_learning_rate: 0.05
+linear_decay_rate: 0.8
 
-# DNN Parameters
-# connected_mode: one of {`simple`, `first_dense`, `last_dense`, `dense`, `resnet`}
-#                 or arbitrary connections index tuples.
-#   1. `simple`: normal dnn architecture.
-#   2. `first_dense`: add addition connections from first input layer to all hidden layers.
-#   3. `last_dense`: add addition connections from all previous layers to last layer.
-#   4. `dense`: add addition connections between all layers, similar to DenseNet.
-#   5. `resnet`: add addition connections between adjacent layers, similar to ResNet.
-#   6. arbitrary connections list: add addition connections from layer_0 to layer_1 like 0-1.
-#      eg: [0-1,0-3,1-2]  index start from zero(input_layer), max index is len(hidden_units), smaller index first.
 
-# to use multi dnn model, set nested hidden_units, eg: [[1024,512,256], [512,256]]
-# connected_mode can be set different for each dnn eg: ['simple', 'dense'] or use same mode if only set 'simple'
-# only above 2 network architecture parameters can be different, other parameters are same for multi dnn model.
-dnn:
-  # network architecture
-  hidden_units: [1024,512,256]
-  connected_mode: 'simple'
+## DNN Parameters
 
-  dnn_optimizer: 'Adagrad'
-  deep_learning_rate: 0.1
-  activation_function: 'tf.nn.relu'
-  # regularization parameters, optional, set empty to be default None
-  deep_l1: 0.01
-  deep_l2: 0.01
-  dropout:
-  batch_normalization: 1 # bool
+# dnn_hidden_units: A list indicate each hidden units number.
+#     set nested hidden_units, eg: [[1024,512,256], [512,256]] for multi dnn model.
+# dnn_connected_mode:
+#    One of {`simple`, `first_dense`, `last_dense`, `dense`, `resnet`} or arbitrary connections.
+#      1. `simple`: normal dnn architecture.
+#      2. `first_dense`: add connections from first input layer to all hidden layers.
+#      3. `last_dense`: add connections from all previous layers to last layer.
+#      4. `dense`: add connections between all layers, similar to DenseNet.
+#      5. `resnet`: add connections between adjacent layers, similar to ResNet.
+#      6. arbitrary connections list: add connections from layer_0 to layer_1 like 0-1.
+#        eg: [0-1,0-3,1-2]  index start from zero(input_layer), max index is len(hidden_units), smaller index first.
+#    Set different for each dnn eg: ['simple', 'dense'] or use same mode if only set 'simple'
 
+# dnn_optimizer:
+# dnn_initial_learning_rate: if not specified, defaults to 0.05.
+# dnn_decay_rate:
+#     above 3 paramters see linear, use same for multidnn.
+# dnn_activation_function:
+#     One of {`sigmoid`,`tanh`,`relu`,`relu6`,`leaky_relu`,`crelu`,`elu`,`selu`,`softplus`,`softsign`}
+# dnn_l1: L1 regularization for dense layers, set 0 or empty to not use.
+# dnn_l2: L2 regularization for dense layers, set 0 or empty to not use.
+# dnn_dropout: dropout rate, 0.1 would drop out 10% of input units, set 0 or empty to not use.
+# dnn_batch_normalization: Bool, set 1 or True to enable do batch normalization.
 
-# CNN Parameters
-cnn:
-  # A flag to override the data format used in the model. channels_first
-  # provides a performance boost on GPU but is not always compatible
-  # with CPU. If left unspecified, the data format will be chosen
-  # automatically based on whether TensorFlow was built for CPU or GPU.
-  use_flag: 0
-  data_format:
-  height: 224
-  width: 224
-  num_channels: 3
+dnn_hidden_units: [1024,512,256]
+dnn_connected_mode: simple
+dnn_optimizer: Adagrad
+dnn_initial_learning_rate: 0.05
+dnn_decay_rate: 0.8
+dnn_activation_function: relu
+dnn_l1: 0.1
+dnn_l2: 0.1
+dnn_dropout:
+dnn_batch_normalization: 1
 
-  cnn_optimizer: 'Adagrad'
-  weight_decay: 2e-4  # use 0.0002, performs better than 0.0001 that was originally suggested.
-  momentum: 0.9
-  num_iamges_train:
-  num_iamges_test:
-  use_distortion: 0
 
-  # if use resnet
-  resnet_size: 50 # choices: 18, 34, 50, 101, 152, 200
+## CNN Parameters
+# TODO
+
+# cnn_use_flag: Bool, set 0 to not combine CNN model.
+# cnn_data_format: `channels_first` or `channeals_last`.
+#     channels_first provides a performance boost on GPU but is not always compatible with CPU.
+#     If unspecified, chosen automatically based on whether TensorFlow was built for CPU or GPU.
+# ...
+
+cnn_use_flag: 0
+#cnn_data_format:
+#cnn_height: 224
+#cnn_width: 224
+#cnn_num_channels: 3
+cnn_optimizer: 'Adagrad'
+cnn_initial_learning_rate: 0.05
+cnn_decay_rate: 0.8
+#cnn_weight_decay: 2e-4  # use 0.0002, performs better than 0.0001 that was originally suggested.
+#cnn_momentum: 0.9
+#cnn_num_iamges_train:
+#cnn_num_iamges_test:
+#cnn_use_distortion: 0
+## if use resnet
+#cnn_resnet_size: 50 # choices: 18, 34, 50, 101, 152, 200
 
 
diff --git a/conf/schema.yaml b/conf/schema.yaml
@@ -1,9 +1,9 @@
-# Input Data Schema
+## Input Data Schema
 
+# feature id: feature name
 # The order must be consistent with the raw data.
 # First field is the label. Case insensitive here.
 
-# feature id: feature name
 1: clk
 2: request_id
 3: account_id

diff --git a/conf/serving.yaml b/conf/serving.yaml
@@ -1,13 +1,14 @@
-# Config for saved_model.py
+### Config for TensorFlow Serving.
+
+## SavedModel config.
 SavedModel:
   model_dir: SavedModel
   model_type: wide_deep
   checkpoint_path:
   as_text: false
   model_version: 1
 
-
-# TODO
+## TODO
 apiVersion: extensions/v1beta1
 kind: Deployment
 metadata:
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,3 +7,6 @@ @@
     *.egg-info
     dist
     build
+    #
+    model