1. add sparse model for NFM/AFM/DeepFM 2.extract layers

DSXiangLi · May 1, 2020 · 519d474 · 519d474
1 parent 52add8c
commit 519d474
Show file tree

Hide file tree

Showing 5 changed files with 219 additions and 43 deletions.
diff --git a/layers.py b/layers.py
@@ -0,0 +1,54 @@
+import tensorflow as tf
+from utils import add_layer_summary
+
+def sparse_embedding(feature_size, embedding_size, field_size, feat_ids, feat_vals, add_summary):
+    with tf.variable_scope('Sparse_Embedding'):
+        v = tf.get_variable( shape=[feature_size, embedding_size],
+                             initializer=tf.truncated_normal_initializer(),
+                             name='embedding_weight' )
+
+        embedding_matrix = tf.nn.embedding_lookup( v, feat_ids ) # batch * field_size * embedding_size
+        embedding_matrix = tf.multiply( embedding_matrix, tf.reshape(feat_vals, [-1, field_size,1] ) )
+
+        if add_summary:
+            add_layer_summary( 'embedding_matrix', embedding_matrix )
+
+    return embedding_matrix
+
+
+def sparse_linear(feature_size, feat_ids, feat_vals, add_summary):
+    with tf.variable_scope('Linear_output'):
+        weight = tf.get_variable( shape=[feature_size],
+                             initializer=tf.truncated_normal_initializer(),
+                             name='linear_weight' )
+        bias = tf.get_variable( shape=[1],
+                             initializer=tf.glorot_uniform_initializer(),
+                             name='linear_bias' )
+
+        linear_output = tf.nn.embedding_lookup( weight, feat_ids )
+        linear_output = tf.reduce_sum( tf.multiply( linear_output, feat_vals ), axis=1, keepdims=True )
+        linear_output = tf.add( linear_output, bias )
+
+        if add_summary:
+            add_layer_summary('linear_output', linear_output)
+
+    return linear_output
+
+
+def stack_dense_layer(dense, hidden_units, dropout_rate, batch_norm, mode, add_summary):
+    with tf.variable_scope('Dense'):
+        for i, unit in enumerate(hidden_units):
+            dense = tf.layers.dense(dense, units = unit, activation = 'relu',
+                                    name = 'dense{}'.format(i))
+            if batch_norm:
+                dense = tf.layers.batch_normalization(dense, center = True, scale = True,
+                                                      trainable = True,
+                                                      training = (mode == tf.estimator.ModeKeys.TRAIN))
+            if dropout_rate > 0:
+                dense = tf.layers.dropout(dense, rate = dropout_rate,
+                                          training = (mode == tf.estimator.ModeKeys.TRAIN))
+
+            if add_summary:
+                add_layer_summary(dense.name, dense)
+
+    return dense
diff --git a/model/AFM/AFM.py b/model/AFM/AFM.py
@@ -1,7 +1,7 @@
 """
 paper
 
-Jun Xiao,H ao Ye ,2017, Attentional Factorization Machines - Learning the Weight of Feature Interactions via Attention Networks
+Jun Xiao, Hao Ye ,2017, Attentional Factorization Machines - Learning the Weight of Feature Interactions via Attention Networks
 """
 
 
@@ -10,25 +10,72 @@
 from config import *
 from model.AFM.preprocess import build_features
 from utils import tf_estimator_model, add_layer_summary, build_estimator_helper
+from layers import sparse_embedding, sparse_linear, stack_dense_layer
 
 @tf_estimator_model
-def model_fn(features, labels, mode, params):
+def model_fn_dense(features, labels, mode, params):
     dense_feature, sparse_feature = build_features()
     dense = tf.feature_column.input_layer(features, dense_feature) # lz linear concat of embedding
     sparse = tf.feature_column.input_layer(features, sparse_feature)
 
-    feature_size = len( dense_feature )
+    field_size = len( dense_feature )
     embedding_size = dense_feature[0].variable_shape.as_list()[-1]
-    embedding_matrix = tf.reshape( dense, [-1, feature_size, embedding_size] )  # batch * feature_size *emb_size
+    embedding_matrix = tf.reshape( dense, [-1, field_size, embedding_size] )  # batch * field_size *emb_size
 
     with tf.variable_scope('Linear_part'):
         linear_output = tf.layers.dense(sparse, units=1)
         add_layer_summary( 'linear_output', linear_output )
 
     with tf.variable_scope('Elementwise_Interaction'):
         elementwise_list = []
-        for i in range(feature_size):
-            for j in range(i+1, feature_size):
+        for i in range(field_size):
+            for j in range(i+1, field_size):
+                vi = tf.gather(embedding_matrix, indices=i, axis=1, batch_dims=0,name = 'vi') # batch * emb_size
+                vj = tf.gather(embedding_matrix, indices=j, axis=1, batch_dims=0,name = 'vj')
+                elementwise_list.append(tf.multiply(vi,vj)) # batch * emb_size
+        elementwise_matrix = tf.stack(elementwise_list) # (N*(N-1)/2) * batch * emb_size
+        elementwise_matrix = tf.transpose(elementwise_matrix, [1,0,2]) # batch * (N*(N-1)/2) * emb_size
+
+    with tf.variable_scope('Attention_Net'):
+        # 2 fully connected layer
+        dense = tf.layers.dense(elementwise_matrix, units = params['attention_factor'], activation = 'relu') # batch * (N*(N-1)/2) * t
+        add_layer_summary( dense.name, dense )
+        attention_weight = tf.layers.dense(dense, units=1, activation = 'softmax') # batch *(N*(N-1)/2) * 1
+        add_layer_summary( attention_weight.name, attention_weight)
+
+    with tf.variable_scope('Attention_pooling'):
+        interaction_output = tf.reduce_sum(tf.multiply(elementwise_matrix, attention_weight), axis=1) # batch * emb_size
+        interaction_output = tf.layers.dense(interaction_output, units=1) # batch * 1
+
+    with tf.variable_scope('output'):
+        y = interaction_output + linear_output
+        add_layer_summary( 'output', y )
+
+    return y
+
+
+@tf_estimator_model
+def model_fn_sparse(features, labels, mode, params):
+    # hyper parameter
+    field_size = FRAPPE_PARAMS['field_size']
+    feature_size = FRAPPE_PARAMS['feature_size']
+    embedding_size = FRAPPE_PARAMS['embedding_size']
+
+    # extract feature
+    feat_ids = tf.reshape(features['feat_ids'], shape = [-1, field_size]) # batch * field_size
+    feat_vals = tf.reshape(features['feat_vals'], shape = [-1, field_size]) # batch * field_size
+
+    # extract embedding
+    embedding_matrix = sparse_embedding(feature_size, embedding_size, field_size,
+                                        feat_ids, feat_vals, add_summary = True)
+
+    # linear output
+    linear_output = sparse_linear(feature_size, feat_ids, feat_vals, add_summary = True)
+
+    with tf.variable_scope('Elementwise_Interaction'):
+        elementwise_list = []
+        for i in range(field_size):
+            for j in range(i+1, field_size):
                 vi = tf.gather(embedding_matrix, indices=i, axis=1, batch_dims=0,name = 'vi') # batch * emb_size
                 vj = tf.gather(embedding_matrix, indices=j, axis=1, batch_dims=0,name = 'vj')
                 elementwise_list.append(tf.multiply(vi,vj)) # batch * emb_size
@@ -53,11 +100,23 @@ def model_fn(features, labels, mode, params):
     return y
 
 
+
 build_estimator = build_estimator_helper(
-    {'dense':model_fn},
-     params = {'attention_factor':3,
-                'dropout_rate':0.2,
-               'learning_rate' :0.002,
-               'hidden_units':[5,5]
-            }
+    model_fn = {
+        'dense' : model_fn_dense,
+        'sparse' : model_fn_sparse
+    },
+     params = {
+         'dense':{
+             'attention_factor':3,
+             'dropout_rate':0.2,
+             'learning_rate' :0.002
+            },
+         'sparse':{
+             'attention_factor': 16,
+             'dropout_rate': 0.2,
+             'learning_rate': 0.002,
+             'hidden_units': [128, 64, 1]
+         }
+     }
 )
diff --git a/model/DeepFM/DeepFM.py b/model/DeepFM/DeepFM.py
@@ -61,14 +61,14 @@ def model_fn_sparse(features, labels, mode, params):
     feat_vals = tf.reshape(features['feat_vals'], shape = [-1, field_size]) # batch * field_size
 
     with tf.variable_scope('FM_component'):
-        bias = tf.get_variable(shape =[1], name = 'linear_bias')
-        weight = tf.get_variable(shape = [feature_size], name = 'linear_weight')
-        v = tf.get_variable(shape = [feature_size, embedding_size], name = 'embedding_weight')
+        bias = tf.get_variable(shape =[1], name = 'linear_bias', initializer = tf.glorot_uniform_initializer())
+        weight = tf.get_variable(shape = [feature_size], name = 'linear_weight', initializer = tf.truncated_normal_initializer())
+        v = tf.get_variable(shape = [feature_size, embedding_size], name = 'embedding_weight', initializer = tf.truncated_normal_initializer())
 
         with tf.variable_scope( 'Linear' ):
             # batch_size * feature_size -> batch_size * field_size  -> batch_size * 1
             linear_output = tf.reduce_sum(tf.multiply(tf.nn.embedding_lookup(weight, feat_ids), feat_vals) , axis=1, keepdims=True)
-            linear_output = tf.add(linear_output,bias)
+            linear_output = tf.add(linear_output,bias) # batch * 1
             add_layer_summary( 'linear_output', linear_output )
 
         with tf.variable_scope( 'second_order' ):
@@ -79,7 +79,7 @@ def model_fn_sparse(features, labels, mode, params):
             sum_square = tf.pow( tf.reduce_sum( embedding_matrix, axis=1 ), 2 )
             square_sum = tf.reduce_sum( tf.pow(embedding_matrix,2), axis=1 )
 
-            fm_output = tf.reduce_sum(tf.subtract( sum_square, square_sum) * 0.5, axis=1, keepdims=True)
+            fm_output = tf.reduce_sum(tf.subtract( sum_square, square_sum) * 0.5, axis=1, keepdims=True) # batch * 1
             add_layer_summary('fm_output', fm_output)
 
     with tf.variable_scope('Deep_component'):
@@ -92,19 +92,28 @@ def model_fn_sparse(features, labels, mode, params):
             add_layer_summary( dense.name, dense )
 
     with tf.variable_scope( 'output' ):
-        y = dense + fm_output+ linear_output
+        y = dense + fm_output+ linear_output # batch * 1
         add_layer_summary( 'output', y )
 
     return y
 
 build_estimator = build_estimator_helper(
-    {'dense' : model_fn_dense,
-     'sparse': model_fn_sparse
-     },
-     params = {
+    model_fn = {
+        'dense' : model_fn_dense,
+        'sparse': model_fn_sparse
+    },
+    params = {
+         'dense': {
             'dropout_rate': 0.2,
             'learning_rate' :0.001,
             'hidden_units':[20,10,1]
+            },
+        'sparse': {
+            'dropout_rate':0.2,
+            'learning_rate':0.002,
+            'hidden_units':[128,64,1]
+
         }
+    }
 )
 
diff --git a/model/NFM/NFM.py b/model/NFM/NFM.py
@@ -10,42 +10,92 @@
 from config import *
 from model.NFM.preprocess import build_features
 from utils import tf_estimator_model, add_layer_summary, build_estimator_helper
+from layers import sparse_embedding, sparse_linear, stack_dense_layer
 
 @tf_estimator_model
-def model_fn(features, labels, mode, params):
-    dense_feature = build_features()
-    dense = tf.feature_column.input_layer(features, dense_feature) # lz linear concat of embedding
+def model_fn_dense(features, labels, mode, params):
+    dense_feature, sparse_feature = build_features()
+    dense = tf.feature_column.input_layer(features, dense_feature)
+    sparse = tf.feature_column.input_layer(features, sparse_feature)
 
-    feature_size = len( dense_feature )
+    field_size = len( dense_feature )
     embedding_size = dense_feature[0].variable_shape.as_list()[-1]
-    embedding_matrix = tf.reshape( dense, [-1, feature_size, embedding_size] )  # batch * feature_size *emb_size
+    embedding_matrix = tf.reshape( dense, [-1, field_size, embedding_size] )  # batch * field_size *emb_size
+
+    with tf.variable_scope('Linear_output'):
+        linear_output = tf.layers.dense( sparse, units=1 )
+        add_layer_summary( 'linear_output', linear_output )
 
     with tf.variable_scope('BI_Pooling'):
         sum_square = tf.pow(tf.reduce_sum(embedding_matrix, axis=1), 2)
         square_sum = tf.reduce_sum(tf.pow(embedding_matrix, 2), axis=1)
         dense = tf.subtract(sum_square, square_sum)
         add_layer_summary( dense.name, dense )
 
-    with tf.variable_scope('Dense'):
-        for i, unit in enumerate( params['hidden_units'] ):
-            dense = tf.layers.dense( dense, units=unit, activation='relu', name='dense{}'.format( i ) )
-            dense = tf.layers.batch_normalization( dense, center=True, scale=True, trainable=True,
-                                                   training=(mode == tf.estimator.ModeKeys.TRAIN) )
-            dense = tf.layers.dropout( dense, rate=params['dropout_rate'],
-                                       training=(mode == tf.estimator.ModeKeys.TRAIN) )
-            add_layer_summary( dense.name, dense)
+    dense = stack_dense_layer(dense, params['hidden_units'],
+                              dropout_rate = params['dropout_rate'], batch_norm = params['batch_norm'],
+                              mode = mode, add_summary = True)
 
     with tf.variable_scope('output'):
-        y = tf.layers.dense(dense, units=1, name = 'output')
+        y = linear_output + dense
+        add_layer_summary( 'output', y )
+
+    return y
+
+@tf_estimator_model
+def model_fn_sparse(features, labels, mode, params):
+    # hyper parameter
+    field_size = FRAPPE_PARAMS['field_size']
+    feature_size = FRAPPE_PARAMS['feature_size']
+    embedding_size = FRAPPE_PARAMS['embedding_size']
+
+    # extract feature
+    feat_ids = tf.reshape(features['feat_ids'], shape = [-1, field_size]) # batch * field_size
+    feat_vals = tf.reshape(features['feat_vals'], shape = [-1, field_size]) # batch * field_size
+
+    # extract embedding
+    embedding_matrix = sparse_embedding(feature_size, embedding_size, field_size,
+                                        feat_ids, feat_vals, add_summary=True)
+
+    # linear output
+    linear_output = sparse_linear(feature_size, feat_ids, feat_vals, add_summary= True)
+
+    with tf.variable_scope('BI_Pooling'):
+        sum_square = tf.pow(tf.reduce_sum(embedding_matrix, axis=1),2)
+        square_sum = tf.reduce_sum(tf.pow(embedding_matrix,2), axis=1)
+        dense = tf.subtract(sum_square, square_sum)
+        add_layer_summary( dense.name, dense )
+
+    # fully connected stacked dense layers
+    dense = stack_dense_layer( dense, params['hidden_units'],
+                               dropout_rate=params['dropout_rate'], batch_norm=params['batch_norm'],
+                               mode = mode, add_summary = True)
+
+    with tf.variable_scope( 'output' ):
+        y = linear_output + dense
         add_layer_summary( 'output', y )
 
     return y
 
 
 build_estimator = build_estimator_helper(
-    {'dense':model_fn},
-     params = {'dropout_rate':0.2,
-               'learning_rate' :0.002,
-               'hidden_units':[5,5]
-            }
+    model_fn = {
+        'dense' : model_fn_dense,
+        'sparse': model_fn_sparse
+     },
+    params = {
+         'dense': {
+            'dropout_rate': 0.2,
+            'batch_norm': True,
+            'learning_rate' :0.001,
+            'hidden_units':[20,10,1]
+            },
+        'sparse': {
+            'dropout_rate': 0.2,
+            'batch_norm': True,
+            'learning_rate': 0.002,
+            'hidden_units': [128,64,1]
+
+        }
+    }
 )
diff --git a/model/NFM/preprocess.py b/model/NFM/preprocess.py
@@ -3,13 +3,17 @@
 
 def build_features():
     f_dense = []
+    f_sparse = []
     # categorical features
     for col, config in EMB_CONFIGS.items():
         ind = tf.feature_column.categorical_column_with_hash_bucket(col, hash_bucket_size = config['hash_size'])
+        f_sparse.append( tf.feature_column.indicator_column(ind))
         f_dense.append( tf.feature_column.embedding_column(ind, dimension = config['emb_size']) )
 
     for col, config in BUCKET_CONFIGS.items():
-        bucket = tf.feature_column.bucketized_column( tf.feature_column.numeric_column( col ), boundaries = config['bin'] )
+        bucket = tf.feature_column.bucketized_column( tf.feature_column.numeric_column( col ), boundaries=config['bin'] )
+        f_sparse.append( bucket)
         f_dense.append( tf.feature_column.embedding_column(bucket, dimension = config['emb_size']) )
 
-    return f_dense
+    return f_dense, f_sparse
+