forked from brightmart/text_classification
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patha2_poistion_wise_feed_forward.py
79 lines (72 loc) · 3.25 KB
/
a2_poistion_wise_feed_forward.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# -*- coding: utf-8 -*-
import tensorflow as tf
import time
"""
Position-wise Feed-Forward Networks
In addition to attention sub-layers, each of the layers in our encoder and decoder contains a fully
connected feed-forward network, which is applied to each position separately and identically. This
consists of two linear transformations with a ReLU activation in between.
FFN(x) = max(0,xW1+b1)W2+b2
While the linear transformations are the same across different positions, they use different parameters
from layer to layer. Another way of describing this is as two convolutions with kernel size 1.
The dimensionality of input and output is d_model= 512, and the inner-layer has dimensionalityd_ff= 2048.
"""
class PositionWiseFeedFoward(object): #TODO make it parallel
"""
position-wise feed forward networks. formula as below:
FFN(x)=max(0,xW1+b1)W2+b2
"""
def __init__(self,x,layer_index,d_model=512,d_ff=2048):
"""
:param x: shape should be:[batch,sequence_length,d_model]
:param layer_index: index of layer
:return: shape:[sequence_length,d_model]
"""
shape_list=x.get_shape().as_list()
assert(len(shape_list)==3)
self.x=x
self.layer_index=layer_index
self.d_model=d_model
self.d_ff=d_ff
self.initializer = tf.random_normal_initializer(stddev=0.1)
def position_wise_feed_forward_fn(self):
"""
x: [batch,sequence_length,d_model]
:return: [batch,sequence_length,d_model]
"""
output=None
#1.conv1
input=tf.expand_dims(self.x,axis=3) #[batch,sequence_length,d_model,1]
# conv2d.input: [None,sentence_length,embed_size,1]. filter=[filter_size,self.embed_size,1,self.num_filters]
# output with padding:[None,sentence_length,1,1]
output_conv1=tf.layers.conv2d(
input,filters=self.d_ff,kernel_size=[1,self.d_model],padding="VALID",
name='conv1',kernel_initializer=self.initializer,activation=tf.nn.relu
)
output_conv1 = tf.transpose(output_conv1, [0,1,3,2])
print("output_conv1:",output_conv1)
#2.conv2
output_conv2 = tf.layers.conv2d(
output_conv1,filters=self.d_model,kernel_size=[1,self.d_ff],padding="VALID",
name='conv2',kernel_initializer=self.initializer,activation=None
)
output=tf.squeeze(output_conv2) #[batch,sequence_length,d_model]
return output #[batch,sequence_length,d_model]
#test function of position_wise_feed_forward_fn
#time spent:OLD VERSION: length=8000,time spent:35.6s; NEW VERSION:0.03s
def test_position_wise_feed_forward_fn():
start=time.time()
x=tf.ones((8,1000,512)) #batch_size=8,sequence_length=10 ;
layer_index=0
postion_wise_feed_forward=PositionWiseFeedFoward(x,layer_index)
output=postion_wise_feed_forward.position_wise_feed_forward_fn()
end=time.time()
print("x:",x,";output:",output,";time spent:",(end-start))
return output
def test():
with tf.Session() as sess:
result=test_position_wise_feed_forward_fn()
sess.run(tf.global_variables_initializer())
result_=sess.run(result)
print("result_:",result_)
#test()