-
Notifications
You must be signed in to change notification settings - Fork 6.4k
/
Copy pathtensorflow2.py
109 lines (84 loc) · 3.7 KB
/
tensorflow2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# 2-hidden layer NN in TensorFlow
# This code is not optimized for speed.
# It's just to get something working, using the principles we know.
# For the class Data Science: Practical Deep Learning Concepts in Theano and TensorFlow
# https://deeplearningcourses.com/c/data-science-deep-learning-in-theano-tensorflow
# https://www.udemy.com/data-science-deep-learning-in-theano-tensorflow
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from util import get_normalized_data, y2indicator
def error_rate(p, t):
return np.mean(p != t)
# copy this first part from theano2.py
def main():
# step 1: get the data and define all the usual variables
Xtrain, Xtest, Ytrain, Ytest = get_normalized_data()
max_iter = 15
print_period = 50
lr = 0.00004
reg = 0.01
Ytrain_ind = y2indicator(Ytrain)
Ytest_ind = y2indicator(Ytest)
N, D = Xtrain.shape
batch_sz = 500
n_batches = N // batch_sz
# add an extra layer just for fun
M1 = 300
M2 = 100
K = 10
W1_init = np.random.randn(D, M1) / np.sqrt(D)
b1_init = np.zeros(M1)
W2_init = np.random.randn(M1, M2) / np.sqrt(M1)
b2_init = np.zeros(M2)
W3_init = np.random.randn(M2, K) / np.sqrt(M2)
b3_init = np.zeros(K)
# define variables and expressions
X = tf.placeholder(tf.float32, shape=(None, D), name='X')
T = tf.placeholder(tf.float32, shape=(None, K), name='T')
W1 = tf.Variable(W1_init.astype(np.float32))
b1 = tf.Variable(b1_init.astype(np.float32))
W2 = tf.Variable(W2_init.astype(np.float32))
b2 = tf.Variable(b2_init.astype(np.float32))
W3 = tf.Variable(W3_init.astype(np.float32))
b3 = tf.Variable(b3_init.astype(np.float32))
# define the model
Z1 = tf.nn.relu( tf.matmul(X, W1) + b1 )
Z2 = tf.nn.relu( tf.matmul(Z1, W2) + b2 )
Yish = tf.matmul(Z2, W3) + b3 # remember, the cost function does the softmaxing! weird, right?
# softmax_cross_entropy_with_logits take in the "logits"
# if you wanted to know the actual output of the neural net,
# you could pass "Yish" into tf.nn.softmax(logits)
cost = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits_v2(logits=Yish, labels=T))
# we choose the optimizer but don't implement the algorithm ourselves
# let's go with RMSprop, since we just learned about it.
# it includes momentum!
train_op = tf.train.RMSPropOptimizer(lr, decay=0.99, momentum=0.9).minimize(cost)
# we'll use this to calculate the error rate
predict_op = tf.argmax(Yish, 1)
costs = []
init = tf.global_variables_initializer()
with tf.Session() as session:
session.run(init)
for i in range(max_iter):
for j in range(n_batches):
Xbatch = Xtrain[j*batch_sz:(j*batch_sz + batch_sz),]
Ybatch = Ytrain_ind[j*batch_sz:(j*batch_sz + batch_sz),]
session.run(train_op, feed_dict={X: Xbatch, T: Ybatch})
if j % print_period == 0:
test_cost = session.run(cost, feed_dict={X: Xtest, T: Ytest_ind})
prediction = session.run(predict_op, feed_dict={X: Xtest})
err = error_rate(prediction, Ytest)
print("Cost / err at iteration i=%d, j=%d: %.3f / %.3f" % (i, j, test_cost, err))
costs.append(test_cost)
plt.plot(costs)
plt.show()
# increase max_iter and notice how the test cost starts to increase.
# are we overfitting by adding that extra layer?
# how would you add regularization to this model?
if __name__ == '__main__':
main()