-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathlogistic_regression.py
104 lines (90 loc) · 3.67 KB
/
logistic_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import numpy as np
import scipy.optimize as opt
from scipy.io import loadmat
import matplotlib.pyplot as plot
from matplotlib import cm
def sigmoid(X):
return 1./(1.+np.exp(-X))
def logistic_cost(theta, X, y):
m = y.shape[0]
if len(theta.shape) < 2:
theta = theta[:,np.newaxis]
h = sigmoid(np.dot(X,theta))
#THE LINES BELOW DO NOT WORK VERY WELL
#POSSIBLE NUMERICAL ISSUES?
#Direct versions with distributed minus
#return np.ravel(1./m*np.sum(-y*np.log(h) - (1.-y)*np.log(1.-h)))
#return np.ravel(1./m*(np.dot(-y.T,np.log(h)) - np.dot((1.-y.T),np.log(1.-h))))
#PARAMETERIZATIONS THAT WORK
#return np.ravel(-1./m*(np.dot(y.T,np.log(h)) + np.dot((1.-y).T,np.log(1.-h))))
#Chose this one since it seems the most direct
return np.ravel(-1./m*np.sum(y*np.log(h) + (1.-y)*np.log(1.-h)))
def logistic_cost_grad(theta, X, y):
m = y.shape[0]
if len(theta.shape) < 2:
theta = theta[:,np.newaxis]
h = sigmoid(np.dot(X,theta))
return np.ravel(1./m*np.dot(X.T,h-y))
def logistic_cost_reg(theta, X, y, l=.1):
m = y.shape[0]
if len(theta.shape) < 2:
theta = theta[:,np.newaxis]
reg = l/(2.*m)*np.sum(theta[1:,:]**2.)
return np.ravel(logistic_cost(theta,X,y) + reg)
def logistic_cost_reg_grad(theta, X, y, l=.1):
m = y.shape[0]
if len(theta.shape) < 2:
theta = theta[:,np.newaxis]
reg = float(l)/m*np.sum(np.vstack((np.zeros(1), theta[1:,:])))
return np.ravel(logistic_cost_grad(theta,X,y) + reg)
#Perform classification of MNIST digits
from sklearn import datasets
digits = datasets.load_digits()
X = digits.data
plotdim = 4
randind = np.random.randint(X.shape[0],size=(plotdim,plotdim))
f1, axarr = plot.subplots(plotdim,plotdim)
for i in range(plotdim):
for j in range(plotdim):
axarr[i,j].get_xaxis().set_visible(False)
axarr[i,j].get_yaxis().set_visible(False)
#Digits are 8x8 images
for i in range(plotdim):
for j in range(plotdim):
axarr[i,j].imshow(X[randind[i,j],:].reshape(8,8), cmap=cm.gray)
#These digits are randomly ordered, if given an ORDERED set
#of training data make sure to randomize before splitting
#Normalize between 0 and 1
X = digits.data/255.
#Subtract the mean, though it doesn't appear to make a big difference
X = X - np.mean(X)
y = digits.target.T[:,np.newaxis]
#Add bias terms
X = np.hstack((np.ones((X.shape[0],1)), X))
#Expect 10 labels for digits
num_labels = np.amax(y)-np.amin(y)+1
all_theta = np.zeros((X.shape[1],num_labels))
for c in range(num_labels):
#Initialize theta to zeros and pass into optimization routine
theta0 = np.zeros((X.shape[1],))
#fmin_cg requires the cost and cost_grad functions to return flattened 1D arrays!
#theta = opt.fmin_cg(logistic_cost, theta0, fprime=logistic_cost_grad, args=(X, (y == c)), maxiter=50)
#Use regularized cost function
theta = opt.fmin_cg(logistic_cost_reg, theta0, fprime=logistic_cost_reg_grad, args=(X, (y == c)), maxiter=50)
all_theta[:,c] = theta
#We can use the builtin check_grad function to perform numerical gradient
#checking, ensuring the gradient code is correct for the cost
#print opt.check_grad(logistic_cost, logistic_cost_grad, theta, X, y)
h = sigmoid(np.dot(X, all_theta))
pred = np.argmax(h,axis=1)[:,np.newaxis]
print "Classification accuracy(%):" + `100*np.sum((y == pred))/float(len(y))`
for i in range(plotdim):
for j in range(plotdim):
ind = randind[i,j]
if y[ind] == pred[ind]:
text_color = 'g'
else:
text_color = 'r'
axarr[i,j].set_title("Label=" + `pred[ind,:][0]`,color=text_color)
plot.suptitle("Overall classification accuracy(%):" + `round(100*np.sum((y == pred))/float(len(y)),2)`)
plot.show()