forked from kaleko/CourseraML
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
watched lectures and completed first half of homework
- Loading branch information
David Kaleko
committed
Dec 11, 2015
1 parent
8495617
commit 111f024
Showing
1 changed file
with
354 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,354 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Programming Exercise 2: Logistic Regression" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"%matplotlib inline\n", | ||
"import numpy as np\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"import pandas as pd" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### 1 Logistic Regression" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"datafile = 'data/ex2data1.txt'\n", | ||
"#!head $datafile\n", | ||
"cols = np.loadtxt(datafile,delimiter=',',usecols=(0,1,2),unpack=True) #Read in comma separated data\n", | ||
"##Form the usual \"X\" matrix and \"y\" vector\n", | ||
"X = np.transpose(np.array(cols[:-1]))\n", | ||
"y = np.transpose(np.array(cols[-1:]))\n", | ||
"m = y.size # number of training examples\n", | ||
"##Insert the usual column of 1's into the \"X\" matrix\n", | ||
"X = np.insert(X,0,1,axis=1)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"#### 1.1 Visualizing the data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#Divide the sample into two: ones with positive classification, one with null classification\n", | ||
"pos = np.array([X[i] for i in xrange(X.shape[0]) if y[i] == 1])\n", | ||
"neg = np.array([X[i] for i in xrange(X.shape[0]) if y[i] == 0])\n", | ||
"#Check to make sure I included all entries\n", | ||
"print \"Included everything? \",(len(pos)+len(neg) == X.shape[0])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"def plotData():\n", | ||
" plt.figure(figsize=(10,6))\n", | ||
" plt.plot(pos[:,1],pos[:,2],'k+',label='Admitted')\n", | ||
" plt.plot(neg[:,1],neg[:,2],'yo',label='Not admitted')\n", | ||
" plt.xlabel('Exam 1 score')\n", | ||
" plt.ylabel('Exam 2 score')\n", | ||
" plt.legend()\n", | ||
" plt.grid(True)\n", | ||
" \n", | ||
"plotData()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"#### 1.2 Implementation" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from scipy.special import expit #Vectorized sigmoid function" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#Quick check that expit is what I think it is\n", | ||
"myx = np.arange(-10,10,.1)\n", | ||
"plt.plot(myx,expit(myx))\n", | ||
"plt.title(\"Woohoo this looks like a sigmoid function to me.\")\n", | ||
"plt.grid(True)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#Hypothesis function and cost function for logistic regression\n", | ||
"def h(theta,X): #Logistic hypothesis function\n", | ||
" return expit(np.dot(X,theta))\n", | ||
"\n", | ||
"def computeCost(mytheta,myX,myy): #Cost function\n", | ||
" \"\"\"\n", | ||
" theta_start is an n- dimensional vector of initial theta guess\n", | ||
" X is matrix with n- columns and m- rows\n", | ||
" y is a matrix with m- rows and 1 column\n", | ||
" \"\"\"\n", | ||
" #note to self: *.shape is (rows, columns)\n", | ||
" return float((1./m) * np.sum(np.dot(-np.array(myy).T,np.log(h(mytheta,myX))) - np.dot((1-np.array(y)).T,np.log(1-h(mytheta,X)))))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#Check that with theta as zeros, cost returns about 0.693:\n", | ||
"initial_theta = np.zeros((X.shape[1],1))\n", | ||
"computeCost(initial_theta,X,y)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#An alternative to OCTAVE's 'fminunc' we'll use some scipy.optimize function, \"fmin\"\n", | ||
"from scipy import optimize\n", | ||
"\n", | ||
"def optimizeTheta(mytheta,myX,myy):\n", | ||
" result = optimize.fmin(computeCost, x0=mytheta, args=(X, y), maxiter=400, full_output=True)\n", | ||
" return result[0], result[1]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"theta, mincost = optimizeTheta(initial_theta,X,y)\n", | ||
"#That's pretty cool. Black boxes ftw" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#\"Call your costFunction function using the optimal parameters of θ. \n", | ||
"#You should see that the cost is about 0.203.\"\n", | ||
"print computeCost(theta,X,y)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#Plotting the decision boundary: two points, draw a line between\n", | ||
"#Decision boundary occurs when h = 0, or when\n", | ||
"#theta0 + theta1*x1 + theta2*x2 = 0\n", | ||
"#y=mx+b is replaced by x2 = (-1/thetheta2)(theta0 + theta1*x1)\n", | ||
"\n", | ||
"boundary_xs = np.array([np.min(X[:,1]), np.max(X[:,1])])\n", | ||
"boundary_ys = (-1./theta[2])*(theta[0] + theta[1]*boundary_xs)\n", | ||
"plotData()\n", | ||
"plt.plot(boundary_xs,boundary_ys,'b-',label='Decision Boundary')\n", | ||
"plt.legend()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#For a student with an Exam 1 score of 45 and an Exam 2 score of 85, \n", | ||
"#you should expect to see an admission probability of 0.776.\n", | ||
"print h(theta,np.array([1, 45.,85.]))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"def makePrediction(mytheta, myx):\n", | ||
" return h(mytheta,myx) >= 0.5\n", | ||
"\n", | ||
"#Compute the percentage of samples I got correct:\n", | ||
"pos_correct = float(np.sum(makePrediction(theta,pos)))\n", | ||
"neg_correct = float(np.sum(np.invert(makePrediction(theta,neg))))\n", | ||
"tot = len(pos)+len(neg)\n", | ||
"prcnt_correct = float(pos_correct+neg_correct)/tot\n", | ||
"print \"Fraction of training samples correctly predicted: %f.\" % prcnt_correct " | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### 2 Regularized Logistic Regression" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"#### 2.1 Visualizing the data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"datafile = 'data/ex2data2.txt'\n", | ||
"#!head $datafile\n", | ||
"cols = np.loadtxt(datafile,delimiter=',',usecols=(0,1,2),unpack=True) #Read in comma separated data\n", | ||
"##Form the usual \"X\" matrix and \"y\" vector\n", | ||
"X = np.transpose(np.array(cols[:-1]))\n", | ||
"y = np.transpose(np.array(cols[-1:]))\n", | ||
"m = y.size # number of training examples\n", | ||
"##Insert the usual column of 1's into the \"X\" matrix\n", | ||
"X = np.insert(X,0,1,axis=1)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#Divide the sample into two: ones with positive classification, one with null classification\n", | ||
"pos = np.array([X[i] for i in xrange(X.shape[0]) if y[i] == 1])\n", | ||
"neg = np.array([X[i] for i in xrange(X.shape[0]) if y[i] == 0])\n", | ||
"#Check to make sure I included all entries\n", | ||
"print \"Included everything? \",(len(pos)+len(neg) == X.shape[0])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"def plotData():\n", | ||
" plt.figure(figsize=(6,6)) #Draw it square to emphasize circular features\n", | ||
" plt.plot(pos[:,1],pos[:,2],'k+',label='y=1')\n", | ||
" plt.plot(neg[:,1],neg[:,2],'yo',label='y=0')\n", | ||
" plt.xlabel('Microchip Test 1')\n", | ||
" plt.ylabel('Microchip Test 2')\n", | ||
" plt.legend()\n", | ||
" plt.grid(True)\n", | ||
" \n", | ||
"plotData()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#### 2.2 Feature mapping" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 2", | ||
"language": "python", | ||
"name": "python2" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |