Skip to content

Commit

Permalink
watched lectures and completed first half of homework
Browse files Browse the repository at this point in the history
  • Loading branch information
David Kaleko committed Dec 11, 2015
1 parent 8495617 commit 111f024
Showing 1 changed file with 354 additions and 0 deletions.
354 changes: 354 additions & 0 deletions ex2/ex2.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,354 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Programming Exercise 2: Logistic Regression"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1 Logistic Regression"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"datafile = 'data/ex2data1.txt'\n",
"#!head $datafile\n",
"cols = np.loadtxt(datafile,delimiter=',',usecols=(0,1,2),unpack=True) #Read in comma separated data\n",
"##Form the usual \"X\" matrix and \"y\" vector\n",
"X = np.transpose(np.array(cols[:-1]))\n",
"y = np.transpose(np.array(cols[-1:]))\n",
"m = y.size # number of training examples\n",
"##Insert the usual column of 1's into the \"X\" matrix\n",
"X = np.insert(X,0,1,axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 1.1 Visualizing the data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Divide the sample into two: ones with positive classification, one with null classification\n",
"pos = np.array([X[i] for i in xrange(X.shape[0]) if y[i] == 1])\n",
"neg = np.array([X[i] for i in xrange(X.shape[0]) if y[i] == 0])\n",
"#Check to make sure I included all entries\n",
"print \"Included everything? \",(len(pos)+len(neg) == X.shape[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def plotData():\n",
" plt.figure(figsize=(10,6))\n",
" plt.plot(pos[:,1],pos[:,2],'k+',label='Admitted')\n",
" plt.plot(neg[:,1],neg[:,2],'yo',label='Not admitted')\n",
" plt.xlabel('Exam 1 score')\n",
" plt.ylabel('Exam 2 score')\n",
" plt.legend()\n",
" plt.grid(True)\n",
" \n",
"plotData()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 1.2 Implementation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"from scipy.special import expit #Vectorized sigmoid function"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Quick check that expit is what I think it is\n",
"myx = np.arange(-10,10,.1)\n",
"plt.plot(myx,expit(myx))\n",
"plt.title(\"Woohoo this looks like a sigmoid function to me.\")\n",
"plt.grid(True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Hypothesis function and cost function for logistic regression\n",
"def h(theta,X): #Logistic hypothesis function\n",
" return expit(np.dot(X,theta))\n",
"\n",
"def computeCost(mytheta,myX,myy): #Cost function\n",
" \"\"\"\n",
" theta_start is an n- dimensional vector of initial theta guess\n",
" X is matrix with n- columns and m- rows\n",
" y is a matrix with m- rows and 1 column\n",
" \"\"\"\n",
" #note to self: *.shape is (rows, columns)\n",
" return float((1./m) * np.sum(np.dot(-np.array(myy).T,np.log(h(mytheta,myX))) - np.dot((1-np.array(y)).T,np.log(1-h(mytheta,X)))))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Check that with theta as zeros, cost returns about 0.693:\n",
"initial_theta = np.zeros((X.shape[1],1))\n",
"computeCost(initial_theta,X,y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#An alternative to OCTAVE's 'fminunc' we'll use some scipy.optimize function, \"fmin\"\n",
"from scipy import optimize\n",
"\n",
"def optimizeTheta(mytheta,myX,myy):\n",
" result = optimize.fmin(computeCost, x0=mytheta, args=(X, y), maxiter=400, full_output=True)\n",
" return result[0], result[1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"theta, mincost = optimizeTheta(initial_theta,X,y)\n",
"#That's pretty cool. Black boxes ftw"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#\"Call your costFunction function using the optimal parameters of θ. \n",
"#You should see that the cost is about 0.203.\"\n",
"print computeCost(theta,X,y)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Plotting the decision boundary: two points, draw a line between\n",
"#Decision boundary occurs when h = 0, or when\n",
"#theta0 + theta1*x1 + theta2*x2 = 0\n",
"#y=mx+b is replaced by x2 = (-1/thetheta2)(theta0 + theta1*x1)\n",
"\n",
"boundary_xs = np.array([np.min(X[:,1]), np.max(X[:,1])])\n",
"boundary_ys = (-1./theta[2])*(theta[0] + theta[1]*boundary_xs)\n",
"plotData()\n",
"plt.plot(boundary_xs,boundary_ys,'b-',label='Decision Boundary')\n",
"plt.legend()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#For a student with an Exam 1 score of 45 and an Exam 2 score of 85, \n",
"#you should expect to see an admission probability of 0.776.\n",
"print h(theta,np.array([1, 45.,85.]))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def makePrediction(mytheta, myx):\n",
" return h(mytheta,myx) >= 0.5\n",
"\n",
"#Compute the percentage of samples I got correct:\n",
"pos_correct = float(np.sum(makePrediction(theta,pos)))\n",
"neg_correct = float(np.sum(np.invert(makePrediction(theta,neg))))\n",
"tot = len(pos)+len(neg)\n",
"prcnt_correct = float(pos_correct+neg_correct)/tot\n",
"print \"Fraction of training samples correctly predicted: %f.\" % prcnt_correct "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2 Regularized Logistic Regression"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### 2.1 Visualizing the data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"datafile = 'data/ex2data2.txt'\n",
"#!head $datafile\n",
"cols = np.loadtxt(datafile,delimiter=',',usecols=(0,1,2),unpack=True) #Read in comma separated data\n",
"##Form the usual \"X\" matrix and \"y\" vector\n",
"X = np.transpose(np.array(cols[:-1]))\n",
"y = np.transpose(np.array(cols[-1:]))\n",
"m = y.size # number of training examples\n",
"##Insert the usual column of 1's into the \"X\" matrix\n",
"X = np.insert(X,0,1,axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"#Divide the sample into two: ones with positive classification, one with null classification\n",
"pos = np.array([X[i] for i in xrange(X.shape[0]) if y[i] == 1])\n",
"neg = np.array([X[i] for i in xrange(X.shape[0]) if y[i] == 0])\n",
"#Check to make sure I included all entries\n",
"print \"Included everything? \",(len(pos)+len(neg) == X.shape[0])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def plotData():\n",
" plt.figure(figsize=(6,6)) #Draw it square to emphasize circular features\n",
" plt.plot(pos[:,1],pos[:,2],'k+',label='y=1')\n",
" plt.plot(neg[:,1],neg[:,2],'yo',label='y=0')\n",
" plt.xlabel('Microchip Test 1')\n",
" plt.ylabel('Microchip Test 2')\n",
" plt.legend()\n",
" plt.grid(True)\n",
" \n",
"plotData()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"#### 2.2 Feature mapping"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.5"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

0 comments on commit 111f024

Please sign in to comment.