forked from kaleko/CourseraML
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ex1 ipython notebook with mandatory problems completed, will do optio…
…nal problems and tidy up the code next
- Loading branch information
David Kaleko
committed
Dec 8, 2015
1 parent
17874be
commit 08fbc30
Showing
1 changed file
with
201 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,201 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Programming Exercise 1: Linear Regression" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"%matplotlib inline\n", | ||
"import numpy as np\n", | ||
"import matplotlib.pyplot as plt" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"### 2 Linear regression with one variable" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"datafile = 'data/ex1data1.txt'\n", | ||
"x,y = np.loadtxt(datafile,delimiter=',',usecols=(0,1),unpack=True) #Read in comma separated data\n", | ||
"m = y.size # number of training examples" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"#### 2.1 Plotting the Data" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#Plot the data to see what it looks like\n", | ||
"plt.figure(figsize=(10,6))\n", | ||
"plt.plot(x,y,'rx',markersize=10)\n", | ||
"plt.grid(True) #Always plot.grid true!\n", | ||
"plt.ylabel('Profit in $10,000s')\n", | ||
"plt.xlabel('Population of City in 10,000s')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"#### 2.2 Gradient Descent" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"X = np.insert([x],0,np.ones(m),axis=0) #Add a column of ones to x (x0=1)\n", | ||
"iterations = 1500\n", | ||
"alpha = 0.01" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"def h(theta,xvec): #Linear hypothesis function\n", | ||
" return np.sum(theta*xvec)\n", | ||
"\n", | ||
"#Todo: Rewrite this without the sum over i and multiply matrices like a boss\n", | ||
"def computeCost(theta,X): #Cost function\n", | ||
" j = 0\n", | ||
" for i in xrange(m):\n", | ||
" j += (h(theta, X[:,i])-y[i])**2\n", | ||
" return (1./(2*m)) * j\n", | ||
"\n", | ||
"#Test that running computeCost with 0's as theta returns 32.07:\n", | ||
"print computeCost(np.zeros(2),X)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#Actual gradient descent minimizing routine\n", | ||
"jvec = [] #Used to plot cost as function of iteration\n", | ||
"theta = np.zeros(2)\n", | ||
"for meaninglessvariable in xrange(iterations):\n", | ||
" tmptheta = theta\n", | ||
" for i in xrange(len(tmptheta)):\n", | ||
" tmptheta[i] = theta[i] - (alpha/m)*sum([ (h(theta,X[:,k]) - y[k])*X[i][k] for k in xrange(m)])\n", | ||
" theta = tmptheta\n", | ||
" jvec.append(computeCost(theta,X))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"plt.plot(range(len(jvec)),jvec,'bo')\n", | ||
"plt.grid(True)\n", | ||
"plt.title(\"Convergence of Cost Function\")\n", | ||
"plt.xlabel(\"Iteration number\")\n", | ||
"plt.ylabel(\"Cost function\")\n", | ||
"dummy = plt.xlim([-50,1600])" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": false | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#Plot the line on top of the data to ensure it looks correct\n", | ||
"def myfit(xval):\n", | ||
" return theta[0] + theta[1]*xval\n", | ||
"plt.figure(figsize=(10,6))\n", | ||
"plt.plot(x,y,'rx',markersize=10,label='Training Data')\n", | ||
"plt.plot(x,myfit(x),'b-',label = 'Hypothesis: h(x) = %0.2f + %0.2fx'%(theta[0],theta[1]))\n", | ||
"plt.grid(True) #Always plot.grid true!\n", | ||
"plt.ylabel('Profit in $10,000s')\n", | ||
"plt.xlabel('Population of City in 10,000s')\n", | ||
"plt.legend()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"#### 2.4 Visualizing _J($\\theta$)_" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"collapsed": true | ||
}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 2", | ||
"language": "python", | ||
"name": "python2" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 2 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython2", | ||
"version": "2.7.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |