Update ex6

johnmcdowell · Mar 9, 2017 · 5de26f3 · 5de26f3
1 parent d58af54
commit 5de26f3
Show file tree

Hide file tree

Showing 13 changed files with 100 additions and 102 deletions.
diff --git a/ex6/dataset3Params.py b/ex6/dataset3Params.py
@@ -8,7 +8,7 @@ def dataset3Params(X, y, Xval, yval):
     cross-validation set.
     """
 
-# You need to return the following variables correctly.
+    # You need to return the following variables correctly.
     C = 1
     sigma = 0.3
 
@@ -23,7 +23,5 @@ def dataset3Params(X, y, Xval, yval):
 #  Note: You can compute the prediction error using 
 #        mean(double(predictions ~= yval))
 #
-
-
 # =========================================================================
     return C, sigma
diff --git a/ex6/emailFeatures.py b/ex6/emailFeatures.py
@@ -6,10 +6,10 @@ def emailFeatures(word_indices):
     produces a feature vector from the word indices.
     """
 
-# Total number of words in the dictionary
+    # Total number of words in the dictionary
     n = 1899
 
-# You need to return the following variables correctly.
+    # You need to return the following variables correctly.
     x = np.zeros(n)
 # ====================== YOUR CODE HERE ======================
 # Instructions: Fill in this function to return a feature vector for the
@@ -48,8 +48,6 @@ def emailFeatures(word_indices):
 #              x = [ 0 0 0 0 1 0 0 0 ... 0 0 0 0 1 ... 0 0 0 1 0 ..]
 #
 #
-
-
 # =========================================================================
 
-    return x
+    return x
diff --git a/ex6/ex6.py b/ex6/ex6.py
@@ -31,7 +31,7 @@
 #  the data.
 #
 
-print 'Loading and Visualizing Data ...'
+print('Loading and Visualizing Data ...')
 
 # Load from ex6data1: 
 # You will have X, y in your environment
@@ -42,7 +42,7 @@
 # Plot training data
 plotData(X, y)
 
-raw_input("Program paused. Press Enter to continue...")
+input('Program paused. Press Enter to continue...')
 
 ## ==================== Part 2: Training Linear SVM ====================
 #  The following code will train a linear SVM on the dataset and plot the
@@ -55,7 +55,7 @@
 X = data['X']
 y = data['y'].flatten()
 
-print 'Training Linear SVM ...'
+print('Training Linear SVM ...')
 
 # You should try to change the C value below and see how the decision
 # boundary varies (e.g., try C = 1000)
@@ -65,13 +65,13 @@
 model = clf.fit(X, y)
 visualizeBoundaryLinear(X, y, model)
 
-raw_input("Program paused. Press Enter to continue...")
+input('Program paused. Press Enter to continue...')
 
 ## =============== Part 3: Implementing Gaussian Kernel ===============
 #  You will now implement the Gaussian kernel to use
 #  with the SVM. You should complete the code in gaussianKernel.m
 #
-print 'Evaluating the Gaussian Kernel ...'
+print('Evaluating the Gaussian Kernel ...')
 
 x1 = np.array([1, 2, 1])
 x2 = np.array([0, 4, -1])
@@ -81,14 +81,14 @@
 # print 'Gaussian Kernel between x1 = [1 2 1], x2 = [0 4 -1], sigma = %0.5f : ' \
 #        '\t%f\n(this value should be about 0.324652)\n' % (sigma, sim)
 
-raw_input("Program paused. Press Enter to continue...")
+input('Program paused. Press Enter to continue...')
 
 ## =============== Part 4: Visualizing Dataset 2 ================
 #  The following code will load the next dataset into your environment and
 #  plot the data.
 #
 
-print 'Loading and Visualizing Data ...'
+print('Loading and Visualizing Data ...')
 
 # Load from ex6data2:
 # You will have X, y in your environment
@@ -99,13 +99,13 @@
 # Plot training data
 plotData(X, y)
 
-raw_input("Program paused. Press Enter to continue...")
+input('Program paused. Press Enter to continue...')
 
 ## ========== Part 5: Training SVM with RBF Kernel (Dataset 2) ==========
 #  After you have implemented the kernel, we can now use it to train the
 #  SVM classifier.
 #
-print 'Training SVM with RBF Kernel (this may take 1 to 2 minutes) ...'
+print('Training SVM with RBF Kernel (this may take 1 to 2 minutes) ...')
 
 # Load from ex6data2:
 # You will have X, y in your environment
@@ -126,14 +126,14 @@
 model = clf.fit(X, y)
 visualizeBoundary(X, y, model)
 
-raw_input("Program paused. Press Enter to continue...")
+input('Program paused. Press Enter to continue...')
 
 ## =============== Part 6: Visualizing Dataset 3 ================
 #  The following code will load the next dataset into your environment and
 #  plot the data.
 #
 
-print 'Loading and Visualizing Data ...'
+print('Loading and Visualizing Data ...')
 
 # Load from ex6data3:
 # You will have X, y in your environment
@@ -144,7 +144,7 @@
 # Plot training data
 plotData(X, y)
 
-raw_input("Program paused. Press Enter to continue...")
+input('Program paused. Press Enter to continue...')
 
 ## ========== Part 7: Training SVM with RBF Kernel (Dataset 3) ==========
 
@@ -167,5 +167,5 @@
 model = clf.fit(X, y)
 visualizeBoundary(X, y, model)
 
-raw_input("Program paused. Press Enter to continue...")
+input('Program paused. Press Enter to continue...')
 
diff --git a/ex6/ex6_spam.py b/ex6/ex6_spam.py
@@ -31,25 +31,25 @@
 #  complete the code in processEmail.m to produce a word indices vector
 #  for a given email.
 
-print 'Preprocessing sample email (emailSample1.txt)'
+print('Preprocessing sample email (emailSample1.txt)')
 
 # Extract Features
 file = open('emailSample1.txt', 'r')
 file_contents = file.readlines()
 word_indices  = processEmail(''.join(file_contents))
 
 # Print Stats
-print 'Word Indices: '
-print word_indices
+print('Word Indices: ')
+print(word_indices)
 
-raw_input("Program paused. Press Enter to continue...")
+input('Program paused. Press Enter to continue...')
 
 ## ==================== Part 2: Feature Extraction ====================
 #  Now, you will convert each email into a vector of features in R^n.
 #  You should complete the code in emailFeatures.m to produce a feature
 #  vector for a given email.
 
-print 'Extracting features from sample email (emailSample1.txt)'
+print('Extracting features from sample email (emailSample1.txt)')
 
 # Extract Features
 file = open('emailSample1.txt')
@@ -58,10 +58,10 @@
 features = emailFeatures(word_indices)
 
 # Print Stats
-print 'Length of feature vector: %d'% features.size
-print 'Number of non-zero entries: %d'% sum(features > 0)
+print('Length of feature vector: %d'% features.size)
+print('Number of non-zero entries: %d'% sum(features > 0))
 
-raw_input("Program paused. Press Enter to continue...")
+input('Program paused. Press Enter to continue...')
 
 ## =========== Part 3: Train Linear SVM for Spam Classification ========
 #  In this section, you will train a linear classifier to determine if an
@@ -73,16 +73,16 @@
 X = data['X']
 y = data['y'].flatten()
 
-print 'Training Linear SVM (Spam Classification)'
-print '(this may take 1 to 2 minutes) ...'
+print('Training Linear SVM (Spam Classification)')
+print('(this may take 1 to 2 minutes) ...')
 
 C = 0.1
 clf = svm.SVC(C=C, kernel='linear', tol=1e-3, max_iter=200)
 model = clf.fit(X, y)
 
 p = model.predict(X)
 
-print 'Training Accuracy: %f', np.mean(np.double(p == y)) * 100
+print('Training Accuracy: %f', np.mean(np.double(p == y)) * 100)
 
 ## =================== Part 4: Test Spam Classification ================
 #  After training the classifier, we can evaluate it on a test set. We have
@@ -94,11 +94,11 @@
 Xtest = data['Xtest']
 ytest = data['ytest']
 
-print 'Evaluating the trained Linear SVM on a test set ...'
+print('Evaluating the trained Linear SVM on a test set ...')
 
 p = model.predict(Xtest)
 
-print 'Test Accuracy: %f', np. mean(np.double(p == ytest)) * 100
+print('Test Accuracy: %f', np. mean(np.double(p == ytest)) * 100)
 
 
 ## ================= Part 5: Top Predictors of Spam ====================
@@ -111,17 +111,17 @@
 
 # Sort the weights and obtain the vocabulary list
 
-t = sorted(list(enumerate(model.coef_[0])),key=lambda e: e[1], reverse=True)
+t = sorted(list(enumerate(model.coef_[0])), key=lambda e: e[1], reverse=True)
 d = OrderedDict(t)
 idx = d.keys()
 weight = d.values()
 vocabList = getVocabList()
 
-print 'Top predictors of spam: '
+print('Top predictors of spam: ')
 for i in range(15):
-    print ' %-15s (%f)' %(vocabList[idx[i]], weight[i])
+    print(' %-15s (%f)' %(vocabList[idx[i]], weight[i]))
 
-print 'Program paused. Press enter to continue.'
+print('Program paused. Press enter to continue.')
 
 ## =================== Part 6: Try Your Own Emails =====================
 #  Now that you've trained the spam classifier, you can use it on your own
@@ -144,6 +144,6 @@
 x = emailFeatures(word_indices)
 p = model.predict(x)
 
-print 'Processed %s\n\nSpam Classification: %d' % (filename, p)
-print '(1 indicates spam, 0 indicates not spam)'
+print('Processed %s\n\nSpam Classification: %d' % (filename, p))
+print('(1 indicates spam, 0 indicates not spam)')
 
diff --git a/ex6/gaussianKernel.py b/ex6/gaussianKernel.py
@@ -6,11 +6,11 @@ def gaussianKernel(x1, x2, sigma):
     and returns the value in sim
     """
 
-# Ensure that x1 and x2 are column vectors
-#     x1 = x1.ravel()
-#     x2 = x2.ravel()
+    # Ensure that x1 and x2 are column vectors
+    #     x1 = x1.ravel()
+    #     x2 = x2.ravel()
 
-# You need to return the following variables correctly.
+    # You need to return the following variables correctly.
     sim = 0
 
 # ====================== YOUR CODE HERE ======================
@@ -19,7 +19,5 @@ def gaussianKernel(x1, x2, sigma):
 #               sigma
 #
 #
-
-
 # =============================================================
-    return sim
+    return sim
diff --git a/ex6/getVocabList.py b/ex6/getVocabList.py
@@ -7,13 +7,13 @@ def getVocabList():
     and returns a cell array of the words in vocabList.
     """
 
-## Read the fixed vocabulary list
+    ## Read the fixed vocabulary list
     with open('vocab.txt') as f:
 
-# Store all dictionary words in cell array vocab{}
+    # Store all dictionary words in cell array vocab{}
 
-# For ease of implementation, we use a struct to map the strings => integers
-# In practice, you'll want to use some form of hashmap
+    # For ease of implementation, we use a struct to map the strings => integers
+    # In practice, you'll want to use some form of hashmap
         vocabList = []
         for line in f:
             idx, w = line.split()

diff --git a/ex6/linearKernel.py b/ex6/linearKernel.py
@@ -1,13 +1,16 @@
+import numpy as np
+
+
 def linearKernel(x1, x2):
     """returns a linear kernel between x1 and x2
     and returns the value in sim
     """
 
-# Ensure that x1 and x2 are column vectors
+    # Ensure that x1 and x2 are column vectors
     x1 = x1.ravel()
     x2 = x2.ravel()
 
-# Compute the kernel
+    # Compute the kernel
     sim = x1.T.dot(x2)  # dot product
 
     return sim
diff --git a/ex6/plotData.py b/ex6/plotData.py
@@ -1,6 +1,8 @@
 import matplotlib.pyplot as plt
 import numpy as np
 from show import show
+
+
 def plotData(X, y):
     """plots the data points with + for the positive examples
     and o for the negative examples. X is assumed to be a Mx2 matrix.
@@ -9,11 +11,11 @@ def plotData(X, y):
     """
     plt.figure()
 
-# Find Indices of Positive and Negative Examples
-    pos = np.where(y==1, True, False).flatten()
-    neg = np.where(y==0, True, False).flatten()
+    # Find Indices of Positive and Negative Examples
+    pos = np.where(y == 1, True, False).flatten()
+    neg = np.where(y == 0, True, False).flatten()
 
-# Plot Examples
+    # Plot Examples
     plt.plot(X[pos,0], X[pos, 1], 'k+', linewidth=1, markersize=7)
     plt.plot(X[neg,0], X[neg, 1], 'ko', color='y', markersize=7)
     show()

diff --git a/ex6/porterStemmer.py b/ex6/porterStemmer.py
@@ -29,6 +29,7 @@
 
 import sys
 
+
 class PorterStemmer:
 
     def __init__(self):
Original file line number	Diff line number	Diff line change
Expand Up		@@ -29,6 +29,7 @@

		import sys


		class PorterStemmer:

		def __init__(self):
Expand Down