Skip to content

Commit

Permalink
add file->linear classification
Browse files Browse the repository at this point in the history
  • Loading branch information
dailiang committed May 12, 2013
0 parents commit cab9e68
Show file tree
Hide file tree
Showing 5 changed files with 205 additions and 0 deletions.
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
Machine Learning Visualization
==============
Overview
-------------
A collection of common machine learning algorithm implemented in python, and I am trying to plot them out for visualization. The following algorithms are included:

* Linear classification (MLP, Perceptron Algorithm, LMS Algorithm, etc)
* Clustering (K-means, )

Requirements
---------------

* python 2.7
* numpy, scipy, matplotlib
53 changes: 53 additions & 0 deletions linear-classification/LMS.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# !/usr/bin/python
# -*- coding=UTF-8 -*-

import copy as cp

from data import *

def lms_alg(inputs, targets, eta=0.05, weights=np.random.rand(3,1)*0.1 - 0.05, nIteration=1):
''' eta is learning rate '''
weights_start = cp.deepcopy(weights)
# n iteration
for n in range(nIteration):
outputs = np.dot(inputs, weights)
# Threshold the outputs
weights += eta/(nData1+nData2)*np.dot(inputs.T, targets-outputs)
outputs = np.dot(inputs, weights)
# Threshold the outputs
outputs = np.where(outputs>0,1,-1)
return (outputs,weights_start, weights)

if __name__=="__main__":
final = lms_alg(inputs, targets, nIteration=8)

outputs_final = final[0] - targets
outputs_final = np.where(outputs_final==0, 0, 1)
nMis = sum(outputs_final)
print 'Num of misclassified:'
print nMis
weights_start = final[1]
weights = final[2]
outputs = np.dot(inputs, weights)
# 统计误分点的个数
outputs = np.dot(inputs, weights)
# 求Loss
Loss = sum((targets-outputs)**2)
print 'Init Weight:'
print weights_start
print 'Final Weight:'
print weights
print 'Loss:'
print Loss

# plot
classfier_x = np.linspace(-8, 8, 100)
classfier_y = (weights[2] - weights[0]*classfier_x)/weights[1]
classfier_y_start = (weights_start[2] - weights_start[0]*classfier_x)/weights_start[1]
plt.axis([-8,8,-6,6])
plt.plot(x1, y1, 'ro', x2, y2, 'bo')
plt.plot(classfier_x, classfier_y_start, 'm--', classfier_x, classfier_y, 'g-')
plt.legend(["+1", "-1", "Init", "Final"])
plt.xlabel('x')
plt.ylabel('y')
plt.show()
40 changes: 40 additions & 0 deletions linear-classification/MSE.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# !/usr/bin/python
# -*- coding=UTF-8 -*-

from data import *

# W = (XX^T)^{-1}XT

t1 = np.ones((1, nData1))
t2 = -np.ones((1, nData2))
targets = np.concatenate((t1, t2), axis=1).T
X = inputs
weights = np.linalg.solve(np.dot(X.T, X), np.dot(X.T, targets))

if __name__=="__main__":
outputs = np.dot(X, weights)
# Loss
Loss = sum((targets - outputs)**2)
# Threshold the outputs
outputs = np.where(outputs>0,1,-1)
outputs -= targets
outputs = np.where(outputs==0, 0, 1)
nMis = sum(outputs)
print 'Num of miclassifed datapoint:'
print nMis
print 'Final weights:'
print weights
print 'Loss:'
print Loss


# plot
classfier_x = np.linspace(-8, 8, 100)
classfier_y = (weights[2] - weights[0]*classfier_x)/weights[1]
plt.axis([-8,8,-6,6])
plt.plot(x1, y1, 'ro', x2, y2, 'bo')
plt.plot(classfier_x, classfier_y, 'g-')
plt.legend(['+1', '-1'])
plt.xlabel('x')
plt.ylabel('y')
plt.show()
45 changes: 45 additions & 0 deletions linear-classification/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# !/usr/bin/python
# -*- coding=UTF-8 -*-

import matplotlib.pyplot as plt
import numpy as np

# From 2-D normal distribution, we will get two groups data (x1,y1)
# and (x2,y2), each with 200 datapoints.

# define covariance matrix and mean values.
m1 = [-5, 0]
m2 = [5, 0]
cov1 = [[1, 0.5], [0.5, 1]]
cov2 = cov1

# Generator random data
np.random.seed(0)
x1,y1 = np.random.multivariate_normal(m1, cov1, 200).T
np.random.seed(1)
x2,y2 = np.random.multivariate_normal(m2, cov2, 200).T

# range the data
nData1 = np.shape(x1)[0]
nData2 = np.shape(x2)[0]
X1 = np.concatenate(([x1], [y1], -np.ones((1,nData1))), axis=0).T
X2 = np.concatenate(([x2], [y2], -np.ones((1,nData2))), axis=0).T
inputs = np.concatenate((X1, X2), axis=0)
# Targets
t1 = np.ones((1, nData1))
t2 = -np.ones((1, nData2))
targets = np.concatenate((t1, t2), axis=1).T


if __name__=="__main__":
# what is the relationship between covariance matrix and shape of distribution?
eigenval, eigenvec = np.linalg.eig(cov1)
print "The eigen value of cov matrix is:\n", eigenval
print "The eigen vector of cov matrix is:\n", eigenvec

# plot
plt.axis([-8,8,-6,6])
plt.plot(x1, y1, 'ro', x2, y2, 'bo')
plt.xlabel('x')
plt.ylabel('y')
plt.show()
53 changes: 53 additions & 0 deletions linear-classification/perceptron.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# !/usr/bin/python
# -*- coding=UTF-8 -*-

import copy as cp

from data import *

# Since Loss function is:
# Loss(w) = sum( sign(Xw)*(Xw-target)) X belongs to misclassified datapoint
# using gradient descent algorithm:
# Gradient of w = sign(Xw) * X

def pcn_train(inputs, targets, eta=0.25, weights=np.random.rand(3,1)*0.1 - 0.05, nIteration=1):
''' perceptron training phase, eta is learning rate '''
weights_start = cp.deepcopy(weights)
for n in range(nIteration):
""" Run the network forward """
outputs = np.dot(inputs, weights)
# Threshold the outputs
outputs = np.where(outputs>0,1,-1)
weights += eta*np.dot(inputs.T,targets-outputs)
# outputs
outputs = np.dot(inputs, weights)
# Threshold the outputs
outputs = np.where(outputs>0,1,-1)
return (outputs,weights_start, weights)

if __name__=="__main__":
'''train'''
final = pcn_train(inputs, targets, nIteration=4)
outputs_final = final[0] - targets
outputs_final = np.where(outputs_final==0, 0, 1)
nMis = sum(outputs_final)
print 'Nums of data that is misclassifed:'
print nMis
weights_start = final[1]
weights = final[2]
print 'The init weights:'
print weights_start
print 'The finally weights:'
print weights

# plot
classfier_x = np.linspace(-8, 8, 100)
classfier_y = (weights[2] - weights[0]*classfier_x)/weights[1]
classfier_y_start = (weights_start[2] - weights_start[0]*classfier_x)/weights_start[1]
plt.axis([-8,8,-6,6])
plt.plot(x1, y1, 'ro', x2, y2, 'bo')
plt.plot(classfier_x, classfier_y_start, 'm--', classfier_x, classfier_y, 'g-')
plt.legend(['+1', '-1', 'init', 'fianl'])
plt.xlabel('x')
plt.ylabel('y')
plt.show()

0 comments on commit cab9e68

Please sign in to comment.