-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
dailiang
committed
May 12, 2013
0 parents
commit cab9e68
Showing
5 changed files
with
205 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
Machine Learning Visualization | ||
============== | ||
Overview | ||
------------- | ||
A collection of common machine learning algorithm implemented in python, and I am trying to plot them out for visualization. The following algorithms are included: | ||
|
||
* Linear classification (MLP, Perceptron Algorithm, LMS Algorithm, etc) | ||
* Clustering (K-means, ) | ||
|
||
Requirements | ||
--------------- | ||
|
||
* python 2.7 | ||
* numpy, scipy, matplotlib |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# !/usr/bin/python | ||
# -*- coding=UTF-8 -*- | ||
|
||
import copy as cp | ||
|
||
from data import * | ||
|
||
def lms_alg(inputs, targets, eta=0.05, weights=np.random.rand(3,1)*0.1 - 0.05, nIteration=1): | ||
''' eta is learning rate ''' | ||
weights_start = cp.deepcopy(weights) | ||
# n iteration | ||
for n in range(nIteration): | ||
outputs = np.dot(inputs, weights) | ||
# Threshold the outputs | ||
weights += eta/(nData1+nData2)*np.dot(inputs.T, targets-outputs) | ||
outputs = np.dot(inputs, weights) | ||
# Threshold the outputs | ||
outputs = np.where(outputs>0,1,-1) | ||
return (outputs,weights_start, weights) | ||
|
||
if __name__=="__main__": | ||
final = lms_alg(inputs, targets, nIteration=8) | ||
|
||
outputs_final = final[0] - targets | ||
outputs_final = np.where(outputs_final==0, 0, 1) | ||
nMis = sum(outputs_final) | ||
print 'Num of misclassified:' | ||
print nMis | ||
weights_start = final[1] | ||
weights = final[2] | ||
outputs = np.dot(inputs, weights) | ||
# 统计误分点的个数 | ||
outputs = np.dot(inputs, weights) | ||
# 求Loss | ||
Loss = sum((targets-outputs)**2) | ||
print 'Init Weight:' | ||
print weights_start | ||
print 'Final Weight:' | ||
print weights | ||
print 'Loss:' | ||
print Loss | ||
|
||
# plot | ||
classfier_x = np.linspace(-8, 8, 100) | ||
classfier_y = (weights[2] - weights[0]*classfier_x)/weights[1] | ||
classfier_y_start = (weights_start[2] - weights_start[0]*classfier_x)/weights_start[1] | ||
plt.axis([-8,8,-6,6]) | ||
plt.plot(x1, y1, 'ro', x2, y2, 'bo') | ||
plt.plot(classfier_x, classfier_y_start, 'm--', classfier_x, classfier_y, 'g-') | ||
plt.legend(["+1", "-1", "Init", "Final"]) | ||
plt.xlabel('x') | ||
plt.ylabel('y') | ||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# !/usr/bin/python | ||
# -*- coding=UTF-8 -*- | ||
|
||
from data import * | ||
|
||
# W = (XX^T)^{-1}XT | ||
|
||
t1 = np.ones((1, nData1)) | ||
t2 = -np.ones((1, nData2)) | ||
targets = np.concatenate((t1, t2), axis=1).T | ||
X = inputs | ||
weights = np.linalg.solve(np.dot(X.T, X), np.dot(X.T, targets)) | ||
|
||
if __name__=="__main__": | ||
outputs = np.dot(X, weights) | ||
# Loss | ||
Loss = sum((targets - outputs)**2) | ||
# Threshold the outputs | ||
outputs = np.where(outputs>0,1,-1) | ||
outputs -= targets | ||
outputs = np.where(outputs==0, 0, 1) | ||
nMis = sum(outputs) | ||
print 'Num of miclassifed datapoint:' | ||
print nMis | ||
print 'Final weights:' | ||
print weights | ||
print 'Loss:' | ||
print Loss | ||
|
||
|
||
# plot | ||
classfier_x = np.linspace(-8, 8, 100) | ||
classfier_y = (weights[2] - weights[0]*classfier_x)/weights[1] | ||
plt.axis([-8,8,-6,6]) | ||
plt.plot(x1, y1, 'ro', x2, y2, 'bo') | ||
plt.plot(classfier_x, classfier_y, 'g-') | ||
plt.legend(['+1', '-1']) | ||
plt.xlabel('x') | ||
plt.ylabel('y') | ||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# !/usr/bin/python | ||
# -*- coding=UTF-8 -*- | ||
|
||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
|
||
# From 2-D normal distribution, we will get two groups data (x1,y1) | ||
# and (x2,y2), each with 200 datapoints. | ||
|
||
# define covariance matrix and mean values. | ||
m1 = [-5, 0] | ||
m2 = [5, 0] | ||
cov1 = [[1, 0.5], [0.5, 1]] | ||
cov2 = cov1 | ||
|
||
# Generator random data | ||
np.random.seed(0) | ||
x1,y1 = np.random.multivariate_normal(m1, cov1, 200).T | ||
np.random.seed(1) | ||
x2,y2 = np.random.multivariate_normal(m2, cov2, 200).T | ||
|
||
# range the data | ||
nData1 = np.shape(x1)[0] | ||
nData2 = np.shape(x2)[0] | ||
X1 = np.concatenate(([x1], [y1], -np.ones((1,nData1))), axis=0).T | ||
X2 = np.concatenate(([x2], [y2], -np.ones((1,nData2))), axis=0).T | ||
inputs = np.concatenate((X1, X2), axis=0) | ||
# Targets | ||
t1 = np.ones((1, nData1)) | ||
t2 = -np.ones((1, nData2)) | ||
targets = np.concatenate((t1, t2), axis=1).T | ||
|
||
|
||
if __name__=="__main__": | ||
# what is the relationship between covariance matrix and shape of distribution? | ||
eigenval, eigenvec = np.linalg.eig(cov1) | ||
print "The eigen value of cov matrix is:\n", eigenval | ||
print "The eigen vector of cov matrix is:\n", eigenvec | ||
|
||
# plot | ||
plt.axis([-8,8,-6,6]) | ||
plt.plot(x1, y1, 'ro', x2, y2, 'bo') | ||
plt.xlabel('x') | ||
plt.ylabel('y') | ||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
# !/usr/bin/python | ||
# -*- coding=UTF-8 -*- | ||
|
||
import copy as cp | ||
|
||
from data import * | ||
|
||
# Since Loss function is: | ||
# Loss(w) = sum( sign(Xw)*(Xw-target)) X belongs to misclassified datapoint | ||
# using gradient descent algorithm: | ||
# Gradient of w = sign(Xw) * X | ||
|
||
def pcn_train(inputs, targets, eta=0.25, weights=np.random.rand(3,1)*0.1 - 0.05, nIteration=1): | ||
''' perceptron training phase, eta is learning rate ''' | ||
weights_start = cp.deepcopy(weights) | ||
for n in range(nIteration): | ||
""" Run the network forward """ | ||
outputs = np.dot(inputs, weights) | ||
# Threshold the outputs | ||
outputs = np.where(outputs>0,1,-1) | ||
weights += eta*np.dot(inputs.T,targets-outputs) | ||
# outputs | ||
outputs = np.dot(inputs, weights) | ||
# Threshold the outputs | ||
outputs = np.where(outputs>0,1,-1) | ||
return (outputs,weights_start, weights) | ||
|
||
if __name__=="__main__": | ||
'''train''' | ||
final = pcn_train(inputs, targets, nIteration=4) | ||
outputs_final = final[0] - targets | ||
outputs_final = np.where(outputs_final==0, 0, 1) | ||
nMis = sum(outputs_final) | ||
print 'Nums of data that is misclassifed:' | ||
print nMis | ||
weights_start = final[1] | ||
weights = final[2] | ||
print 'The init weights:' | ||
print weights_start | ||
print 'The finally weights:' | ||
print weights | ||
|
||
# plot | ||
classfier_x = np.linspace(-8, 8, 100) | ||
classfier_y = (weights[2] - weights[0]*classfier_x)/weights[1] | ||
classfier_y_start = (weights_start[2] - weights_start[0]*classfier_x)/weights_start[1] | ||
plt.axis([-8,8,-6,6]) | ||
plt.plot(x1, y1, 'ro', x2, y2, 'bo') | ||
plt.plot(classfier_x, classfier_y_start, 'm--', classfier_x, classfier_y, 'g-') | ||
plt.legend(['+1', '-1', 'init', 'fianl']) | ||
plt.xlabel('x') | ||
plt.ylabel('y') | ||
plt.show() |