forked from RKDSOne/-machine-learning-module
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fb47d66
commit eda7351
Showing
20 changed files
with
324 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
%This demo uses the function svd which is closely related to PCA in that it | ||
%provides a decomposition of a matrix | ||
%X = USV' and so X'X = VSU'USV' = VS^2V' where V is the matrix whose | ||
%columns are the eigenvectors of X'X i.e a scaled version of the principal | ||
%directions and the matrix U has columns which are the principal components | ||
%or the projections of X onto V i.e. U = XVinv(S) | ||
|
||
clear | ||
load('olivettifaces.mat'); | ||
Selected_Face = 125;%312;%255; | ||
|
||
X=faces'; | ||
[N_faces,D]=size(X); | ||
mean_face = mean(X); | ||
X = X - repmat(mean_face,N_faces,1); | ||
fprintf('Performing PCA.... stay tuned\n'); | ||
[U,S,V]=svd(X); | ||
subplot(131) | ||
imagesc(reshape(X(Selected_Face,:)+mean_face,sqrt(D),sqrt(D))); | ||
title('Original Image'); | ||
|
||
recon_err=[]; | ||
|
||
for i=1:N_faces | ||
X_Reconst=U(Selected_Face,1:i)*S(1:i,1:i)*V(:,1:i)' + mean_face; | ||
subplot(132) | ||
imagesc(reshape(X_Reconst',sqrt(D),sqrt(D))); | ||
title('Reconstructed Image');drawnow; | ||
recon_err = [recon_err;sqrt(mean((X_Reconst - (X(Selected_Face,:) + mean_face) ).^2,2))]; | ||
colormap gray | ||
subplot(133) | ||
plot(1:i,recon_err,'LineWidth',3); | ||
title('Reconstruction Error'); | ||
pause(0.1) | ||
fprintf('%d:Reconstruction Error = %f\n',i,recon_err(i)) | ||
end | ||
|
||
|
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
function [B,D]=power_pca(C) | ||
%A little routine to compute PCA given a covariance matrix C | ||
N = size(C,1); | ||
|
||
threshold = 1e-3; | ||
Max_Its = 1000; | ||
|
||
%loop round all dimensions of the covariance matrix | ||
for n=1:N | ||
|
||
%initialise the principal eigenvector and set norm to unity | ||
x = randn(N,1); | ||
y = x./sqrt(x'*x); | ||
|
||
%monitor convergence | ||
err = 1e20; | ||
its = 1; | ||
|
||
%main loop to compute single eigenvector | ||
while (err > threshold) | (its < Max_Its) | ||
x = C*y; | ||
y_new = x./sqrt(x'*x); | ||
|
||
err = sum((y_new - y).^2); | ||
y = y_new; | ||
|
||
%set eigenvalue | ||
D(n) = sqrt(x'*x); | ||
|
||
%increment counter | ||
its = its + 1; | ||
end | ||
|
||
%set the column vectors to be the found eigenvectors | ||
B(:,n) = y_new; | ||
|
||
%deflate the covariance matrix | ||
C = C - D(n)*y_new*y_new'; | ||
end | ||
D=diag(D); |
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
function K = kernel_func(X1,X2,kernel_,T,p) | ||
|
||
[N1 d] = size(X1); | ||
[N2 d] = size(X2); | ||
|
||
switch kernel_ | ||
|
||
case 'gauss', | ||
K = exp(-distSqrd(X1,X2,T)); | ||
|
||
case 'poly', | ||
K = (1+X1*T*X2').^p; | ||
end | ||
|
||
function D2=distSqrd(X,Y,T) | ||
nx = size(X,1); | ||
ny = size(Y,1); | ||
|
||
D2 = sum((X.^2)*T,2)*ones(1,ny) + ones(nx,1)*sum((Y.^2)*T,2)' - 2*(X*T*Y'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
function [z,e] = kernel_kmeans(X,K,Max_Its,kwidth) | ||
%This is a simple implementation of Kernel K-means clustering - an | ||
%interesting paper which proposed kernel based Kmeans clustering is [1] | ||
%Girolami, M, Mercer Kernel-Based Clustering in Feature Space, | ||
%IEEE Trans Neural Networks, 13(3),780 - 784, 2002. | ||
|
||
|
||
%Create the kernel matrix. | ||
[N,D]=size(X); | ||
C = kernel_func(X,X,'gauss',kwidth,1); | ||
|
||
%initialise the indictaor matrix to a random segmentation of the data | ||
Z = zeros(N,K); | ||
for n = 1:N | ||
Z(n,rand_int(K)) = 1; | ||
end | ||
|
||
%main loop | ||
for its = 1:Max_Its | ||
%compute the similarity of each data point to each cluster mean in | ||
%feature space - note we do not need to compute store or update a mean | ||
%vector s we are using the kernel-trick - cool eh? | ||
for k=1:K | ||
Nk = sum(Z(:,k)); | ||
Y(:,k) = diag(C) - 2*C*Z(:,k)./Nk + Z(:,k)'*C*Z(:,k)./(Nk^2); | ||
end | ||
|
||
%Now we find the cluster assignment for each point based on the minimum | ||
%distance of the point from the mean centres in feature space using the | ||
%Y matrix of dissimilarities | ||
[i,j]=min(Y,[],2); | ||
|
||
%this simply updates the indictor matrix Z refleting the new | ||
%allocations of data points to clusters | ||
Z = zeros(N,K); | ||
for n=1:N | ||
Z(n,j(n)) = 1; | ||
end | ||
|
||
%compoute the verall error | ||
e = sum(sum(Z.*Y))./N; | ||
fprintf('%d Error = %f\n', its, e); | ||
end | ||
|
||
%return the clutsers that each data point has been allocated to | ||
for n=1:N | ||
z(n) = find(Z(n,:)); | ||
end | ||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | ||
%this is a little utility functino which returns a random integer between 1 | ||
%& Max_Int. | ||
function u = rand_int(Max_Int) | ||
u=ceil(Max_Int*rand); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
function [M,j,e] = kmeans(X,K,Max_Its) | ||
|
||
%This is a simple and naive implementation of the standard K-Means | ||
%clustering algorithm for the Machine Learning II course. | ||
|
||
%The data matrix X (N x D) is passed as argument | ||
%The number of cluster K is passed as argument | ||
%The maximum nos of iterations Max_Its is passed as argument | ||
|
||
%The function returns the matrix M (K x D) - corresponding to the K mean | ||
%vector values | ||
|
||
%The functin returns the clusters which each point has been allocated to | ||
%1.. K in the vector j. | ||
|
||
[N,D]=size(X); %N - nos of data points, D dimension of data | ||
I=randperm(N); %a random permutation of integers 1:N - required | ||
%to set initial mean values | ||
|
||
M=X(I(1:K),:); %M is the initial K x D matrix of mean values - | ||
%simply setting to values of K randomly selected data | ||
%points | ||
Mo = M; | ||
|
||
for n=1:Max_Its | ||
%Create distance matrix which is N x K indicating distance that each data | ||
%point is from the current mean values (of which there are K) | ||
for k=1:K | ||
Dist(:,k) = sum((X - repmat(M(k,:),N,1)).^2,2); | ||
end | ||
|
||
%No we simply find which of the K-mean each data point is nearest to - | ||
%so we find the minimum distance of K for each data point. This | ||
%operation can be easily achieved in one line of Matlab using the min function. | ||
[i,j]=min(Dist,[],2); | ||
|
||
%Now that we have the new allocations of points to clusters based on | ||
%the minimum distances obtained form the previous operation we can | ||
%revise our estimates of the position of each mean vector by simply | ||
%taking the mean vlue of all points which have been allocated to each | ||
%cluster using the folowing simple routine. | ||
|
||
for k=1:K | ||
if size(find(j==k))>0 | ||
M(k,:) = mean(X(find(j==k),:)); | ||
end | ||
end | ||
|
||
%we create an N x K dimensional indictor matrix - each row will have a | ||
%1 in the column corresponding to the cluster that the data point (row) | ||
%has been allocated to - this is really only required to compute the | ||
%overall error assocated with the current partitioning. | ||
|
||
Z = zeros(N,K); | ||
for m=1:N | ||
Z(m,j(m)) = 1; | ||
end | ||
|
||
%This simply prints the current value of the error criterion which | ||
%K-means is trying to minimise. | ||
e = sum(sum(Z.*Dist)./N); | ||
fprintf('%d Error = %f\n', n, e); | ||
Mo = M; | ||
end |
Binary file not shown.
Empty file.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
%week_8_demo_1 | ||
%images segmentation. This is a rather nice demo which shows how clustering | ||
%methods can be empoyed in segmenting images into self similar regions | ||
|
||
|
||
%segmenting a gray scale image of a face - achieved by clustering each of | ||
%the pixel values based on gray level | ||
load('olivettifaces.mat'); | ||
[H,j]=kmeans(faces(:,1),5,10); | ||
|
||
figure | ||
colormap gray | ||
subplot(121) | ||
imagesc(reshape(faces(:,1),64,64));drawnow | ||
subplot(122) | ||
imagesc(reshape(j,64,64));drawnow | ||
|
||
|
||
%segmenting a jpg image of water lillies - the image is represented such that | ||
%each pixel is rperesented as a three dimensional vector in RGB space so we | ||
%perform pixel clustering based on color values - this is quite a nice | ||
%image which demonstrates that the leaves and flowers of water lillies can | ||
%be separated from each other and segmented from the background - this is | ||
%due to the uniform colors across each of the leaves and flowers. | ||
clear | ||
X = imread('water_lillies.jpg','jpg'); | ||
A = [double(reshape(X(:,:,1),600*800,1))... | ||
double(reshape(X(:,:,2),600*800,1))... | ||
double(reshape(X(:,:,3),600*800,1))]; | ||
[H,j,e]=kmeans(A,3,10); | ||
figure | ||
subplot(121) | ||
imagesc(X);drawnow | ||
subplot(122) | ||
imagesc(reshape(j,600,800));drawnow | ||
|
||
|
||
%this is another vey nice example as the dog, water, grass & road can be | ||
%segmented. However this also shows the variability in the solutions | ||
%obtained - for a single run you may or may not get a good segmentation | ||
%into each of the regions of interest. So in the following loop K-means is | ||
%run mutiple times toring the segmentation which yields the smallest error | ||
%- which should correspond to the best segmentation. | ||
clear | ||
X = imread('wee_dog.jpg','jpg'); | ||
X=(X(15:end-15,:,:)); %crop image | ||
A = [double(reshape(X(:,:,1),71*100,1))... | ||
double(reshape(X(:,:,2),71*100,1))... | ||
double(reshape(X(:,:,3),71*100,1))]; | ||
[H,j,e]=kmeans(A,4,20); | ||
figure | ||
subplot(121) | ||
imagesc(X);drawnow | ||
subplot(122) | ||
imagesc(reshape(j,71,100));drawnow | ||
|
||
%here we run the K-mean alorithm on the images of the wee dog five hundred | ||
%times. We retain only the segmentation yielding the smallest value of | ||
%error and also look at the distributuino of the error achieved - quite | ||
%interesting. | ||
|
||
A = A - repmat(mean(A),size(A,1),1); | ||
A = A./repmat(std(A),size(A,1),1); | ||
E=[]; | ||
emin =1e100; | ||
for i=1:100 | ||
[H,j,e]=kmeans(A,4,20); | ||
if e < emin | ||
j_min = j; | ||
end | ||
E=[E;e]; | ||
end | ||
figure | ||
subplot(121) | ||
hist(E) | ||
subplot(122) | ||
imagesc(reshape(j_min,71,100));drawnow | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
%week 8 demo 2 | ||
%This script demonstrates a situation where standard K-means will fail when | ||
%the two distinct clusters of data share the same mean value - this is | ||
%achieved by making one cluster of data such that points are uniformly | ||
%distributed within an annulus which is centered at the point (0,0). The | ||
%sceond cluster corresponds to data which has an isotropic Gaussian | ||
%distribution centered at (0,0) and whose variance is sufficiently small | ||
%that points in this cluster are distinct from those within the annulus. | ||
|
||
clear | ||
load wk8_demo_dat; | ||
%Run standard K-means clustering assuming K = 2 - the true value | ||
[H,j,e]=kmeans(X,2,30); | ||
subplot(121) | ||
plot(X(find(j==1),1),X(find(j==1),2),'.'); | ||
hold | ||
plot(X(find(j==2),1),X(find(j==2),2),'ro'); | ||
title('K-Means Clustering'); | ||
|
||
%Run Kernel K-means assuming K = 2 AND the parameter of the kernel (width | ||
%for an RBF is also passed - clearly this has to be selected in some | ||
%reasonable way - cross validation is a practical way to achieve this. | ||
[j,e] = kernel_kmeans(X,2,30,1); | ||
subplot(122) | ||
plot(X(find(j==1),1),X(find(j==1),2),'.'); | ||
hold | ||
plot(X(find(j==2),1),X(find(j==2),2),'ro'); | ||
title('Kernel K-Means Clustering'); | ||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.