Skip to content

Commit

Permalink
added weeks seven and eight
Browse files Browse the repository at this point in the history
  • Loading branch information
josephmmisiti committed Apr 28, 2011
1 parent fb47d66 commit eda7351
Show file tree
Hide file tree
Showing 20 changed files with 324 additions and 0 deletions.
38 changes: 38 additions & 0 deletions week7/faces_demo.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
%This demo uses the function svd which is closely related to PCA in that it
%provides a decomposition of a matrix
%X = USV' and so X'X = VSU'USV' = VS^2V' where V is the matrix whose
%columns are the eigenvectors of X'X i.e a scaled version of the principal
%directions and the matrix U has columns which are the principal components
%or the projections of X onto V i.e. U = XVinv(S)

clear
load('olivettifaces.mat');
Selected_Face = 125;%312;%255;

X=faces';
[N_faces,D]=size(X);
mean_face = mean(X);
X = X - repmat(mean_face,N_faces,1);
fprintf('Performing PCA.... stay tuned\n');
[U,S,V]=svd(X);
subplot(131)
imagesc(reshape(X(Selected_Face,:)+mean_face,sqrt(D),sqrt(D)));
title('Original Image');

recon_err=[];

for i=1:N_faces
X_Reconst=U(Selected_Face,1:i)*S(1:i,1:i)*V(:,1:i)' + mean_face;
subplot(132)
imagesc(reshape(X_Reconst',sqrt(D),sqrt(D)));
title('Reconstructed Image');drawnow;
recon_err = [recon_err;sqrt(mean((X_Reconst - (X(Selected_Face,:) + mean_face) ).^2,2))];
colormap gray
subplot(133)
plot(1:i,recon_err,'LineWidth',3);
title('Reconstruction Error');
pause(0.1)
fprintf('%d:Reconstruction Error = %f\n',i,recon_err(i))
end


Binary file added week7/lect_1.pdf
Binary file not shown.
Binary file added week7/olivettifaces.mat
Binary file not shown.
40 changes: 40 additions & 0 deletions week7/power_pca.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
function [B,D]=power_pca(C)
%A little routine to compute PCA given a covariance matrix C
N = size(C,1);

threshold = 1e-3;
Max_Its = 1000;

%loop round all dimensions of the covariance matrix
for n=1:N

%initialise the principal eigenvector and set norm to unity
x = randn(N,1);
y = x./sqrt(x'*x);

%monitor convergence
err = 1e20;
its = 1;

%main loop to compute single eigenvector
while (err > threshold) | (its < Max_Its)
x = C*y;
y_new = x./sqrt(x'*x);

err = sum((y_new - y).^2);
y = y_new;

%set eigenvalue
D(n) = sqrt(x'*x);

%increment counter
its = its + 1;
end

%set the column vectors to be the found eigenvectors
B(:,n) = y_new;

%deflate the covariance matrix
C = C - D(n)*y_new*y_new';
end
D=diag(D);
Binary file added week7/wk_7.pdf
Binary file not shown.
Binary file added week7/wk_7_laboratory.pdf
Binary file not shown.
Binary file added week8/kern_km.pdf
Binary file not shown.
19 changes: 19 additions & 0 deletions week8/kernel_func.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
function K = kernel_func(X1,X2,kernel_,T,p)

[N1 d] = size(X1);
[N2 d] = size(X2);

switch kernel_

case 'gauss',
K = exp(-distSqrd(X1,X2,T));

case 'poly',
K = (1+X1*T*X2').^p;
end

function D2=distSqrd(X,Y,T)
nx = size(X,1);
ny = size(Y,1);

D2 = sum((X.^2)*T,2)*ones(1,ny) + ones(nx,1)*sum((Y.^2)*T,2)' - 2*(X*T*Y');
53 changes: 53 additions & 0 deletions week8/kernel_kmeans.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
function [z,e] = kernel_kmeans(X,K,Max_Its,kwidth)
%This is a simple implementation of Kernel K-means clustering - an
%interesting paper which proposed kernel based Kmeans clustering is [1]
%Girolami, M, Mercer Kernel-Based Clustering in Feature Space,
%IEEE Trans Neural Networks, 13(3),780 - 784, 2002.


%Create the kernel matrix.
[N,D]=size(X);
C = kernel_func(X,X,'gauss',kwidth,1);

%initialise the indictaor matrix to a random segmentation of the data
Z = zeros(N,K);
for n = 1:N
Z(n,rand_int(K)) = 1;
end

%main loop
for its = 1:Max_Its
%compute the similarity of each data point to each cluster mean in
%feature space - note we do not need to compute store or update a mean
%vector s we are using the kernel-trick - cool eh?
for k=1:K
Nk = sum(Z(:,k));
Y(:,k) = diag(C) - 2*C*Z(:,k)./Nk + Z(:,k)'*C*Z(:,k)./(Nk^2);
end

%Now we find the cluster assignment for each point based on the minimum
%distance of the point from the mean centres in feature space using the
%Y matrix of dissimilarities
[i,j]=min(Y,[],2);

%this simply updates the indictor matrix Z refleting the new
%allocations of data points to clusters
Z = zeros(N,K);
for n=1:N
Z(n,j(n)) = 1;
end

%compoute the verall error
e = sum(sum(Z.*Y))./N;
fprintf('%d Error = %f\n', its, e);
end

%return the clutsers that each data point has been allocated to
for n=1:N
z(n) = find(Z(n,:));
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%this is a little utility functino which returns a random integer between 1
%& Max_Int.
function u = rand_int(Max_Int)
u=ceil(Max_Int*rand);
64 changes: 64 additions & 0 deletions week8/kmeans.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
function [M,j,e] = kmeans(X,K,Max_Its)

%This is a simple and naive implementation of the standard K-Means
%clustering algorithm for the Machine Learning II course.

%The data matrix X (N x D) is passed as argument
%The number of cluster K is passed as argument
%The maximum nos of iterations Max_Its is passed as argument

%The function returns the matrix M (K x D) - corresponding to the K mean
%vector values

%The functin returns the clusters which each point has been allocated to
%1.. K in the vector j.

[N,D]=size(X); %N - nos of data points, D dimension of data
I=randperm(N); %a random permutation of integers 1:N - required
%to set initial mean values

M=X(I(1:K),:); %M is the initial K x D matrix of mean values -
%simply setting to values of K randomly selected data
%points
Mo = M;

for n=1:Max_Its
%Create distance matrix which is N x K indicating distance that each data
%point is from the current mean values (of which there are K)
for k=1:K
Dist(:,k) = sum((X - repmat(M(k,:),N,1)).^2,2);
end

%No we simply find which of the K-mean each data point is nearest to -
%so we find the minimum distance of K for each data point. This
%operation can be easily achieved in one line of Matlab using the min function.
[i,j]=min(Dist,[],2);

%Now that we have the new allocations of points to clusters based on
%the minimum distances obtained form the previous operation we can
%revise our estimates of the position of each mean vector by simply
%taking the mean vlue of all points which have been allocated to each
%cluster using the folowing simple routine.

for k=1:K
if size(find(j==k))>0
M(k,:) = mean(X(find(j==k),:));
end
end

%we create an N x K dimensional indictor matrix - each row will have a
%1 in the column corresponding to the cluster that the data point (row)
%has been allocated to - this is really only required to compute the
%overall error assocated with the current partitioning.

Z = zeros(N,K);
for m=1:N
Z(m,j(m)) = 1;
end

%This simply prints the current value of the error criterion which
%K-means is trying to minimise.
e = sum(sum(Z.*Dist)./N);
fprintf('%d Error = %f\n', n, e);
Mo = M;
end
Binary file added week8/lect_1.pdf
Binary file not shown.
Empty file added week8/olivettifaces.mat
Empty file.
Binary file added week8/olivettifaces.mat.crdownload
Binary file not shown.
Binary file added week8/water_lillies.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added week8/wee_dog.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
81 changes: 81 additions & 0 deletions week8/wk8_demo_1.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
%week_8_demo_1
%images segmentation. This is a rather nice demo which shows how clustering
%methods can be empoyed in segmenting images into self similar regions


%segmenting a gray scale image of a face - achieved by clustering each of
%the pixel values based on gray level
load('olivettifaces.mat');
[H,j]=kmeans(faces(:,1),5,10);

figure
colormap gray
subplot(121)
imagesc(reshape(faces(:,1),64,64));drawnow
subplot(122)
imagesc(reshape(j,64,64));drawnow


%segmenting a jpg image of water lillies - the image is represented such that
%each pixel is rperesented as a three dimensional vector in RGB space so we
%perform pixel clustering based on color values - this is quite a nice
%image which demonstrates that the leaves and flowers of water lillies can
%be separated from each other and segmented from the background - this is
%due to the uniform colors across each of the leaves and flowers.
clear
X = imread('water_lillies.jpg','jpg');
A = [double(reshape(X(:,:,1),600*800,1))...
double(reshape(X(:,:,2),600*800,1))...
double(reshape(X(:,:,3),600*800,1))];
[H,j,e]=kmeans(A,3,10);
figure
subplot(121)
imagesc(X);drawnow
subplot(122)
imagesc(reshape(j,600,800));drawnow


%this is another vey nice example as the dog, water, grass & road can be
%segmented. However this also shows the variability in the solutions
%obtained - for a single run you may or may not get a good segmentation
%into each of the regions of interest. So in the following loop K-means is
%run mutiple times toring the segmentation which yields the smallest error
%- which should correspond to the best segmentation.
clear
X = imread('wee_dog.jpg','jpg');
X=(X(15:end-15,:,:)); %crop image
A = [double(reshape(X(:,:,1),71*100,1))...
double(reshape(X(:,:,2),71*100,1))...
double(reshape(X(:,:,3),71*100,1))];
[H,j,e]=kmeans(A,4,20);
figure
subplot(121)
imagesc(X);drawnow
subplot(122)
imagesc(reshape(j,71,100));drawnow

%here we run the K-mean alorithm on the images of the wee dog five hundred
%times. We retain only the segmentation yielding the smallest value of
%error and also look at the distributuino of the error achieved - quite
%interesting.

A = A - repmat(mean(A),size(A,1),1);
A = A./repmat(std(A),size(A,1),1);
E=[];
emin =1e100;
for i=1:100
[H,j,e]=kmeans(A,4,20);
if e < emin
j_min = j;
end
E=[E;e];
end
figure
subplot(121)
hist(E)
subplot(122)
imagesc(reshape(j_min,71,100));drawnow




29 changes: 29 additions & 0 deletions week8/wk8_demo_2.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
%week 8 demo 2
%This script demonstrates a situation where standard K-means will fail when
%the two distinct clusters of data share the same mean value - this is
%achieved by making one cluster of data such that points are uniformly
%distributed within an annulus which is centered at the point (0,0). The
%sceond cluster corresponds to data which has an isotropic Gaussian
%distribution centered at (0,0) and whose variance is sufficiently small
%that points in this cluster are distinct from those within the annulus.

clear
load wk8_demo_dat;
%Run standard K-means clustering assuming K = 2 - the true value
[H,j,e]=kmeans(X,2,30);
subplot(121)
plot(X(find(j==1),1),X(find(j==1),2),'.');
hold
plot(X(find(j==2),1),X(find(j==2),2),'ro');
title('K-Means Clustering');

%Run Kernel K-means assuming K = 2 AND the parameter of the kernel (width
%for an RBF is also passed - clearly this has to be selected in some
%reasonable way - cross validation is a practical way to achieve this.
[j,e] = kernel_kmeans(X,2,30,1);
subplot(122)
plot(X(find(j==1),1),X(find(j==1),2),'.');
hold
plot(X(find(j==2),1),X(find(j==2),2),'ro');
title('Kernel K-Means Clustering');

Binary file added week8/wk8_demo_dat.mat
Binary file not shown.
Binary file added week8/wk_8.pdf
Binary file not shown.
Binary file added week8/wk_8_laboratory.pdf
Binary file not shown.

0 comments on commit eda7351

Please sign in to comment.