forked from pdollar/toolbox
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
git-svn-id: svn+ssh://lumo.ucsd.edu/projects/p1/svnroot/pdollar/toolbox@3792 52fe0c90-79fe-0310-8a18-a0b98ad248f8
- Loading branch information
pdollar
committed
Jan 11, 2007
1 parent
c5ade1e
commit d747956
Showing
5 changed files
with
298 additions
and
307 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,108 +1,105 @@ | ||
% principal components analysis (alternative to princomp). | ||
% Principal components analysis (alternative to princomp). | ||
% | ||
% A simple dimensionality reduction technique. Use this function to create an orthonormal | ||
% basis for the space R^N. This basis has the property that the coordinates of a vector x | ||
% in R^N are of decreasing importance. Hence instead of using all N coordinates to | ||
% specify the location of x, using only the first k<N still gives a vector xhat that is | ||
% very close to x with high probability. | ||
% A simple dimensionality reduction technique. Use to create an | ||
% orthonormal basis for the points in R^N such that the coordinates of a | ||
% vector x in this basis are of decreasing importance. Instead of | ||
% using all N coordinates to specify the location of x, using only the | ||
% first k<N still gives a vector xhat that is close to x. | ||
% | ||
% Use this function to retrieve the basis U. Use pca_apply.m to retrieve that basis | ||
% coefficients for a novel vector x. Also, use pca_visualize(X,...) for visualization of | ||
% the approximated X. | ||
% | ||
% This function operates on arrays of arbitrary dimension, by first converting the array | ||
% to a vector. That is if X is n dimensional, say of dims d1 x d2 x...x dn-1 x dn. Then | ||
% the first n-1 dimensions of X are comined. That is X is flattened to be 2 dimensional: | ||
% (d1 * d2 * ...* dn-1) x dn. | ||
% | ||
% Once X is converted to 2 dimensions of size Nxn, each column represents a single | ||
% observation, and each row is a different variable. Note that this is the opposite of | ||
% many matlab functions such as princomp. So if X is MxNxK, then X(:,:,i) representes the | ||
% ith observation. This is useful if X is a stack of images (each image will | ||
% automatically get vectorized). Likewise if X is MxNxKxR, then X(:,:,:,i) becomes the | ||
% ith observation, which is useful if X is a collection of videos each of size MxNxK. | ||
% | ||
% If X is very large, it is samlped before running PCA, using randomsample. | ||
% This function operates on arrays of arbitrary dimension, by first | ||
% converting the arrays to vector. If X is m+1 dimensional, say of size | ||
% [d1 x d2 x...x dm x n], then the first m dimensions of X are combined. X | ||
% is flattened to be 2 dimensional: [dxn], with d=prod(di). Once X is | ||
% converted to 2 dimensions of size dxn, each column represents a single | ||
% observation, and each row is a different variable. Note that this is the | ||
% opposite of many matlab functions such as princomp. If X is MxNxn, then | ||
% X(:,:,i) represents the ith observation (useful for stack of n images), | ||
% likewise for n videos X is MxNxKxn. If X is very large, it is sampled | ||
% before running PCA, using randomsample. Use this function to retrieve the | ||
% basis U. Use pca_apply to retrieve that basis coefficients for a novel | ||
% vector x. Use pca_visualize(X,...) for visualization of approximated X. | ||
% | ||
% To calculate residuals: | ||
% residuals = variances / sum(variances); | ||
% residuals = cumsum(residuals); plot( residuals, '- .' ) | ||
% residuals = cumsum(vars / sum(vars)); | ||
% plot( residuals, '-.' ) | ||
% | ||
% USAGE | ||
% [ U, mu, vars ] = pca( X ) | ||
% | ||
% INPUTS | ||
% X - n-dim array of size (d1 x d2 x...x dn-1) x dn (treated as dn elements) | ||
% X - [d1 x ... x dm x n], treated as n [d1 x ... x dm] elements | ||
% | ||
% OUTPUTS | ||
% U - 2D array of size (d1 * d2 * ...* dn-1) x r, where each column represents | ||
% - a principal component of X (after X is flattened). | ||
% mu - Array of size d1 x d2 x...x dn-1 which represents the mean of X. | ||
% variances - sorted eigenvalues corresponding to eigenvectors in U | ||
% U - [d x r], d=prod(di), each column is a principal component | ||
% mu - [d1 x ... x dm] mean of X. | ||
% vars - sorted eigenvalues corresponding to eigenvectors in U | ||
% | ||
% EXAMPLE | ||
% load pca_data; | ||
% [ U, mu, variances ] = pca( I3D1(:,:,1:12) ); | ||
% [ Y, Xhat, pe ] = pca_apply( I3D1(:,:,1), U, mu, variances, 5 ); | ||
% figure(1); im(I3D1(:,:,1)); figure(2); im(Xhat); | ||
% pca_visualize( U, mu, variances, I3D1, 13, [0:12], [], 3 ); | ||
% load pca_data; | ||
% [ U, mu, vars ] = pca( I3D1(:,:,1:12) ); | ||
% [ Y, Xhat, avsq ] = pca_apply( I3D1(:,:,1), U, mu, vars, 5 ); | ||
% figure(1); im(I3D1(:,:,1)); figure(2); im(Xhat); | ||
% pca_visualize( U, mu, vars, I3D1, 13, [0:12], [], 3 ); | ||
% | ||
% DATESTAMP | ||
% 01-Jan-2007 1:00pm | ||
% 10-Jan-2007 4:00pm | ||
% | ||
% See also PRINCOMP, PCA_APPLY, PCA_VISUALIZE, VISUALIZE_DATA, RANDOMSAMPLE | ||
|
||
% Piotr's Image&Video Toolbox Version 1.03 | ||
% Written and maintained by Piotr Dollar pdollar-at-cs.ucsd.edu | ||
% Please email me if you find bugs, or have suggestions or questions! | ||
|
||
function [ U, mu, variances ] = pca( X ) | ||
function [ U, mu, vars ] = pca( X ) | ||
|
||
%%% THE FOLLOWING IS USED TO TIME SVD | ||
% t=[]; rs = 100:20:500; | ||
% for r=rs tic; [u,s,v] = svd(rand(r)); t(end+1)=toc; end; plot(rs,t); | ||
% plot(rs,t,'r'); hold('on'); fplot( '10^-6 * .25*(x)^2.75', [1,1000] ); hold('off'); | ||
% x=1500; 10^-7 * 2.5*(x)^2.75 / 60 %minutes | ||
%%% | ||
% Will run out of memory if X has too many elements. | ||
% Hence, choose a random sample to run PCA on. | ||
maxmegs = 80; | ||
if( ~isa(X,'double') ) | ||
s=whos('X'); nbytes=s.bytes/numel(X); | ||
X = randomsample( X, maxmegs * nbytes/8 ); | ||
X = double(X); | ||
else | ||
X = randomsample( X, maxmegs ); | ||
end | ||
|
||
siz=size(X); nd=ndims(X); d=prod(siz(1:end-1)); | ||
inds={':'}; inds=inds(:,ones(1,nd-1)); | ||
|
||
% Will run out of memory if X has too many elements. | ||
% Hence, choose a random sample to run PCA on. | ||
maxmegs = 80; %$P | ||
if( ~isa(X,'double') ) | ||
x1=X(1); s=whos('x1'); nbytes=s.bytes; | ||
X = randomsample( X, maxmegs * nbytes/8 ); | ||
X = double(X); | ||
else | ||
X = randomsample( X, maxmegs ); | ||
end | ||
% set X to be zero mean | ||
mu = mean( X, nd ); | ||
murep = mu( inds{:}, ones(1,siz(end))); | ||
X = X - murep; | ||
|
||
siz=size( X ); nd=ndims(X); d=prod(siz(1:end-1)); | ||
inds={':'}; inds=inds(:,ones(1,nd-1)); | ||
|
||
% set X to be zero mean | ||
mu = mean( X, nd ); | ||
murep = mu( inds{:}, ones(1,siz(end))); | ||
X = X - murep; | ||
% flatten X | ||
X = reshape(X, d, [] ); | ||
[N,n] = size(X); | ||
if(n==1); U=zeros(d,1); vars=0; return; end | ||
X = X ./ sqrt(n-1); | ||
|
||
% flatten X | ||
X = reshape(X, d, [] ); | ||
[N,n] = size(X); | ||
if (n==1) U = zeros( d, 1 ); variances = 0; return; end | ||
X = X ./ sqrt(n-1); | ||
% get principal components using the SVD | ||
% note: X = U * S * V'; maxrank = min(n-1,N) | ||
if( N>n ) | ||
[V,SS,V] = svd( X' * X ); | ||
keeplocs = diag(SS) > 1e-30; | ||
SS = SS(keeplocs,keeplocs); | ||
V = V(:,keeplocs); | ||
U = X * (V * SS^-.5); | ||
else | ||
[U,SS,U] = svd( X * X' ); | ||
keeplocs = diag(SS) > 1e-30; | ||
SS = SS(keeplocs,keeplocs); | ||
U = U(:,keeplocs); | ||
end | ||
|
||
% get principal components using the SVD | ||
% X = U * S * V'; all else follows | ||
% note: maxrank = min( n-1, N ) | ||
if (N>n) | ||
[V,Ssq,V] = svd( X' * X ); | ||
keeplocs = diag(Ssq) > 1e-30; | ||
Ssq = Ssq(keeplocs,keeplocs); | ||
V = V(:,keeplocs); | ||
U = X * (V * Ssq^-.5); | ||
else | ||
[U,Ssq,U] = svd( X * X' ); | ||
keeplocs = diag(Ssq) > 1e-30; | ||
Ssq = Ssq(keeplocs,keeplocs); | ||
U = U(:,keeplocs); | ||
end | ||
|
||
% eigenvalues squared | ||
variances = diag(Ssq); | ||
% eigenvalues squared | ||
vars = diag(SS); | ||
|
||
%%% THE FOLLOWING IS USED TO TIME SVD | ||
% t=[]; rs = 100:20:500; | ||
% for r=rs tic; [u,s,v] = svd(rand(r)); t(end+1)=toc; end; plot(rs,t); | ||
% plot(rs,t,'r'); hold('on'); | ||
% fplot( '10^-6 * .25*(x)^2.75', [1,1000] ); hold('off'); | ||
% x=1500; 10^-7 * 2.5*(x)^2.75 / 60 %minutes | ||
%%% | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,85 +1,74 @@ | ||
% Companion function to pca. | ||
% | ||
% Use pca to retrieve the principal components U and the mean mu from a | ||
% set fo vectors X1 via [U,mu,variances] = pca(X1). Then given a new | ||
% vector x, use y = pca_apply( x, U, mu, variances, k ) to get the first k | ||
% coefficients of x in the space spanned by the columns of U. | ||
% | ||
% The input x can be a matrix X, where each column represents a single | ||
% vector in R^N. If X has higher dimension, the first n-1 dimensions are | ||
% used as the variables and the last dimension as an observation -- for | ||
% more information on this see pca.m | ||
% set fo vectors X1 via [U,mu,vars] = pca(X1). Then given a new | ||
% vector x, use y = pca_apply( x, U, mu, vars, k ) to get the first k | ||
% coefficients of x in the space spanned by the columns of U. See pca for | ||
% general information. | ||
% | ||
% This may prove useful: | ||
% siz = size(X); k = 100; | ||
% Uim = reshape( U(:,1:k), [ siz(1:end-1) k ] ); | ||
% | ||
% It is also interesting to look at the distribution of the points Y's (their projection | ||
% onto 2D or 3D): | ||
% plot( Y(1,:), Y(2,:), '.' ); | ||
% plot3( Y(1,:), Y(2,:), Y(3,:), '.' ); | ||
% | ||
% INPUTS | ||
% X - array for which to get PCA coefficients | ||
% U - [returned by pca] -- see pca | ||
% mu - [returned by pca] -- see pca | ||
% variances - [returned by pca] -- see pca | ||
% vars - [returned by pca] -- see pca | ||
% k - number of principal coordinates to approximate X with | ||
% | ||
% OUTPUTS | ||
% Yk - first k coordinates of X in column space of U | ||
% Xhat - approximation of X corresponding to Yk | ||
% pixelerror - measure of squared error per pixel normalized to fall between [0,1] | ||
% avsq - measure of squared error normalized to fall between [0,1] | ||
% | ||
% DATESTAMP | ||
% 29-Nov-2005 2:00pm | ||
% 10-Jan-2007 4:00pm | ||
% | ||
% See also PCA, PCA_APPLY_LARGE, PCA_VISUALIZE | ||
|
||
% Piotr's Image&Video Toolbox Version 1.03 | ||
% Written and maintained by Piotr Dollar pdollar-at-cs.ucsd.edu | ||
% Please email me if you find bugs, or have suggestions or questions! | ||
|
||
function [ Yk, Xhat, avsq, avsq_orig ] = pca_apply( X, U, mu, variances, k ) | ||
% Note: the 4 output version of this function is for pca_apply_large | ||
|
||
siz = size(X); nd = ndims(X); [N,r] = size(U); | ||
if (N==prod(siz) && ~(nd==2 && siz(2)==1)) siz=[siz, 1]; nd=nd+1; end; | ||
inds = {':'}; inds = inds(:,ones(1,nd-1)); | ||
d= prod(siz(1:end-1)); | ||
function [ Yk, Xhat, avsq, avsq_orig ] = pca_apply( X, U, mu, vars, k ) | ||
siz = size(X); nd = ndims(X); [N,r] = size(U); | ||
if(N==prod(siz) && ~(nd==2 && siz(2)==1)); siz=[siz, 1]; nd=nd+1; end; | ||
inds = {':'}; inds = inds(:,ones(1,nd-1)); | ||
d= prod(siz(1:end-1)); | ||
|
||
% some error checking | ||
if(isa(X,'uint8')); X = double(X); end; | ||
if(k>r); warning(['Only ' int2str(r) '<k comp. available.']); k=r; end; | ||
if(d~=N); error('incorrect size for X or U'); end; | ||
|
||
% some error checking | ||
if(isa(X,'uint8')) X = double(X); end; | ||
if( k>r ) | ||
warning(['Only ' int2str(r) '<k principal components available.']); k=r; end; | ||
if (d~=N) error('incorrect size for X or U'); end; | ||
|
||
% subtract mean, then flatten X | ||
Xorig = X; | ||
murep = mu( inds{:}, ones(1,siz(end))); | ||
X = X - murep; | ||
X = reshape(X, d, [] ); | ||
% subtract mean, then flatten X | ||
Xorig = X; | ||
murep = mu( inds{:}, ones(1,siz(end))); | ||
X = X - murep; | ||
X = reshape(X, d, [] ); | ||
|
||
% Find Yk, the first k coefficients of X in the new basis | ||
k = min( r, k ); | ||
Uk = U(:,1:k); | ||
Yk = Uk' * X; | ||
% Find Yk, the first k coefficients of X in the new basis | ||
k = min( r, k ); | ||
Uk = U(:,1:k); | ||
Yk = Uk' * X; | ||
|
||
% calculate Xhat the approximation of X using the first k princ components | ||
if( nargout>1 ) | ||
Xhat = Uk * Yk; | ||
Xhat = reshape( Xhat, siz ); | ||
Xhat = Xhat + murep; | ||
end; | ||
% calculate Xhat - the approx of X using the first k princ components | ||
if( nargout>1 ) | ||
Xhat = Uk * Yk; | ||
Xhat = reshape( Xhat, siz ); | ||
Xhat = Xhat + murep; | ||
end; | ||
|
||
% caclulate average value of (Xhat-Xorig).^2 compared to average value of | ||
% X.^2, where X is Xorig without the mean. This is equivalent to what | ||
% fraction of the variance is captured by Xhat. | ||
if( nargout>2 ) | ||
avsq = Xhat - Xorig; | ||
avsq = dot(avsq(:),avsq(:)); | ||
avsq_orig = dot(X(:),X(:)); | ||
if (nargout==3) | ||
avsq = avsq / avsq_orig; | ||
end | ||
end; | ||
% caclulate average value of (Xhat-Xorig).^2 compared to average value | ||
% of X.^2, where X is Xorig without the mean. This is equivalent to | ||
% what fraction of the variance is captured by Xhat. | ||
% Note: the 4 output version of this function is for pca_apply_large | ||
if( nargout>2 ) | ||
avsq = Xhat - Xorig; | ||
avsq = dot(avsq(:),avsq(:)); | ||
avsq_orig = dot(X(:),X(:)); | ||
if (nargout==3) | ||
avsq = avsq / avsq_orig; | ||
end | ||
end; |
Oops, something went wrong.