standardized, new comments

git-svn-id: svn+ssh://lumo.ucsd.edu/projects/p1/svnroot/pdollar/toolbox@3792 52fe0c90-79fe-0310-8a18-a0b98ad248f8
arctanbell · Jan 11, 2007 · d747956 · d747956
1 parent c5ade1e
commit d747956
Show file tree

Hide file tree

Showing 5 changed files with 298 additions and 307 deletions.
diff --git a/classify/pca.m b/classify/pca.m
@@ -1,108 +1,105 @@
-% principal components analysis (alternative to princomp).
+% Principal components analysis (alternative to princomp).
 %
-% A simple dimensionality reduction technique.  Use this function to create an orthonormal
-% basis for the space R^N.  This basis has the property that the coordinates of a vector x
-% in R^N are of decreasing importance.  Hence instead of using all N coordinates to
-% specify the location of x, using only the first k<N still gives a vector xhat that is
-% very close to x with high probability.  
+% A simple dimensionality reduction technique.  Use to create an
+% orthonormal basis for the points in R^N such that the coordinates of a
+% vector x in this basis are of decreasing importance.  Instead of
+% using all N coordinates to specify the location of x, using only the
+% first k<N still gives a vector xhat that is close to x.
 %
-% Use this function to retrieve the basis U.  Use pca_apply.m to retrieve that basis
-% coefficients for a novel vector x.  Also, use pca_visualize(X,...) for visualization of
-% the approximated X. 
-%
-% This function operates on arrays of arbitrary dimension, by first converting the array
-% to a vector.  That is if X is n dimensional, say of dims d1 x d2 x...x dn-1 x dn.  Then
-% the first n-1 dimensions of X are comined. That is X is flattened to be 2 dimensional:
-% (d1 * d2 * ...* dn-1) x dn.  
-%
-% Once X is converted to 2 dimensions of size Nxn, each column represents a single 
-% observation, and each row is a different variable.  Note that this is the opposite of
-% many matlab functions such as princomp.  So if X is MxNxK, then X(:,:,i) representes the
-% ith observation.  This is useful if X is a stack of images (each image will
-% automatically get vectorized).  Likewise if X is MxNxKxR, then X(:,:,:,i) becomes the
-% ith observation, which is useful if X is a collection of videos each of size MxNxK.
-%
-% If X is very large, it is samlped before running PCA, using randomsample.
+% This function operates on arrays of arbitrary dimension, by first
+% converting the arrays to vector.  If X is m+1 dimensional, say of size
+% [d1 x d2 x...x dm x n], then the first m dimensions of X are combined. X
+% is flattened to be 2 dimensional: [dxn], with d=prod(di). Once X is
+% converted to 2 dimensions of size dxn, each column represents a single
+% observation, and each row is a different variable.  Note that this is the
+% opposite of many matlab functions such as princomp.  If X is MxNxn, then
+% X(:,:,i) represents the ith observation (useful for stack of n images),
+% likewise for n videos X is MxNxKxn. If X is very large, it is sampled
+% before running PCA, using randomsample. Use this function to retrieve the
+% basis U.  Use pca_apply to retrieve that basis coefficients for a novel
+% vector x. Use pca_visualize(X,...) for visualization of approximated X. 
 %
 % To calculate residuals: 
-%   residuals = variances / sum(variances); 
-%   residuals = cumsum(residuals); plot( residuals, '- .' )
+%   residuals = cumsum(vars / sum(vars)); 
+%   plot( residuals, '-.' )
+%
+% USAGE
+%  [ U, mu, vars ] = pca( X )
 %
 % INPUTS
-%   X           - n-dim array of size (d1 x d2 x...x dn-1) x dn (treated as dn elements)
+%  X         - [d1 x ... x dm x n], treated as n [d1 x ... x dm] elements
 %
 % OUTPUTS
-%   U           - 2D array of size (d1 * d2 * ...* dn-1) x r, where each column represents 
-%               - a principal component of X (after X is flattened).
-%   mu          - Array of size d1 x d2 x...x dn-1 which represents the mean of X.
-%   variances   - sorted eigenvalues corresponding to eigenvectors in U 
+%  U         - [d x r], d=prod(di), each column is a principal component
+%  mu        - [d1 x ... x dm] mean of X.
+%  vars      - sorted eigenvalues corresponding to eigenvectors in U 
 %
 % EXAMPLE
-%   load pca_data;  
-%   [ U, mu, variances ] = pca( I3D1(:,:,1:12) );
-%   [ Y, Xhat, pe ] = pca_apply( I3D1(:,:,1), U, mu, variances, 5 );
-%   figure(1); im(I3D1(:,:,1));  figure(2); im(Xhat);
-%   pca_visualize( U, mu, variances, I3D1, 13, [0:12], [], 3 );
+%  load pca_data;  
+%  [ U, mu, vars ] = pca( I3D1(:,:,1:12) );
+%  [ Y, Xhat, avsq ] = pca_apply( I3D1(:,:,1), U, mu, vars, 5 );
+%  figure(1); im(I3D1(:,:,1));  figure(2); im(Xhat);
+%  pca_visualize( U, mu, vars, I3D1, 13, [0:12], [], 3 );
 %
 % DATESTAMP
-%   01-Jan-2007  1:00pm
+%  10-Jan-2007  4:00pm
 %
 % See also PRINCOMP, PCA_APPLY, PCA_VISUALIZE, VISUALIZE_DATA, RANDOMSAMPLE
 
 % Piotr's Image&Video Toolbox      Version 1.03   
 % Written and maintained by Piotr Dollar    pdollar-at-cs.ucsd.edu 
 % Please email me if you find bugs, or have suggestions or questions! 
 
-function [ U, mu, variances ] = pca( X )
+function [ U, mu, vars ] = pca( X )
 
-    %%% THE FOLLOWING IS USED TO TIME SVD
-    % t=[]; rs = 100:20:500; 
-    % for r=rs tic; [u,s,v] = svd(rand(r)); t(end+1)=toc; end; plot(rs,t);
-    % plot(rs,t,'r'); hold('on'); fplot( '10^-6 * .25*(x)^2.75', [1,1000] ); hold('off');
-    % x=1500; 10^-7 * 2.5*(x)^2.75 / 60 %minutes
-    %%%
+  % Will run out of memory if X has too many elements.  
+  % Hence, choose a random sample to run PCA on.
+  maxmegs = 80;
+  if( ~isa(X,'double') )  
+    s=whos('X'); nbytes=s.bytes/numel(X);
+    X = randomsample( X, maxmegs * nbytes/8 );
+    X = double(X);
+  else
+    X = randomsample( X, maxmegs );
+  end
+
+  siz=size(X);  nd=ndims(X);  d=prod(siz(1:end-1));
+  inds={':'};  inds=inds(:,ones(1,nd-1));   
 
-    % Will run out of memory if X has too many elements.  
-    % Hence, choose a random sample to run PCA on.
-    maxmegs = 80; %$P
-    if( ~isa(X,'double') )  
-        x1=X(1); s=whos('x1'); nbytes=s.bytes;  
-        X = randomsample( X, maxmegs * nbytes/8 );
-        X = double(X);
-    else
-        X = randomsample( X, maxmegs );
-    end
+  % set X to be zero mean
+  mu = mean( X, nd );
+  murep = mu( inds{:}, ones(1,siz(end)));
+  X = X - murep;
 
-    siz=size( X );  nd=ndims(X);  d=prod(siz(1:end-1));
-    inds={':'};  inds=inds(:,ones(1,nd-1));   
-
-    % set X to be zero mean
-    mu = mean( X, nd );
-    murep = mu( inds{:}, ones(1,siz(end)));
-    X = X - murep;
+  % flatten X
+  X = reshape(X, d, [] );
+  [N,n] = size(X);
+  if(n==1); U=zeros(d,1); vars=0; return; end
+  X = X ./ sqrt(n-1);
 
-    % flatten X
-    X = reshape(X, d, [] );
-    [N,n] = size(X);
-    if (n==1) U = zeros( d, 1 ); variances = 0; return; end
-    X = X ./ sqrt(n-1);
+  % get principal components using the SVD 
+  % note: X = U * S * V';  maxrank = min(n-1,N)
+  if( N>n )
+    [V,SS,V] = svd( X' * X ); 
+    keeplocs = diag(SS) > 1e-30;
+    SS = SS(keeplocs,keeplocs);
+    V = V(:,keeplocs);
+    U = X * (V * SS^-.5);
+  else
+    [U,SS,U] = svd( X * X' );
+    keeplocs = diag(SS) > 1e-30;
+    SS = SS(keeplocs,keeplocs);
+    U = U(:,keeplocs);
+  end    
 
-    % get principal components using the SVD 
-    %  X = U * S * V'; all else follows
-    %  note: maxrank = min( n-1, N )
-    if (N>n)
-        [V,Ssq,V] = svd( X' * X ); 
-        keeplocs = diag(Ssq) > 1e-30;
-        Ssq = Ssq(keeplocs,keeplocs);
-        V = V(:,keeplocs);
-        U = X * (V * Ssq^-.5);
-    else
-        [U,Ssq,U] = svd( X * X' );
-        keeplocs = diag(Ssq) > 1e-30;
-        Ssq = Ssq(keeplocs,keeplocs);
-        U = U(:,keeplocs);
-    end    
-
-    % eigenvalues squared
-    variances = diag(Ssq);
+  % eigenvalues squared
+  vars = diag(SS);
 
+  %%% THE FOLLOWING IS USED TO TIME SVD
+  % t=[]; rs = 100:20:500; 
+  % for r=rs tic; [u,s,v] = svd(rand(r)); t(end+1)=toc; end; plot(rs,t);
+  % plot(rs,t,'r'); hold('on'); 
+  % fplot( '10^-6 * .25*(x)^2.75', [1,1000] ); hold('off');
+  % x=1500; 10^-7 * 2.5*(x)^2.75 / 60 %minutes
+  %%%
+
diff --git a/classify/pca_apply.m b/classify/pca_apply.m
@@ -1,85 +1,74 @@
 % Companion function to pca.
 %
 % Use pca to retrieve the principal components U and the mean mu from a
-% set fo vectors X1 via [U,mu,variances] = pca(X1).  Then given a new
-% vector x, use y = pca_apply( x, U, mu, variances, k ) to get the first k
-% coefficients of x in the space spanned by the columns of U.
-%
-% The input x can be a matrix X, where each column represents a single
-% vector in R^N.  If X has higher dimension, the first n-1 dimensions are
-% used as the variables and the last dimension as an observation -- for
-% more information on this see pca.m
+% set fo vectors X1 via [U,mu,vars] = pca(X1).  Then given a new
+% vector x, use y = pca_apply( x, U, mu, vars, k ) to get the first k
+% coefficients of x in the space spanned by the columns of U. See pca for
+% general information.
 %
 % This may prove useful:
 %   siz = size(X);  k = 100;
 %   Uim = reshape( U(:,1:k), [ siz(1:end-1) k ]  );
 %
-% It is also interesting to look at the distribution of the points Y's (their projection
-% onto 2D or 3D): 
-%   plot( Y(1,:), Y(2,:), '.' );
-%   plot3( Y(1,:), Y(2,:), Y(3,:), '.' );
-%
 % INPUTS
 %   X           - array for which to get PCA coefficients
 %   U           - [returned by pca] -- see pca
 %   mu          - [returned by pca] -- see pca
-%   variances   - [returned by pca] -- see pca
+%   vars        - [returned by pca] -- see pca
 %   k           - number of principal coordinates to approximate X with
 %
 % OUTPUTS
 %   Yk          - first k coordinates of X in column space of U
 %   Xhat        - approximation of X corresponding to Yk
-%   pixelerror  - measure of squared error per pixel normalized to fall between [0,1]
+%   avsq        - measure of squared error normalized to fall between [0,1]
 %
 % DATESTAMP
-%   29-Nov-2005  2:00pm
+%  10-Jan-2007  4:00pm
 %
 % See also PCA, PCA_APPLY_LARGE, PCA_VISUALIZE
 
 % Piotr's Image&Video Toolbox      Version 1.03   
 % Written and maintained by Piotr Dollar    pdollar-at-cs.ucsd.edu 
 % Please email me if you find bugs, or have suggestions or questions! 
 
-function [ Yk, Xhat, avsq, avsq_orig ] = pca_apply( X, U, mu, variances, k )
-    % Note: the 4 output version of this function is for pca_apply_large
-
-    siz = size(X); nd = ndims(X);  [N,r]  = size(U);
-    if (N==prod(siz) && ~(nd==2 && siz(2)==1)) siz=[siz, 1]; nd=nd+1; end;
-    inds = {':'}; inds = inds(:,ones(1,nd-1));   
-    d= prod(siz(1:end-1));
+function [ Yk, Xhat, avsq, avsq_orig ] = pca_apply( X, U, mu, vars, k )
+  siz = size(X); nd = ndims(X);  [N,r]  = size(U);
+  if(N==prod(siz) && ~(nd==2 && siz(2)==1)); siz=[siz, 1]; nd=nd+1; end;
+  inds = {':'}; inds = inds(:,ones(1,nd-1));   
+  d= prod(siz(1:end-1));
+
+  % some error checking
+  if(isa(X,'uint8')); X = double(X); end;
+  if(k>r); warning(['Only ' int2str(r) '<k comp. available.']); k=r; end;
+  if(d~=N); error('incorrect size for X or U'); end;
 
-    % some error checking
-    if(isa(X,'uint8')) X = double(X); end;
-    if( k>r ) 
-        warning(['Only ' int2str(r) '<k principal components available.']); k=r; end;
-    if (d~=N) error('incorrect size for X or U'); end;
-
-    % subtract mean, then flatten X
-    Xorig = X;
-    murep = mu( inds{:}, ones(1,siz(end)));
-    X = X - murep;
-    X = reshape(X, d, [] );
+  % subtract mean, then flatten X
+  Xorig = X;
+  murep = mu( inds{:}, ones(1,siz(end)));
+  X = X - murep;
+  X = reshape(X, d, [] );
 
-    % Find Yk, the first k coefficients of X in the new basis
-    k = min( r, k );
-    Uk = U(:,1:k);
-    Yk = Uk' * X;
+  % Find Yk, the first k coefficients of X in the new basis
+  k = min( r, k );
+  Uk = U(:,1:k);
+  Yk = Uk' * X;
 
-    % calculate Xhat the approximation of X using the first k princ components
-    if( nargout>1 ) 
-        Xhat = Uk * Yk; 
-        Xhat = reshape( Xhat, siz );
-        Xhat = Xhat + murep;
-    end;
+  % calculate Xhat - the approx of X using the first k princ components
+  if( nargout>1 ) 
+    Xhat = Uk * Yk; 
+    Xhat = reshape( Xhat, siz );
+    Xhat = Xhat + murep;
+  end;
 
-    % caclulate average value of (Xhat-Xorig).^2 compared to average value of
-    % X.^2, where X is Xorig without the mean.  This is equivalent to what
-    % fraction of the variance is captured by Xhat.
-    if( nargout>2 )
-        avsq = Xhat - Xorig; 
-        avsq = dot(avsq(:),avsq(:));
-        avsq_orig = dot(X(:),X(:)); 
-        if (nargout==3)
-            avsq = avsq / avsq_orig;
-        end
-    end;
+  % caclulate average value of (Xhat-Xorig).^2 compared to average value
+  % of X.^2, where X is Xorig without the mean.  This is equivalent to 
+  % what fraction of the variance is captured by Xhat.
+  % Note: the 4 output version of this function is for pca_apply_large
+  if( nargout>2 )
+    avsq = Xhat - Xorig; 
+    avsq = dot(avsq(:),avsq(:));
+    avsq_orig = dot(X(:),X(:)); 
+    if (nargout==3)
+      avsq = avsq / avsq_orig;
+    end
+  end;