INIT repo created and first commits

edwinyi · Sep 14, 2012 · b0b59da · b0b59da
1 parent 57928a4
commit b0b59da
Show file tree

Hide file tree

Showing 68 changed files with 2,264 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,13 @@
+Pattern Recognition and Machine Learning
+===========
+
+License
+-------
+Currently Released Under GPLv3
+
+
+Contact
+-------
+sth4nth(CHEN, Mo) [username] at gmail dot com
+
+KuantKid(LI,Wei) [username] at gmail dot com
diff --git a/chapter01/condEntropy.m b/chapter01/condEntropy.m
@@ -0,0 +1,25 @@
+function z = condEntropy (x, y)
+% Compute conditional entropy H(x|y) of two discrete variables x and y.
+% Written by Mo Chen ([email protected]).
+assert(numel(x) == numel(y));
+n = numel(x);
+x = reshape(x,1,n);
+y = reshape(y,1,n);
+
+l = min(min(x),min(y));
+x = x-l+1;
+y = y-l+1;
+k = max(max(x),max(y));
+
+idx = 1:n;
+Mx = sparse(idx,x,1,n,k,n);
+My = sparse(idx,y,1,n,k,n);
+Pxy = nonzeros(Mx'*My/n); %joint distribution of x and y
+Hxy = -dot(Pxy,log2(Pxy+eps));
+
+Py = mean(My,1);
+Hy = -dot(Py,log2(Py+eps));
+
+% conditional entropy H(x|y)
+z = Hxy-Hy;
+
diff --git a/chapter01/entropy.m b/chapter01/entropy.m
@@ -0,0 +1,8 @@
+function z = entropy(x)
+% Compute entropy H(x) of a discrete variable x.
+% Written by Mo Chen ([email protected]).
+n = numel(x);
+x = reshape(x,1,n);
+[u,~,label] = unique(x);
+p = full(mean(sparse(1:n,label,1,n,numel(u),n),1));
+z = -dot(p,log2(p+eps));
diff --git a/chapter01/jointEntropy.m b/chapter01/jointEntropy.m
@@ -0,0 +1,17 @@
+function z = jointEntropy(x, y)
+% Compute joint entropy H(x,y) of two discrete variables x and y.
+% Written by Mo Chen ([email protected]).    
+assert(numel(x) == numel(y));
+n = numel(x);
+x = reshape(x,1,n);
+y = reshape(y,1,n);
+
+l = min(min(x),min(y));
+x = x-l+1;
+y = y-l+1;
+k = max(max(x),max(y));
+
+idx = 1:n;
+p = nonzeros(sparse(idx,x,1,n,k,n)'*sparse(idx,y,1,n,k,n)/n); %joint distribution of x and y
+
+z = -dot(p,log2(p+eps));
diff --git a/chapter01/mutInfo.m b/chapter01/mutInfo.m
@@ -0,0 +1,28 @@
+function z = mutInfo(x, y)
+% Compute mutual information I(x,y) of two discrete variables x and y.
+% Written by Mo Chen ([email protected]).
+assert(numel(x) == numel(y));
+n = numel(x);
+x = reshape(x,1,n);
+y = reshape(y,1,n);
+
+l = min(min(x),min(y));
+x = x-l+1;
+y = y-l+1;
+k = max(max(x),max(y));
+
+idx = 1:n;
+Mx = sparse(idx,x,1,n,k,n);
+My = sparse(idx,y,1,n,k,n);
+Pxy = nonzeros(Mx'*My/n); %joint distribution of x and y
+Hxy = -dot(Pxy,log2(Pxy+eps));
+
+Px = mean(Mx,1);
+Py = mean(My,1);
+
+% entropy of Py and Px
+Hx = -dot(Px,log2(Px+eps));
+Hy = -dot(Py,log2(Py+eps));
+
+% mutual information
+z = Hx + Hy - Hxy;
diff --git a/chapter01/nmi.m b/chapter01/nmi.m
@@ -0,0 +1,31 @@
+function z = nmi(x, y)
+% Compute nomalized mutual information I(x,y)/sqrt(H(x)*H(y)).
+% Written by Michael Chen ([email protected]).
+assert(numel(x) == numel(y));
+n = numel(x);
+x = reshape(x,1,n);
+y = reshape(y,1,n);
+
+l = min(min(x),min(y));
+x = x-l+1;
+y = y-l+1;
+k = max(max(x),max(y));
+
+idx = 1:n;
+Mx = sparse(idx,x,1,n,k,n);
+My = sparse(idx,y,1,n,k,n);
+Pxy = nonzeros(Mx'*My/n); %joint distribution of x and y
+Hxy = -dot(Pxy,log2(Pxy+eps));
+
+Px = mean(Mx,1);
+Py = mean(My,1);
+
+% entropy of Py and Px
+Hx = -dot(Px,log2(Px+eps));
+Hy = -dot(Py,log2(Py+eps));
+
+% mutual information
+MI = Hx + Hy - Hxy;
+
+% normalized mutual information
+z = sqrt((MI/Hx)*(MI/Hy)) ;
diff --git a/chapter01/nvi.m b/chapter01/nvi.m
@@ -0,0 +1,28 @@
+function z = nvi(x, y)
+% Compute nomalized variation information (1-I(x,y)/H(x,y)).
+% Written by Michael Chen ([email protected]).
+assert(numel(x) == numel(y));
+n = numel(x);
+x = reshape(x,1,n);
+y = reshape(y,1,n);
+
+l = min(min(x),min(y));
+x = x-l+1;
+y = y-l+1;
+k = max(max(x),max(y));
+
+idx = 1:n;
+Mx = sparse(idx,x,1,n,k,n);
+My = sparse(idx,y,1,n,k,n);
+Pxy = nonzeros(Mx'*My/n); %joint distribution of x and y
+Hxy = -dot(Pxy,log2(Pxy+eps));
+
+Px = mean(Mx,1);
+Py = mean(My,1);
+
+% entropy of Py and Px
+Hx = -dot(Px,log2(Px+eps));
+Hy = -dot(Py,log2(Py+eps));
+
+% nvi
+z = 2-(Hx+Hy)/Hxy;
diff --git a/chapter01/relatEntropy.m b/chapter01/relatEntropy.m
@@ -0,0 +1,21 @@
+function z = relatEntropy (x, y)
+% Compute relative entropy (a.k.a KL divergence) KL(p(x)||p(y)) of two discrete variables x and y.
+% Written by Mo Chen ([email protected]).    
+assert(numel(x) == numel(y));
+n = numel(x);
+x = reshape(x,1,n);
+y = reshape(y,1,n);
+
+l = min(min(x),min(y));
+x = x-l+1;
+y = y-l+1;
+k = max(max(x),max(y));
+
+idx = 1:n;
+Mx = sparse(idx,x,1,n,k,n);
+My = sparse(idx,y,1,n,k,n);
+Px = mean(Mx,1);
+Py = mean(My,1);
+
+z = -dot(Px,log2(Py+eps)-log2(Px+eps));
+
diff --git a/chapter02/demo.m b/chapter02/demo.m
@@ -0,0 +1 @@
+% TODO: 1) improve precision of logBesseli; 2) test cases for all functions
diff --git a/chapter02/pdfDirichletLn.m b/chapter02/pdfDirichletLn.m
@@ -0,0 +1,13 @@
+function y = pdfDirichletLn(X, a)
+% Compute log pdf of a Dirichlet distribution.
+%   X: d x n data matrix satifying (sum(X,1)==ones(1,n) && X>=0)
+%   a: d x k parameters
+%   y: k x n probability density
+% Written by Mo Chen ([email protected]).
+X = bsxfun(@times,X,1./sum(X,1));
+if size(a,1) == 1
+    a = repmat(a,size(X,1),1);
+end
+c = gammaln(sum(a,1))-sum(gammaln(a),1);
+g = (a-1)'*log(X);
+y = bsxfun(@plus,g,c');
diff --git a/chapter02/pdfGaussLn.m b/chapter02/pdfGaussLn.m
@@ -0,0 +1,36 @@
+function y = pdfGaussLn(X, mu, sigma)
+% Compute log pdf of a Gaussian distribution.
+% Written by Mo Chen ([email protected]).
+
+[d,n] = size(X);
+k = size(mu,2);
+if n == k && size(sigma,1) == 1           
+    X = bsxfun(@times,X-mu,1./sigma);
+    q = dot(X,X,1);  % M distance
+    c = d*log(2*pi)+2*log(sigma);          % normalization constant
+    y = -0.5*(c+q);
+elseif size(sigma,1)==d && size(sigma,2)==d && k==1   % one mu and one dxd sigma
+    X = bsxfun(@minus,X,mu);
+    [R,p]= chol(sigma);
+    if p ~= 0
+        error('ERROR: sigma is not PD.');
+    end
+    Q = R'\X;
+    q = dot(Q,Q,1);  % quadratic term (M distance)
+    c = d*log(2*pi)+2*sum(log(diag(R)));   % normalization constant
+    y = -0.5*(c+q);
+elseif size(sigma,1)==d && size(sigma,2)==k % k mu and k diagonal sigma
+    lambda = 1./sigma;
+    ml = mu.*lambda;
+    q = bsxfun(@plus,X'.^2*lambda-2*X'*ml,dot(mu,ml,1)); % M distance
+    c = d*log(2*pi)+2*sum(log(sigma),1); % normalization constant
+    y = -0.5*bsxfun(@plus,q,c);
+elseif size(sigma,1)==1 && (size(sigma,2)==k || size(sigma,2)==1) % k mu and (k or one) scalar sigma
+    X2 = repmat(dot(X,X,1)',1,k);
+    D = bsxfun(@plus,X2-2*X'*mu,dot(mu,mu,1));
+    q = bsxfun(@times,D,1./sigma);  % M distance
+    c = d*(log(2*pi)+2*log(sigma));          % normalization constant
+    y = -0.5*bsxfun(@plus,q,c);
+else
+    error('Parameters mismatched.');
+end
diff --git a/chapter02/pdfKdeLn.m b/chapter02/pdfKdeLn.m
@@ -0,0 +1,6 @@
+function z = pdfKdeLn (X, Y, sigma)
+% Compute log pdf of kernel density estimator.
+% Written by Mo Chen ([email protected]).
+    D = bsxfun(@plus,full(dot(X,X,1)),full(dot(Y,Y,1))')-full(2*(Y'*X));
+    z = logSumExp(D/(-2*sigma^2),1)-0.5*log(2*pi)-log(sigma*size(Y,2));
+endfunction
diff --git a/chapter02/pdfMnLn.m b/chapter02/pdfMnLn.m
@@ -0,0 +1,11 @@
+function z = pdfMnLn (x, p)
+% Compute log pdf of a multinomial distribution.
+% Written by Mo Chen ([email protected]).    
+    if numel(x) ~= numel(p)
+        n = numel(x);
+        x = reshape(x,1,n);
+        [u,~,label] = unique(x);
+        x = full(sum(sparse(label,1:n,1,n,numel(u),n),2));
+    end
+    z = gammaln(sum(x)+1)-sum(gammaln(x+1))+dot(x,log(p));
+endfunction
diff --git a/chapter02/pdfMvGammaLn.m b/chapter02/pdfMvGammaLn.m
@@ -0,0 +1,10 @@
+function y = pdfMvGammaLn(x,d)
+% Compute logarithm multivariate Gamma function.
+% Gamma_p(x) = pi^(p(p-1)/4) prod_(j=1)^p Gamma(x+(1-j)/2)
+% log Gamma_p(x) = p(p-1)/4 log pi + sum_(j=1)^p log Gamma(x+(1-j)/2)
+% Written by Michael Chen ([email protected]).
+s = size(x);
+x = reshape(x,1,prod(s));
+x = bsxfun(@plus,repmat(x,d,1),(1-(1:d)')/2);
+y = d*(d-1)/4*log(pi)+sum(gammaln(x),1);
+y = reshape(y,s);
diff --git a/chapter02/pdfStLn.m b/chapter02/pdfStLn.m
@@ -0,0 +1,33 @@
+function y = pdfStLn(X, mu, sigma, v)
+% Compute log pdf of a student-t distribution.
+% Written by mo Chen ([email protected]).
+[d,k] = size(mu);
+
+if size(sigma,1)==d && size(sigma,2)==d && k==1
+    [R,p]= cholcov(sigma,0);
+    if p ~= 0
+        error('ERROR: sigma is not SPD.');
+    end
+    X = bsxfun(@minus,X,mu);
+    Q = R'\X;
+    q = dot(Q,Q,1);  % quadratic term (M distance)
+    o = -log(1+q/v)*((v+d)/2);
+    c = gammaln((v+d)/2)-gammaln(v/2)-(d*log(v*pi)+2*sum(log(diag(R))))/2;
+    y = c+o;
+elseif size(sigma,1)==d && size(sigma,2)==k
+    lambda = 1./sigma;
+    ml = mu.*lambda;
+    q = bsxfun(@plus,X'.^2*lambda-2*X'*ml,dot(mu,ml,1)); % M distance
+    o = bsxfun(@times,log(1+bsxfun(@times,q,1./v)),-(v+d)/2);
+    c = gammaln((v+d)/2)-gammaln(v/2)-(d*log(pi*v)+sum(log(sigma),1))/2;
+    y = bsxfun(@plus,o,c);
+elseif size(sigma,1)==1 && size(sigma,2)==k
+    X2 = repmat(dot(X,X,1)',1,k);
+    D = bsxfun(@plus,X2-2*X'*mu,dot(mu,mu,1));
+    q = bsxfun(@times,D,1./sigma);  % M distance
+    o = bsxfun(@times,log(1+bsxfun(@times,q,1./v)),-(v+d)/2);
+    c = gammaln((v+d)/2)-gammaln(v/2)-d*log(pi*v.*sigma)/2;
+    y = bsxfun(@plus,o,c);
+else
+    error('Parameters mismatched.');
+end
diff --git a/chapter02/pdfWishartLn.m b/chapter02/pdfWishartLn.m
@@ -0,0 +1,6 @@
+function y = pdfWishartLn(Sigma, v, W)
+% Compute log pdf of a Wishart distribution.
+% Written by Mo Chen ([email protected]).
+d = length(Sigma);
+B = -0.5*v*logdet(W)-0.5*v*d*log(2)-logmvgamma(0.5*v,d);
+y = B+0.5*(v-d-1)*logdet(Sigma)-0.5*trace(W\Sigma);
diff --git a/chapter02/pdflogVmfLn.m b/chapter02/pdflogVmfLn.m
@@ -0,0 +1,7 @@
+function y = pdflogVmfLn(X, mu, kappa)
+% Compute log pdf of a von Mises-Fisher distribution.
+% Written by Mo Chen ([email protected]).
+d = size(X,1);
+c = (d/2-1)*log(kappa)-(d/2)*log(2*pi)-logbesseli(d/2-1,kappa);
+q = bsxfun(@times,mu,kappa)'*X;
+y = bsxfun(@plus,q,c');
diff --git a/chapter03/demo.m b/chapter03/demo.m
@@ -0,0 +1,41 @@
+% Done
+% demo for chapter 03
+clear; close all;
+n = 100;
+beta = 1e-1;
+X = rand(1,n);
+w = randn;
+b = randn;
+t = w'*X+b+beta*randn(1,n);
+
+x = linspace(min(X)-1,max(X)+1,n);   % test data
+%%
+model = regress(X, t);
+y = linInfer(x, model);
+figure;
+hold on;
+plot(X,t,'o');
+plot(x,y,'r-');
+hold off
+%%
+[model,llh] = regressEbEm(X,t);
+[y, sigma] = linInfer(x,model,t);
+figure;
+hold on;
+plotBand(x,y,2*sigma);
+plot(X,t,'o');
+plot(x,y,'r-');
+hold off
+figure
+plot(llh);
+%%
+[model,llh] = regressEbFp(X,t);
+[y, sigma] = linInfer(x,model,t);
+figure;
+hold on;
+plotBand(x,y,2*sigma);
+plot(X,t,'o');
+plot(x,y,'r-');
+hold off
+figure
+plot(llh);
diff --git a/chapter03/linInfer.m b/chapter03/linInfer.m
@@ -0,0 +1,24 @@
+function [y, sigma, p] = linInfer(X, model, t)
+% Compute linear model reponse y = w'*x+b and likelihood
+% X: d x n data
+% t: 1 x n response
+w = model.w;
+b = model.b;
+y = w'*X+b;
+if nargout > 1
+    beta = model.beta;
+
+    if isfield(model,'V')   % V*V'=inv(S) 3.54
+        X = model.V'*bsxfun(@minus,X,model.xbar);
+        sigma = sqrt(1/beta+dot(X,X,1));   % 3.59
+    else
+        sigma = sqrt(1/beta);
+    end
+
+    if nargin == 3 && nargout == 3
+%         p = exp(logGaussPdf(t,y,sigma));
+        p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma));
+    end
+end
+
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		% TODO: 1) improve precision of logBesseli; 2) test cases for all functions