Skip to content

Commit

Permalink
INIT repo created and first commits
Browse files Browse the repository at this point in the history
  • Loading branch information
weilinear committed Sep 14, 2012
1 parent 57928a4 commit b0b59da
Show file tree
Hide file tree
Showing 68 changed files with 2,264 additions and 0 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Pattern Recognition and Machine Learning
===========

License
-------
Currently Released Under GPLv3


Contact
-------
sth4nth(CHEN, Mo) [username] at gmail dot com

KuantKid(LI,Wei) [username] at gmail dot com
25 changes: 25 additions & 0 deletions chapter01/condEntropy.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
function z = condEntropy (x, y)
% Compute conditional entropy H(x|y) of two discrete variables x and y.
% Written by Mo Chen ([email protected]).
assert(numel(x) == numel(y));
n = numel(x);
x = reshape(x,1,n);
y = reshape(y,1,n);

l = min(min(x),min(y));
x = x-l+1;
y = y-l+1;
k = max(max(x),max(y));

idx = 1:n;
Mx = sparse(idx,x,1,n,k,n);
My = sparse(idx,y,1,n,k,n);
Pxy = nonzeros(Mx'*My/n); %joint distribution of x and y
Hxy = -dot(Pxy,log2(Pxy+eps));

Py = mean(My,1);
Hy = -dot(Py,log2(Py+eps));

% conditional entropy H(x|y)
z = Hxy-Hy;

8 changes: 8 additions & 0 deletions chapter01/entropy.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
function z = entropy(x)
% Compute entropy H(x) of a discrete variable x.
% Written by Mo Chen ([email protected]).
n = numel(x);
x = reshape(x,1,n);
[u,~,label] = unique(x);
p = full(mean(sparse(1:n,label,1,n,numel(u),n),1));
z = -dot(p,log2(p+eps));
17 changes: 17 additions & 0 deletions chapter01/jointEntropy.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
function z = jointEntropy(x, y)
% Compute joint entropy H(x,y) of two discrete variables x and y.
% Written by Mo Chen ([email protected]).
assert(numel(x) == numel(y));
n = numel(x);
x = reshape(x,1,n);
y = reshape(y,1,n);

l = min(min(x),min(y));
x = x-l+1;
y = y-l+1;
k = max(max(x),max(y));

idx = 1:n;
p = nonzeros(sparse(idx,x,1,n,k,n)'*sparse(idx,y,1,n,k,n)/n); %joint distribution of x and y

z = -dot(p,log2(p+eps));
28 changes: 28 additions & 0 deletions chapter01/mutInfo.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
function z = mutInfo(x, y)
% Compute mutual information I(x,y) of two discrete variables x and y.
% Written by Mo Chen ([email protected]).
assert(numel(x) == numel(y));
n = numel(x);
x = reshape(x,1,n);
y = reshape(y,1,n);

l = min(min(x),min(y));
x = x-l+1;
y = y-l+1;
k = max(max(x),max(y));

idx = 1:n;
Mx = sparse(idx,x,1,n,k,n);
My = sparse(idx,y,1,n,k,n);
Pxy = nonzeros(Mx'*My/n); %joint distribution of x and y
Hxy = -dot(Pxy,log2(Pxy+eps));

Px = mean(Mx,1);
Py = mean(My,1);

% entropy of Py and Px
Hx = -dot(Px,log2(Px+eps));
Hy = -dot(Py,log2(Py+eps));

% mutual information
z = Hx + Hy - Hxy;
31 changes: 31 additions & 0 deletions chapter01/nmi.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
function z = nmi(x, y)
% Compute nomalized mutual information I(x,y)/sqrt(H(x)*H(y)).
% Written by Michael Chen ([email protected]).
assert(numel(x) == numel(y));
n = numel(x);
x = reshape(x,1,n);
y = reshape(y,1,n);

l = min(min(x),min(y));
x = x-l+1;
y = y-l+1;
k = max(max(x),max(y));

idx = 1:n;
Mx = sparse(idx,x,1,n,k,n);
My = sparse(idx,y,1,n,k,n);
Pxy = nonzeros(Mx'*My/n); %joint distribution of x and y
Hxy = -dot(Pxy,log2(Pxy+eps));

Px = mean(Mx,1);
Py = mean(My,1);

% entropy of Py and Px
Hx = -dot(Px,log2(Px+eps));
Hy = -dot(Py,log2(Py+eps));

% mutual information
MI = Hx + Hy - Hxy;

% normalized mutual information
z = sqrt((MI/Hx)*(MI/Hy)) ;
28 changes: 28 additions & 0 deletions chapter01/nvi.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
function z = nvi(x, y)
% Compute nomalized variation information (1-I(x,y)/H(x,y)).
% Written by Michael Chen ([email protected]).
assert(numel(x) == numel(y));
n = numel(x);
x = reshape(x,1,n);
y = reshape(y,1,n);

l = min(min(x),min(y));
x = x-l+1;
y = y-l+1;
k = max(max(x),max(y));

idx = 1:n;
Mx = sparse(idx,x,1,n,k,n);
My = sparse(idx,y,1,n,k,n);
Pxy = nonzeros(Mx'*My/n); %joint distribution of x and y
Hxy = -dot(Pxy,log2(Pxy+eps));

Px = mean(Mx,1);
Py = mean(My,1);

% entropy of Py and Px
Hx = -dot(Px,log2(Px+eps));
Hy = -dot(Py,log2(Py+eps));

% nvi
z = 2-(Hx+Hy)/Hxy;
21 changes: 21 additions & 0 deletions chapter01/relatEntropy.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
function z = relatEntropy (x, y)
% Compute relative entropy (a.k.a KL divergence) KL(p(x)||p(y)) of two discrete variables x and y.
% Written by Mo Chen ([email protected]).
assert(numel(x) == numel(y));
n = numel(x);
x = reshape(x,1,n);
y = reshape(y,1,n);

l = min(min(x),min(y));
x = x-l+1;
y = y-l+1;
k = max(max(x),max(y));

idx = 1:n;
Mx = sparse(idx,x,1,n,k,n);
My = sparse(idx,y,1,n,k,n);
Px = mean(Mx,1);
Py = mean(My,1);

z = -dot(Px,log2(Py+eps)-log2(Px+eps));

1 change: 1 addition & 0 deletions chapter02/demo.m
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
% TODO: 1) improve precision of logBesseli; 2) test cases for all functions
13 changes: 13 additions & 0 deletions chapter02/pdfDirichletLn.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
function y = pdfDirichletLn(X, a)
% Compute log pdf of a Dirichlet distribution.
% X: d x n data matrix satifying (sum(X,1)==ones(1,n) && X>=0)
% a: d x k parameters
% y: k x n probability density
% Written by Mo Chen ([email protected]).
X = bsxfun(@times,X,1./sum(X,1));
if size(a,1) == 1
a = repmat(a,size(X,1),1);
end
c = gammaln(sum(a,1))-sum(gammaln(a),1);
g = (a-1)'*log(X);
y = bsxfun(@plus,g,c');
36 changes: 36 additions & 0 deletions chapter02/pdfGaussLn.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
function y = pdfGaussLn(X, mu, sigma)
% Compute log pdf of a Gaussian distribution.
% Written by Mo Chen ([email protected]).

[d,n] = size(X);
k = size(mu,2);
if n == k && size(sigma,1) == 1
X = bsxfun(@times,X-mu,1./sigma);
q = dot(X,X,1); % M distance
c = d*log(2*pi)+2*log(sigma); % normalization constant
y = -0.5*(c+q);
elseif size(sigma,1)==d && size(sigma,2)==d && k==1 % one mu and one dxd sigma
X = bsxfun(@minus,X,mu);
[R,p]= chol(sigma);
if p ~= 0
error('ERROR: sigma is not PD.');
end
Q = R'\X;
q = dot(Q,Q,1); % quadratic term (M distance)
c = d*log(2*pi)+2*sum(log(diag(R))); % normalization constant
y = -0.5*(c+q);
elseif size(sigma,1)==d && size(sigma,2)==k % k mu and k diagonal sigma
lambda = 1./sigma;
ml = mu.*lambda;
q = bsxfun(@plus,X'.^2*lambda-2*X'*ml,dot(mu,ml,1)); % M distance
c = d*log(2*pi)+2*sum(log(sigma),1); % normalization constant
y = -0.5*bsxfun(@plus,q,c);
elseif size(sigma,1)==1 && (size(sigma,2)==k || size(sigma,2)==1) % k mu and (k or one) scalar sigma
X2 = repmat(dot(X,X,1)',1,k);
D = bsxfun(@plus,X2-2*X'*mu,dot(mu,mu,1));
q = bsxfun(@times,D,1./sigma); % M distance
c = d*(log(2*pi)+2*log(sigma)); % normalization constant
y = -0.5*bsxfun(@plus,q,c);
else
error('Parameters mismatched.');
end
6 changes: 6 additions & 0 deletions chapter02/pdfKdeLn.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
function z = pdfKdeLn (X, Y, sigma)
% Compute log pdf of kernel density estimator.
% Written by Mo Chen ([email protected]).
D = bsxfun(@plus,full(dot(X,X,1)),full(dot(Y,Y,1))')-full(2*(Y'*X));
z = logSumExp(D/(-2*sigma^2),1)-0.5*log(2*pi)-log(sigma*size(Y,2));
endfunction
11 changes: 11 additions & 0 deletions chapter02/pdfMnLn.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
function z = pdfMnLn (x, p)
% Compute log pdf of a multinomial distribution.
% Written by Mo Chen ([email protected]).
if numel(x) ~= numel(p)
n = numel(x);
x = reshape(x,1,n);
[u,~,label] = unique(x);
x = full(sum(sparse(label,1:n,1,n,numel(u),n),2));
end
z = gammaln(sum(x)+1)-sum(gammaln(x+1))+dot(x,log(p));
endfunction
10 changes: 10 additions & 0 deletions chapter02/pdfMvGammaLn.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
function y = pdfMvGammaLn(x,d)
% Compute logarithm multivariate Gamma function.
% Gamma_p(x) = pi^(p(p-1)/4) prod_(j=1)^p Gamma(x+(1-j)/2)
% log Gamma_p(x) = p(p-1)/4 log pi + sum_(j=1)^p log Gamma(x+(1-j)/2)
% Written by Michael Chen ([email protected]).
s = size(x);
x = reshape(x,1,prod(s));
x = bsxfun(@plus,repmat(x,d,1),(1-(1:d)')/2);
y = d*(d-1)/4*log(pi)+sum(gammaln(x),1);
y = reshape(y,s);
33 changes: 33 additions & 0 deletions chapter02/pdfStLn.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
function y = pdfStLn(X, mu, sigma, v)
% Compute log pdf of a student-t distribution.
% Written by mo Chen ([email protected]).
[d,k] = size(mu);

if size(sigma,1)==d && size(sigma,2)==d && k==1
[R,p]= cholcov(sigma,0);
if p ~= 0
error('ERROR: sigma is not SPD.');
end
X = bsxfun(@minus,X,mu);
Q = R'\X;
q = dot(Q,Q,1); % quadratic term (M distance)
o = -log(1+q/v)*((v+d)/2);
c = gammaln((v+d)/2)-gammaln(v/2)-(d*log(v*pi)+2*sum(log(diag(R))))/2;
y = c+o;
elseif size(sigma,1)==d && size(sigma,2)==k
lambda = 1./sigma;
ml = mu.*lambda;
q = bsxfun(@plus,X'.^2*lambda-2*X'*ml,dot(mu,ml,1)); % M distance
o = bsxfun(@times,log(1+bsxfun(@times,q,1./v)),-(v+d)/2);
c = gammaln((v+d)/2)-gammaln(v/2)-(d*log(pi*v)+sum(log(sigma),1))/2;
y = bsxfun(@plus,o,c);
elseif size(sigma,1)==1 && size(sigma,2)==k
X2 = repmat(dot(X,X,1)',1,k);
D = bsxfun(@plus,X2-2*X'*mu,dot(mu,mu,1));
q = bsxfun(@times,D,1./sigma); % M distance
o = bsxfun(@times,log(1+bsxfun(@times,q,1./v)),-(v+d)/2);
c = gammaln((v+d)/2)-gammaln(v/2)-d*log(pi*v.*sigma)/2;
y = bsxfun(@plus,o,c);
else
error('Parameters mismatched.');
end
6 changes: 6 additions & 0 deletions chapter02/pdfWishartLn.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
function y = pdfWishartLn(Sigma, v, W)
% Compute log pdf of a Wishart distribution.
% Written by Mo Chen ([email protected]).
d = length(Sigma);
B = -0.5*v*logdet(W)-0.5*v*d*log(2)-logmvgamma(0.5*v,d);
y = B+0.5*(v-d-1)*logdet(Sigma)-0.5*trace(W\Sigma);
7 changes: 7 additions & 0 deletions chapter02/pdflogVmfLn.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
function y = pdflogVmfLn(X, mu, kappa)
% Compute log pdf of a von Mises-Fisher distribution.
% Written by Mo Chen ([email protected]).
d = size(X,1);
c = (d/2-1)*log(kappa)-(d/2)*log(2*pi)-logbesseli(d/2-1,kappa);
q = bsxfun(@times,mu,kappa)'*X;
y = bsxfun(@plus,q,c');
41 changes: 41 additions & 0 deletions chapter03/demo.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
% Done
% demo for chapter 03
clear; close all;
n = 100;
beta = 1e-1;
X = rand(1,n);
w = randn;
b = randn;
t = w'*X+b+beta*randn(1,n);

x = linspace(min(X)-1,max(X)+1,n); % test data
%%
model = regress(X, t);
y = linInfer(x, model);
figure;
hold on;
plot(X,t,'o');
plot(x,y,'r-');
hold off
%%
[model,llh] = regressEbEm(X,t);
[y, sigma] = linInfer(x,model,t);
figure;
hold on;
plotBand(x,y,2*sigma);
plot(X,t,'o');
plot(x,y,'r-');
hold off
figure
plot(llh);
%%
[model,llh] = regressEbFp(X,t);
[y, sigma] = linInfer(x,model,t);
figure;
hold on;
plotBand(x,y,2*sigma);
plot(X,t,'o');
plot(x,y,'r-');
hold off
figure
plot(llh);
24 changes: 24 additions & 0 deletions chapter03/linInfer.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
function [y, sigma, p] = linInfer(X, model, t)
% Compute linear model reponse y = w'*x+b and likelihood
% X: d x n data
% t: 1 x n response
w = model.w;
b = model.b;
y = w'*X+b;
if nargout > 1
beta = model.beta;

if isfield(model,'V') % V*V'=inv(S) 3.54
X = model.V'*bsxfun(@minus,X,model.xbar);
sigma = sqrt(1/beta+dot(X,X,1)); % 3.59
else
sigma = sqrt(1/beta);
end

if nargin == 3 && nargout == 3
% p = exp(logGaussPdf(t,y,sigma));
p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma));
end
end


Loading

0 comments on commit b0b59da

Please sign in to comment.