Skip to content

Commit

Permalink
Merge pull request PRML#31 from sth4nth/master
Browse files Browse the repository at this point in the history
1.0 update
  • Loading branch information
sth4nth committed Mar 7, 2016
2 parents 8bf9f60 + a5f7458 commit 1149629
Show file tree
Hide file tree
Showing 35 changed files with 371 additions and 185 deletions.
5 changes: 2 additions & 3 deletions TODO.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
TODO:
extract demos
ch08: BP
ch10: EP

chapter05: MLP
chapter08: BP, EP
51 changes: 15 additions & 36 deletions chapter02/logGauss.m
Original file line number Diff line number Diff line change
@@ -1,42 +1,21 @@
function y = logGauss(X, mu, sigma)
function y = logGauss(X, mu, Sigma)
% Compute log pdf of a Gaussian distribution.
% Input:
% X: d x n data matrix
% mu: mean of Gaussian
% sigma: variance of Gaussian
% mu: d x 1 mean vector of Gaussian
% Sigma: d x d covariance matrix of Gaussian
% Output:
% y: probability density in logrithm scale y=log p(x)
% y: 1 x n probability density in logrithm scale y=log p(x)
% Written by Mo Chen ([email protected]).

[d,n] = size(X);
k = size(mu,2);
if n == k && size(sigma,1) == 1
X = bsxfun(@times,X-mu,1./sigma);
q = dot(X,X,1); % M distance
c = d*log(2*pi)+2*log(sigma); % normalization constant
y = -0.5*(c+q);
elseif size(sigma,1)==d && size(sigma,2)==d && k==1 % one mu and one dxd sigma
X = bsxfun(@minus,X,mu);
[R,p]= chol(sigma);
if p ~= 0
error('ERROR: sigma is not PD.');
end
Q = R'\X;
q = dot(Q,Q,1); % quadratic term (M distance)
c = d*log(2*pi)+2*sum(log(diag(R))); % normalization constant
y = -0.5*(c+q);
elseif size(sigma,1)==d && size(sigma,2)==k % k mu and k diagonal sigma
lambda = 1./sigma;
ml = mu.*lambda;
q = bsxfun(@plus,X'.^2*lambda-2*X'*ml,dot(mu,ml,1)); % M distance
c = d*log(2*pi)+2*sum(log(sigma),1); % normalization constant
y = -0.5*bsxfun(@plus,q,c);
elseif size(sigma,1)==1 && (size(sigma,2)==k || size(sigma,2)==1) % k mu and (k or one) scalar sigma
X2 = repmat(dot(X,X,1)',1,k);
D = bsxfun(@plus,X2-2*X'*mu,dot(mu,mu,1));
q = bsxfun(@times,D,1./sigma); % M distance
c = d*(log(2*pi)+2*log(sigma)); % normalization constant
y = -0.5*bsxfun(@plus,q,c);
else
error('Parameters are mismatched.');
[d,k] = size(mu);
assert(all(size(Sigma)==d) && k==1) % one mu and one dxd Sigma
X = bsxfun(@minus,X,mu);
[R,p]= chol(Sigma);
if p ~= 0
error('ERROR: Sigma is not PD.');
end
Q = R'\X;
q = dot(Q,Q,1); % quadratic term (M distance)
c = d*log(2*pi)+2*sum(log(diag(R))); % normalization constant
y = -0.5*(c+q);

6 changes: 6 additions & 0 deletions chapter05/demo.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
clear; close all;
h = 4;
X = [0 0 1 1;0 1 0 1];
Y = [0 1 1 0];
[model,mse] = mlp(X,Y,h);
plot(mse);
39 changes: 39 additions & 0 deletions chapter05/mlp.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
function [model, mse] = mlp(X, Y, h)
% Multilayer perceptron
% Input:
% X: d x n data matrix
% Y: p x n response matrix
% h: L x 1 vector specify number of hidden nodes in each layer l
% Ouput:
% model: model structure
% mse: mean square error
% Written by Mo Chen ([email protected]).
h = [size(X,1);h(:);size(Y,1)];
L = numel(h);
W = cell(L-1);
for l = 1:L-1
W{l} = randn(h(l),h(l+1));
end
Z = cell(L);
Z{1} = X;
eta = 1/size(X,2);
maxiter = 2000;
mse = zeros(1,maxiter);
for iter = 1:maxiter
% forward
for l = 2:L
Z{l} = sigmoid(W{l-1}'*Z{l-1});
end
% backward
E = Y-Z{L};
mse(iter) = mean(dot(E(:),E(:)));
for l = L-1:-1:1
df = Z{l+1}.*(1-Z{l+1});
dG = df.*E;
dW = Z{l}*dG';
W{l} = W{l}+eta*dW;
E = W{l}*dG;
end
end
mse = mse(1:iter);
model.W = W;
2 changes: 1 addition & 1 deletion chapter06/demo.m
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@

Y_kn = knPcaPred(model_kn,Xt);

[U,L,mu,err1] = pca(X,q);
[U,L,mu,mse] = pca(X,q);
Y_lin = U'*bsxfun(@minus,Xt,mu); % projection


Expand Down
7 changes: 5 additions & 2 deletions chapter06/knKmeans.m
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,20 @@
label = ceil(k*rand(1,n));
elseif numel(init)==n
label = init;
k = max(label);
end
if nargin < 3
kn = @knGauss;
end
K = kn(X,X);
last = 0;
while any(label ~= last)
[u,~,label(:)] = unique(label); % remove empty clusters
k = numel(u);
E = sparse(label,1:n,1,k,n,n);
E = spdiags(1./sum(E,2),0,k,k)*E;
T = E*K;
last = label;
[val, label] = max(bsxfun(@minus,T,diag(T*E')/2),[],1);
% [val, label] = max(bsxfun(@minus,2*T,dot(T,E,2)),[],1);
end
energy = trace(K)-2*sum(val);
if nargout == 3
Expand Down
11 changes: 11 additions & 0 deletions chapter08/bpMaxSum.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
function [z, llh] = bpMaxSum(x, h, E)
% Max-sum loopy belief propagation on a factor graph over discrete random variables
% Input:
% x: 1 x n vector of n observations
% h: 1 x d vector of factors
% E: d x n edge matrix of a bipartite graph to represent the factor graph (d factors, n nodes)
% Output:
% z: 1 x n vector of predicted label
% llh: loglikelihood
% Written by Mo Chen ([email protected]).

10 changes: 10 additions & 0 deletions chapter08/bpSumProd.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
function [z, llh] = bpSumProd(x, h, E)
% Sum-product loopy belief propagation on Markov random field with discrete rvs
% Input:
% x: 1 x n vector of n observations
% h: 1 x d vector of factors
% E: d x n edge matrix of a bipartite graph to represent the factor graph (d factors, n nodes)
% Output:
% z: 1 x n vector of predicted label
% llh: loglikelihood
% Written by Mo Chen ([email protected]).
31 changes: 31 additions & 0 deletions chapter08/demo.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
% demo for ch08

%% Naive Bayes with independent Gausssian
d = 2;
k = 3;
n = 1000;
[X, t] = kmeansRnd(d,k,n);
plotClass(X,t);

m = floor(n/2);
X1 = X(:,1:m);
X2 = X(:,(m+1):end);
t1 = t(1:m);
model = nbGauss(X1,t1);
y2 = nbGaussPred(model,X2);
plotClass(X2,y2);

%% Naive Bayes with independent Bernoulli
close all; clear;
d = 10;
k = 2;
n = 2000;
[X,t,mu] = mixBernRnd(d,k,n);
m = floor(n/2);
X1 = X(:,1:m);
X2 = X(:,(m+1):end);
t1 = t(1:m);
t2 = t((m+1):end);
model = nbBern(X1,t1);
y2 = nbBernPred(model,X2);
err = sum(t2~=y2)/numel(t2);
17 changes: 17 additions & 0 deletions chapter08/nbBern.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
function model = nbBern(X, t)
% Naive bayes classifier with indepenet Bernoulli.
% Input:
% X: d x n data matrix
% t: 1 x n label (1~k)
% Output:
% model: trained model structure
% Written by Mo Chen ([email protected]).
k = max(t);
n = size(X,2);
E = sparse(t,1:n,1,k,n,n);
nk = full(sum(E,2));
w = nk/n;
mu = full(sparse(X)*E'*spdiags(1./nk,0,k,k));

model.mu = mu; % d x k means
model.w = w;
15 changes: 15 additions & 0 deletions chapter08/nbBernPred.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
function y = nbBernPred(model, X)
% Prediction of naive Bayes classifier with independent Bernoulli.
% input:
% model: trained model structure
% X: d x n data matrix
% output:
% y: 1 x n predicted class label
% Written by Mo Chen ([email protected]).
mu = model.mu;
w = model.w;
X = sparse(X);
R = log(mu)'*X+log(1-mu)'*(1-X);
R = bsxfun(@plus,R,log(w));
[~,y] = max(R,[],1);

21 changes: 21 additions & 0 deletions chapter08/nbGauss.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
function model = nbGauss(X, t)
% Naive bayes classifier with indepenet Gaussian, each dimension of data is
% assumed from a 1d Gaussian distribution with independent mean and variance.
% Input:
% X: d x n data matrix
% t: 1 x n label (1~k)
% Output:
% model: trained model structure
% Written by Mo Chen ([email protected]).
n = size(X,2);
k = max(t);
E = sparse(t,1:n,1,k,n,n);
nk = full(sum(E,2));
w = nk/n;
R = E'*spdiags(1./nk,0,k,k);
mu = X*R;
var = X.^2*R-mu.^2;

model.mu = mu; % d x k means
model.var = var; % d x k variances
model.w = w;
20 changes: 20 additions & 0 deletions chapter08/nbGaussPred.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
function y = nbGaussPred(model, X)
% Prediction of naive Bayes classifier with independent Gaussian.
% input:
% model: trained model structure
% X: d x n data matrix
% output:
% y: 1 x n predicted class label
% Written by Mo Chen ([email protected]).
mu = model.mu;
var = model.var;
w = model.w;
assert(all(size(mu)==size(var)));
d = size(mu,1);

lambda = 1./var;
ml = mu.*lambda;
M = bsxfun(@plus,lambda'*X.^2-2*ml'*X,dot(mu,ml,1)'); % M distance
c = d*log(2*pi)+2*sum(log(var),1)'; % normalization constant
R = -0.5*bsxfun(@plus,M,c);
[~,y] = max(bsxfun(@times,exp(R),w),[],1);
27 changes: 21 additions & 6 deletions chapter09/demo.m
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,19 @@
y = rvmBinPred(model,X)+1;
figure;
binPlot(model,X,y);
%% kmeans
% kmeans
close all; clear;
d = 2;
k = 3;
n = 500;
d = 20;
k = 6;
n = 5000;
[X,label] = kmeansRnd(d,k,n);
y = kmeans(X,k);
tic;
y = kmeans_(X,k);
toc
tic
y = kmeans(X',k);
toc
y = kmedoids(X,k);
plotClass(X,label);
figure;
plotClass(X,y);
Expand Down Expand Up @@ -67,4 +73,13 @@
figure;
plotClass(X,z);
figure;
plot(llh);
plot(llh);

%% Bernoulli Mixture via EM
close all; clear;
d = 2;
k = 3;
n = 5000;
[X,z,mu] = mixBernRnd(d,k,n);
[label,model,llh] = mixBernEm(X,k);
plot(llh);
7 changes: 4 additions & 3 deletions chapter09/kmeans.m
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,15 @@
label = ceil(k*rand(1,n));
elseif numel(init)==n
label = init;
k = max(label);
end
last = 0;
while any(label ~= last)
[u,~,label(:)] = unique(label); % remove empty clusters
k = numel(u);
E = sparse(1:n,label,1,n,k,n); % transform label into indicator matrix
m = X*(E*spdiags(1./sum(E,1)',0,k,k)); % compute m of each cluster
m = X*(E*spdiags(1./sum(E,1)',0,k,k)); % compute centers
last = label;
[val,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); % assign samples to the nearest centers
[val,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); % assign labels
end
energy = dot(X(:),X(:))-2*sum(val);
model.means = m;
8 changes: 4 additions & 4 deletions chapter09/kmeansRnd.m
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function [X, z, center] = kmeansRnd(d, k, n)
function [X, z, mu] = kmeansRnd(d, k, n)
% Generate samples from a Gaussian mixture distribution with common variances (kmeans model).
% Input:
% d: dimension of data
Expand All @@ -7,7 +7,7 @@
% Output:
% X: d x n data matrix
% z: 1 x n response variable
% center: d x k centers of clusters
% mu: d x k centers of clusters
% Written by Mo Chen ([email protected]).
alpha = 1;
beta = nthroot(k,d); % in volume x^d there is k points: x^d=k
Expand All @@ -16,5 +16,5 @@
w = dirichletRnd(alpha,ones(1,k)/k);
z = discreteRnd(w,n);
E = full(sparse(z,1:n,1,k,n,n));
center = randn(d,k)*beta;
X = X+center*E;
mu = randn(d,k)*beta;
X = X+mu*E;
29 changes: 29 additions & 0 deletions chapter09/kmedoids.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
function [label, energy, index] = kmedoids(X, init)
% Perform k-medoids clustering.
% Input:
% X: d x n data matrix
% init: k number of clusters or label (1 x n vector)
% Output:
% label: 1 x n cluster label
% energy: optimization target value
% index: index of medoids
% Written by Mo Chen ([email protected]).
[d,n] = size(X);
if numel(init)==1
k = init;
label = ceil(k*rand(1,n));
elseif numel(init)==n
label = init;
end
X = bsxfun(@minus,X,mean(X,2)); % reduce chance of numerical problems
v = dot(X,X,1);
D = bsxfun(@plus,v,v')-2*(X'*X); % Euclidean distance matrix
D(sub2ind([d,d],1:d,1:d)) = 0; % reduce chance of numerical problems
last = 0;
while any(label ~= last)
[u,~,label(:)] = unique(label); % remove empty clusters
[~, index] = min(D*sparse(1:n,label,1,n,numel(u),n),[],1); % find k medoids
last = label;
[val, label] = min(D(index,:),[],1); % assign labels
end
energy = sum(val);
Loading

0 comments on commit 1149629

Please sign in to comment.