From d971fd815b25bd1b481d641e7c624d106af30127 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 2 Dec 2015 15:36:58 +0800 Subject: [PATCH 001/149] add demo to chapter01 and clean up the code --- chapter01/condEntropy.m | 1 + chapter01/demo.m | 38 ++++++++++++++++++++++++++++++++++++++ chapter01/entropy.m | 1 + chapter01/jointEntropy.m | 1 + chapter01/mutInfo.m | 1 + chapter01/nmi.m | 3 ++- chapter01/nvi.m | 3 ++- chapter01/relatEntropy.m | 1 + helper/isequalf.m | 8 ++++++++ helper/plotBand.m | 1 + helper/rndKCluster.m | 2 +- helper/spread.m | 2 +- 12 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 chapter01/demo.m create mode 100644 helper/isequalf.m diff --git a/chapter01/condEntropy.m b/chapter01/condEntropy.m index bc7d016..df8ec94 100644 --- a/chapter01/condEntropy.m +++ b/chapter01/condEntropy.m @@ -1,5 +1,6 @@ function z = condEntropy (x, y) % Compute conditional entropy H(x|y) of two discrete variables x and y. +% x, y: two vectors of integers of the same length % Written by Mo Chen (sth4nth@gmail.com). assert(numel(x) == numel(y)); n = numel(x); diff --git a/chapter01/demo.m b/chapter01/demo.m new file mode 100644 index 0000000..042afc8 --- /dev/null +++ b/chapter01/demo.m @@ -0,0 +1,38 @@ + +k = 10; % variable range +n = 100; % number of variables + +x = ceil(k*rand(1,n)); +y = ceil(k*rand(1,n)); + +% x = randi(k,1,n); % need statistics toolbox +% y = randi(k,1,n); + +%% entropy H(x), H(y) +Hx = entropy(x); +Hy = entropy(y); +%% joint entropy H(x,y) +Hxy = jointEntropy(x, y); +%% conditional entropy H(x|y) +Hx_y = condEntropy(x, y); +%% mutual information I(x,y) +Ixy = mutInfo(x, y); +%% relative entropy (KL divergence) KL(p(x)|p(y)) +Dxy = relatEntropy(x, y); +%% normalized mutual information I_n(x,y) +nIxy = nmi(x, y); +%% nomalized variation information I_v(x,y) +vIxy = nvi(x, y); +%% H(x|y) = H(x,y)-H(y) +isequalf(Hx_y, Hxy-Hy) +%% I(x,y) = H(x)-H(x|y) +isequalf(Ixy, Hx-Hx_y) +%% I(x,y) = H(x)+H(y)-H(x,y) +isequalf(Ixy, Hx+Hy-Hxy) +%% I_n(x,y) = I(x,y)/sqrt(H(x)*H(y)) +isequalf(nIxy, Ixy/sqrt(Hx*Hy)) +%% I_v(x,y) = (1-I(x,y)/H(x,y)) +isequalf(vIxy, 1-Ixy/Hxy) + + + diff --git a/chapter01/entropy.m b/chapter01/entropy.m index 4f63c2c..4ba1596 100644 --- a/chapter01/entropy.m +++ b/chapter01/entropy.m @@ -1,5 +1,6 @@ function z = entropy(x) % Compute entropy H(x) of a discrete variable x. +% x: a vectors of integers % Written by Mo Chen (sth4nth@gmail.com). n = numel(x); x = reshape(x,1,n); diff --git a/chapter01/jointEntropy.m b/chapter01/jointEntropy.m index cc3cd40..33d58f2 100644 --- a/chapter01/jointEntropy.m +++ b/chapter01/jointEntropy.m @@ -1,5 +1,6 @@ function z = jointEntropy(x, y) % Compute joint entropy H(x,y) of two discrete variables x and y. +% x, y: two vectors of integers of the same length % Written by Mo Chen (sth4nth@gmail.com). assert(numel(x) == numel(y)); n = numel(x); diff --git a/chapter01/mutInfo.m b/chapter01/mutInfo.m index ff6c5f8..d3d306d 100644 --- a/chapter01/mutInfo.m +++ b/chapter01/mutInfo.m @@ -1,5 +1,6 @@ function z = mutInfo(x, y) % Compute mutual information I(x,y) of two discrete variables x and y. +% x, y: two vectors of integers of the same length % Written by Mo Chen (sth4nth@gmail.com). assert(numel(x) == numel(y)); n = numel(x); diff --git a/chapter01/nmi.m b/chapter01/nmi.m index ee82942..580876e 100644 --- a/chapter01/nmi.m +++ b/chapter01/nmi.m @@ -1,6 +1,7 @@ function z = nmi(x, y) % Compute nomalized mutual information I(x,y)/sqrt(H(x)*H(y)). -% Written by Michael Chen (sth4nth@gmail.com). +% x, y: two vectors of integers of the same length +% Written by Mo Chen (sth4nth@gmail.com). assert(numel(x) == numel(y)); n = numel(x); x = reshape(x,1,n); diff --git a/chapter01/nvi.m b/chapter01/nvi.m index e3608b6..0da226b 100644 --- a/chapter01/nvi.m +++ b/chapter01/nvi.m @@ -1,6 +1,7 @@ function z = nvi(x, y) % Compute nomalized variation information (1-I(x,y)/H(x,y)). -% Written by Michael Chen (sth4nth@gmail.com). +% x, y: two vectors of integers of the same length +% Written by Mo Chen (sth4nth@gmail.com). assert(numel(x) == numel(y)); n = numel(x); x = reshape(x,1,n); diff --git a/chapter01/relatEntropy.m b/chapter01/relatEntropy.m index 79c2fbd..64e8d60 100644 --- a/chapter01/relatEntropy.m +++ b/chapter01/relatEntropy.m @@ -1,5 +1,6 @@ function z = relatEntropy (x, y) % Compute relative entropy (a.k.a KL divergence) KL(p(x)||p(y)) of two discrete variables x and y. +% x, y: two vectors of integers of the same length % Written by Mo Chen (sth4nth@gmail.com). assert(numel(x) == numel(y)); n = numel(x); diff --git a/helper/isequalf.m b/helper/isequalf.m new file mode 100644 index 0000000..4e4192f --- /dev/null +++ b/helper/isequalf.m @@ -0,0 +1,8 @@ +function z = isequalf(x, y, tol) +% Determine whether two float number x and y are equal up to precision tol +% Written by Mo Chen (sth4nth@gmail.com). +if nargin < 3 + tol = 1e-8; +end +z = abs(x-y) Date: Wed, 2 Dec 2015 16:40:07 +0800 Subject: [PATCH 002/149] fix demo in chapter03 --- chapter01/demo.m | 26 ++++++++++++++------------ chapter03/demo.m | 25 +++++++++++++++---------- chapter03/linRnd.asv | 19 +++++++++++++++++++ chapter03/linRnd.m | 20 ++++++++++++++++++++ 4 files changed, 68 insertions(+), 22 deletions(-) create mode 100644 chapter03/linRnd.asv create mode 100644 chapter03/linRnd.m diff --git a/chapter01/demo.m b/chapter01/demo.m index 042afc8..5bfbc4f 100644 --- a/chapter01/demo.m +++ b/chapter01/demo.m @@ -1,4 +1,6 @@ - +% Done +% demo for information theory toolbox +clear; k = 10; % variable range n = 100; % number of variables @@ -12,27 +14,27 @@ Hx = entropy(x); Hy = entropy(y); %% joint entropy H(x,y) -Hxy = jointEntropy(x, y); +Hxy = jointEntropy(x,y); %% conditional entropy H(x|y) -Hx_y = condEntropy(x, y); +Hx_y = condEntropy(x,y); %% mutual information I(x,y) -Ixy = mutInfo(x, y); +Ixy = mutInfo(x,y); %% relative entropy (KL divergence) KL(p(x)|p(y)) -Dxy = relatEntropy(x, y); +Dxy = relatEntropy(x,y); %% normalized mutual information I_n(x,y) -nIxy = nmi(x, y); +nIxy = nmi(x,y); %% nomalized variation information I_v(x,y) -vIxy = nvi(x, y); +vIxy = nvi(x,y); %% H(x|y) = H(x,y)-H(y) -isequalf(Hx_y, Hxy-Hy) +isequalf(Hx_y,Hxy-Hy) %% I(x,y) = H(x)-H(x|y) -isequalf(Ixy, Hx-Hx_y) +isequalf(Ixy,Hx-Hx_y) %% I(x,y) = H(x)+H(y)-H(x,y) -isequalf(Ixy, Hx+Hy-Hxy) +isequalf(Ixy,Hx+Hy-Hxy) %% I_n(x,y) = I(x,y)/sqrt(H(x)*H(y)) -isequalf(nIxy, Ixy/sqrt(Hx*Hy)) +isequalf(nIxy,Ixy/sqrt(Hx*Hy)) %% I_v(x,y) = (1-I(x,y)/H(x,y)) -isequalf(vIxy, 1-Ixy/Hxy) +isequalf(vIxy,1-Ixy/Hxy) diff --git a/chapter03/demo.m b/chapter03/demo.m index 2d0182f..bfe905d 100644 --- a/chapter03/demo.m +++ b/chapter03/demo.m @@ -1,15 +1,20 @@ % Done % demo for chapter 03 clear; close all; -n = 100; -beta = 1e-1; -X = rand(1,n); -w = randn; -b = randn; -t = w'*X+b+beta*randn(1,n); -x = linspace(min(X)-1,max(X)+1,n); % test data +% n = 100; +% beta = 1e-1; +% X = rand(1,n); +% w = randn; +% b = randn; +% t = w'*X+b+beta*randn(1,n); +% x = linspace(min(X)-1,max(X)+1,n); % test data + +d = 1; +n = 200; +[X,t,model] = linRnd(d,n); +plotBand(X,t,2*model.sigma); %% -model = regress(X, t); +model = linReg(X,t); y = linInfer(x, model); figure; hold on; @@ -18,7 +23,7 @@ hold off pause %% -[model,llh] = regressEbEm(X,t); +[model,llh] = linRegEbEm(X,t); [y, sigma] = linInfer(x,model,t); figure; hold on; @@ -30,7 +35,7 @@ plot(llh); pause %% -[model,llh] = regressEbFp(X,t); +[model,llh] = linRegEbFp(X,t); [y, sigma] = linInfer(x,model,t); figure; hold on; diff --git a/chapter03/linRnd.asv b/chapter03/linRnd.asv new file mode 100644 index 0000000..7f10a73 --- /dev/null +++ b/chapter03/linRnd.asv @@ -0,0 +1,19 @@ +function [X, t, model] = linRnd(d, n, prior) +% Generate a data from a linear model p(t|w,x)=G(w'x+w0,sigma), where w and w0 are +% generated from G(0, +% d: dimension of data +% n: number of data +% prior: a structure specify the prior + +if nargin < 3 + sigma = 1; +else + sigma = prior.sigma; +end +X = rand(d,n); +w = randn(d,1); +w0 = randn(1,1); +epsilon = sigma^2*randn(1,n); +t = w'*X+w0+ +model.w = w; +model.w0 = w0; \ No newline at end of file diff --git a/chapter03/linRnd.m b/chapter03/linRnd.m new file mode 100644 index 0000000..4540677 --- /dev/null +++ b/chapter03/linRnd.m @@ -0,0 +1,20 @@ +function [X, t, model] = linRnd(d, n, prior) +% Generate a data from a linear model p(t|w,x)=G(w'x+w0,sigma), where w and w0 are +% generated from G(0,1) +% d: dimension of data +% n: number of data +% prior: a structure specify the prior + +if nargin < 3 + sigma = 1; +else + sigma = prior.sigma; +end +X = rand(d,n); +w = randn(d,1); +w0 = randn(1,1); +epsilon = sigma^2*randn(1,n); +t = w'*X+w0+epsilon; +model.w = w; +model.w0 = w0; +model.sigma = sigma; \ No newline at end of file From 9be8ea7c3be03d266149aca9d33d06e40247f519 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 3 Dec 2015 15:49:42 +0800 Subject: [PATCH 003/149] add comment --- chapter03/linReg.m | 2 +- chapter03/linRegEbEm.m | 12 ++++++------ chapter03/linRegEbFp.m | 15 ++++++++------- 3 files changed, 15 insertions(+), 14 deletions(-) diff --git a/chapter03/linReg.m b/chapter03/linReg.m index f282d58..9352395 100644 --- a/chapter03/linReg.m +++ b/chapter03/linReg.m @@ -1,5 +1,5 @@ function model = linReg(X, t, lambda) -% Fit linear regression model t=w'x+w0 +% Fit linear regression model y=w'x+w0 % X: d x n data % t: 1 x n response % Written by Mo Chen (sth4nth@gmail.com). diff --git a/chapter03/linRegEbEm.m b/chapter03/linRegEbEm.m index 690feb4..ab3e22a 100644 --- a/chapter03/linRegEbEm.m +++ b/chapter03/linRegEbEm.m @@ -1,5 +1,5 @@ function [model, llh] = linRegEbEm(X, t, alpha, beta) -% Fit empirical Bayesian linear model with EM +% Fit empirical Bayesian linear model with EM (p.448 chapter 9.3.4) % X: d x n data % t: 1 x n response % Written by Mo Chen (sth4nth@gmail.com). @@ -25,23 +25,23 @@ llh = -inf(1,maxiter+1); for iter = 2:maxiter A = beta*C; - A(dg) = A(dg)+alpha; + A(dg) = A(dg)+alpha; % 3.81 U = chol(A); V = U\I; - w = beta*(V*(V'*Xt)); + w = beta*(V*(V'*Xt)); % 3.84 w2 = dot(w,w); err = sum((t-w'*X).^2); logdetA = 2*sum(log(diag(U))); - llh(iter) = 0.5*(d*log(alpha)+n*log(beta)-alpha*w2-beta*err-logdetA-n*log(2*pi)); + llh(iter) = 0.5*(d*log(alpha)+n*log(beta)-alpha*w2-beta*err-logdetA-n*log(2*pi)); % 3.86 if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end trS = dot(V(:),V(:)); alpha = d/(w2+trS); % 9.63 - gamma = d-alpha*trS; - beta = n/(err+gamma/beta); + gamma = d-alpha*trS; % 9.64 + beta = n/(err+gamma/beta); % 9.68 end w0 = tbar-dot(w,xbar); diff --git a/chapter03/linRegEbFp.m b/chapter03/linRegEbFp.m index 038f03f..f2bc86e 100644 --- a/chapter03/linRegEbFp.m +++ b/chapter03/linRegEbFp.m @@ -1,5 +1,6 @@ function [model, llh] = linRegEbFp(X, t, alpha, beta) % Fit empirical Bayesian linear model with Mackay fixed point method +% (p.168) % X: d x n data % t: 1 x n response % Written by Mo Chen (sth4nth@gmail.com). @@ -25,22 +26,22 @@ llh = -inf(1,maxiter+1); for iter = 2:maxiter A = beta*C; - A(dg) = A(dg)+alpha; + A(dg) = A(dg)+alpha; % 3.81 U = chol(A); V = U\I; - w = beta*(V*(V'*Xt)); + w = beta*(V*(V'*Xt)); % 3.84 w2 = dot(w,w); - err = sum((t-w'*X).^2); + err = sum((t-w'*X).^2); logdetA = 2*sum(log(diag(U))); - llh(iter) = 0.5*(d*log(alpha)+n*log(beta)-alpha*w2-beta*err-logdetA-n*log(2*pi)); + llh(iter) = 0.5*(d*log(alpha)+n*log(beta)-alpha*w2-beta*err-logdetA-n*log(2*pi)); % 3.86 if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end trS = dot(V(:),V(:)); - gamma = d-alpha*trS; - alpha = gamma/w2; - beta = (n-gamma)/err; + gamma = d-alpha*trS; % 3.91 + alpha = gamma/w2; % 3.92 + beta = (n-gamma)/err; % 3.95 end w0 = tbar-dot(w,xbar); From 8e03be875ca5364eb3b10057d589028706b850d7 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 3 Dec 2015 15:51:02 +0800 Subject: [PATCH 004/149] add plotting and data generating functions --- chapter03/linInfer.m | 22 ---------------------- chapter03/linPlot.m | 7 +++++++ chapter03/linPredict.m | 20 ++++++++++++++++++++ chapter03/linRnd.asv | 19 ------------------- 4 files changed, 27 insertions(+), 41 deletions(-) delete mode 100644 chapter03/linInfer.m create mode 100644 chapter03/linPlot.m create mode 100644 chapter03/linPredict.m delete mode 100644 chapter03/linRnd.asv diff --git a/chapter03/linInfer.m b/chapter03/linInfer.m deleted file mode 100644 index d355cfe..0000000 --- a/chapter03/linInfer.m +++ /dev/null @@ -1,22 +0,0 @@ -function [y, sigma, p] = linInfer(X, model, t) -% Compute linear model reponse y = w'*x+b and likelihood -% X: d x n data -% t: 1 x n response -% Written by Mo Chen (sth4nth@gmail.com). -w = model.w; -b = model.w0; -y = w'*X+b; -if nargout > 1 - beta = model.beta; - if isfield(model,'V') % V*V'=inv(S) 3.54 - X = model.V'*bsxfun(@minus,X,model.xbar); - sigma = sqrt(1/beta+dot(X,X,1)); % 3.59 - else - sigma = sqrt(1/beta); - end - if nargin == 3 && nargout == 3 - p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); - end -end - - diff --git a/chapter03/linPlot.m b/chapter03/linPlot.m new file mode 100644 index 0000000..8f8cf8d --- /dev/null +++ b/chapter03/linPlot.m @@ -0,0 +1,7 @@ +function linPlot(model, range) +n = 100; +x = linspace(range(1),range(2),n); +y = lin; +plot(x,y,'r-'); +end + diff --git a/chapter03/linPredict.m b/chapter03/linPredict.m new file mode 100644 index 0000000..61b0655 --- /dev/null +++ b/chapter03/linPredict.m @@ -0,0 +1,20 @@ +function [y, sigma, p] = linPredict(model, X, t) +% Compute linear model reponse y = w'*X+w0 and likelihood +% model: trained model structure +% X: d x n testing data +% t (optional): 1 x n testing response +% Written by Mo Chen (sth4nth@gmail.com). +w = model.w; +w0 = model.w0; +y = w'*X+w0; +if nargout == 3 + beta = model.beta; + if isfield(model,'V') % V*V'=inv(S) 3.54 + U = model.V'*bsxfun(@minus,X,model.xbar); + sigma = sqrt(1/beta+dot(U,U,1)); % 3.59 + else + sigma = sqrt(1/beta); + end + p = exp(logGauss(t,y,sigma)); +% p2 = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); +end \ No newline at end of file diff --git a/chapter03/linRnd.asv b/chapter03/linRnd.asv deleted file mode 100644 index 7f10a73..0000000 --- a/chapter03/linRnd.asv +++ /dev/null @@ -1,19 +0,0 @@ -function [X, t, model] = linRnd(d, n, prior) -% Generate a data from a linear model p(t|w,x)=G(w'x+w0,sigma), where w and w0 are -% generated from G(0, -% d: dimension of data -% n: number of data -% prior: a structure specify the prior - -if nargin < 3 - sigma = 1; -else - sigma = prior.sigma; -end -X = rand(d,n); -w = randn(d,1); -w0 = randn(1,1); -epsilon = sigma^2*randn(1,n); -t = w'*X+w0+ -model.w = w; -model.w0 = w0; \ No newline at end of file From ed9e167c03727b440e41a705ae94791a9346005c Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 3 Dec 2015 18:12:21 +0800 Subject: [PATCH 005/149] linPlot and linRnd are not finished. linPred needs more test --- chapter03/demo.m | 29 ++++++++-------- chapter03/demo.m~ | 48 +++++++++++++++++++++++++++ chapter03/linPlot.m | 16 ++++++--- chapter03/linPlot.m~ | 19 +++++++++++ chapter03/{linPredict.m => linPred.m} | 8 ++--- chapter03/linRnd.m | 21 +++++------- chapter03/test.m | 4 +++ 7 files changed, 110 insertions(+), 35 deletions(-) create mode 100644 chapter03/demo.m~ create mode 100644 chapter03/linPlot.m~ rename chapter03/{linPredict.m => linPred.m} (76%) create mode 100644 chapter03/test.m diff --git a/chapter03/demo.m b/chapter03/demo.m index bfe905d..72efe2b 100644 --- a/chapter03/demo.m +++ b/chapter03/demo.m @@ -1,21 +1,21 @@ % Done % demo for chapter 03 clear; close all; -% n = 100; -% beta = 1e-1; -% X = rand(1,n); -% w = randn; -% b = randn; -% t = w'*X+b+beta*randn(1,n); -% x = linspace(min(X)-1,max(X)+1,n); % test data +n = 100; +beta = 1e-1; +X = rand(1,n); +w = randn; +b = randn; +t = w'*X+b+beta*randn(1,n); +x = linspace(min(X)-1,max(X)+1,n); % test data -d = 1; -n = 200; -[X,t,model] = linRnd(d,n); -plotBand(X,t,2*model.sigma); +% d = 1; +% n = 200; +% [X,t,model] = linRnd(d,n); +% plotBand(X,t,2*model.sigma); %% model = linReg(X,t); -y = linInfer(x, model); +y = linPred(model,x); figure; hold on; plot(X,t,'o'); @@ -24,7 +24,7 @@ pause %% [model,llh] = linRegEbEm(X,t); -[y, sigma] = linInfer(x,model,t); +[y, sigma] = linPred(model,x,t); figure; hold on; plotBand(x,y,2*sigma); @@ -33,10 +33,11 @@ hold off figure plot(llh); +linPlot(model,x,t) pause %% [model,llh] = linRegEbFp(X,t); -[y, sigma] = linInfer(x,model,t); +[y, sigma] = linPred(model,x,t); figure; hold on; plotBand(x,y,2*sigma); diff --git a/chapter03/demo.m~ b/chapter03/demo.m~ new file mode 100644 index 0000000..bb2f733 --- /dev/null +++ b/chapter03/demo.m~ @@ -0,0 +1,48 @@ +% Done +% demo for chapter 03 +clear; close all; +n = 100; +beta = 1e-1; +X = rand(1,n); +w = randn; +b = randn; +t = w'*X+b+beta*randn(1,n); +x = linspace(min(X)-1,max(X)+1,n); % test data + +% d = 1; +% n = 200; +% [X,t,model] = linRnd(d,n); +% plotBand(X,t,2*model.sigma); +%% +model = linReg(X,t); +y = linPred(model,x); +figure; +hold on; +plot(X,t,'o'); +plot(x,y,'r-'); +hold off +pause +%% +[model,llh] = linRegEbEm(X,t); + +[y, sigma] = linPred(model,x,t); +figure; +hold on; +plotBand(x,y,2*sigma); +plot(X,t,'o'); +plot(x,y,'r-'); +hold off +figure +plot(llh); +pause +%% +[model,llh] = linRegEbFp(X,t); +[y, sigma] = linPred(model,x,t); +figure; +hold on; +plotBand(x,y,2*sigma); +plot(X,t,'o'); +plot(x,y,'r-'); +hold off +figure +plot(llh); \ No newline at end of file diff --git a/chapter03/linPlot.m b/chapter03/linPlot.m index 8f8cf8d..8f7f195 100644 --- a/chapter03/linPlot.m +++ b/chapter03/linPlot.m @@ -1,7 +1,13 @@ -function linPlot(model, range) -n = 100; -x = linspace(range(1),range(2),n); -y = lin; +function linPlot(model, x, t) +color = [255,228,225]/255; %pink +[y,sigma] = linPred(model,x,t); +h = 2*sigma; + +figure; +hold on; +x = x(:); +y = y(:); +fill([x;flipud(x)],[y+h;flipud(y-h)],color); plot(x,y,'r-'); -end +hold off diff --git a/chapter03/linPlot.m~ b/chapter03/linPlot.m~ new file mode 100644 index 0000000..ee56703 --- /dev/null +++ b/chapter03/linPlot.m~ @@ -0,0 +1,19 @@ +function linPlot(model, x, t) +color = [255,228,225]/255; %pink + +x = x(:); +y = y(:); +h = h(:); + +n = length(x); +xl = min(x); +xu = max(x); + +[y,sigma,p] = linPred(model,x,t); +h = 2*sigma; + + +plot(x,y,'r-'); +fill([x;flipud(x)],[y+h;flipud(y-h)],color); +end + diff --git a/chapter03/linPredict.m b/chapter03/linPred.m similarity index 76% rename from chapter03/linPredict.m rename to chapter03/linPred.m index 61b0655..1789ac1 100644 --- a/chapter03/linPredict.m +++ b/chapter03/linPred.m @@ -1,4 +1,4 @@ -function [y, sigma, p] = linPredict(model, X, t) +function [y, sigma, p] = linPred(model, X, t) % Compute linear model reponse y = w'*X+w0 and likelihood % model: trained model structure % X: d x n testing data @@ -7,7 +7,7 @@ w = model.w; w0 = model.w0; y = w'*X+w0; -if nargout == 3 +if nargin == 3 beta = model.beta; if isfield(model,'V') % V*V'=inv(S) 3.54 U = model.V'*bsxfun(@minus,X,model.xbar); @@ -16,5 +16,5 @@ sigma = sqrt(1/beta); end p = exp(logGauss(t,y,sigma)); -% p2 = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); -end \ No newline at end of file +% p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); +end diff --git a/chapter03/linRnd.m b/chapter03/linRnd.m index 4540677..b5999eb 100644 --- a/chapter03/linRnd.m +++ b/chapter03/linRnd.m @@ -1,20 +1,17 @@ -function [X, t, model] = linRnd(d, n, prior) -% Generate a data from a linear model p(t|w,x)=G(w'x+w0,sigma), where w and w0 are -% generated from G(0,1) +function [X, t, model] = linRnd(d, n) +% Generate a data from a linear model p(t|w,x)=G(w'x+w0,sigma), sigma=sqrt(1/beta) +% where w and w0 are generated from Gauss(0,1), +% beta is generated from Gamma(1,1), +% X is generated form [0,1] % d: dimension of data % n: number of data -% prior: a structure specify the prior - -if nargin < 3 - sigma = 1; -else - sigma = prior.sigma; -end +beta = gamrnd(1,1); % need statistcs toolbox X = rand(d,n); w = randn(d,1); w0 = randn(1,1); -epsilon = sigma^2*randn(1,n); +epsilon = randn(1,n)/beta; t = w'*X+w0+epsilon; + model.w = w; model.w0 = w0; -model.sigma = sigma; \ No newline at end of file +model.beta = beta; \ No newline at end of file diff --git a/chapter03/test.m b/chapter03/test.m new file mode 100644 index 0000000..af3b2b0 --- /dev/null +++ b/chapter03/test.m @@ -0,0 +1,4 @@ +d = 1; +n = 200; +[X, t, model] = linRnd(d, n); +linPlot(model, X, t); From 8768953b1c192763a215a7560c854854f766827a Mon Sep 17 00:00:00 2001 From: sth4nth Date: Fri, 4 Dec 2015 00:55:43 +0800 Subject: [PATCH 006/149] fix chapter03 --- README.md | 7 +++++-- chapter01/nmi.m | 5 ++++- chapter01/nvi.m | 2 +- chapter03/demo.m | 47 +++++++++++-------------------------------- chapter03/demo.m~ | 48 -------------------------------------------- chapter03/linPlot.m | 15 ++++++++------ chapter03/linPlot.m~ | 19 ------------------ chapter03/linPred.m | 8 +++++--- chapter03/linReg.m | 3 ++- chapter03/linRnd.m | 6 +++--- chapter03/test.m | 4 ---- helper/plotBand.m | 2 +- 12 files changed, 42 insertions(+), 124 deletions(-) delete mode 100644 chapter03/demo.m~ delete mode 100644 chapter03/linPlot.m~ delete mode 100644 chapter03/test.m diff --git a/README.md b/README.md index 5dd3654..b416d95 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ Pattern Recognition and Machine Learning =========== +This package contains the matlab implementation of the algorithms described in the book: +Pattern Recognition and Machine Learning by C. Bishop (http://research.microsoft.com/en-us/um/people/cmbishop/prml/) + + License ------- Currently Released Under GPLv3 @@ -8,6 +12,5 @@ Currently Released Under GPLv3 Contact ------- -sth4nth(CHEN, Mo) [username] at gmail dot com +sth4nth at gmail dot com -KuantKid(LI,Wei) [username] at gmail dot com diff --git a/chapter01/nmi.m b/chapter01/nmi.m index 580876e..45f6538 100644 --- a/chapter01/nmi.m +++ b/chapter01/nmi.m @@ -1,5 +1,5 @@ function z = nmi(x, y) -% Compute nomalized mutual information I(x,y)/sqrt(H(x)*H(y)). +% Compute normalized mutual information I(x,y)/sqrt(H(x)*H(y)). % x, y: two vectors of integers of the same length % Written by Mo Chen (sth4nth@gmail.com). assert(numel(x) == numel(y)); @@ -18,6 +18,8 @@ Pxy = nonzeros(Mx'*My/n); %joint distribution of x and y Hxy = -dot(Pxy,log2(Pxy)); + +% hacking, to elimative the 0log0 issue Px = nonzeros(mean(Mx,1)); Py = nonzeros(mean(My,1)); @@ -31,3 +33,4 @@ % normalized mutual information z = sqrt((MI/Hx)*(MI/Hy)); z = max(0,z); + diff --git a/chapter01/nvi.m b/chapter01/nvi.m index 0da226b..5bfc89b 100644 --- a/chapter01/nvi.m +++ b/chapter01/nvi.m @@ -1,5 +1,5 @@ function z = nvi(x, y) -% Compute nomalized variation information (1-I(x,y)/H(x,y)). +% Compute normalized variation information (1-I(x,y)/H(x,y)). % x, y: two vectors of integers of the same length % Written by Mo Chen (sth4nth@gmail.com). assert(numel(x) == numel(y)); diff --git a/chapter03/demo.m b/chapter03/demo.m index 72efe2b..8c7587e 100644 --- a/chapter03/demo.m +++ b/chapter03/demo.m @@ -1,48 +1,25 @@ % Done % demo for chapter 03 clear; close all; -n = 100; -beta = 1e-1; -X = rand(1,n); -w = randn; -b = randn; -t = w'*X+b+beta*randn(1,n); -x = linspace(min(X)-1,max(X)+1,n); % test data - -% d = 1; -% n = 200; -% [X,t,model] = linRnd(d,n); -% plotBand(X,t,2*model.sigma); +d = 1; +n = 200; +[x,t,model] = linRnd(d,n); +linPlot(model,x,t); %% -model = linReg(X,t); -y = linPred(model,x); -figure; -hold on; -plot(X,t,'o'); -plot(x,y,'r-'); -hold off +model = linReg(x,t); +linPlot(model,x,t); +fprintf('Press any key to continue. \n'); pause %% -[model,llh] = linRegEbEm(X,t); -[y, sigma] = linPred(model,x,t); +[model,llh] = linRegEbEm(x,t); +linPlot(model,x,t); figure; -hold on; -plotBand(x,y,2*sigma); -plot(X,t,'o'); -plot(x,y,'r-'); -hold off -figure plot(llh); -linPlot(model,x,t) +fprintf('Press any key to continue. \n'); pause %% -[model,llh] = linRegEbFp(X,t); +[model,llh] = linRegEbFp(x,t); [y, sigma] = linPred(model,x,t); +linPlot(model,x,t); figure; -hold on; -plotBand(x,y,2*sigma); -plot(X,t,'o'); -plot(x,y,'r-'); -hold off -figure plot(llh); \ No newline at end of file diff --git a/chapter03/demo.m~ b/chapter03/demo.m~ deleted file mode 100644 index bb2f733..0000000 --- a/chapter03/demo.m~ +++ /dev/null @@ -1,48 +0,0 @@ -% Done -% demo for chapter 03 -clear; close all; -n = 100; -beta = 1e-1; -X = rand(1,n); -w = randn; -b = randn; -t = w'*X+b+beta*randn(1,n); -x = linspace(min(X)-1,max(X)+1,n); % test data - -% d = 1; -% n = 200; -% [X,t,model] = linRnd(d,n); -% plotBand(X,t,2*model.sigma); -%% -model = linReg(X,t); -y = linPred(model,x); -figure; -hold on; -plot(X,t,'o'); -plot(x,y,'r-'); -hold off -pause -%% -[model,llh] = linRegEbEm(X,t); - -[y, sigma] = linPred(model,x,t); -figure; -hold on; -plotBand(x,y,2*sigma); -plot(X,t,'o'); -plot(x,y,'r-'); -hold off -figure -plot(llh); -pause -%% -[model,llh] = linRegEbFp(X,t); -[y, sigma] = linPred(model,x,t); -figure; -hold on; -plotBand(x,y,2*sigma); -plot(X,t,'o'); -plot(x,y,'r-'); -hold off -figure -plot(llh); \ No newline at end of file diff --git a/chapter03/linPlot.m b/chapter03/linPlot.m index 8f7f195..dab60c3 100644 --- a/chapter03/linPlot.m +++ b/chapter03/linPlot.m @@ -1,13 +1,16 @@ function linPlot(model, x, t) +% Plot linear function and data +% X: 1xn data +% t: 1xn response +% Written by Mo Chen (sth4nth@gmail.com). color = [255,228,225]/255; %pink -[y,sigma] = linPred(model,x,t); -h = 2*sigma; - +[x,idx] = sort(x); +t = t(idx); +[y,s] = linPred(model,x); figure; hold on; -x = x(:); -y = y(:); -fill([x;flipud(x)],[y+h;flipud(y-h)],color); +fill([x,fliplr(x)],[y+s,fliplr(y-s)],color); +plot(x,t,'o'); plot(x,y,'r-'); hold off diff --git a/chapter03/linPlot.m~ b/chapter03/linPlot.m~ deleted file mode 100644 index ee56703..0000000 --- a/chapter03/linPlot.m~ +++ /dev/null @@ -1,19 +0,0 @@ -function linPlot(model, x, t) -color = [255,228,225]/255; %pink - -x = x(:); -y = y(:); -h = h(:); - -n = length(x); -xl = min(x); -xu = max(x); - -[y,sigma,p] = linPred(model,x,t); -h = 2*sigma; - - -plot(x,y,'r-'); -fill([x;flipud(x)],[y+h;flipud(y-h)],color); -end - diff --git a/chapter03/linPred.m b/chapter03/linPred.m index 1789ac1..ffa283f 100644 --- a/chapter03/linPred.m +++ b/chapter03/linPred.m @@ -7,7 +7,7 @@ w = model.w; w0 = model.w0; y = w'*X+w0; -if nargin == 3 +if nargout > 1 beta = model.beta; if isfield(model,'V') % V*V'=inv(S) 3.54 U = model.V'*bsxfun(@minus,X,model.xbar); @@ -15,6 +15,8 @@ else sigma = sqrt(1/beta); end - p = exp(logGauss(t,y,sigma)); -% p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); + if nargin == 3 && nargout == 3 + p = exp(logGauss(t,y,sigma)); +% p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); + end end diff --git a/chapter03/linReg.m b/chapter03/linReg.m index 9352395..05d4dc9 100644 --- a/chapter03/linReg.m +++ b/chapter03/linReg.m @@ -21,6 +21,7 @@ U = chol(S); w = U\(U'\(X*t')); % 3.15 & 3.28 w0 = tbar-dot(w,xbar); % 3.19 - +beta = 1/mean((t-w'*X).^2); % 3.21 model.w = w; model.w0 = w0; +model.beta = beta; diff --git a/chapter03/linRnd.m b/chapter03/linRnd.m index b5999eb..6f24444 100644 --- a/chapter03/linRnd.m +++ b/chapter03/linRnd.m @@ -5,12 +5,12 @@ % X is generated form [0,1] % d: dimension of data % n: number of data -beta = gamrnd(1,1); % need statistcs toolbox +beta = gamrnd(10,10); % need statistcs toolbox X = rand(d,n); w = randn(d,1); w0 = randn(1,1); -epsilon = randn(1,n)/beta; -t = w'*X+w0+epsilon; +err = randn(1,n)/sqrt(beta); +t = w'*X+w0+err; model.w = w; model.w0 = w0; diff --git a/chapter03/test.m b/chapter03/test.m deleted file mode 100644 index af3b2b0..0000000 --- a/chapter03/test.m +++ /dev/null @@ -1,4 +0,0 @@ -d = 1; -n = 200; -[X, t, model] = linRnd(d, n); -linPlot(model, X, t); diff --git a/helper/plotBand.m b/helper/plotBand.m index b675bac..228d03b 100644 --- a/helper/plotBand.m +++ b/helper/plotBand.m @@ -7,4 +7,4 @@ function plotBand(x, y, h, color) x = x(:); y = y(:); h = h(:); -fill([x;flipud(x)],[y+h;flipud(y-h)],color); \ No newline at end of file +fill([x;flipud(x)]',[y+h;flipud(y-h)]',color); \ No newline at end of file From 137e85b7018f7cbe1f216974d7b9abe333cf180e Mon Sep 17 00:00:00 2001 From: sth4nth Date: Fri, 4 Dec 2015 00:57:29 +0800 Subject: [PATCH 007/149] remove plotband helper function --- helper/plotBand.m | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 helper/plotBand.m diff --git a/helper/plotBand.m b/helper/plotBand.m deleted file mode 100644 index 228d03b..0000000 --- a/helper/plotBand.m +++ /dev/null @@ -1,10 +0,0 @@ -function plotBand(x, y, h, color) -% plot a band with bandwidth h around y -% Written by Mo Chen (sth4nth@gmail.com). -if nargin < 4 - color = [255,228,225]/255; %pink -end -x = x(:); -y = y(:); -h = h(:); -fill([x;flipud(x)]',[y+h;flipud(y-h)]',color); \ No newline at end of file From 222203f41c3b1cebf7d1403fd10859879911b58c Mon Sep 17 00:00:00 2001 From: sth4nth Date: Fri, 4 Dec 2015 01:04:25 +0800 Subject: [PATCH 008/149] initial fix for chapter04 --- chapter04/logitReg.m | 2 +- chapter04/mnReg.m | 2 +- chapter04/{optLogitNewton.m => optNewton.m} | 4 ++-- chapter04/sigmoid.m | 1 + chapter04/softmax.m | 2 +- 5 files changed, 6 insertions(+), 5 deletions(-) rename chapter04/{optLogitNewton.m => optNewton.m} (85%) diff --git a/chapter04/logitReg.m b/chapter04/logitReg.m index 13856f0..fc487a9 100644 --- a/chapter04/logitReg.m +++ b/chapter04/logitReg.m @@ -1,5 +1,5 @@ function [w, llh] = logitReg(X, t, lambda) -% logistic regression for binary classification (Bernoulli likelihood) +% Logistic regression for binary classification (Bernoulli likelihood) % Written by Mo Chen (sth4nth@gmail.com). if nargin < 3 lambda = 1e-2; diff --git a/chapter04/mnReg.m b/chapter04/mnReg.m index 465444b..5acc6f7 100644 --- a/chapter04/mnReg.m +++ b/chapter04/mnReg.m @@ -1,5 +1,5 @@ function [model, llh] = mnReg(X, t, lambda, method) -% multinomial regression for multiclass problem (Multinomial likelihood) +% Multinomial regression for multiclass problem (Multinomial likelihood) % Written by Mo Chen (sth4nth@gmail.com). if nargin < 4 method = 1; diff --git a/chapter04/optLogitNewton.m b/chapter04/optNewton.m similarity index 85% rename from chapter04/optLogitNewton.m rename to chapter04/optNewton.m index 1cc3b7b..7cd0c7c 100644 --- a/chapter04/optLogitNewton.m +++ b/chapter04/optNewton.m @@ -1,5 +1,5 @@ -function [w, llh, U] = optLogitNewton(X, t, lambda, w) -% logistic regression for binary classification (Bernoulli likelihood) +function [w, llh, U] = optNewton(X, t, lambda, w) +% Newton-Raphson (second order) opitimzation method % Written by Mo Chen (sth4nth@gmail.com). [d,n] = size(X); tol = 1e-4; diff --git a/chapter04/sigmoid.m b/chapter04/sigmoid.m index b20383d..0113793 100644 --- a/chapter04/sigmoid.m +++ b/chapter04/sigmoid.m @@ -1,3 +1,4 @@ function y = sigmoid(x) +% Sigmod function % Written by Mo Chen (sth4nth@gmail.com). y = 1./(1+exp(-x)); \ No newline at end of file diff --git a/chapter04/softmax.m b/chapter04/softmax.m index e3b76a3..e7ab72e 100644 --- a/chapter04/softmax.m +++ b/chapter04/softmax.m @@ -1,5 +1,5 @@ function s = softmax(x, dim) -% Compute softmax +% Softmax function % By default dim = 1 (columns). % Written by Mo Chen (sth4nth@gmail.com). if nargin == 1, From a31e4bc8f333142ab10fe600b9c5c9d2b653b82e Mon Sep 17 00:00:00 2001 From: sth4nth Date: Fri, 4 Dec 2015 01:05:29 +0800 Subject: [PATCH 009/149] minor fix comment --- chapter03/linRnd.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter03/linRnd.m b/chapter03/linRnd.m index 6f24444..c1e7539 100644 --- a/chapter03/linRnd.m +++ b/chapter03/linRnd.m @@ -1,5 +1,5 @@ function [X, t, model] = linRnd(d, n) -% Generate a data from a linear model p(t|w,x)=G(w'x+w0,sigma), sigma=sqrt(1/beta) +% Generate data from a linear model p(t|w,x)=G(w'x+w0,sigma), sigma=sqrt(1/beta) % where w and w0 are generated from Gauss(0,1), % beta is generated from Gamma(1,1), % X is generated form [0,1] From 597756831683d1e7ea05e30cff2f25cb99723a51 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 7 Dec 2015 11:35:31 +0800 Subject: [PATCH 010/149] fix up chapter04 --- chapter04/classPlot.m | 5 +++++ chapter04/demo.m | 32 ++++++++++++++++---------------- chapter04/fda.m | 17 ++++++++++------- chapter04/logitReg.m | 2 +- 4 files changed, 32 insertions(+), 24 deletions(-) create mode 100644 chapter04/classPlot.m diff --git a/chapter04/classPlot.m b/chapter04/classPlot.m new file mode 100644 index 0000000..78d92d0 --- /dev/null +++ b/chapter04/classPlot.m @@ -0,0 +1,5 @@ +function classPlot(X, y) +% Plot 2d classification data +% X: 2xn data matrix +% y: 1xn label + diff --git a/chapter04/demo.m b/chapter04/demo.m index b842886..5e816d0 100644 --- a/chapter04/demo.m +++ b/chapter04/demo.m @@ -16,20 +16,20 @@ contour(x1,x2,y,1); hold off; %% -% clear; close all; -% k = 4; -% n = 1000; -% [X,t] = rndKCluster(2,k,n); -% -% [x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); -% [model, llh] = classLogitMul(X,t, 1e-4, 1); -% plot(llh); -% figure; -% spread(X,t); +clear; close all; +k = 4; +n = 1000; +[X,t] = rndKCluster(2,k,n); + +[x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); +[model, llh] = mnReg(X,t, 1e-4, 1); +plot(llh); +figure; +spread(X,t); -% W = model.W; -% y = w(1)*x1+w(2)*x2+w(3); -% -% hold on; -% contour(x1,x2,t,1); -% hold off; \ No newline at end of file +W = model.W; +y = w(1)*x1+w(2)*x2+w(3); + +hold on; +contour(x1,x2,t,1); +hold off; \ No newline at end of file diff --git a/chapter04/fda.m b/chapter04/fda.m index db1ea8d..5e4d8e5 100644 --- a/chapter04/fda.m +++ b/chapter04/fda.m @@ -1,23 +1,26 @@ -function U = fda(X, y, d) +function U = fda(X, t, d) % Fisher (linear) discriminant analysis +% X: dxn data matrix +% t: 1xn label +% d: target dimension % Written by Mo Chen (sth4nth@gmail.com). n = size(X,2); -k = max(y); +k = max(t); -E = sparse(1:n,y,true,n,k,n); % transform label into indicator matrix +E = sparse(1:n,t,true,n,k,n); % transform label into indicator matrix nk = full(sum(E)); m = mean(X,2); Xo = bsxfun(@minus,X,m); -St = (Xo*Xo')/n; +St = (Xo*Xo')/n; % 4.43 mk = bsxfun(@times,X*E,1./nk); mo = bsxfun(@minus,mk,m); mo = bsxfun(@times,mo,sqrt(nk/n)); -Sb = mo*mo'; -% Sw = St-Sb; +Sb = mo*mo'; % 4.46 +% Sw = St-Sb; % 4.45 -[U,A] = eig(Sb,St,'chol'); +[U,A] = eig(Sb,St,'chol'); [~,idx] = sort(diag(A),'descend'); U = U(:,idx(1:d)); diff --git a/chapter04/logitReg.m b/chapter04/logitReg.m index fc487a9..1688660 100644 --- a/chapter04/logitReg.m +++ b/chapter04/logitReg.m @@ -5,4 +5,4 @@ lambda = 1e-2; end X = [X; ones(1,size(X,2))]; -[w, llh] = optLogitNewton(X, t, lambda, zeros(size(X,1),1)); \ No newline at end of file +[w, llh] = optNewton(X, t, lambda, zeros(size(X,1),1)); \ No newline at end of file From ec6c8b673d6289f10871cb502f8c6fd7dc22b26d Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 7 Dec 2015 15:14:46 +0800 Subject: [PATCH 011/149] fix help --- chapter03/linPred.m | 4 ++-- chapter03/linReg.m | 4 ++-- chapter03/linRegEbEm.m | 4 ++-- chapter03/linRegEbFp.m | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/chapter03/linPred.m b/chapter03/linPred.m index ffa283f..afce03a 100644 --- a/chapter03/linPred.m +++ b/chapter03/linPred.m @@ -1,8 +1,8 @@ function [y, sigma, p] = linPred(model, X, t) % Compute linear model reponse y = w'*X+w0 and likelihood % model: trained model structure -% X: d x n testing data -% t (optional): 1 x n testing response +% X: d x n testing data +% t (optional): 1 x n testing response % Written by Mo Chen (sth4nth@gmail.com). w = model.w; w0 = model.w0; diff --git a/chapter03/linReg.m b/chapter03/linReg.m index 05d4dc9..225be82 100644 --- a/chapter03/linReg.m +++ b/chapter03/linReg.m @@ -1,7 +1,7 @@ function model = linReg(X, t, lambda) % Fit linear regression model y=w'x+w0 -% X: d x n data -% t: 1 x n response +% X: d x n data +% t: 1 x n response % Written by Mo Chen (sth4nth@gmail.com). if nargin < 3 lambda = 0; diff --git a/chapter03/linRegEbEm.m b/chapter03/linRegEbEm.m index ab3e22a..06bb093 100644 --- a/chapter03/linRegEbEm.m +++ b/chapter03/linRegEbEm.m @@ -1,7 +1,7 @@ function [model, llh] = linRegEbEm(X, t, alpha, beta) % Fit empirical Bayesian linear model with EM (p.448 chapter 9.3.4) -% X: d x n data -% t: 1 x n response +% X: d x n data +% t: 1 x n response % Written by Mo Chen (sth4nth@gmail.com). if nargin < 3 alpha = 0.02; diff --git a/chapter03/linRegEbFp.m b/chapter03/linRegEbFp.m index f2bc86e..ead773d 100644 --- a/chapter03/linRegEbFp.m +++ b/chapter03/linRegEbFp.m @@ -1,8 +1,8 @@ function [model, llh] = linRegEbFp(X, t, alpha, beta) % Fit empirical Bayesian linear model with Mackay fixed point method % (p.168) -% X: d x n data -% t: 1 x n response +% X: d x n data +% t: 1 x n response % Written by Mo Chen (sth4nth@gmail.com). if nargin < 3 alpha = 0.02; From 7d4a2fbc77d7780015dfa1145be01e5496ea55e5 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 7 Dec 2015 15:18:49 +0800 Subject: [PATCH 012/149] chapter04 is not finished --- chapter04/binPlot.m | 13 +++++++ chapter04/classPlot.m | 5 --- chapter04/classPlot.m~ | 15 ++++++++ chapter04/demo.m | 38 ++++++++++---------- chapter04/logitPred.m | 24 +++++++++++++ chapter04/logitReg.m | 48 ++++++++++++++++++++++++-- chapter04/{optNewton.m => logitReg.m~} | 26 ++++++++++---- chapter04/mulPlot.m | 7 ++++ 8 files changed, 142 insertions(+), 34 deletions(-) create mode 100644 chapter04/binPlot.m delete mode 100644 chapter04/classPlot.m create mode 100644 chapter04/classPlot.m~ create mode 100644 chapter04/logitPred.m rename chapter04/{optNewton.m => logitReg.m~} (56%) create mode 100644 chapter04/mulPlot.m diff --git a/chapter04/binPlot.m b/chapter04/binPlot.m new file mode 100644 index 0000000..77b062a --- /dev/null +++ b/chapter04/binPlot.m @@ -0,0 +1,13 @@ +function binPlot(model, X, t) +% Plot binary classification result for 2d data +% X: 2xn data matrix +% y: 1xn label + +w = model.w; +w0 = model.w0; +figure; +spread(X,t); +y = w'*X+w0; +hold on; +contour(X(1,:),X(2,:),y,1); +hold off; \ No newline at end of file diff --git a/chapter04/classPlot.m b/chapter04/classPlot.m deleted file mode 100644 index 78d92d0..0000000 --- a/chapter04/classPlot.m +++ /dev/null @@ -1,5 +0,0 @@ -function classPlot(X, y) -% Plot 2d classification data -% X: 2xn data matrix -% y: 1xn label - diff --git a/chapter04/classPlot.m~ b/chapter04/classPlot.m~ new file mode 100644 index 0000000..63847be --- /dev/null +++ b/chapter04/classPlot.m~ @@ -0,0 +1,15 @@ +function classPlot(model, X, t) +% Plot 2d classification data +% X: 2xn data matrix +% y: 1xn label + +w = model.w; +w0 = model.w0; +figure; +spread(X,t); + +y = w'X+w0; + +hold on; +contour(X[1,x2,y,1); +hold off; \ No newline at end of file diff --git a/chapter04/demo.m b/chapter04/demo.m index 5e816d0..182d5b7 100644 --- a/chapter04/demo.m +++ b/chapter04/demo.m @@ -5,31 +5,31 @@ [X,t] = rndKCluster(2,k,n); [x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); -[w, llh] = logitReg(X,t-1,0.0001); +[w, llh] = logitReg(X,t-1,0); plot(llh); figure; spread(X,t); -y = w(1)*x1+w(2)*x2; +y = w(1)*x1+w(2)*x2+w(3); hold on; contour(x1,x2,y,1); hold off; %% -clear; close all; -k = 4; -n = 1000; -[X,t] = rndKCluster(2,k,n); - -[x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); -[model, llh] = mnReg(X,t, 1e-4, 1); -plot(llh); -figure; -spread(X,t); - -W = model.W; -y = w(1)*x1+w(2)*x2+w(3); - -hold on; -contour(x1,x2,t,1); -hold off; \ No newline at end of file +% clear; close all; +% k = 4; +% n = 1000; +% [X,t] = rndKCluster(2,k,n); +% +% [x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); +% [model, llh] = mnReg(X,t, 1e-4, 1); +% plot(llh); +% figure; +% spread(X,t); +% +% W = model.W; +% y = w(1)*x1+w(2)*x2+w(3); +% +% hold on; +% contour(x1,x2,t,1); +% hold off; \ No newline at end of file diff --git a/chapter04/logitPred.m b/chapter04/logitPred.m new file mode 100644 index 0000000..a6e8c20 --- /dev/null +++ b/chapter04/logitPred.m @@ -0,0 +1,24 @@ +function [ output_args ] = logitPred(model, X, t ) +% Prodict the label for binary lgoistic regression model +% model: trained model structure +% X: d x n testing data +% t (optional): 1 x n testing label +% Written by Mo Chen (sth4nth@gmail.com). +w = model.w; +w0 = model.w0; +y = w'*X+w0; +% if nargout > 1 +% beta = model.beta; +% if isfield(model,'V') % V*V'=inv(S) 3.54 +% U = model.V'*bsxfun(@minus,X,model.xbar); +% sigma = sqrt(1/beta+dot(U,U,1)); % 3.59 +% else +% sigma = sqrt(1/beta); +% end +% if nargin == 3 && nargout == 3 +% p = exp(logGauss(t,y,sigma)); +% % p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); +% end +% end + + diff --git a/chapter04/logitReg.m b/chapter04/logitReg.m index 1688660..b5f8aae 100644 --- a/chapter04/logitReg.m +++ b/chapter04/logitReg.m @@ -1,8 +1,50 @@ function [w, llh] = logitReg(X, t, lambda) -% Logistic regression for binary classification (Bernoulli likelihood) +% Logistic regression for binary classification optimized by Newton-Raphson +% method. +% X: dxn data matrix +% t: dx1 label (0/1) +% lambda: regularization parameter % Written by Mo Chen (sth4nth@gmail.com). if nargin < 3 - lambda = 1e-2; + lambda = 0; end X = [X; ones(1,size(X,2))]; -[w, llh] = optNewton(X, t, lambda, zeros(size(X,1),1)); \ No newline at end of file +[d,n] = size(X); + + +tol = 1e-4; +maxiter = 100; +llh = -inf(1,maxiter); + +idx = (1:d)'; +dg = sub2ind([d,d],idx,idx); +h = ones(1,n); +h(t==0) = -1; +w = rand(d,1); +z = w'*X; +for iter = 2:maxiter + y = sigmoid(z); + r = y.*(1-y); % 4.89 + Xw = bsxfun(@times, X, sqrt(r)); + H = Xw*Xw'; % 4.95 + H(dg) = H(dg)+lambda; + U = chol(H); + g = X*(y-t)'+lambda.*w; + p = -U\(U'\g); + wo = w; + while true % line search + w = wo+p; + z = w'*X; + llh(iter) = -sum(log1pexp(-h.*z))-0.5*sum(lambda.*w.^2); + incr = llh(iter)-llh(iter-1); + if incr < 0 + p = p/2; + else + break; + end + end + if incr < tol; break; end +end +llh = llh(2:iter); +% model.w = w(1:(end-1)); +% model.w0 = w(end); \ No newline at end of file diff --git a/chapter04/optNewton.m b/chapter04/logitReg.m~ similarity index 56% rename from chapter04/optNewton.m rename to chapter04/logitReg.m~ index 7cd0c7c..6dd5a15 100644 --- a/chapter04/optNewton.m +++ b/chapter04/logitReg.m~ @@ -1,7 +1,17 @@ -function [w, llh, U] = optNewton(X, t, lambda, w) -% Newton-Raphson (second order) opitimzation method +function [model, llh] = logitReg(X, t, lambda) +% Logistic regression for binary classification optimized by Newton-Raphson +% method. +% X: dxn data matrix +% t: dx1 label (0/1) +% lambda: regularization parameter % Written by Mo Chen (sth4nth@gmail.com). +if nargin < 3 + lambda = 1e-2; +end +X = [X; ones(1,size(X,2))]; [d,n] = size(X); + + tol = 1e-4; maxiter = 100; llh = -inf(1,maxiter); @@ -10,11 +20,13 @@ dg = sub2ind([d,d],idx,idx); h = ones(1,n); h(t==0) = -1; +w = rand(d,1); z = w'*X; for iter = 2:maxiter y = sigmoid(z); - Xw = bsxfun(@times, X, sqrt(y.*(1-y))); - H = Xw*Xw'; + r = y.*(1-y); % 4.89 + Xw = bsxfun(@times, X, sqrt(r)); + H = Xw*Xw'; % 4.95 H(dg) = H(dg)+lambda; U = chol(H); g = X*(y-t)'+lambda.*w; @@ -31,8 +43,8 @@ break; end end - if progress < tol - break - end + if progress < tol; break; end end llh = llh(2:iter); +model.w = w(1:(end-1)); +model.w0 \ No newline at end of file diff --git a/chapter04/mulPlot.m b/chapter04/mulPlot.m new file mode 100644 index 0000000..0e321e7 --- /dev/null +++ b/chapter04/mulPlot.m @@ -0,0 +1,7 @@ +function [ output_args ] = mulPlot( input_args ) +%MULPLOT Summary of this function goes here +% Detailed explanation goes here + + +end + From cb1701cac55b679e83b0de73f899b28017e76ee0 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 7 Dec 2015 16:47:06 +0800 Subject: [PATCH 013/149] clean up logitReg.m --- chapter04/logitReg.m | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/chapter04/logitReg.m b/chapter04/logitReg.m index b5f8aae..2407999 100644 --- a/chapter04/logitReg.m +++ b/chapter04/logitReg.m @@ -1,4 +1,4 @@ -function [w, llh] = logitReg(X, t, lambda) +function [model, llh] = logitReg(X, t, lambda) % Logistic regression for binary classification optimized by Newton-Raphson % method. % X: dxn data matrix @@ -23,28 +23,28 @@ w = rand(d,1); z = w'*X; for iter = 2:maxiter - y = sigmoid(z); - r = y.*(1-y); % 4.89 + y = sigmoid(z); % 4.87 + r = y.*(1-y); % 4.98 Xw = bsxfun(@times, X, sqrt(r)); - H = Xw*Xw'; % 4.95 + H = Xw*Xw'; % 4.97 H(dg) = H(dg)+lambda; U = chol(H); - g = X*(y-t)'+lambda.*w; + g = X*(y-t)'+lambda.*w; % 4.96 p = -U\(U'\g); - wo = w; - while true % line search + wo = w; % 4.92 + w = wo+p; + z = w'*X; + llh(iter) = -sum(log1pexp(-h.*z))-0.5*sum(lambda.*w.^2); % 4.89 + incr = llh(iter)-llh(iter-1); + while incr < 0 % line search + p = p/2; w = wo+p; z = w'*X; llh(iter) = -sum(log1pexp(-h.*z))-0.5*sum(lambda.*w.^2); incr = llh(iter)-llh(iter-1); - if incr < 0 - p = p/2; - else - break; - end end if incr < tol; break; end end llh = llh(2:iter); -% model.w = w(1:(end-1)); -% model.w0 = w(end); \ No newline at end of file +model.w = w(1:(end-1)); +model.w0 = w(end); \ No newline at end of file From 956f7f23386b3e0cac5c84653faecfd000554c39 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 7 Dec 2015 19:09:55 +0800 Subject: [PATCH 014/149] fix logistic regression --- chapter04/classPlot.m~ | 15 ------------- chapter04/demo.m | 14 +++++++----- chapter04/logitPred.m | 23 ++++++------------- chapter04/logitReg.m | 31 ++++++++++++-------------- chapter04/logitReg.m~ | 50 ------------------------------------------ chapter04/mnReg.m | 34 ++++++++++++---------------- 6 files changed, 43 insertions(+), 124 deletions(-) delete mode 100644 chapter04/classPlot.m~ delete mode 100644 chapter04/logitReg.m~ diff --git a/chapter04/classPlot.m~ b/chapter04/classPlot.m~ deleted file mode 100644 index 63847be..0000000 --- a/chapter04/classPlot.m~ +++ /dev/null @@ -1,15 +0,0 @@ -function classPlot(model, X, t) -% Plot 2d classification data -% X: 2xn data matrix -% y: 1xn label - -w = model.w; -w0 = model.w0; -figure; -spread(X,t); - -y = w'X+w0; - -hold on; -contour(X[1,x2,y,1); -hold off; \ No newline at end of file diff --git a/chapter04/demo.m b/chapter04/demo.m index 182d5b7..f4f1a44 100644 --- a/chapter04/demo.m +++ b/chapter04/demo.m @@ -5,30 +5,32 @@ [X,t] = rndKCluster(2,k,n); [x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); -[w, llh] = logitReg(X,t-1,0); +[model, llh] = logitReg(X,t-1,0); +w = model.w; +w0 = model.w0; plot(llh); figure; spread(X,t); -y = w(1)*x1+w(2)*x2+w(3); +y = w(1)*x1+w(2)*x2+w0; hold on; contour(x1,x2,y,1); hold off; %% % clear; close all; -% k = 4; -% n = 1000; +% k = 3; +% n = 200; % [X,t] = rndKCluster(2,k,n); % % [x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); -% [model, llh] = mnReg(X,t, 1e-4, 1); +% [model, llh] = mnReg(X,t, 1e-4,2); % plot(llh); % figure; % spread(X,t); % % W = model.W; -% y = w(1)*x1+w(2)*x2+w(3); +% % y = w(1)*x1+w(2)*x2+w(3); % % hold on; % contour(x1,x2,t,1); diff --git a/chapter04/logitPred.m b/chapter04/logitPred.m index a6e8c20..d42977d 100644 --- a/chapter04/logitPred.m +++ b/chapter04/logitPred.m @@ -1,4 +1,4 @@ -function [ output_args ] = logitPred(model, X, t ) +function [y, p] = logitPred(model, X) % Prodict the label for binary lgoistic regression model % model: trained model structure % X: d x n testing data @@ -6,19 +6,10 @@ % Written by Mo Chen (sth4nth@gmail.com). w = model.w; w0 = model.w0; -y = w'*X+w0; -% if nargout > 1 -% beta = model.beta; -% if isfield(model,'V') % V*V'=inv(S) 3.54 -% U = model.V'*bsxfun(@minus,X,model.xbar); -% sigma = sqrt(1/beta+dot(U,U,1)); % 3.59 -% else -% sigma = sqrt(1/beta); -% end -% if nargin == 3 && nargout == 3 -% p = exp(logGauss(t,y,sigma)); -% % p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); -% end -% end - +a = w'*X+w0; +y = a > 0; +h = ones(1,n); +h(~y) = -1; +p = exp(-sum(log1pexp(-h.*a))); +% (p > 0.5)==y diff --git a/chapter04/logitReg.m b/chapter04/logitReg.m index 2407999..119e0f3 100644 --- a/chapter04/logitReg.m +++ b/chapter04/logitReg.m @@ -6,24 +6,21 @@ % lambda: regularization parameter % Written by Mo Chen (sth4nth@gmail.com). if nargin < 3 - lambda = 0; + lambda = 1e-2; end X = [X; ones(1,size(X,2))]; [d,n] = size(X); - - tol = 1e-4; maxiter = 100; llh = -inf(1,maxiter); - idx = (1:d)'; dg = sub2ind([d,d],idx,idx); h = ones(1,n); h(t==0) = -1; -w = rand(d,1); -z = w'*X; +w = zeros(d,1); +a = w'*X; for iter = 2:maxiter - y = sigmoid(z); % 4.87 + y = sigmoid(a); % 4.87 r = y.*(1-y); % 4.98 Xw = bsxfun(@times, X, sqrt(r)); H = Xw*Xw'; % 4.97 @@ -32,17 +29,17 @@ g = X*(y-t)'+lambda.*w; % 4.96 p = -U\(U'\g); wo = w; % 4.92 - w = wo+p; - z = w'*X; - llh(iter) = -sum(log1pexp(-h.*z))-0.5*sum(lambda.*w.^2); % 4.89 + w = wo+p; + a = w'*X; + llh(iter) = -sum(log1pexp(-h.*a))-0.5*sum(lambda.*w.^2); % 4.89 incr = llh(iter)-llh(iter-1); - while incr < 0 % line search - p = p/2; - w = wo+p; - z = w'*X; - llh(iter) = -sum(log1pexp(-h.*z))-0.5*sum(lambda.*w.^2); - incr = llh(iter)-llh(iter-1); - end +% while incr < 0 % line search +% p = p/2; +% w = wo+p; +% a = w'*X; +% llh(iter) = -sum(log1pexp(-h.*a))-0.5*sum(lambda.*w.^2); +% incr = llh(iter)-llh(iter-1); +% end if incr < tol; break; end end llh = llh(2:iter); diff --git a/chapter04/logitReg.m~ b/chapter04/logitReg.m~ deleted file mode 100644 index 6dd5a15..0000000 --- a/chapter04/logitReg.m~ +++ /dev/null @@ -1,50 +0,0 @@ -function [model, llh] = logitReg(X, t, lambda) -% Logistic regression for binary classification optimized by Newton-Raphson -% method. -% X: dxn data matrix -% t: dx1 label (0/1) -% lambda: regularization parameter -% Written by Mo Chen (sth4nth@gmail.com). -if nargin < 3 - lambda = 1e-2; -end -X = [X; ones(1,size(X,2))]; -[d,n] = size(X); - - -tol = 1e-4; -maxiter = 100; -llh = -inf(1,maxiter); - -idx = (1:d)'; -dg = sub2ind([d,d],idx,idx); -h = ones(1,n); -h(t==0) = -1; -w = rand(d,1); -z = w'*X; -for iter = 2:maxiter - y = sigmoid(z); - r = y.*(1-y); % 4.89 - Xw = bsxfun(@times, X, sqrt(r)); - H = Xw*Xw'; % 4.95 - H(dg) = H(dg)+lambda; - U = chol(H); - g = X*(y-t)'+lambda.*w; - p = -U\(U'\g); - wo = w; - while true % line search - w = wo+p; - z = w'*X; - llh(iter) = -sum(log1pexp(-h.*z))-0.5*sum(lambda.*w.^2); - progress = llh(iter)-llh(iter-1); - if progress < 0 - p = p/2; - else - break; - end - end - if progress < tol; break; end -end -llh = llh(2:iter); -model.w = w(1:(end-1)); -model.w0 \ No newline at end of file diff --git a/chapter04/mnReg.m b/chapter04/mnReg.m index 5acc6f7..bccee8d 100644 --- a/chapter04/mnReg.m +++ b/chapter04/mnReg.m @@ -1,21 +1,15 @@ -function [model, llh] = mnReg(X, t, lambda, method) +function [model, llh] = mnReg(X, t, lambda) % Multinomial regression for multiclass problem (Multinomial likelihood) % Written by Mo Chen (sth4nth@gmail.com). -if nargin < 4 - method = 1; -end if nargin < 3 lambda = 1e-4; end X = [X; ones(1,size(X,2))]; -if method == 1 - [W, llh] = newton(X, t, lambda); -else - [W, llh] = newtonBlock(X, t, lambda); -end +[W, llh] = newtonRaphson(X, t, lambda); +% [W, llh] = newtonBlock(X, t, lambda); model.W = W; -function [W, llh] = newton(X, t, lambda) +function [W, llh] = newtonRaphson(X, t, lambda) [d,n] = size(X); k = max(t); tol = 1e-4; @@ -28,21 +22,21 @@ W = zeros(d,k); HT = zeros(d,k,d,k); for iter = 2:maxiter - Z = W'*X; - logY = bsxfun(@minus,Z,logsumexp(Z,1)); - llh(iter) = dot(T(:),logY(:))-0.5*lambda*dot(W(:),W(:)); + A = W'*X; % 4.105 + logY = bsxfun(@minus,A,logsumexp(A,1)); % 4.104 + llh(iter) = dot(T(:),logY(:))-0.5*lambda*dot(W(:),W(:)); % 4.108 if abs(llh(iter)-llh(iter-1)) < tol; break; end Y = exp(logY); for i = 1:k for j = 1:k r = Y(i,:).*((i==j)-Y(j,:)); % r has negative value, so cannot use sqrt - HT(:,i,:,j) = bsxfun(@times,X,r)*X'; + HT(:,i,:,j) = bsxfun(@times,X,r)*X'; % 4.110 end end - G = X*(Y-T)'+lambda*W; + G = X*(Y-T)'+lambda*W; % 4.96 H = reshape(HT,dk,dk); H(dg) = H(dg)+lambda; - W(:) = W(:)-H\G(:); + W(:) = W(:)-H\G(:); % 4.92 end llh = llh(2:iter); @@ -56,8 +50,8 @@ llh = -inf(1,maxiter); T = sparse(t,1:n,1,k,n,n); W = zeros(d,k); -Z = W'*X; -logY = bsxfun(@minus,Z,logsumexp(Z,1)); +A = W'*X; +logY = bsxfun(@minus,A,logsumexp(A,1)); for iter = 2:maxiter for j = 1:k Y = exp(logY); @@ -66,8 +60,8 @@ H(dg) = H(dg)+lambda; g = X*(Y(j,:)-T(j,:))'+lambda*W(:,j); W(:,j) = W(:,j)-H\g; - Z(j,:) = W(:,j)'*X; - logY = bsxfun(@minus,Z,logsumexp(Z,1)); % must be here to renormalize + A(j,:) = W(:,j)'*X; + logY = bsxfun(@minus,A,logsumexp(A,1)); % must be here to renormalize end llh(iter) = dot(T(:),logY(:))-0.5*lambda*dot(W(:),W(:)); if abs(llh(iter)-llh(iter-1)) < tol; break; end From e5840ba5388028993cd7ec8618cd9db5d6c5d01b Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 7 Dec 2015 19:23:25 +0800 Subject: [PATCH 015/149] logitPred.m is not finished --- chapter04/logitPred.m | 2 +- chapter06/demo.m | 4 ++-- chapter06/{knInfer.m => knPred.m} | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) rename chapter06/{knInfer.m => knPred.m} (71%) diff --git a/chapter04/logitPred.m b/chapter04/logitPred.m index d42977d..a9df226 100644 --- a/chapter04/logitPred.m +++ b/chapter04/logitPred.m @@ -8,7 +8,7 @@ w0 = model.w0; a = w'*X+w0; y = a > 0; -h = ones(1,n); +h = ones(1,size(X,2)); h(~y) = -1; p = exp(-sum(log1pexp(-h.*a))); diff --git a/chapter06/demo.m b/chapter06/demo.m index 558b3a8..800356e 100755 --- a/chapter06/demo.m +++ b/chapter06/demo.m @@ -8,8 +8,8 @@ x = linspace(min(X)-1,max(X)+1,n); % test data %% -model = regressKn(X,t,1e-4,@knGauss); -y = knInfer(x,model); +model = knReg(X,t,1e-4,@knGauss); +y = knPred(x,model); figure; hold on; % plotBand(x,y,2*sigma); diff --git a/chapter06/knInfer.m b/chapter06/knPred.m similarity index 71% rename from chapter06/knInfer.m rename to chapter06/knPred.m index e26dfd5..6f41bd2 100755 --- a/chapter06/knInfer.m +++ b/chapter06/knPred.m @@ -1,5 +1,5 @@ -function [y, sigma2, p] = knInfer(x, model, t) -% inference for kernel model +function [y, sigma2, p] = knPred(x, model, t) +% Prediction for kernel regression model % Written by Mo Chen (sth4nth@gmail.com). kn = model.kn; a = model.a; From c007a905056cfe027e5301fedc16f5b4e71b9121 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 7 Dec 2015 20:51:39 +0800 Subject: [PATCH 016/149] rename --- chapter11/dirichletRnd.m | 10 ++++++++++ chapter11/{rndDiscrete.m => discreteRnd.m} | 2 +- chapter11/{rndGauss.m => gaussRnd.m} | 0 chapter11/{rndKmeans.m => kmeansRnd.m} | 2 +- chapter11/rndDirichlet.m | 5 ----- helper/rndKCluster.m | 12 ------------ 6 files changed, 12 insertions(+), 19 deletions(-) create mode 100644 chapter11/dirichletRnd.m rename chapter11/{rndDiscrete.m => discreteRnd.m} (86%) rename chapter11/{rndGauss.m => gaussRnd.m} (100%) rename chapter11/{rndKmeans.m => kmeansRnd.m} (86%) delete mode 100644 chapter11/rndDirichlet.m delete mode 100644 helper/rndKCluster.m diff --git a/chapter11/dirichletRnd.m b/chapter11/dirichletRnd.m new file mode 100644 index 0000000..4fd737b --- /dev/null +++ b/chapter11/dirichletRnd.m @@ -0,0 +1,10 @@ +function x = dirichletRnd(a, m) +% Sampling from a Dirichlet distribution. +% a: k dimensional vector +% m: k dimensional mean vector +% Written by Mo Chen (sth4nth@gmail.com). +if nargin == 2 + a = a*m; +end +x = gamrnd(a,1); +x = x/sum(x); diff --git a/chapter11/rndDiscrete.m b/chapter11/discreteRnd.m similarity index 86% rename from chapter11/rndDiscrete.m rename to chapter11/discreteRnd.m index 117aa8b..4148f35 100644 --- a/chapter11/rndDiscrete.m +++ b/chapter11/discreteRnd.m @@ -1,4 +1,4 @@ -function x = rndDiscrete(p, n) +function x = discreteRnd(p, n) % Sampling from a discrete distribution (multinomial). % Written by Mo Chen (sth4nth@gmail.com). if nargin == 1 diff --git a/chapter11/rndGauss.m b/chapter11/gaussRnd.m similarity index 100% rename from chapter11/rndGauss.m rename to chapter11/gaussRnd.m diff --git a/chapter11/rndKmeans.m b/chapter11/kmeansRnd.m similarity index 86% rename from chapter11/rndKmeans.m rename to chapter11/kmeansRnd.m index 3287bda..d06847b 100644 --- a/chapter11/rndKmeans.m +++ b/chapter11/kmeansRnd.m @@ -1,4 +1,4 @@ -function [X, z, center] = rndKmeans(d, k, n) +function [X, z, center] = kmeansRnd(d, k, n) % Sampling from a Gaussian mixture distribution with common variances (kmeans model). % Written by Michael Chen (sth4nth@gmail.com). a = 1; diff --git a/chapter11/rndDirichlet.m b/chapter11/rndDirichlet.m deleted file mode 100644 index ddc6d84..0000000 --- a/chapter11/rndDirichlet.m +++ /dev/null @@ -1,5 +0,0 @@ -function x = rndDirichlet(a) -% Sampling from a Dirichlet distribution. -% Written by Mo Chen (sth4nth@gmail.com). -x = gamrnd(a,1); -x = x/sum(x); \ No newline at end of file diff --git a/helper/rndKCluster.m b/helper/rndKCluster.m deleted file mode 100644 index 7d1a076..0000000 --- a/helper/rndKCluster.m +++ /dev/null @@ -1,12 +0,0 @@ -function [X, z, center] = rndKCluster(d, k, n) -% Sampling from a Gaussian mixture distribution with common variances (kmeans model). -% Written by Mo Chen (sth4nth@gmail.com). -a = 1; -b = 6*nthroot(k,d); - -X = randn(d,n); -w = rndDirichlet(ones(k,a)); -z = rndDiscrete(w,n); -E = full(sparse(z,1:n,1,k,n,n)); -center = rand(d,k)*b; -X = X+center*E; \ No newline at end of file From ddced0a788d0ef19f4e54c18ea4c6f1dd12d8f3e Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 7 Dec 2015 22:08:12 +0800 Subject: [PATCH 017/149] add logitPred.m --- chapter04/demo.m | 4 +++- chapter04/logitPred.m | 8 ++------ chapter11/kmeansRnd.m | 4 ++-- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/chapter04/demo.m b/chapter04/demo.m index f4f1a44..62350f4 100644 --- a/chapter04/demo.m +++ b/chapter04/demo.m @@ -2,10 +2,12 @@ clear; close all; k = 2; n = 1000; -[X,t] = rndKCluster(2,k,n); +[X,t] = kmeansRnd(2,k,n); [x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); [model, llh] = logitReg(X,t-1,0); +[y,p] = logitPred(model,X); + w = model.w; w0 = model.w0; plot(llh); diff --git a/chapter04/logitPred.m b/chapter04/logitPred.m index a9df226..a51359f 100644 --- a/chapter04/logitPred.m +++ b/chapter04/logitPred.m @@ -6,10 +6,6 @@ % Written by Mo Chen (sth4nth@gmail.com). w = model.w; w0 = model.w0; -a = w'*X+w0; -y = a > 0; -h = ones(1,size(X,2)); -h(~y) = -1; -p = exp(-sum(log1pexp(-h.*a))); +p = exp(-log1pexp(w'*X+w0)); +y = (p>0.5)+0; -% (p > 0.5)==y diff --git a/chapter11/kmeansRnd.m b/chapter11/kmeansRnd.m index d06847b..f30ff9f 100644 --- a/chapter11/kmeansRnd.m +++ b/chapter11/kmeansRnd.m @@ -5,8 +5,8 @@ b = 6*nthroot(k,d); X = randn(d,n); -w = rndDirichlet(ones(k,a)); -z = rndDiscrete(w,n); +w = dirichletRnd(ones(k,a)); +z = discreteRnd(w,n); E = full(sparse(z,1:n,1,k,n,n)); center = rand(d,k)*b; X = X+center*E; \ No newline at end of file From 60bbe9317176ca579937db10301232cd589a1fc6 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 7 Dec 2015 22:15:01 +0800 Subject: [PATCH 018/149] add mnPred.m --- chapter04/logitPred.m | 5 ++--- chapter04/mnPred.m | 10 ++++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) create mode 100644 chapter04/mnPred.m diff --git a/chapter04/logitPred.m b/chapter04/logitPred.m index a51359f..2beeec1 100644 --- a/chapter04/logitPred.m +++ b/chapter04/logitPred.m @@ -1,8 +1,7 @@ function [y, p] = logitPred(model, X) -% Prodict the label for binary lgoistic regression model -% model: trained model structure +% Prodict the label for binary logistic regression model +% model: trained model structure % X: d x n testing data -% t (optional): 1 x n testing label % Written by Mo Chen (sth4nth@gmail.com). w = model.w; w0 = model.w0; diff --git a/chapter04/mnPred.m b/chapter04/mnPred.m new file mode 100644 index 0000000..065752e --- /dev/null +++ b/chapter04/mnPred.m @@ -0,0 +1,10 @@ +function [y, P] = mnPred(model, X) +% Prodict the label for multiclass (multinomial) logistic regression model +% model: trained model structure +% X: d x n testing data +% Written by Mo Chen (sth4nth@gmail.com). +W = model.W; +X = [X; ones(1,size(X,2))]; +A = W'*X; +P = exp(bsxfun(@minus,A,logsumexp(A,1))); +[~, y] = max(P,[],1); \ No newline at end of file From 634825c5dfc3abffc988710b1c526b8947234f88 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Tue, 8 Dec 2015 00:04:59 +0800 Subject: [PATCH 019/149] initial fix for chapter06 --- chapter04/TODO.txt | 3 +++ chapter04/binPlot.m | 3 +-- chapter04/multiPlot.m | 12 ++++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 chapter04/TODO.txt create mode 100644 chapter04/multiPlot.m diff --git a/chapter04/TODO.txt b/chapter04/TODO.txt new file mode 100644 index 0000000..b4b9cac --- /dev/null +++ b/chapter04/TODO.txt @@ -0,0 +1,3 @@ +binPlot +multiPlot +demo diff --git a/chapter04/binPlot.m b/chapter04/binPlot.m index 77b062a..b486a1f 100644 --- a/chapter04/binPlot.m +++ b/chapter04/binPlot.m @@ -1,8 +1,7 @@ function binPlot(model, X, t) % Plot binary classification result for 2d data % X: 2xn data matrix -% y: 1xn label - +% t: 1xn label w = model.w; w0 = model.w0; figure; diff --git a/chapter04/multiPlot.m b/chapter04/multiPlot.m new file mode 100644 index 0000000..efeaaf2 --- /dev/null +++ b/chapter04/multiPlot.m @@ -0,0 +1,12 @@ +function multiPlot(model, X, t) +% Plot binary classification result for 2d data +% X: 2xn data matrix +% t: 1xn label +W = model.W; +X = [X; ones(1,size(X,2))]; +figure; +spread(X,t); +y = W'*X; +hold on; +contour(X(1,:),X(2,:),y,1); +hold off; \ No newline at end of file From 001c62d30088752d0cfa34cc8e596ad29af20117 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Tue, 8 Dec 2015 00:05:36 +0800 Subject: [PATCH 020/149] initial fix for chapter06 --- TODO.txt | 6 +----- chapter04/mulPlot.m | 7 ------- chapter06/demo.m | 2 +- chapter06/knCenterize.m | 3 +++ chapter06/knPred.m | 2 +- chapter06/knReg.m | 3 +++ 6 files changed, 9 insertions(+), 14 deletions(-) delete mode 100644 chapter04/mulPlot.m diff --git a/TODO.txt b/TODO.txt index 0797d30..419dac6 100644 --- a/TODO.txt +++ b/TODO.txt @@ -2,8 +2,4 @@ TODO: derive simpler bound for vb and improve vb functions chapter 10 fix llh for rvm cd viterbi normalize update - -Other: -Add plot function -Add inference function for classification, mixture models -Add unit test \ No newline at end of file +Add predict for mixture models diff --git a/chapter04/mulPlot.m b/chapter04/mulPlot.m deleted file mode 100644 index 0e321e7..0000000 --- a/chapter04/mulPlot.m +++ /dev/null @@ -1,7 +0,0 @@ -function [ output_args ] = mulPlot( input_args ) -%MULPLOT Summary of this function goes here -% Detailed explanation goes here - - -end - diff --git a/chapter06/demo.m b/chapter06/demo.m index 800356e..a3e292e 100755 --- a/chapter06/demo.m +++ b/chapter06/demo.m @@ -9,7 +9,7 @@ x = linspace(min(X)-1,max(X)+1,n); % test data %% model = knReg(X,t,1e-4,@knGauss); -y = knPred(x,model); +y = knPred(model, x); figure; hold on; % plotBand(x,y,2*sigma); diff --git a/chapter06/knCenterize.m b/chapter06/knCenterize.m index d5201c5..2be4f87 100755 --- a/chapter06/knCenterize.m +++ b/chapter06/knCenterize.m @@ -1,5 +1,8 @@ function Kc = knCenterize(kn, X, Xt) % Centerize the data in the kernel space +% kn: kernel function +% X: dxn data matrix of which the center is computed +% Xt(option): dxn test data to be centerized by the center of X % Written by Mo Chen (sth4nth@gmail.com). K = kn(X,X); mK = mean(K); diff --git a/chapter06/knPred.m b/chapter06/knPred.m index 6f41bd2..825f18f 100755 --- a/chapter06/knPred.m +++ b/chapter06/knPred.m @@ -1,4 +1,4 @@ -function [y, sigma2, p] = knPred(x, model, t) +function [y, sigma2, p] = knPred(model, x, t) % Prediction for kernel regression model % Written by Mo Chen (sth4nth@gmail.com). kn = model.kn; diff --git a/chapter06/knReg.m b/chapter06/knReg.m index f36edbf..8e53c32 100644 --- a/chapter06/knReg.m +++ b/chapter06/knReg.m @@ -4,6 +4,9 @@ if nargin < 4 kn = @knGauss; end +if nargin < 3 + lambda = 1e-2; +end K = knCenterize(kn,X); tbar = mean(t); U = chol(K+lambda*eye(size(X,2))); From 60f470096ccde5a6babb5b8670569af6096fc501 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 8 Dec 2015 15:25:53 +0800 Subject: [PATCH 021/149] chapter04 is done for now --- chapter04/TODO.txt | 5 ++--- chapter04/binPlot.m | 23 +++++++++++++++++----- chapter04/demo.m | 42 ++++++++++------------------------------ chapter04/multiPlot.m | 12 ------------ chapter06/knCenterize.m | 2 +- chapter06/knKmeans.m | 25 ++++++++++++++++++++++++ chapter06/knKmeansPred.m | 7 +++++++ chapter06/knPred.m | 13 ------------- chapter06/knRegPred.m | 26 +++++++++++++++++++++++++ 9 files changed, 89 insertions(+), 66 deletions(-) delete mode 100644 chapter04/multiPlot.m create mode 100755 chapter06/knKmeans.m create mode 100644 chapter06/knKmeansPred.m delete mode 100755 chapter06/knPred.m create mode 100755 chapter06/knRegPred.m diff --git a/chapter04/TODO.txt b/chapter04/TODO.txt index b4b9cac..c98c787 100644 --- a/chapter04/TODO.txt +++ b/chapter04/TODO.txt @@ -1,3 +1,2 @@ -binPlot -multiPlot -demo +multiPlot: plot multclass decison boundary + diff --git a/chapter04/binPlot.m b/chapter04/binPlot.m index b486a1f..96bfd17 100644 --- a/chapter04/binPlot.m +++ b/chapter04/binPlot.m @@ -2,11 +2,24 @@ function binPlot(model, X, t) % Plot binary classification result for 2d data % X: 2xn data matrix % t: 1xn label +assert(size(X,1) == 2); w = model.w; w0 = model.w0; -figure; -spread(X,t); -y = w'*X+w0; +xi = min(X,[],2); +xa = max(X,[],2); +[x1,x2] = meshgrid(linspace(xi(1),xa(1)), linspace(xi(2),xa(2))); + +color = 'brgmcyk'; +m = length(color); +figure(gcf); +axis equal +clf; hold on; -contour(X(1,:),X(2,:),y,1); -hold off; \ No newline at end of file +view(2); +for i = 1:max(t) + idc = t==i; + scatter(X(1,idc),X(2,idc),36,color(mod(i-1,m)+1)); +end +y = w0+w(1)*x1+w(2)*x2; +contour(x1,x2,y,[-0 0]); +hold off; diff --git a/chapter04/demo.m b/chapter04/demo.m index 62350f4..6799428 100644 --- a/chapter04/demo.m +++ b/chapter04/demo.m @@ -1,39 +1,17 @@ - +% clear; close all; k = 2; n = 1000; [X,t] = kmeansRnd(2,k,n); - -[x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); [model, llh] = logitReg(X,t-1,0); -[y,p] = logitPred(model,X); - -w = model.w; -w0 = model.w0; plot(llh); -figure; -spread(X,t); - -y = w(1)*x1+w(2)*x2+w0; - -hold on; -contour(x1,x2,y,1); -hold off; +binPlot(model,X,t) +pause %% -% clear; close all; -% k = 3; -% n = 200; -% [X,t] = rndKCluster(2,k,n); -% -% [x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); -% [model, llh] = mnReg(X,t, 1e-4,2); -% plot(llh); -% figure; -% spread(X,t); -% -% W = model.W; -% % y = w(1)*x1+w(2)*x2+w(3); -% -% hold on; -% contour(x1,x2,t,1); -% hold off; \ No newline at end of file +clear +k = 3; +n = 1000; +[X,t] = kmeansRnd(2,k,n); +[model, llh] = mnReg(X,t); +y = mnPred(model,X); +spread(X,y) diff --git a/chapter04/multiPlot.m b/chapter04/multiPlot.m deleted file mode 100644 index efeaaf2..0000000 --- a/chapter04/multiPlot.m +++ /dev/null @@ -1,12 +0,0 @@ -function multiPlot(model, X, t) -% Plot binary classification result for 2d data -% X: 2xn data matrix -% t: 1xn label -W = model.W; -X = [X; ones(1,size(X,2))]; -figure; -spread(X,t); -y = W'*X; -hold on; -contour(X(1,:),X(2,:),y,1); -hold off; \ No newline at end of file diff --git a/chapter06/knCenterize.m b/chapter06/knCenterize.m index 2be4f87..e9caecd 100755 --- a/chapter06/knCenterize.m +++ b/chapter06/knCenterize.m @@ -2,7 +2,7 @@ % Centerize the data in the kernel space % kn: kernel function % X: dxn data matrix of which the center is computed -% Xt(option): dxn test data to be centerized by the center of X +% Xt(optional): dxn test data to be centerized by the center of X % Written by Mo Chen (sth4nth@gmail.com). K = kn(X,X); mK = mean(K); diff --git a/chapter06/knKmeans.m b/chapter06/knKmeans.m new file mode 100755 index 0000000..112732a --- /dev/null +++ b/chapter06/knKmeans.m @@ -0,0 +1,25 @@ +function [label, energy, model] = knKmeans(X, k, kn) +% Perform kernel k-means clustering. +% K: nxn kernel matrix +% k: number of cluster +% Reference: Kernel Methods for Pattern Analysis +% by John Shawe-Taylor, Nello Cristianini +% Written by Mo Chen (sth4nth@gmail.com). +K = kn(X,X); +n = size(X,2); +label = ceil(k*rand(1,n)); +last = 0; +while any(label ~= last) + E = sparse(label,1:n,1,k,n,n); + E = bsxfun(@times,E,1./sum(E,2)); + T = E*K; + Z = repmat(diag(T*E'),1,n)-2*T; + last = label; + [val, label] = min(Z,[],1); +end +energy = sum(val)+trace(K); +if nargout == 3 + model.X = X; + model.kn = kn; + model.label = label; +end \ No newline at end of file diff --git a/chapter06/knKmeansPred.m b/chapter06/knKmeansPred.m new file mode 100644 index 0000000..cfee0ac --- /dev/null +++ b/chapter06/knKmeansPred.m @@ -0,0 +1,7 @@ +function [ output_args ] = knKmeansPred( input_args ) +%KNKMEANSPRED Summary of this function goes here +% Detailed explanation goes here + + +end + diff --git a/chapter06/knPred.m b/chapter06/knPred.m deleted file mode 100755 index 825f18f..0000000 --- a/chapter06/knPred.m +++ /dev/null @@ -1,13 +0,0 @@ -function [y, sigma2, p] = knPred(model, x, t) -% Prediction for kernel regression model -% Written by Mo Chen (sth4nth@gmail.com). -kn = model.kn; -a = model.a; -X = model.X; -tbar = model.tbar; -y = a'*knCenterize(kn,X,x)+tbar; -if nargin == 3 - sigma2 = 1/beta+dot(X,X,1); % 3.59 - p = exp(((t-y).^2/sigma2+log(2*pi*sigma2))/(-2)); -end - diff --git a/chapter06/knRegPred.m b/chapter06/knRegPred.m new file mode 100755 index 0000000..54ed3c6 --- /dev/null +++ b/chapter06/knRegPred.m @@ -0,0 +1,26 @@ +function [y, sigma, p] = knRegPred(model, x, t) +% Prediction for kernel regression model +% Written by Mo Chen (sth4nth@gmail.com). +kn = model.kn; +a = model.a; +X = model.X; +tbar = model.tbar; +y = a'*knCenterize(kn,X,x)+tbar; +if nargin == 3 + sigma = sqrt(1/beta+dot(X,X,1)); % 3.59 + p = exp(((t-y).^2/sigma2+log(2*pi*sigma2))/(-2)); +end + +% if nargout > 1 +% beta = model.beta; +% if isfield(model,'V') % V*V'=inv(S) 3.54 +% U = model.V'*bsxfun(@minus,X,model.xbar); +% sigma = sqrt(1/beta+dot(U,U,1)); % 3.59 +% else +% sigma = sqrt(1/beta); +% end +% if nargin == 3 && nargout == 3 +% p = exp(logGauss(t,y,sigma)); +% % p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); +% end +% end From 9f0cec679bc4c826a567af0672e9b1ea0ffcfeea Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 8 Dec 2015 17:20:20 +0800 Subject: [PATCH 022/149] tweak linPlot.m --- chapter03/linPlot.m | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chapter03/linPlot.m b/chapter03/linPlot.m index dab60c3..26fda56 100644 --- a/chapter03/linPlot.m +++ b/chapter03/linPlot.m @@ -1,16 +1,16 @@ -function linPlot(model, x, t) +function linPlot(model, X, t) % Plot linear function and data % X: 1xn data % t: 1xn response % Written by Mo Chen (sth4nth@gmail.com). color = [255,228,225]/255; %pink -[x,idx] = sort(x); -t = t(idx); +% [x,idx] = sort(x); +x = linspace(min(X),max(X)); [y,s] = linPred(model,x); figure; hold on; fill([x,fliplr(x)],[y+s,fliplr(y-s)],color); -plot(x,t,'o'); +plot(X,t,'o'); plot(x,y,'r-'); hold off From 00f5827d3d472ee938ed967c641fd96d021c4ab8 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 8 Dec 2015 17:40:25 +0800 Subject: [PATCH 023/149] remove empty cluster check due to the fk unique bug --- chapter09/kmeans.m | 3 --- 1 file changed, 3 deletions(-) diff --git a/chapter09/kmeans.m b/chapter09/kmeans.m index 1937259..5e94fde 100644 --- a/chapter09/kmeans.m +++ b/chapter09/kmeans.m @@ -7,11 +7,8 @@ last = 0; label = ceil(k*rand(1,n)); % random initialization while any(label ~= last) - [u,~,label] = unique(label); % remove empty clusters - k = length(u); E = sparse(1:n,label,1,n,k,n); % transform label into indicator matrix m = X*(E*spdiags(1./sum(E,1)',0,k,k)); % compute m of each cluster last = label; [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); % assign samples to the nearest centers end -[~,~,label] = unique(label); \ No newline at end of file From 6b6487f7494b5f47c9d05b8796d9cf1aa3af9b4c Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 8 Dec 2015 18:02:08 +0800 Subject: [PATCH 024/149] add variance estimation in knReg.m --- chapter06/knReg.m | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/chapter06/knReg.m b/chapter06/knReg.m index 8e53c32..a616494 100644 --- a/chapter06/knReg.m +++ b/chapter06/knReg.m @@ -1,5 +1,5 @@ function model = knReg(X, t, lambda, kn) -% Gaussian process for regression +% Gaussian process (kernel) regression % Written by Mo Chen (sth4nth@gmail.com). if nargin < 4 kn = @knGauss; @@ -9,10 +9,13 @@ end K = knCenterize(kn,X); tbar = mean(t); -U = chol(K+lambda*eye(size(X,2))); -a = U\(U'\(t(:)-tbar)); +U = chol(K+lambda*eye(size(X,2))); % 6.62 +a = U\(U'\(t(:)-tbar)); % 6.68 +y = a'*K+tbar; +beta = 1/mean((t-y).^2); % 3.21 model.kn = kn; model.a = a; model.X = X; -model.tbar = tbar; \ No newline at end of file +model.tbar = tbar; +model.beta = beta; \ No newline at end of file From 4563c202c79947ce6d564bd8b809a659edc2b9cd Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 8 Dec 2015 18:12:47 +0800 Subject: [PATCH 025/149] more comment --- chapter03/linRegEbEm.m | 6 +++--- chapter03/linRegEbFp.m | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/chapter03/linRegEbEm.m b/chapter03/linRegEbEm.m index 06bb093..5f71d5f 100644 --- a/chapter03/linRegEbEm.m +++ b/chapter03/linRegEbEm.m @@ -25,9 +25,9 @@ llh = -inf(1,maxiter+1); for iter = 2:maxiter A = beta*C; - A(dg) = A(dg)+alpha; % 3.81 + A(dg) = A(dg)+alpha; % 3.81 3.54 U = chol(A); - V = U\I; + V = U\I; % A=inv(S) w = beta*(V*(V'*Xt)); % 3.84 w2 = dot(w,w); @@ -37,7 +37,7 @@ llh(iter) = 0.5*(d*log(alpha)+n*log(beta)-alpha*w2-beta*err-logdetA-n*log(2*pi)); % 3.86 if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end - trS = dot(V(:),V(:)); + trS = dot(V(:),V(:)); % A=inv(S) alpha = d/(w2+trS); % 9.63 gamma = d-alpha*trS; % 9.64 diff --git a/chapter03/linRegEbFp.m b/chapter03/linRegEbFp.m index ead773d..170ef34 100644 --- a/chapter03/linRegEbFp.m +++ b/chapter03/linRegEbFp.m @@ -26,9 +26,9 @@ llh = -inf(1,maxiter+1); for iter = 2:maxiter A = beta*C; - A(dg) = A(dg)+alpha; % 3.81 + A(dg) = A(dg)+alpha; % 3.81 3.54 U = chol(A); - V = U\I; + V = U\I; % A=inv(S) w = beta*(V*(V'*Xt)); % 3.84 w2 = dot(w,w); @@ -38,7 +38,7 @@ llh(iter) = 0.5*(d*log(alpha)+n*log(beta)-alpha*w2-beta*err-logdetA-n*log(2*pi)); % 3.86 if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end - trS = dot(V(:),V(:)); + trS = dot(V(:),V(:)); % A=inv(S) gamma = d-alpha*trS; % 3.91 alpha = gamma/w2; % 3.92 beta = (n-gamma)/err; % 3.95 From c7e060cce0e3c91aead3ce709b797bb125ff118a Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 8 Dec 2015 19:12:00 +0800 Subject: [PATCH 026/149] modify model parameter passed between functions --- chapter03/linPred.m | 8 +++++--- chapter03/linReg.m | 3 +++ chapter03/linRegEbEm.m | 3 ++- chapter03/linRegEbFp.m | 3 ++- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/chapter03/linPred.m b/chapter03/linPred.m index afce03a..c159a54 100644 --- a/chapter03/linPred.m +++ b/chapter03/linPred.m @@ -9,9 +9,11 @@ y = w'*X+w0; if nargout > 1 beta = model.beta; - if isfield(model,'V') % V*V'=inv(S) 3.54 - U = model.V'*bsxfun(@minus,X,model.xbar); - sigma = sqrt(1/beta+dot(U,U,1)); % 3.59 + if isfield(model,'U') % 3.54 + U = model.U; + Xo = bsxfun(@minus,X,model.xbar); + XU = U'\Xo; + sigma = sqrt(1/beta+dot(XU,XU,1)); else sigma = sqrt(1/beta); end diff --git a/chapter03/linReg.m b/chapter03/linReg.m index 225be82..077b2ae 100644 --- a/chapter03/linReg.m +++ b/chapter03/linReg.m @@ -22,6 +22,9 @@ w = U\(U'\(X*t')); % 3.15 & 3.28 w0 = tbar-dot(w,xbar); % 3.19 beta = 1/mean((t-w'*X).^2); % 3.21 + model.w = w; model.w0 = w0; +%% optional for bayesian probabilistic inference purpose model.beta = beta; +model.U = U; diff --git a/chapter03/linRegEbEm.m b/chapter03/linRegEbEm.m index 5f71d5f..b0e06a5 100644 --- a/chapter03/linRegEbEm.m +++ b/chapter03/linRegEbEm.m @@ -48,7 +48,8 @@ llh = llh(2:iter); model.w0 = w0; model.w = w; +%% optional for bayesian probabilistic inference purpose model.alpha = alpha; model.beta = beta; model.xbar = xbar; -model.V = V; +model.U = U; diff --git a/chapter03/linRegEbFp.m b/chapter03/linRegEbFp.m index 170ef34..4caebc3 100644 --- a/chapter03/linRegEbFp.m +++ b/chapter03/linRegEbFp.m @@ -48,7 +48,8 @@ llh = llh(2:iter); model.w0 = w0; model.w = w; +%% optional for bayesian probabilistic inference purpose model.alpha = alpha; model.beta = beta; model.xbar = xbar; -model.V = V; \ No newline at end of file +model.U = U; \ No newline at end of file From 99f79de9ad2acafec75b9ba04edeaedcfd6562e2 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 8 Dec 2015 19:13:15 +0800 Subject: [PATCH 027/149] knRegPred knRegPlot knKmeanPred are not finished --- chapter06/demo.m | 22 +++++++--------------- chapter06/knKmeans.m | 10 +++++----- chapter06/knKmeansPred.m | 7 ------- chapter06/knRegPlot.m | 16 ++++++++++++++++ chapter06/knRegPred.m | 39 +++++++++++++++++++-------------------- 5 files changed, 47 insertions(+), 47 deletions(-) delete mode 100644 chapter06/knKmeansPred.m create mode 100644 chapter06/knRegPlot.m diff --git a/chapter06/demo.m b/chapter06/demo.m index a3e292e..a9d0d07 100755 --- a/chapter06/demo.m +++ b/chapter06/demo.m @@ -1,21 +1,13 @@ clear; close all; + n = 100; -beta = 1e-1; -X = rand(1,n); -w = randn; -b = randn; -t = w'*X+b+beta*randn(1,n); +x = linspace(0,2*pi,n); % test data +t = sin(x)+rand(1,n)/2; -x = linspace(min(X)-1,max(X)+1,n); % test data -%% -model = knReg(X,t,1e-4,@knGauss); -y = knPred(model, x); +model = knReg(x,t,1e-4,@knGauss); +y = knRegPred(model, x); figure; hold on; -% plotBand(x,y,2*sigma); -plot(X,t,'o'); +plot(x,t,'o'); plot(x,y,'r-'); -hold off -% figure -% plot(llh); -axis equal \ No newline at end of file + diff --git a/chapter06/knKmeans.m b/chapter06/knKmeans.m index 112732a..bf86519 100755 --- a/chapter06/knKmeans.m +++ b/chapter06/knKmeans.m @@ -18,8 +18,8 @@ [val, label] = min(Z,[],1); end energy = sum(val)+trace(K); -if nargout == 3 - model.X = X; - model.kn = kn; - model.label = label; -end \ No newline at end of file +% if nargout == 3 +% model.X = X; +% model.kn = kn; +% model.label = label; +% end \ No newline at end of file diff --git a/chapter06/knKmeansPred.m b/chapter06/knKmeansPred.m deleted file mode 100644 index cfee0ac..0000000 --- a/chapter06/knKmeansPred.m +++ /dev/null @@ -1,7 +0,0 @@ -function [ output_args ] = knKmeansPred( input_args ) -%KNKMEANSPRED Summary of this function goes here -% Detailed explanation goes here - - -end - diff --git a/chapter06/knRegPlot.m b/chapter06/knRegPlot.m new file mode 100644 index 0000000..8ffd379 --- /dev/null +++ b/chapter06/knRegPlot.m @@ -0,0 +1,16 @@ +function knRegPlot(model, X, t) +% Plot linear function and data +% X: 1xn data +% t: 1xn response +% Written by Mo Chen (sth4nth@gmail.com). +color = [255,228,225]/255; %pink +% [x,idx] = sort(x); +x = linspace(min(X),max(X)); +[y,s] = knRegPred(model,x); +figure; +hold on; +fill([x,fliplr(x)],[y+s,fliplr(y-s)],color); +plot(X,t,'o'); +plot(x,y,'r-'); +hold off + diff --git a/chapter06/knRegPred.m b/chapter06/knRegPred.m index 54ed3c6..1d7e058 100755 --- a/chapter06/knRegPred.m +++ b/chapter06/knRegPred.m @@ -1,26 +1,25 @@ -function [y, sigma, p] = knRegPred(model, x, t) -% Prediction for kernel regression model +function [y, sigma, p] = knRegPred(model, Xt, t) +% Prediction for Gaussian Process (kernel) regression model % Written by Mo Chen (sth4nth@gmail.com). kn = model.kn; a = model.a; X = model.X; tbar = model.tbar; -y = a'*knCenterize(kn,X,x)+tbar; -if nargin == 3 - sigma = sqrt(1/beta+dot(X,X,1)); % 3.59 - p = exp(((t-y).^2/sigma2+log(2*pi*sigma2))/(-2)); -end +y = a'*knCenterize(kn,X,Xt)+tbar; +if nargout > 1 + beta = model.beta; + if isfield(model,'U') + U = model.U; + Xo = bsxfun(@minus,X,model.xbar); + XU = U'\Xo; + sigma = sqrt(1/beta+dot(XU,XU,1)); + + sigma = sqrt(c-k'*C^-1*k); + else + sigma = sqrt(1/beta); % 6.67 + end + if nargin == 3 && nargout == 3 + p = exp(logGauss(t,y,sigma)); + end -% if nargout > 1 -% beta = model.beta; -% if isfield(model,'V') % V*V'=inv(S) 3.54 -% U = model.V'*bsxfun(@minus,X,model.xbar); -% sigma = sqrt(1/beta+dot(U,U,1)); % 3.59 -% else -% sigma = sqrt(1/beta); -% end -% if nargin == 3 && nargout == 3 -% p = exp(logGauss(t,y,sigma)); -% % p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); -% end -% end +end From 36535a36fb827255c4ef4367c9dc58a5a0e520ac Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 15 Dec 2015 13:25:52 +0800 Subject: [PATCH 028/149] add polynomial kernel --- chapter06/knPoly.m | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 chapter06/knPoly.m diff --git a/chapter06/knPoly.m b/chapter06/knPoly.m new file mode 100644 index 0000000..5cfd0bd --- /dev/null +++ b/chapter06/knPoly.m @@ -0,0 +1,20 @@ +function K = knPoly(X, Y, o, c) +% Polynomial kernel k(x,y)=(x'y+c)^o +% X,Y: data matrix +% o: order +% c: constant +% Written by Mo Chen (sth4nth@gmail.com). +if nargin < 4 + c = 0; +end + +if nargin < 3 + o = 3; +end + +if nargin < 2 || isempty(Y) + K = (dot(X,X,1)+c).^o; % norm in kernel space +else + K = (X'*Y+c).^o; +end + From 0129687bb358bd898b142ff85e55a562eb7ee1af Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 15 Dec 2015 13:27:09 +0800 Subject: [PATCH 029/149] modify kernels to include norm --- chapter06/knGauss.m | 9 +++++++-- chapter06/knLin.m | 9 +++++++++ chapter06/knLinear.m | 5 ----- 3 files changed, 16 insertions(+), 7 deletions(-) create mode 100644 chapter06/knLin.m delete mode 100644 chapter06/knLinear.m diff --git a/chapter06/knGauss.m b/chapter06/knGauss.m index 35f0d2b..c893b16 100755 --- a/chapter06/knGauss.m +++ b/chapter06/knGauss.m @@ -5,5 +5,10 @@ s = 1; end -D = bsxfun(@plus,dot(X,X,1)',dot(Y,Y,1))-2*(X'*Y); -K = exp(D/(-2*s^2)); +if nargin < 2 || isempty(Y) + K = ones(1,size(X,2)); % norm in kernel space +else + D = bsxfun(@plus,dot(X,X,1)',dot(Y,Y,1))-2*(X'*Y); + K = exp(D/(-2*s^2)); +end + diff --git a/chapter06/knLin.m b/chapter06/knLin.m new file mode 100644 index 0000000..37533fb --- /dev/null +++ b/chapter06/knLin.m @@ -0,0 +1,9 @@ +function K = knLin(X, Y) +% Linear kernel (inner product) +% Written by Mo Chen (sth4nth@gmail.com). + +if nargin < 2 || isempty(Y) + K = dot(X,X,1); % norm in kernel space +else + K = X'*Y; +end diff --git a/chapter06/knLinear.m b/chapter06/knLinear.m deleted file mode 100644 index f362dea..0000000 --- a/chapter06/knLinear.m +++ /dev/null @@ -1,5 +0,0 @@ -function K = knLinear(X, Y) -% Linear kernel (inner product) -% Written by Mo Chen (sth4nth@gmail.com). -K = X'*Y; - From 3a1c49a51703441a4803c7d932d6598edd9a7195 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 15 Dec 2015 13:37:05 +0800 Subject: [PATCH 030/149] change interface of knCenter.m --- chapter06/knCenter.m | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 chapter06/knCenter.m diff --git a/chapter06/knCenter.m b/chapter06/knCenter.m new file mode 100644 index 0000000..71d4d26 --- /dev/null +++ b/chapter06/knCenter.m @@ -0,0 +1,17 @@ +function Kc = knCenter(kn, X, X1, X2) +% Centerize the data in the kernel space +% kn: kernel function +% X: dxn data matrix of which the center in the kernel space is computed +% X1, X2: dxn1 and dxn2 data matrix. the kernel k(x1,x2) is computed +% where the origin in the kernel space is the center of X +% Written by Mo Chen (sth4nth@gmail.com). +K = kn(X,X); +mK = mean(K); +mmK = mean(mK); +if nargin == 2 % compute the pairwise centerized version of the kernel of X. eq knCenter(kn,X,X,X) + Kc = K+mmK-bsxfun(@plus,mK',mK); % Kc = K-M*K-K*M+M*K*M; where M = ones(n,n)/n; +elseif nargin == 3 % compute the norms (k(x,x)) of X1 w.r.t. the center of X as the origin. eq diag(knCenter(kn,X,X1,X1)) + Kc = kn(X1)+mmK-2*mean(kn(X,X1)); +elseif nargin == 4 % compute the kernel of X1 and X2 w.r.t. the center of X as the origin + Kc = kn(X1,X2)+mmK-bsxfun(@plus,mean(kn(X,X1))',mean(kn(X,X2))); +end From e0824ae12bceb5df264e698634855e0a4f8a2d93 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 15 Dec 2015 13:37:59 +0800 Subject: [PATCH 031/149] remove old knCenterize.m --- chapter06/knCenterize.m | 17 ----------------- 1 file changed, 17 deletions(-) delete mode 100755 chapter06/knCenterize.m diff --git a/chapter06/knCenterize.m b/chapter06/knCenterize.m deleted file mode 100755 index e9caecd..0000000 --- a/chapter06/knCenterize.m +++ /dev/null @@ -1,17 +0,0 @@ -function Kc = knCenterize(kn, X, Xt) -% Centerize the data in the kernel space -% kn: kernel function -% X: dxn data matrix of which the center is computed -% Xt(optional): dxn test data to be centerized by the center of X -% Written by Mo Chen (sth4nth@gmail.com). -K = kn(X,X); -mK = mean(K); -mmK = mean(mK); -if nargin < 3 - % Kc = K-M*K-K*M+M*K*M; where M = ones(n,n)/n; - Kc = K+mmK-bsxfun(@plus,mK,mK'); -else - Kt = kn(X,Xt); - mKt = mean(Kt); - Kc = Kt+mmK-bsxfun(@plus,mKt,mK'); -end \ No newline at end of file From 1656914548771abbe21c8d887e2eb27ca0359a02 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 15 Dec 2015 13:51:55 +0800 Subject: [PATCH 032/149] add helper function maxabsdiff.m --- helper/maxabsdiff.m | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 helper/maxabsdiff.m diff --git a/helper/maxabsdiff.m b/helper/maxabsdiff.m new file mode 100644 index 0000000..c59abac --- /dev/null +++ b/helper/maxabsdiff.m @@ -0,0 +1,4 @@ +function z = maxabsdiff(x, y) +% Written by Mo Chen (sth4nth@gmail.com). +z = max(abs(x(:)-y(:))); + From 8888afcb2e281436a846c14eba35ebe54c9ccfdf Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 15 Dec 2015 14:01:03 +0800 Subject: [PATCH 033/149] change knReg.m to use knCenter.m --- chapter06/knReg.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter06/knReg.m b/chapter06/knReg.m index a616494..1f24fe2 100644 --- a/chapter06/knReg.m +++ b/chapter06/knReg.m @@ -7,7 +7,7 @@ if nargin < 3 lambda = 1e-2; end -K = knCenterize(kn,X); +K = knCenter(kn,X); tbar = mean(t); U = chol(K+lambda*eye(size(X,2))); % 6.62 a = U\(U'\(t(:)-tbar)); % 6.68 From 82530f69deb4c8515c952f2ec0ba3832d939a6ac Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 15 Dec 2015 14:31:37 +0800 Subject: [PATCH 034/149] fix linReg.m --- chapter03/linReg.m | 1 + 1 file changed, 1 insertion(+) diff --git a/chapter03/linReg.m b/chapter03/linReg.m index 077b2ae..2b9c7b9 100644 --- a/chapter03/linReg.m +++ b/chapter03/linReg.m @@ -25,6 +25,7 @@ model.w = w; model.w0 = w0; +model.xbar = xbar; %% optional for bayesian probabilistic inference purpose model.beta = beta; model.U = U; From 8ec5aed649ecc70df913a46a0524c492f5cd15fc Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 15 Dec 2015 15:14:45 +0800 Subject: [PATCH 035/149] knRegPred.m is not finished --- chapter03/linPred.m | 23 +++++++++++------------ chapter06/demo.m | 31 +++++++++++++++++++++++++------ chapter06/knPca.m | 6 ++++++ chapter06/knReg.m | 3 ++- chapter06/knRegPred.m | 22 ++++++++-------------- 5 files changed, 52 insertions(+), 33 deletions(-) create mode 100644 chapter06/knPca.m diff --git a/chapter03/linPred.m b/chapter03/linPred.m index c159a54..1ebc439 100644 --- a/chapter03/linPred.m +++ b/chapter03/linPred.m @@ -7,18 +7,17 @@ w = model.w; w0 = model.w0; y = w'*X+w0; + if nargout > 1 beta = model.beta; - if isfield(model,'U') % 3.54 - U = model.U; - Xo = bsxfun(@minus,X,model.xbar); - XU = U'\Xo; - sigma = sqrt(1/beta+dot(XU,XU,1)); - else - sigma = sqrt(1/beta); - end - if nargin == 3 && nargout == 3 - p = exp(logGauss(t,y,sigma)); -% p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); - end + U = model.U; % 3.54 + Xo = bsxfun(@minus,X,model.xbar); + XU = U'\Xo; + sigma = sqrt(1/beta+dot(XU,XU,1)); end + +if nargin == 3 && nargout == 3 + p = exp(logGauss(t,y,sigma)); +% p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); +end + diff --git a/chapter06/demo.m b/chapter06/demo.m index a9d0d07..dac4237 100755 --- a/chapter06/demo.m +++ b/chapter06/demo.m @@ -4,10 +4,29 @@ x = linspace(0,2*pi,n); % test data t = sin(x)+rand(1,n)/2; -model = knReg(x,t,1e-4,@knGauss); -y = knRegPred(model, x); -figure; -hold on; -plot(x,t,'o'); -plot(x,y,'r-'); +% model = knReg(x,t,1e-4,@knGauss); +% y = knRegPred(model, x); +% figure; +% hold on; +% plot(x,t,'o'); +% plot(x,y,'r-'); + +%% test case for kernel regression +lambda = 1e-4; +model_kn = knReg(x,t,lambda,@knLin); +model_lin = linReg(x,t,lambda); + +[y_kn, s_kn] = knRegPred(model_kn, x); +[y_lin, s_lin] = linPred(model_lin,x); + +maxabsdiff(y_kn,y_lin) +maxabsdiff(s_kn,s_lin) +%% test case for knCenter +% kn = @knGauss; +% X=rand(2,100); +% X1=rand(2,10); +% X2=rand(2,5); +% +% isequalf(knCenter(kn,X,X1),diag(knCenter(kn,X,X1,X1))) +% isequalf(knCenter(kn,X),knCenter(kn,X,X,X)); \ No newline at end of file diff --git a/chapter06/knPca.m b/chapter06/knPca.m new file mode 100644 index 0000000..35aa93a --- /dev/null +++ b/chapter06/knPca.m @@ -0,0 +1,6 @@ +function [ output_args ] = knPca( input_args ) +% Kernel PCA + + +end + diff --git a/chapter06/knReg.m b/chapter06/knReg.m index 1f24fe2..d0ae640 100644 --- a/chapter06/knReg.m +++ b/chapter06/knReg.m @@ -18,4 +18,5 @@ model.a = a; model.X = X; model.tbar = tbar; -model.beta = beta; \ No newline at end of file +model.beta = beta; +model.U = U; \ No newline at end of file diff --git a/chapter06/knRegPred.m b/chapter06/knRegPred.m index 1d7e058..a2de44c 100755 --- a/chapter06/knRegPred.m +++ b/chapter06/knRegPred.m @@ -5,21 +5,15 @@ a = model.a; X = model.X; tbar = model.tbar; -y = a'*knCenterize(kn,X,Xt)+tbar; +y = a'*knCenter(kn,X,X,Xt)+tbar; + if nargout > 1 beta = model.beta; - if isfield(model,'U') - U = model.U; - Xo = bsxfun(@minus,X,model.xbar); - XU = U'\Xo; - sigma = sqrt(1/beta+dot(XU,XU,1)); - - sigma = sqrt(c-k'*C^-1*k); - else - sigma = sqrt(1/beta); % 6.67 - end - if nargin == 3 && nargout == 3 - p = exp(logGauss(t,y,sigma)); - end + U = model.U; + sigma = sqrt(knCenter(kn,X,Xt)+1/beta-dot(XU,XU,1)); end + +if nargin == 3 && nargout == 3 + p = exp(logGauss(t,y,sigma)); +end \ No newline at end of file From 35ca9c1cd8cc4bc3beb09d2e3f64718e0f9209dd Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 15 Dec 2015 15:23:46 +0800 Subject: [PATCH 036/149] init commit knPca.m --- chapter06/knPca.m | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/chapter06/knPca.m b/chapter06/knPca.m index 35aa93a..898b6c1 100644 --- a/chapter06/knPca.m +++ b/chapter06/knPca.m @@ -1,6 +1,19 @@ -function [ output_args ] = knPca( input_args ) +function [ R, Z, err ] = knPca( X, d, kn ) % Kernel PCA - - +if nargin < 3 + kn = @knGauss; end +K = kn(X,X); +K = knCenter(K); +[V,A] = eig(K); +[A,idx] = sort(diag(A),'descend'); +V = V(:,idx(1:d))'; +A = A(1:d); +R = bsxfun(@times,V,1./sqrt(A)); +if nargout > 1 + Z = bsxfun(@times,V,sqrt(A)); +end +if nargout > 2 + err = diag(K)'-sum(Z.^2,1); +end \ No newline at end of file From c9bb14650ef07c90b99562dd127ee74f8b9e624f Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 15 Dec 2015 15:42:23 +0800 Subject: [PATCH 037/149] knRegPred.m: sigma need fix --- chapter06/knKmeansPred.m | 7 +++++++ chapter06/knPcaPred.m | 7 +++++++ chapter06/knRegPred.asv | 21 +++++++++++++++++++++ chapter06/knRegPred.m | 7 ++++--- 4 files changed, 39 insertions(+), 3 deletions(-) create mode 100644 chapter06/knKmeansPred.m create mode 100644 chapter06/knPcaPred.m create mode 100644 chapter06/knRegPred.asv diff --git a/chapter06/knKmeansPred.m b/chapter06/knKmeansPred.m new file mode 100644 index 0000000..cfee0ac --- /dev/null +++ b/chapter06/knKmeansPred.m @@ -0,0 +1,7 @@ +function [ output_args ] = knKmeansPred( input_args ) +%KNKMEANSPRED Summary of this function goes here +% Detailed explanation goes here + + +end + diff --git a/chapter06/knPcaPred.m b/chapter06/knPcaPred.m new file mode 100644 index 0000000..9213cf2 --- /dev/null +++ b/chapter06/knPcaPred.m @@ -0,0 +1,7 @@ +function [ output_args ] = knPcaPred( input_args ) +%KNPCAPRED Summary of this function goes here +% Detailed explanation goes here + + +end + diff --git a/chapter06/knRegPred.asv b/chapter06/knRegPred.asv new file mode 100644 index 0000000..2cb4d4f --- /dev/null +++ b/chapter06/knRegPred.asv @@ -0,0 +1,21 @@ +function [y, sigma, p] = knRegPred(model, Xt, t) +% Prediction for Gaussian Process (kernel) regression model +% Written by Mo Chen (sth4nth@gmail.com). +kn = model.kn; +a = model.a; +X = model.X; +tbar = model.tbar; +y = a'*knCenter(kn,X,X,Xt)+tbar; + +if nargout > 1 + beta = model.beta; + U = model.U; + + XU = U + + sigma = sqrt(knCenter(kn,X,Xt)+1/beta-dot(XU,XU,1)); +end + +if nargin == 3 && nargout == 3 + p = exp(logGauss(t,y,sigma)); +end \ No newline at end of file diff --git a/chapter06/knRegPred.m b/chapter06/knRegPred.m index a2de44c..5a7802c 100755 --- a/chapter06/knRegPred.m +++ b/chapter06/knRegPred.m @@ -5,13 +5,14 @@ a = model.a; X = model.X; tbar = model.tbar; -y = a'*knCenter(kn,X,X,Xt)+tbar; +Kt = knCenter(kn,X,X,Xt); +y = a'*Kt+tbar; if nargout > 1 beta = model.beta; U = model.U; - - sigma = sqrt(knCenter(kn,X,Xt)+1/beta-dot(XU,XU,1)); + XU = U'\Kt; + sigma = sqrt(knCenter(kn,X,Xt)+1/beta-dot(XU,XU,1)); % not right end if nargin == 3 && nargout == 3 From f01ce4f8f897694b88d3e2f06f507fdf13f68675 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 15 Dec 2015 20:11:15 +0800 Subject: [PATCH 038/149] delete temp file --- chapter06/knRegPred.asv | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 chapter06/knRegPred.asv diff --git a/chapter06/knRegPred.asv b/chapter06/knRegPred.asv deleted file mode 100644 index 2cb4d4f..0000000 --- a/chapter06/knRegPred.asv +++ /dev/null @@ -1,21 +0,0 @@ -function [y, sigma, p] = knRegPred(model, Xt, t) -% Prediction for Gaussian Process (kernel) regression model -% Written by Mo Chen (sth4nth@gmail.com). -kn = model.kn; -a = model.a; -X = model.X; -tbar = model.tbar; -y = a'*knCenter(kn,X,X,Xt)+tbar; - -if nargout > 1 - beta = model.beta; - U = model.U; - - XU = U - - sigma = sqrt(knCenter(kn,X,Xt)+1/beta-dot(XU,XU,1)); -end - -if nargin == 3 && nargout == 3 - p = exp(logGauss(t,y,sigma)); -end \ No newline at end of file From dff456abf26b65e5d7028b1eb4a47a9643ca377b Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 16 Dec 2015 16:30:55 +0800 Subject: [PATCH 039/149] fix probability pred bug --- chapter03/linPred.asv | 28 ++++++++++++++++++++++++++++ chapter03/linPred.m | 4 ++-- chapter03/linReg.asv | 33 +++++++++++++++++++++++++++++++++ chapter03/linReg.m | 20 +++++++++++--------- 4 files changed, 74 insertions(+), 11 deletions(-) create mode 100644 chapter03/linPred.asv create mode 100644 chapter03/linReg.asv diff --git a/chapter03/linPred.asv b/chapter03/linPred.asv new file mode 100644 index 0000000..8528729 --- /dev/null +++ b/chapter03/linPred.asv @@ -0,0 +1,28 @@ +function [y, sigma, p] = linPred(model, X, t) +% Compute linear model reponse y = w'*X+w0 and likelihood +% model: trained model structure +% X: d x n testing data +% t (optional): 1 x n testing response +% Written by Mo Chen (sth4nth@gmail.com). +w = model.w; +w0 = model.w0; +y = w'*X+w0; +%% probability prediction +if nargout > 1 + alpha = model.alpha; + C = model.C; + S = beta + + + beta = model.beta; + U = model.U; % 3.54 + Xo = bsxfun(@minus,X,model.xbar); + XU = U'\Xo; + sigma = sqrt(1/beta+dot(XU,XU,1)); %3.59 +end + +if nargin == 3 && nargout == 3 + p = exp(logGauss(t,y,sigma)); +% p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); +end + diff --git a/chapter03/linPred.m b/chapter03/linPred.m index 1ebc439..719e5a0 100644 --- a/chapter03/linPred.m +++ b/chapter03/linPred.m @@ -7,13 +7,13 @@ w = model.w; w0 = model.w0; y = w'*X+w0; - +%% probability prediction if nargout > 1 beta = model.beta; U = model.U; % 3.54 Xo = bsxfun(@minus,X,model.xbar); XU = U'\Xo; - sigma = sqrt(1/beta+dot(XU,XU,1)); + sigma = sqrt((1+dot(XU,XU,1))/beta); %3.59 end if nargin == 3 && nargout == 3 diff --git a/chapter03/linReg.asv b/chapter03/linReg.asv new file mode 100644 index 0000000..d57dd01 --- /dev/null +++ b/chapter03/linReg.asv @@ -0,0 +1,33 @@ +function model = linReg(X, t, lambda) +% Fit linear regression model y=w'x+w0 +% X: d x n data +% t: 1 x n response +% Written by Mo Chen (sth4nth@gmail.com). +if nargin < 3 + lambda = 0; +end +d = size(X,1); +idx = (1:d)'; +dg = sub2ind([d,d],idx,idx); +xbar = mean(X,2); +tbar = mean(t,2); + +X = bsxfun(@minus,X,xbar); +t = bsxfun(@minus,t,tbar); + +C = X*X'; +C(dg) = C(dg)+lambda; % 3.54 C=inv(S)/beta +% w = C\(X*t'); +U = chol(C); +w = U\(U'\(X*t')); % 3.15 & 3.28 +w0 = tbar-dot(w,xbar); % 3.19 + +model.w = w; +model.w0 = w0; +model.xbar = xbar; +%% for probability prediction +beta = 1/mean((t-w'*X).^2); % 3.21 +% alpha = lambda*beta; % lambda=a/b P.153 3.55 +% model.alpha = alpha; +model.beta = beta; +model.U = U; diff --git a/chapter03/linReg.m b/chapter03/linReg.m index 2b9c7b9..f177a42 100644 --- a/chapter03/linReg.m +++ b/chapter03/linReg.m @@ -7,25 +7,27 @@ lambda = 0; end d = size(X,1); +idx = (1:d)'; +dg = sub2ind([d,d],idx,idx); + xbar = mean(X,2); tbar = mean(t,2); - X = bsxfun(@minus,X,xbar); t = bsxfun(@minus,t,tbar); -S = X*X'; -idx = (1:d)'; -dg = sub2ind([d,d],idx,idx); -S(dg) = S(dg)+lambda; -% w = S\(X*t'); -U = chol(S); +C = X*X'; +C(dg) = C(dg)+lambda; % 3.54 C=inv(S)/beta +% w = C\(X*t'); +U = chol(C); w = U\(U'\(X*t')); % 3.15 & 3.28 w0 = tbar-dot(w,xbar); % 3.19 -beta = 1/mean((t-w'*X).^2); % 3.21 model.w = w; model.w0 = w0; model.xbar = xbar; -%% optional for bayesian probabilistic inference purpose +%% for probability prediction +beta = 1/mean((t-w'*X).^2); % 3.21 +% alpha = lambda*beta; % lambda=a/b P.153 3.55 +% model.alpha = alpha; model.beta = beta; model.U = U; From f77fb114ed0a7508482bffca706f642596cda10f Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 16 Dec 2015 16:31:32 +0800 Subject: [PATCH 040/149] knReg.m knRegPred.m are finished --- chapter06/demo.m | 9 +++++++-- chapter06/knReg.m | 7 +++++-- chapter06/knRegPred.m | 8 ++++++-- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/chapter06/demo.m b/chapter06/demo.m index dac4237..984bf10 100755 --- a/chapter06/demo.m +++ b/chapter06/demo.m @@ -17,11 +17,16 @@ model_kn = knReg(x,t,lambda,@knLin); model_lin = linReg(x,t,lambda); -[y_kn, s_kn] = knRegPred(model_kn, x); -[y_lin, s_lin] = linPred(model_lin,x); +idx = 1:2:n; +xt = x(:,idx); +tt = t(idx); +[y_lin, s_lin,p_lin] = linPred(model_lin,xt,tt); +[y_kn, s_kn,p_kn] = knRegPred(model_kn, xt,tt); + maxabsdiff(y_kn,y_lin) maxabsdiff(s_kn,s_lin) +maxabsdiff(p_kn,p_lin) %% test case for knCenter % kn = @knGauss; % X=rand(2,100); diff --git a/chapter06/knReg.m b/chapter06/knReg.m index d0ae640..92ffc1f 100644 --- a/chapter06/knReg.m +++ b/chapter06/knReg.m @@ -11,12 +11,15 @@ tbar = mean(t); U = chol(K+lambda*eye(size(X,2))); % 6.62 a = U\(U'\(t(:)-tbar)); % 6.68 -y = a'*K+tbar; -beta = 1/mean((t-y).^2); % 3.21 model.kn = kn; model.a = a; model.X = X; model.tbar = tbar; +%% for probability prediction +y = a'*K+tbar; +beta = 1/mean((t-y).^2); % 3.21 +alpha = lambda*beta; % lambda=a/b P.153 3.55 +model.alpha = alpha; model.beta = beta; model.U = U; \ No newline at end of file diff --git a/chapter06/knRegPred.m b/chapter06/knRegPred.m index 5a7802c..6d2c247 100755 --- a/chapter06/knRegPred.m +++ b/chapter06/knRegPred.m @@ -1,5 +1,8 @@ function [y, sigma, p] = knRegPred(model, Xt, t) % Prediction for Gaussian Process (kernel) regression model +% model: trained model structure +% X: d x n testing data +% t (optional): 1 x n testing response % Written by Mo Chen (sth4nth@gmail.com). kn = model.kn; a = model.a; @@ -7,12 +10,13 @@ tbar = model.tbar; Kt = knCenter(kn,X,X,Xt); y = a'*Kt+tbar; - +%% probability prediction if nargout > 1 + alpha = model.alpha; beta = model.beta; U = model.U; XU = U'\Kt; - sigma = sqrt(knCenter(kn,X,Xt)+1/beta-dot(XU,XU,1)); % not right + sigma = sqrt(1/beta+(knCenter(kn,X,Xt)-dot(XU,XU,1))/alpha); % not right end if nargin == 3 && nargout == 3 From 4e2d376dbb442ce6da5f542ded5d3d85daacfb90 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 16 Dec 2015 16:32:54 +0800 Subject: [PATCH 041/149] remove comment --- chapter06/knRegPred.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter06/knRegPred.m b/chapter06/knRegPred.m index 6d2c247..373500f 100755 --- a/chapter06/knRegPred.m +++ b/chapter06/knRegPred.m @@ -16,7 +16,7 @@ beta = model.beta; U = model.U; XU = U'\Kt; - sigma = sqrt(1/beta+(knCenter(kn,X,Xt)-dot(XU,XU,1))/alpha); % not right + sigma = sqrt(1/beta+(knCenter(kn,X,Xt)-dot(XU,XU,1))/alpha); end if nargin == 3 && nargout == 3 From 7a188799565cab6c6b835a5293f2ffcc6bdaabc2 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 16 Dec 2015 17:31:08 +0800 Subject: [PATCH 042/149] remove temp files --- chapter03/linPred.asv | 28 -------------------- chapter03/linReg.asv | 33 ------------------------ chapter06/demo.m | 60 +++++++++++++++++++++++-------------------- 3 files changed, 32 insertions(+), 89 deletions(-) delete mode 100644 chapter03/linPred.asv delete mode 100644 chapter03/linReg.asv diff --git a/chapter03/linPred.asv b/chapter03/linPred.asv deleted file mode 100644 index 8528729..0000000 --- a/chapter03/linPred.asv +++ /dev/null @@ -1,28 +0,0 @@ -function [y, sigma, p] = linPred(model, X, t) -% Compute linear model reponse y = w'*X+w0 and likelihood -% model: trained model structure -% X: d x n testing data -% t (optional): 1 x n testing response -% Written by Mo Chen (sth4nth@gmail.com). -w = model.w; -w0 = model.w0; -y = w'*X+w0; -%% probability prediction -if nargout > 1 - alpha = model.alpha; - C = model.C; - S = beta - - - beta = model.beta; - U = model.U; % 3.54 - Xo = bsxfun(@minus,X,model.xbar); - XU = U'\Xo; - sigma = sqrt(1/beta+dot(XU,XU,1)); %3.59 -end - -if nargin == 3 && nargout == 3 - p = exp(logGauss(t,y,sigma)); -% p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); -end - diff --git a/chapter03/linReg.asv b/chapter03/linReg.asv deleted file mode 100644 index d57dd01..0000000 --- a/chapter03/linReg.asv +++ /dev/null @@ -1,33 +0,0 @@ -function model = linReg(X, t, lambda) -% Fit linear regression model y=w'x+w0 -% X: d x n data -% t: 1 x n response -% Written by Mo Chen (sth4nth@gmail.com). -if nargin < 3 - lambda = 0; -end -d = size(X,1); -idx = (1:d)'; -dg = sub2ind([d,d],idx,idx); -xbar = mean(X,2); -tbar = mean(t,2); - -X = bsxfun(@minus,X,xbar); -t = bsxfun(@minus,t,tbar); - -C = X*X'; -C(dg) = C(dg)+lambda; % 3.54 C=inv(S)/beta -% w = C\(X*t'); -U = chol(C); -w = U\(U'\(X*t')); % 3.15 & 3.28 -w0 = tbar-dot(w,xbar); % 3.19 - -model.w = w; -model.w0 = w0; -model.xbar = xbar; -%% for probability prediction -beta = 1/mean((t-w'*X).^2); % 3.21 -% alpha = lambda*beta; % lambda=a/b P.153 3.55 -% model.alpha = alpha; -model.beta = beta; -model.U = U; diff --git a/chapter06/demo.m b/chapter06/demo.m index 984bf10..b018e1f 100755 --- a/chapter06/demo.m +++ b/chapter06/demo.m @@ -4,34 +4,38 @@ x = linspace(0,2*pi,n); % test data t = sin(x)+rand(1,n)/2; -% model = knReg(x,t,1e-4,@knGauss); -% y = knRegPred(model, x); -% figure; -% hold on; -% plot(x,t,'o'); -% plot(x,y,'r-'); +model = knReg(x,t,1e-4,@knGauss); +y = knRegPred(model, x); +figure; +hold on; +plot(x,t,'o'); +plot(x,y,'r-'); -%% test case for kernel regression -lambda = 1e-4; -model_kn = knReg(x,t,lambda,@knLin); -model_lin = linReg(x,t,lambda); - -idx = 1:2:n; -xt = x(:,idx); -tt = t(idx); -[y_lin, s_lin,p_lin] = linPred(model_lin,xt,tt); -[y_kn, s_kn,p_kn] = knRegPred(model_kn, xt,tt); - - -maxabsdiff(y_kn,y_lin) -maxabsdiff(s_kn,s_lin) -maxabsdiff(p_kn,p_lin) -%% test case for knCenter -% kn = @knGauss; -% X=rand(2,100); -% X1=rand(2,10); -% X2=rand(2,5); +%% kernel regression with linear kernel is linear regression +% clear; close all; +% n = 100; +% x = linspace(0,2*pi,n); % test data +% t = sin(x)+rand(1,n)/2; +% lambda = 1e-4; +% model_kn = knReg(x,t,lambda,@knLin); +% model_lin = linReg(x,t,lambda); +% +% idx = 1:2:n; +% xt = x(:,idx); +% tt = t(idx); +% +% [y_kn, sigma_kn,p_kn] = knRegPred(model_kn,xt,tt); +% [y_lin, sigma_lin,p_lin] = linPred(model_lin,xt,tt); % -% isequalf(knCenter(kn,X,X1),diag(knCenter(kn,X,X1,X1))) -% isequalf(knCenter(kn,X),knCenter(kn,X,X,X)); \ No newline at end of file +% maxabsdiff(y_kn,y_lin) +% maxabsdiff(sigma_kn,sigma_lin) +% maxabsdiff(p_kn,p_lin) +% %% test case for knCenter +% % kn = @knGauss; +% % X=rand(2,100); +% % X1=rand(2,10); +% % X2=rand(2,5); +% % +% % isequalf(knCenter(kn,X,X1),diag(knCenter(kn,X,X1,X1))) +% % isequalf(knCenter(kn,X),knCenter(kn,X,X,X)); \ No newline at end of file From b461826e85481b1e6130b4b0fdd2f03a179ea057 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 16 Dec 2015 23:59:04 +0800 Subject: [PATCH 043/149] knKmeansPred is not finished --- chapter03/linPred.m | 2 +- chapter06/knKmeans.m | 10 +++++----- chapter06/knKmeansPred.m | 17 +++++++++++++---- chapter06/knRegPred.m | 2 +- 4 files changed, 20 insertions(+), 11 deletions(-) diff --git a/chapter03/linPred.m b/chapter03/linPred.m index 719e5a0..692ef17 100644 --- a/chapter03/linPred.m +++ b/chapter03/linPred.m @@ -1,6 +1,6 @@ function [y, sigma, p] = linPred(model, X, t) % Compute linear model reponse y = w'*X+w0 and likelihood -% model: trained model structure +% model: trained model structure % X: d x n testing data % t (optional): 1 x n testing response % Written by Mo Chen (sth4nth@gmail.com). diff --git a/chapter06/knKmeans.m b/chapter06/knKmeans.m index bf86519..7b38c9c 100755 --- a/chapter06/knKmeans.m +++ b/chapter06/knKmeans.m @@ -18,8 +18,8 @@ [val, label] = min(Z,[],1); end energy = sum(val)+trace(K); -% if nargout == 3 -% model.X = X; -% model.kn = kn; -% model.label = label; -% end \ No newline at end of file +if nargout == 3 + model.X = X; + model.label = label; + model.kn = kn; +end \ No newline at end of file diff --git a/chapter06/knKmeansPred.m b/chapter06/knKmeansPred.m index cfee0ac..b4b2cd5 100644 --- a/chapter06/knKmeansPred.m +++ b/chapter06/knKmeansPred.m @@ -1,7 +1,16 @@ -function [ output_args ] = knKmeansPred( input_args ) -%KNKMEANSPRED Summary of this function goes here -% Detailed explanation goes here +function label = knKmeansPred(model, Xt) +% Prediction for kernel kmeans clusterng +% model: trained model structure +% Xt: d x n testing data +% Written by Mo Chen (sth4nth@gmail.com). +X = model.X; +t = model.label; +kn = model.kn; +n = size(X,2); +k = max(t); -end +E = sparse(t,1:n,1,k,n,n); +E = bsxfun(@times,E,1./sum(E,2)); +[val, label] = min(Z,[],1); diff --git a/chapter06/knRegPred.m b/chapter06/knRegPred.m index 373500f..31c7c31 100755 --- a/chapter06/knRegPred.m +++ b/chapter06/knRegPred.m @@ -1,6 +1,6 @@ function [y, sigma, p] = knRegPred(model, Xt, t) % Prediction for Gaussian Process (kernel) regression model -% model: trained model structure +% model: trained model structure % X: d x n testing data % t (optional): 1 x n testing response % Written by Mo Chen (sth4nth@gmail.com). From 73708764bf80d1c348868ba7cce8b6e3e3a59b1e Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Thu, 17 Dec 2015 05:28:12 +0800 Subject: [PATCH 044/149] knKmeansPred.m is not finished --- chapter06/demo.m | 36 ++++++++++++++++++++++-------------- chapter06/knKmeansPred.m | 5 +++-- functions/sigmoid.m | 2 -- 3 files changed, 25 insertions(+), 18 deletions(-) delete mode 100644 functions/sigmoid.m diff --git a/chapter06/demo.m b/chapter06/demo.m index b018e1f..1e14f3e 100755 --- a/chapter06/demo.m +++ b/chapter06/demo.m @@ -1,17 +1,25 @@ -clear; close all; - -n = 100; -x = linspace(0,2*pi,n); % test data -t = sin(x)+rand(1,n)/2; - -model = knReg(x,t,1e-4,@knGauss); -y = knRegPred(model, x); -figure; -hold on; -plot(x,t,'o'); -plot(x,y,'r-'); - - +% clear; close all; +% +% n = 100; +% x = linspace(0,2*pi,n); % test data +% t = sin(x)+rand(1,n)/2; +% +% model = knReg(x,t,1e-4,@knGauss); +% y = knRegPred(model, x); +% figure; +% hold on; +% plot(x,t,'o'); +% plot(x,y,'r-'); +d=2; +k=3; +n=1000; +[X,y]=kmeansRnd(d,k,n); +spread(X,y); +[label, energy, model] = knKmeans(X, k, @knLin); +idx = 1:2:n; +Xt = X(:,idx); +label = knKmeansPred(model, Xt); +spread(Xt,label) %% kernel regression with linear kernel is linear regression % clear; close all; % n = 100; diff --git a/chapter06/knKmeansPred.m b/chapter06/knKmeansPred.m index b4b2cd5..eb3dca3 100644 --- a/chapter06/knKmeansPred.m +++ b/chapter06/knKmeansPred.m @@ -12,5 +12,6 @@ E = sparse(t,1:n,1,k,n,n); E = bsxfun(@times,E,1./sum(E,2)); - -[val, label] = min(Z,[],1); +T = E*kn(X,Xt); +Z = repmat(diag(T*E'),1,n)-2*T; +[~, label] = min(Z,[],1); diff --git a/functions/sigmoid.m b/functions/sigmoid.m deleted file mode 100644 index 7fcbbcf..0000000 --- a/functions/sigmoid.m +++ /dev/null @@ -1,2 +0,0 @@ -function y = sigmoid(x) -y = 1./(1+exp(-x)); \ No newline at end of file From 72322e9b91a9a594c493a476386840b7f85a7fb1 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 18 Dec 2015 00:17:02 +0800 Subject: [PATCH 045/149] energy of knkmeans and kmeans are not equal --- README.md | 6 ++++++ chapter06/demo.m | 21 +++++++++++---------- chapter06/knKmeans.m | 25 +++++++++++++++++-------- chapter06/knKmeansPred.m | 4 +--- chapter09/kmeans.m | 15 +++++++++++---- 5 files changed, 46 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index b416d95..520c807 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,12 @@ Pattern Recognition and Machine Learning This package contains the matlab implementation of the algorithms described in the book: Pattern Recognition and Machine Learning by C. Bishop (http://research.microsoft.com/en-us/um/people/cmbishop/prml/) +The goal of the code are as follows: +1)clean. make the code as clean as possible, which means, there are little nasty guarding code that distracts reader's attention so that the core of the algorithms is easy to spot. +2)efficient. use matlab vectorization trick as much as possible to make the function fast, many functions are even comparable with c implementation. usually, the functions in this package are orders faster than matlab builtin function which provide same functionality (such as kmeans). If anyone can find any matlab implementation that are faster than my code, I am happy to do further optimization. +3)robust. many numerical stability techniques are applied to avoid numerical underflow and overflow which often happens when dealing with high dimensional data +4)easy to learn. the code are heavily commented, and the reference formulas in PRML book are indicated for corresponding code lines +5)practical. the package is designed not only for users to learn the algorithms in the book, but also to facility ML reseearch. Many functions in this package are already among the top downloads in Matlab file exchange and widely used. License ------- diff --git a/chapter06/demo.m b/chapter06/demo.m index 1e14f3e..120536d 100755 --- a/chapter06/demo.m +++ b/chapter06/demo.m @@ -10,16 +10,17 @@ % hold on; % plot(x,t,'o'); % plot(x,y,'r-'); -d=2; -k=3; -n=1000; -[X,y]=kmeansRnd(d,k,n); -spread(X,y); -[label, energy, model] = knKmeans(X, k, @knLin); -idx = 1:2:n; -Xt = X(:,idx); -label = knKmeansPred(model, Xt); -spread(Xt,label) +d = 2; +k = 3; +n = 500; +[X,y] = kmeansRnd(d,k,n); +label = ceil(k*rand(1,n)); +[y1,en1] = knKmeans(X, label, @knLin); +[y2,en2] = kmeans(X,label); +% idx = 1:2:n; +% Xt = X(:,idx); +% label = knKmeansPred(model, Xt); +% spread(Xt,label) %% kernel regression with linear kernel is linear regression % clear; close all; % n = 100; diff --git a/chapter06/knKmeans.m b/chapter06/knKmeans.m index 7b38c9c..76bdb1e 100755 --- a/chapter06/knKmeans.m +++ b/chapter06/knKmeans.m @@ -1,25 +1,34 @@ -function [label, energy, model] = knKmeans(X, k, kn) +function [label, energy, model] = knKmeans(X, init, kn) % Perform kernel k-means clustering. % K: nxn kernel matrix % k: number of cluster % Reference: Kernel Methods for Pattern Analysis % by John Shawe-Taylor, Nello Cristianini % Written by Mo Chen (sth4nth@gmail.com). -K = kn(X,X); n = size(X,2); -label = ceil(k*rand(1,n)); +if numel(init)==1 + k = init; + label = ceil(k*rand(1,n)); +elseif numel(init)==n + label = init; + k = max(label); +end +K = kn(X,X); last = 0; while any(label ~= last) E = sparse(label,1:n,1,k,n,n); - E = bsxfun(@times,E,1./sum(E,2)); + E = spdiags(1./sum(E,2),0,k,k)*E; T = E*K; - Z = repmat(diag(T*E'),1,n)-2*T; last = label; - [val, label] = min(Z,[],1); + [val, label] = max(bsxfun(@minus,2*T,diag(T*E')),[],1); +% [val, label] = max(bsxfun(@minus,2*T,dot(T,E,2)),[],1); end -energy = sum(val)+trace(K); +energy = trace(K)-sum(val); % not consist with kmeans if nargout == 3 model.X = X; model.label = label; model.kn = kn; -end \ No newline at end of file +end +% TODO: +% 2) test equivalency with kmeans +% 3) test predict diff --git a/chapter06/knKmeansPred.m b/chapter06/knKmeansPred.m index eb3dca3..309ebf9 100644 --- a/chapter06/knKmeansPred.m +++ b/chapter06/knKmeansPred.m @@ -9,9 +9,7 @@ n = size(X,2); k = max(t); - E = sparse(t,1:n,1,k,n,n); E = bsxfun(@times,E,1./sum(E,2)); -T = E*kn(X,Xt); -Z = repmat(diag(T*E'),1,n)-2*T; +Z = bsxfun(@plus,-2*E*kn(X,Xt),diag(E*kn(X,X)*E')); [~, label] = min(Z,[],1); diff --git a/chapter09/kmeans.m b/chapter09/kmeans.m index 5e94fde..5a51be5 100644 --- a/chapter09/kmeans.m +++ b/chapter09/kmeans.m @@ -1,14 +1,21 @@ -function label = kmeans(X, k) -% Perform k-means clustering. +function [label, energy] = kmeans(X, init) +% Perform k-means clustering. % X: d x n data matrix % k: number of seeds % Written by Mo Chen (sth4nth@gmail.com). n = size(X,2); +if numel(init)==1 + k = init; + label = ceil(k*rand(1,n)); +elseif numel(init)==n + label = init; + k = max(label); +end last = 0; -label = ceil(k*rand(1,n)); % random initialization while any(label ~= last) E = sparse(1:n,label,1,n,k,n); % transform label into indicator matrix m = X*(E*spdiags(1./sum(E,1)',0,k,k)); % compute m of each cluster last = label; - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); % assign samples to the nearest centers + [val,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); % assign samples to the nearest centers end +energy = dot(X(:),X(:))-sum(val); % not consist with knKmeans \ No newline at end of file From adffeff4587f5d0ddd1a6792d03862355d5150d8 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 18 Dec 2015 11:32:42 +0800 Subject: [PATCH 046/149] kmeans and knKmeans return energy now. equivalency is verified --- chapter06/knKmeans.m | 4 ++-- chapter09/kmeans.m | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/chapter06/knKmeans.m b/chapter06/knKmeans.m index 76bdb1e..45870ba 100755 --- a/chapter06/knKmeans.m +++ b/chapter06/knKmeans.m @@ -20,10 +20,10 @@ E = spdiags(1./sum(E,2),0,k,k)*E; T = E*K; last = label; - [val, label] = max(bsxfun(@minus,2*T,diag(T*E')),[],1); + [val, label] = max(bsxfun(@minus,T,diag(T*E')/2),[],1); % [val, label] = max(bsxfun(@minus,2*T,dot(T,E,2)),[],1); end -energy = trace(K)-sum(val); % not consist with kmeans +energy = trace(K)-2*sum(val); % not consist with kmeans if nargout == 3 model.X = X; model.label = label; diff --git a/chapter09/kmeans.m b/chapter09/kmeans.m index 5a51be5..158faf2 100644 --- a/chapter09/kmeans.m +++ b/chapter09/kmeans.m @@ -18,4 +18,4 @@ last = label; [val,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); % assign samples to the nearest centers end -energy = dot(X(:),X(:))-sum(val); % not consist with knKmeans \ No newline at end of file +energy = dot(X(:),X(:))-2*sum(val); % not consist with knKmeans \ No newline at end of file From 0f5fc002c6b7211006fec015331bd611a7a4b99f Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 18 Dec 2015 11:58:45 +0800 Subject: [PATCH 047/149] add prediction for kmeans and knKmeans --- chapter06/demo.m | 38 ++++++++++++++++++++++++++++++-------- chapter06/knKmeansPred.m | 7 ++++--- chapter09/kmeans.m | 5 +++-- chapter09/kmeansPred.m | 7 +++++++ functions/pdist2.m | 6 ++++++ 5 files changed, 50 insertions(+), 13 deletions(-) create mode 100644 chapter09/kmeansPred.m create mode 100644 functions/pdist2.m diff --git a/chapter06/demo.m b/chapter06/demo.m index 120536d..73b9570 100755 --- a/chapter06/demo.m +++ b/chapter06/demo.m @@ -21,6 +21,28 @@ % Xt = X(:,idx); % label = knKmeansPred(model, Xt); % spread(Xt,label) + +%% kernel kmeans with linear kernel is kmeans +d = 2; +k = 3; +n = 500; +[X,y] = kmeansRnd(d,k,n); +init = ceil(k*rand(1,n)); +[y_kn,en_kn,model_kn] = knKmeans(X,init,@knLin); +[y_lin,en_lin,model_lin] = kmeans(X,init); + +idx = 1:2:n; +Xt = X(:,idx); + +[t_kn,ent_kn] = knKmeansPred(model_kn, Xt); +[t_lin,ent_lin] = kmeansPred(model_lin, Xt); + +maxabsdiff(y_kn,y_lin) +maxabsdiff(en_kn,en_lin) + +maxabsdiff(t_kn,t_lin) +maxabsdiff(ent_kn,ent_lin) + %% kernel regression with linear kernel is linear regression % clear; close all; % n = 100; @@ -40,11 +62,11 @@ % maxabsdiff(y_kn,y_lin) % maxabsdiff(sigma_kn,sigma_lin) % maxabsdiff(p_kn,p_lin) -% %% test case for knCenter -% % kn = @knGauss; -% % X=rand(2,100); -% % X1=rand(2,10); -% % X2=rand(2,5); -% % -% % isequalf(knCenter(kn,X,X1),diag(knCenter(kn,X,X1,X1))) -% % isequalf(knCenter(kn,X),knCenter(kn,X,X,X)); \ No newline at end of file +%% test case for knCenter +% kn = @knGauss; +% X=rand(2,100); +% X1=rand(2,10); +% X2=rand(2,5); +% +% isequalf(knCenter(kn,X,X1),diag(knCenter(kn,X,X1,X1))) +% isequalf(knCenter(kn,X),knCenter(kn,X,X,X)); \ No newline at end of file diff --git a/chapter06/knKmeansPred.m b/chapter06/knKmeansPred.m index 309ebf9..2458d58 100644 --- a/chapter06/knKmeansPred.m +++ b/chapter06/knKmeansPred.m @@ -1,4 +1,4 @@ -function label = knKmeansPred(model, Xt) +function [label, energy] = knKmeansPred(model, Xt) % Prediction for kernel kmeans clusterng % model: trained model structure % Xt: d x n testing data @@ -11,5 +11,6 @@ k = max(t); E = sparse(t,1:n,1,k,n,n); E = bsxfun(@times,E,1./sum(E,2)); -Z = bsxfun(@plus,-2*E*kn(X,Xt),diag(E*kn(X,X)*E')); -[~, label] = min(Z,[],1); +Z = bsxfun(@minus,E*kn(X,Xt),diag(E*kn(X,X)*E')/2); +[val, label] = max(Z,[],1); +energy = sum(kn(Xt))-2*sum(val); diff --git a/chapter09/kmeans.m b/chapter09/kmeans.m index 158faf2..c23e046 100644 --- a/chapter09/kmeans.m +++ b/chapter09/kmeans.m @@ -1,4 +1,4 @@ -function [label, energy] = kmeans(X, init) +function [label, energy, model] = kmeans(X, init) % Perform k-means clustering. % X: d x n data matrix % k: number of seeds @@ -18,4 +18,5 @@ last = label; [val,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); % assign samples to the nearest centers end -energy = dot(X(:),X(:))-2*sum(val); % not consist with knKmeans \ No newline at end of file +energy = dot(X(:),X(:))-2*sum(val); % not consist with knKmeans +model.means = m; \ No newline at end of file diff --git a/chapter09/kmeansPred.m b/chapter09/kmeansPred.m new file mode 100644 index 0000000..73a55b8 --- /dev/null +++ b/chapter09/kmeansPred.m @@ -0,0 +1,7 @@ +function [label, energy] = kmeansPred(model, Xt) +% Prediction for kmeans clusterng +% model: trained model structure +% Xt: d x n testing data +% Written by Mo Chen (sth4nth@gmail.com). +[val,label] = min(pdist2(model.means, Xt)); +energy = sum(val); \ No newline at end of file diff --git a/functions/pdist2.m b/functions/pdist2.m new file mode 100644 index 0000000..3814d05 --- /dev/null +++ b/functions/pdist2.m @@ -0,0 +1,6 @@ +function D = pdist2(X1, X2) +% Pairwise square Euclidean distance between two sample sets +% X1, X2: dxn1 dxn2 sample matrices +% Written by Mo Chen (sth4nth@gmail.com). +D = bsxfun(@plus,dot(X2,X2,1),dot(X1,X1,1)')-2*(X1'*X2); + From 4eea4adbb237ef55899120ed41e58d8435bb3c6f Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 18 Dec 2015 12:00:02 +0800 Subject: [PATCH 048/149] remove comment --- chapter06/knKmeans.m | 5 +---- chapter09/kmeans.m | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/chapter06/knKmeans.m b/chapter06/knKmeans.m index 45870ba..558d2f5 100755 --- a/chapter06/knKmeans.m +++ b/chapter06/knKmeans.m @@ -23,12 +23,9 @@ [val, label] = max(bsxfun(@minus,T,diag(T*E')/2),[],1); % [val, label] = max(bsxfun(@minus,2*T,dot(T,E,2)),[],1); end -energy = trace(K)-2*sum(val); % not consist with kmeans +energy = trace(K)-2*sum(val); if nargout == 3 model.X = X; model.label = label; model.kn = kn; end -% TODO: -% 2) test equivalency with kmeans -% 3) test predict diff --git a/chapter09/kmeans.m b/chapter09/kmeans.m index c23e046..0ce2cf2 100644 --- a/chapter09/kmeans.m +++ b/chapter09/kmeans.m @@ -18,5 +18,5 @@ last = label; [val,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); % assign samples to the nearest centers end -energy = dot(X(:),X(:))-2*sum(val); % not consist with knKmeans +energy = dot(X(:),X(:))-2*sum(val); model.means = m; \ No newline at end of file From 1d8a8d873526ffece47288f17d4f0b866d02d006 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 18 Dec 2015 12:09:54 +0800 Subject: [PATCH 049/149] knRegPlot is finished --- chapter03/demo.m | 6 ++--- chapter03/linRnd.m | 8 ++---- chapter06/demo.m | 66 ++++++++++++++++++---------------------------- 3 files changed, 29 insertions(+), 51 deletions(-) diff --git a/chapter03/demo.m b/chapter03/demo.m index 8c7587e..70fdcb6 100644 --- a/chapter03/demo.m +++ b/chapter03/demo.m @@ -3,20 +3,18 @@ clear; close all; d = 1; n = 200; -[x,t,model] = linRnd(d,n); -linPlot(model,x,t); +[x,t] = linRnd(d,n); %% model = linReg(x,t); linPlot(model,x,t); fprintf('Press any key to continue. \n'); -pause %% [model,llh] = linRegEbEm(x,t); linPlot(model,x,t); figure; plot(llh); fprintf('Press any key to continue. \n'); -pause + %% [model,llh] = linRegEbFp(x,t); [y, sigma] = linPred(model,x,t); diff --git a/chapter03/linRnd.m b/chapter03/linRnd.m index c1e7539..9005c94 100644 --- a/chapter03/linRnd.m +++ b/chapter03/linRnd.m @@ -1,4 +1,4 @@ -function [X, t, model] = linRnd(d, n) +function [X, t] = linRnd(d, n) % Generate data from a linear model p(t|w,x)=G(w'x+w0,sigma), sigma=sqrt(1/beta) % where w and w0 are generated from Gauss(0,1), % beta is generated from Gamma(1,1), @@ -10,8 +10,4 @@ w = randn(d,1); w0 = randn(1,1); err = randn(1,n)/sqrt(beta); -t = w'*X+w0+err; - -model.w = w; -model.w0 = w0; -model.beta = beta; \ No newline at end of file +t = w'*X+w0+err; \ No newline at end of file diff --git a/chapter06/demo.m b/chapter06/demo.m index 73b9570..5e3810d 100755 --- a/chapter06/demo.m +++ b/chapter06/demo.m @@ -1,47 +1,31 @@ -% clear; close all; -% -% n = 100; -% x = linspace(0,2*pi,n); % test data -% t = sin(x)+rand(1,n)/2; +clear; close all; + +n = 100; +x = linspace(0,2*pi,n); % test data +t = sin(x)+rand(1,n)/2; + +model = knReg(x,t,1e-4,@knGauss); +knRegPlot(model,x,t); +%% kernel kmeans with linear kernel is kmeans +% d = 2; +% k = 3; +% n = 500; +% [X,y] = kmeansRnd(d,k,n); +% init = ceil(k*rand(1,n)); +% [y_kn,en_kn,model_kn] = knKmeans(X,init,@knLin); +% [y_lin,en_lin,model_lin] = kmeans(X,init); % -% model = knReg(x,t,1e-4,@knGauss); -% y = knRegPred(model, x); -% figure; -% hold on; -% plot(x,t,'o'); -% plot(x,y,'r-'); -d = 2; -k = 3; -n = 500; -[X,y] = kmeansRnd(d,k,n); -label = ceil(k*rand(1,n)); -[y1,en1] = knKmeans(X, label, @knLin); -[y2,en2] = kmeans(X,label); % idx = 1:2:n; % Xt = X(:,idx); -% label = knKmeansPred(model, Xt); -% spread(Xt,label) - -%% kernel kmeans with linear kernel is kmeans -d = 2; -k = 3; -n = 500; -[X,y] = kmeansRnd(d,k,n); -init = ceil(k*rand(1,n)); -[y_kn,en_kn,model_kn] = knKmeans(X,init,@knLin); -[y_lin,en_lin,model_lin] = kmeans(X,init); - -idx = 1:2:n; -Xt = X(:,idx); - -[t_kn,ent_kn] = knKmeansPred(model_kn, Xt); -[t_lin,ent_lin] = kmeansPred(model_lin, Xt); - -maxabsdiff(y_kn,y_lin) -maxabsdiff(en_kn,en_lin) - -maxabsdiff(t_kn,t_lin) -maxabsdiff(ent_kn,ent_lin) +% +% [t_kn,ent_kn] = knKmeansPred(model_kn, Xt); +% [t_lin,ent_lin] = kmeansPred(model_lin, Xt); +% +% maxabsdiff(y_kn,y_lin) +% maxabsdiff(en_kn,en_lin) +% +% maxabsdiff(t_kn,t_lin) +% maxabsdiff(ent_kn,ent_lin) %% kernel regression with linear kernel is linear regression % clear; close all; From 4af6301e646f7654e80f71af983ac4eeefb46303 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 18 Dec 2015 13:34:26 +0800 Subject: [PATCH 050/149] pca --- chapter06/demo.m | 3 +++ chapter06/knKmeans.m | 2 +- chapter06/knPca.m | 6 +++++- chapter06/knPcaPred.m | 10 ++++++---- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/chapter06/demo.m b/chapter06/demo.m index 5e3810d..3aeff77 100755 --- a/chapter06/demo.m +++ b/chapter06/demo.m @@ -46,6 +46,9 @@ % maxabsdiff(y_kn,y_lin) % maxabsdiff(sigma_kn,sigma_lin) % maxabsdiff(p_kn,p_lin) +%% kernel PCA with linear kernel is PCA +% clear; close all; +% n = 100; %% test case for knCenter % kn = @knGauss; % X=rand(2,100); diff --git a/chapter06/knKmeans.m b/chapter06/knKmeans.m index 558d2f5..0a052b4 100755 --- a/chapter06/knKmeans.m +++ b/chapter06/knKmeans.m @@ -1,7 +1,7 @@ function [label, energy, model] = knKmeans(X, init, kn) % Perform kernel k-means clustering. % K: nxn kernel matrix -% k: number of cluster +% init: either number of clusters or initial label % Reference: Kernel Methods for Pattern Analysis % by John Shawe-Taylor, Nello Cristianini % Written by Mo Chen (sth4nth@gmail.com). diff --git a/chapter06/knPca.m b/chapter06/knPca.m index 898b6c1..50e15b6 100644 --- a/chapter06/knPca.m +++ b/chapter06/knPca.m @@ -1,5 +1,9 @@ -function [ R, Z, err ] = knPca( X, d, kn ) +function [ R, Z, err, model] = knPca(X, d, kn) % Kernel PCA +% X: dxn data matrix +% d: target dimension +% kn: kernel function +% Written by Mo Chen (sth4nth@gmail.com). if nargin < 3 kn = @knGauss; end diff --git a/chapter06/knPcaPred.m b/chapter06/knPcaPred.m index 9213cf2..46bd9da 100644 --- a/chapter06/knPcaPred.m +++ b/chapter06/knPcaPred.m @@ -1,7 +1,9 @@ -function [ output_args ] = knPcaPred( input_args ) -%KNPCAPRED Summary of this function goes here -% Detailed explanation goes here +function X = knPcaPred(model, Xt) +% Prediction for kernel PCA +% model: trained model structure +% X: d x n testing data +% t (optional): 1 x n testing response +% Written by Mo Chen (sth4nth@gmail.com). -end From b7580260bf1bd3d850dad1d664add695f434d4e2 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 18 Dec 2015 18:02:06 +0800 Subject: [PATCH 051/149] pca and knpca not finished --- chapter06/knPca.m | 8 ++++---- chapter06/knRegPred.m | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/chapter06/knPca.m b/chapter06/knPca.m index 50e15b6..119770c 100644 --- a/chapter06/knPca.m +++ b/chapter06/knPca.m @@ -1,7 +1,7 @@ -function [ R, Z, err, model] = knPca(X, d, kn) +function [ R, Z, err, model] = knPca(X, p, kn) % Kernel PCA % X: dxn data matrix -% d: target dimension +% p: target dimension % kn: kernel function % Written by Mo Chen (sth4nth@gmail.com). if nargin < 3 @@ -12,8 +12,8 @@ K = knCenter(K); [V,A] = eig(K); [A,idx] = sort(diag(A),'descend'); -V = V(:,idx(1:d))'; -A = A(1:d); +V = V(:,idx(1:p))'; +A = A(1:p); R = bsxfun(@times,V,1./sqrt(A)); if nargout > 1 Z = bsxfun(@times,V,sqrt(A)); diff --git a/chapter06/knRegPred.m b/chapter06/knRegPred.m index 31c7c31..416d5d3 100755 --- a/chapter06/knRegPred.m +++ b/chapter06/knRegPred.m @@ -1,7 +1,7 @@ function [y, sigma, p] = knRegPred(model, Xt, t) % Prediction for Gaussian Process (kernel) regression model % model: trained model structure -% X: d x n testing data +% Xt: d x n testing data % t (optional): 1 x n testing response % Written by Mo Chen (sth4nth@gmail.com). kn = model.kn; From ba6dd0e85f091114a081c4c7801a914686707f54 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 18 Dec 2015 18:11:29 +0800 Subject: [PATCH 052/149] pca and pcaPred are rewritten (not finished). --- chapter06/pca.m | 13 +++++++++++++ chapter06/pcaPred.m | 6 ++++++ chapter06/pcaPred.m~ | 6 ++++++ chapter12/pca.m~ | 40 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+) create mode 100644 chapter06/pca.m create mode 100644 chapter06/pcaPred.m create mode 100644 chapter06/pcaPred.m~ create mode 100644 chapter12/pca.m~ diff --git a/chapter06/pca.m b/chapter06/pca.m new file mode 100644 index 0000000..31e0ecf --- /dev/null +++ b/chapter06/pca.m @@ -0,0 +1,13 @@ +function model = pca( X, p ) +% Principal component analysis +% Written by Mo Chen (sth4nth@gmail.com). + +Xo = bsxfun(@minus,X,mean(X,2)); +S = Xo*Xo'/size(X,2); % 12.3 +[U,A] = eig(S); % 12.5 +[A,idx] = sort(diag(A),'descend'); +U = U(:,idx(1:p)); +A = A(1:p); + +model.U = U; +model.A = A; \ No newline at end of file diff --git a/chapter06/pcaPred.m b/chapter06/pcaPred.m new file mode 100644 index 0000000..681b688 --- /dev/null +++ b/chapter06/pcaPred.m @@ -0,0 +1,6 @@ +function = pcaPred( model, Xt ) +% Prediction for PCA: project future data to principal subspace +% model: trained model structure +% Xt: d x n testing data +% Written by Mo Chen (sth4nth@gmail.com). + diff --git a/chapter06/pcaPred.m~ b/chapter06/pcaPred.m~ new file mode 100644 index 0000000..681b688 --- /dev/null +++ b/chapter06/pcaPred.m~ @@ -0,0 +1,6 @@ +function = pcaPred( model, Xt ) +% Prediction for PCA: project future data to principal subspace +% model: trained model structure +% Xt: d x n testing data +% Written by Mo Chen (sth4nth@gmail.com). + diff --git a/chapter12/pca.m~ b/chapter12/pca.m~ new file mode 100644 index 0000000..dfe6561 --- /dev/null +++ b/chapter12/pca.m~ @@ -0,0 +1,40 @@ +function [V, A] = pca(X, p) +% Perform standard PCA (spectral method). +% X: d x n data matrix +% p: dimension of target space (p>=1) or ratio (0= min(d,n) % full pca + [V,A] = svd(X,'econ'); + A = diag(A).^2; +elseif d <= n % covariance based pca + [V,A] = eigs(X*X',p,'la',opts); + A = diag(A); +elseif d > n % inner product based pca + [U,A] = eigs(X'*X,p,'la',opts); + A = diag(A); + V = X*bsxfun(@times,U,1./sqrt(A)'); +end + From a22dee3f79bb11a2ff47e81a824330a25ede1368 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 19 Dec 2015 00:35:46 +0800 Subject: [PATCH 053/149] fix function name --- chapter11/gaussRnd.m | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chapter11/gaussRnd.m b/chapter11/gaussRnd.m index 6693403..ee88648 100644 --- a/chapter11/gaussRnd.m +++ b/chapter11/gaussRnd.m @@ -1,4 +1,4 @@ -function x = rndGauss(mu,Sigma,n) +function x = gaussRnd(mu,Sigma,n) % Sampling from a Gaussian distribution. % Written by Mo Chen (sth4nth@gmail.com). if nargin == 2 From b9e8d149947f2e6503d26a3870cfe8cff9bc38e9 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 19 Dec 2015 00:36:27 +0800 Subject: [PATCH 054/149] pca and pred --- chapter06/pca.m | 15 ++++++++------- chapter06/pcaPred.m | 11 ++++++++++- chapter06/pcaPred.m~ | 6 ------ 3 files changed, 18 insertions(+), 14 deletions(-) delete mode 100644 chapter06/pcaPred.m~ diff --git a/chapter06/pca.m b/chapter06/pca.m index 31e0ecf..5c8a625 100644 --- a/chapter06/pca.m +++ b/chapter06/pca.m @@ -1,13 +1,14 @@ -function model = pca( X, p ) +function model = pca(X, p) % Principal component analysis % Written by Mo Chen (sth4nth@gmail.com). - -Xo = bsxfun(@minus,X,mean(X,2)); +xbar = mean(X,2); +Xo = bsxfun(@minus,X,xbar); S = Xo*Xo'/size(X,2); % 12.3 -[U,A] = eig(S); % 12.5 -[A,idx] = sort(diag(A),'descend'); +[U,L] = eig(S); % 12.5 +[L,idx] = sort(diag(L),'descend'); U = U(:,idx(1:p)); -A = A(1:p); +L = L(1:p); +model.xbar = xbar; model.U = U; -model.A = A; \ No newline at end of file +model.L = L; diff --git a/chapter06/pcaPred.m b/chapter06/pcaPred.m index 681b688..73c0cba 100644 --- a/chapter06/pcaPred.m +++ b/chapter06/pcaPred.m @@ -1,6 +1,15 @@ -function = pcaPred( model, Xt ) +function Y = pcaPred( model, Xt, opt) % Prediction for PCA: project future data to principal subspace % model: trained model structure % Xt: d x n testing data % Written by Mo Chen (sth4nth@gmail.com). +xbar = model.xbar; +U = model.U; +Y = U'*bsxfun(@minus,Xt,xbar); +if nargin == 3 && opt.whiten + L = model.L; + Y = bsxfun(@times,Y,1./sqrt(L)); +end + + diff --git a/chapter06/pcaPred.m~ b/chapter06/pcaPred.m~ deleted file mode 100644 index 681b688..0000000 --- a/chapter06/pcaPred.m~ +++ /dev/null @@ -1,6 +0,0 @@ -function = pcaPred( model, Xt ) -% Prediction for PCA: project future data to principal subspace -% model: trained model structure -% Xt: d x n testing data -% Written by Mo Chen (sth4nth@gmail.com). - From 9f083e0bc8d9583f3eb4a0a8392db76926add6f8 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sat, 19 Dec 2015 00:37:14 +0800 Subject: [PATCH 055/149] knPcaPred is not finished. need test --- chapter06/demo.m | 11 +++++++++-- chapter06/knPca.m | 21 ++++++++++----------- chapter06/knPcaPred.m | 7 ++++++- 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/chapter06/demo.m b/chapter06/demo.m index 3aeff77..bbee3d9 100755 --- a/chapter06/demo.m +++ b/chapter06/demo.m @@ -47,8 +47,15 @@ % maxabsdiff(sigma_kn,sigma_lin) % maxabsdiff(p_kn,p_lin) %% kernel PCA with linear kernel is PCA -% clear; close all; -% n = 100; +clear; close all; +d = 10; +p = 2; +n = 500; +X = randn(d,n); + +model = pca(X,p); +Y = pcaPred(model,X); + %% test case for knCenter % kn = @knGauss; % X=rand(2,100); diff --git a/chapter06/knPca.m b/chapter06/knPca.m index 119770c..1675a25 100644 --- a/chapter06/knPca.m +++ b/chapter06/knPca.m @@ -1,4 +1,4 @@ -function [ R, Z, err, model] = knPca(X, p, kn) +function model = knPca(X, p, kn) % Kernel PCA % X: dxn data matrix % p: target dimension @@ -7,17 +7,16 @@ if nargin < 3 kn = @knGauss; end - K = kn(X,X); K = knCenter(K); -[V,A] = eig(K); -[A,idx] = sort(diag(A),'descend'); -V = V(:,idx(1:p))'; -A = A(1:p); -R = bsxfun(@times,V,1./sqrt(A)); +[V,L] = eig(K); +[L,idx] = sort(diag(L),'descend'); +V = V(:,idx(1:p)); +L = L(1:p)'; +U = bsxfun(@times,V,1./sqrt(L)); if nargout > 1 - Z = bsxfun(@times,V,sqrt(A)); + Z = bsxfun(@times,V,sqrt(L)); end -if nargout > 2 - err = diag(K)'-sum(Z.^2,1); -end \ No newline at end of file +model.V = V; +model.L = L; +model.X = X; \ No newline at end of file diff --git a/chapter06/knPcaPred.m b/chapter06/knPcaPred.m index 46bd9da..7d7e89e 100644 --- a/chapter06/knPcaPred.m +++ b/chapter06/knPcaPred.m @@ -1,9 +1,14 @@ -function X = knPcaPred(model, Xt) +function X = knPcaPred(model, Xt, opt) % Prediction for kernel PCA % model: trained model structure % X: d x n testing data % t (optional): 1 x n testing response % Written by Mo Chen (sth4nth@gmail.com). +U = model.U; +L = model.L; +if nargin == 3 && opt.whiten + Y = ; +end From bb00b6dfe4bbd2de7010a352506ba77c12d36c3e Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 20 Dec 2015 22:48:48 +0800 Subject: [PATCH 056/149] chapter06 is finished --- chapter06/demo.m | 54 +++++++++++++++++------------- chapter06/knPca.m | 11 +++--- chapter06/knPcaPred.m | 11 +++--- chapter06/pca.m | 14 -------- chapter12/pca.m | 48 ++++++++------------------ chapter12/pca.m~ | 40 ---------------------- {chapter06 => chapter12}/pcaPred.m | 0 7 files changed, 55 insertions(+), 123 deletions(-) delete mode 100644 chapter06/pca.m delete mode 100644 chapter12/pca.m~ rename {chapter06 => chapter12}/pcaPred.m (100%) diff --git a/chapter06/demo.m b/chapter06/demo.m index bbee3d9..c16fd32 100755 --- a/chapter06/demo.m +++ b/chapter06/demo.m @@ -1,31 +1,10 @@ clear; close all; - n = 100; x = linspace(0,2*pi,n); % test data t = sin(x)+rand(1,n)/2; model = knReg(x,t,1e-4,@knGauss); knRegPlot(model,x,t); -%% kernel kmeans with linear kernel is kmeans -% d = 2; -% k = 3; -% n = 500; -% [X,y] = kmeansRnd(d,k,n); -% init = ceil(k*rand(1,n)); -% [y_kn,en_kn,model_kn] = knKmeans(X,init,@knLin); -% [y_lin,en_lin,model_lin] = kmeans(X,init); -% -% idx = 1:2:n; -% Xt = X(:,idx); -% -% [t_kn,ent_kn] = knKmeansPred(model_kn, Xt); -% [t_lin,ent_lin] = kmeansPred(model_lin, Xt); -% -% maxabsdiff(y_kn,y_lin) -% maxabsdiff(en_kn,en_lin) -% -% maxabsdiff(t_kn,t_lin) -% maxabsdiff(ent_kn,ent_lin) %% kernel regression with linear kernel is linear regression % clear; close all; @@ -46,6 +25,27 @@ % maxabsdiff(y_kn,y_lin) % maxabsdiff(sigma_kn,sigma_lin) % maxabsdiff(p_kn,p_lin) +%% kernel kmeans with linear kernel is kmeans +% clear; close all; +% d = 2; +% k = 3; +% n = 500; +% [X,y] = kmeansRnd(d,k,n); +% init = ceil(k*rand(1,n)); +% [y_kn,en_kn,model_kn] = knKmeans(X,init,@knLin); +% [y_lin,en_lin,model_lin] = kmeans(X,init); +% +% idx = 1:2:n; +% Xt = X(:,idx); +% +% [t_kn,ent_kn] = knKmeansPred(model_kn, Xt); +% [t_lin,ent_lin] = kmeansPred(model_lin, Xt); +% +% maxabsdiff(y_kn,y_lin) +% maxabsdiff(en_kn,en_lin) +% +% maxabsdiff(t_kn,t_lin) +% maxabsdiff(ent_kn,ent_lin) %% kernel PCA with linear kernel is PCA clear; close all; d = 10; @@ -53,10 +53,18 @@ n = 500; X = randn(d,n); -model = pca(X,p); -Y = pcaPred(model,X); +model_lin = pca(X,p); +model_kn = knPca(X,p,@knLin); + +idx = 1:2:n; +Xt = X(:,idx); +Y_lin = pcaPred(model_lin,Xt); +Y_kn = knPcaPred(model_kn,Xt); + +R = Y_lin/Y_kn % the results are equivalent up to a rotation. %% test case for knCenter +% clear; close all; % kn = @knGauss; % X=rand(2,100); % X1=rand(2,10); diff --git a/chapter06/knPca.m b/chapter06/knPca.m index 1675a25..57718c7 100644 --- a/chapter06/knPca.m +++ b/chapter06/knPca.m @@ -7,16 +7,13 @@ if nargin < 3 kn = @knGauss; end -K = kn(X,X); -K = knCenter(K); +K = knCenter(kn,X); [V,L] = eig(K); [L,idx] = sort(diag(L),'descend'); V = V(:,idx(1:p)); -L = L(1:p)'; -U = bsxfun(@times,V,1./sqrt(L)); -if nargout > 1 - Z = bsxfun(@times,V,sqrt(L)); -end +L = L(1:p); + +model.kn = kn; model.V = V; model.L = L; model.X = X; \ No newline at end of file diff --git a/chapter06/knPcaPred.m b/chapter06/knPcaPred.m index 7d7e89e..0e61735 100644 --- a/chapter06/knPcaPred.m +++ b/chapter06/knPcaPred.m @@ -1,14 +1,15 @@ -function X = knPcaPred(model, Xt, opt) +function Y = knPcaPred(model, Xt, opt) % Prediction for kernel PCA % model: trained model structure % X: d x n testing data % t (optional): 1 x n testing response % Written by Mo Chen (sth4nth@gmail.com). - -U = model.U; +kn = model.kn; +V = model.V; L = model.L; - +X = model.X; +Y = bsxfun(@times,V'*knCenter(kn,X,X,Xt),1./sqrt(L)); if nargin == 3 && opt.whiten - Y = ; + Y = bsxfun(@times,Y,1./sqrt(L)); end diff --git a/chapter06/pca.m b/chapter06/pca.m deleted file mode 100644 index 5c8a625..0000000 --- a/chapter06/pca.m +++ /dev/null @@ -1,14 +0,0 @@ -function model = pca(X, p) -% Principal component analysis -% Written by Mo Chen (sth4nth@gmail.com). -xbar = mean(X,2); -Xo = bsxfun(@minus,X,xbar); -S = Xo*Xo'/size(X,2); % 12.3 -[U,L] = eig(S); % 12.5 -[L,idx] = sort(diag(L),'descend'); -U = U(:,idx(1:p)); -L = L(1:p); - -model.xbar = xbar; -model.U = U; -model.L = L; diff --git a/chapter12/pca.m b/chapter12/pca.m index ae59981..6adb7de 100644 --- a/chapter12/pca.m +++ b/chapter12/pca.m @@ -1,36 +1,16 @@ -function [V, A] = pca(X, p) -% Perform standard PCA (spectral method). -% X: d x n data matrix -% p: dimension of target space (p>=1) or ratio (0= min(d,n) % full pca - [V,A] = svd(X,'econ'); - A = diag(A).^2; -elseif d <= n % covariance based pca - [V,A] = eigs(X*X',p,'la',opts); - A = diag(A); -elseif d > n % inner product based pca - [U,A] = eigs(X'*X,p,'la',opts); - A = diag(A); - V = X*bsxfun(@times,U,1./sqrt(A)'); -end +xbar = mean(X,2); +Xo = bsxfun(@minus,X,xbar); +S = Xo*Xo'/size(X,2); % 12.3 +[U,L] = eig(S); % 12.5 +[L,idx] = sort(diag(L),'descend'); +U = U(:,idx(1:p)); +L = L(1:p); +model.xbar = xbar; +model.U = U; +model.L = L; diff --git a/chapter12/pca.m~ b/chapter12/pca.m~ deleted file mode 100644 index dfe6561..0000000 --- a/chapter12/pca.m~ +++ /dev/null @@ -1,40 +0,0 @@ -function [V, A] = pca(X, p) -% Perform standard PCA (spectral method). -% X: d x n data matrix -% p: dimension of target space (p>=1) or ratio (0= min(d,n) % full pca - [V,A] = svd(X,'econ'); - A = diag(A).^2; -elseif d <= n % covariance based pca - [V,A] = eigs(X*X',p,'la',opts); - A = diag(A); -elseif d > n % inner product based pca - [U,A] = eigs(X'*X,p,'la',opts); - A = diag(A); - V = X*bsxfun(@times,U,1./sqrt(A)'); -end - diff --git a/chapter06/pcaPred.m b/chapter12/pcaPred.m similarity index 100% rename from chapter06/pcaPred.m rename to chapter12/pcaPred.m From 41a978cc5aa108160a61664b8bcd8a927215b8a5 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 20 Dec 2015 23:42:17 +0800 Subject: [PATCH 057/149] todo for sparse coding --- chapter07/demo.m | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/chapter07/demo.m b/chapter07/demo.m index f7650cb..c25c099 100755 --- a/chapter07/demo.m +++ b/chapter07/demo.m @@ -1,3 +1,8 @@ +% TODO: +% 1) sparse visualization +% 2) sparse data/demos +% 3) fix coordinate descent + % clear; close all; % %% regression @@ -35,7 +40,7 @@ [model,llh] = rvmRegEbCd(X,t); figure plot(llh); -[y, sigma] = linInfer(x,model,t); +[y, sigma] = linPred(x,model,t); figure; hold on; plotBand(x,y,2*sigma); From f1ec3611950e9dfafc019c4d1f125e617433d79e Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 25 Dec 2015 10:40:50 +0800 Subject: [PATCH 058/149] rename functions and add comments --- chapter07/rvmBinEbEm.m | 37 +++++++++++++++++++++++++++++++++++++ chapter07/rvmBinEbFp.m | 37 +++++++++++++++++++++++++++++++++++++ chapter07/rvmRegEbCd.m | 5 +++-- chapter07/rvmRegEbEm.m | 4 ++-- chapter07/rvmRegEbFp.m | 4 ++-- chapter07/rvmRegEbFpSvd.m | 4 ++-- 6 files changed, 83 insertions(+), 8 deletions(-) create mode 100644 chapter07/rvmBinEbEm.m create mode 100644 chapter07/rvmBinEbFp.m diff --git a/chapter07/rvmBinEbEm.m b/chapter07/rvmBinEbEm.m new file mode 100644 index 0000000..f8d4b26 --- /dev/null +++ b/chapter07/rvmBinEbEm.m @@ -0,0 +1,37 @@ +function [model, llh] = rvmBinEbEm(X, t, alpha) +% Relevance Vector Machine (ARD sparse prior) for binary classification +% training by empirical bayesian (type II ML) using standard EM update +% Written by Mo Chen (sth4nth@gmail.com). +if nargin < 3 + alpha = 1; +end +n = size(X,2); +X = [X;ones(1,n)]; +d = size(X,1); +alpha = alpha*ones(d,1); +weight = zeros(d,1); + +tol = 1e-4; +maxiter = 100; +llh = -inf(1,maxiter); +infinity = 1e+10; +for iter = 2:maxiter + used = alpha < infinity; + a = alpha(used); + w = weight(used); + [w,energy,U] = optLogitNewton(X(used,:),t,a,w); + w2 = w.^2; + llh(iter) = energy(end)+0.5*(sum(log(a))-2*sum(log(diag(U)))-dot(a,w2)-n*log(2*pi)); % 7.114 + if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end + V = inv(U); + dgS = dot(V,V,2); + alpha(used) = 1./(w2+dgS); % 9.67 + weight(used) = w; +end +llh = llh(2:iter); + +model.used = used; +model.w = w; % nonzero elements of weight +model.a = a; % nonzero elements of alpha +model.weght = weight; +model.alpha = alpha; diff --git a/chapter07/rvmBinEbFp.m b/chapter07/rvmBinEbFp.m new file mode 100644 index 0000000..992d620 --- /dev/null +++ b/chapter07/rvmBinEbFp.m @@ -0,0 +1,37 @@ +function [model, llh] = rvmBinEbFp(X, t, alpha) +% Relevance Vector Machine (ARD sparse prior) for binary classification +% training by empirical bayesian (type II ML) using fix point update (Mackay update) +% Written by Mo Chen (sth4nth@gmail.com). +if nargin < 3 + alpha = 1; +end +n = size(X,2); +X = [X;ones(1,n)]; +d = size(X,1); +alpha = alpha*ones(d,1); +weight = zeros(d,1); + +tol = 1e-4; +maxiter = 100; +llh = -inf(1,maxiter); +infinity = 1e+10; +for iter = 2:maxiter + used = alpha < infinity; + a = alpha(used); + w = weight(used); + [w,energy,U] = optLogitNewton(X(used,:),t,a,w); + w2 = w.^2; + llh(iter) = energy(end)+0.5*(sum(log(a))-2*sum(log(diag(U)))-dot(a,w2)-n*log(2*pi)); % 7.114 + if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end + V = inv(U); + dgS = dot(V,V,2); + alpha(used) = (1-a.*dgS)./w2; % 7.89 & 7.87 + weight(used) = w; +end +llh = llh(2:iter); + +model.used = used; +model.w = w; % nonzero elements of weight +model.a = a; % nonzero elements of alpha +model.weght = weight; +model.alpha = alpha; diff --git a/chapter07/rvmRegEbCd.m b/chapter07/rvmRegEbCd.m index 84ed8c1..c1b493c 100644 --- a/chapter07/rvmRegEbCd.m +++ b/chapter07/rvmRegEbCd.m @@ -1,7 +1,8 @@ function [model, llh] = rvmRegEbCd(X, t) % TODO: llh not increasing. verify with sparse high dimensional data -% Sparse Bayesian Regression (RVM) using Coordinate Descent -% reference: +% Relevance Vector Machine (ARD sparse prior) for regression +% training by empirical bayesian (type II ML) using Coordinate Descent +% reference: (Fast RVM) % Tipping and Faul. Fast marginal likelihood maximisation for sparse Bayesian models. AISTATS 2003. % Written by Mo Chen (sth4nth@gmail.com). [d,n] = size(X); diff --git a/chapter07/rvmRegEbEm.m b/chapter07/rvmRegEbEm.m index df2973e..361e726 100644 --- a/chapter07/rvmRegEbEm.m +++ b/chapter07/rvmRegEbEm.m @@ -1,6 +1,6 @@ function [model, llh] = rvmRegEbEm(X, t, alpha, beta) -% Relevance Vector Machine regression training by empirical bayesian (ARD) -% using standard EM update +% Relevance Vector Machine (ARD sparse prior) for regression +% training by empirical bayesian (type II ML) using standard EM update % Written by Mo Chen (sth4nth@gmail.com). if nargin < 3 alpha = 0.02; diff --git a/chapter07/rvmRegEbFp.m b/chapter07/rvmRegEbFp.m index 8603bd8..564a59e 100644 --- a/chapter07/rvmRegEbFp.m +++ b/chapter07/rvmRegEbFp.m @@ -1,6 +1,6 @@ function [model, llh] = rvmRegEbFp(X, t, alpha, beta) -% Relevance Vector Machine regression training by empirical bayesian (ARD) -% using fix point update (Mackay update) +% Relevance Vector Machine (ARD sparse prior) for regression +% training by empirical bayesian (type II ML) using fix point update (Mackay update) % Written by Mo Chen (sth4nth@gmail.com). if nargin < 3 alpha = 0.02; diff --git a/chapter07/rvmRegEbFpSvd.m b/chapter07/rvmRegEbFpSvd.m index 3ded162..dda1a35 100644 --- a/chapter07/rvmRegEbFpSvd.m +++ b/chapter07/rvmRegEbFpSvd.m @@ -1,6 +1,6 @@ function [model, llh] = rvmRegEbFpSvd(X, t, alpha, beta) -% Relevance Vector Machine regression training by empirical bayesian (ARD) -% using fix point update (Mackay update) with SVD +% Relevance Vector Machine (ARD sparse prior) for regression +% training by empirical bayesian (type II ML) using fix point update (Mackay update) with SVD % Written by Mo Chen (sth4nth@gmail.com). if nargin < 3 alpha = 0.02; From cb99dd5a9aaa7a78e69367597431bf4642a42bf7 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 25 Dec 2015 10:53:49 +0800 Subject: [PATCH 059/149] remove old functions and add Pred function stub --- chapter07/demo.m | 1 + chapter07/rvmBinPred.m | 7 +++++++ chapter07/rvmEbEm.m | 37 ------------------------------------- chapter07/rvmEbFp.m | 37 ------------------------------------- chapter07/rvmRegPred.m | 7 +++++++ 5 files changed, 15 insertions(+), 74 deletions(-) create mode 100644 chapter07/rvmBinPred.m delete mode 100644 chapter07/rvmEbEm.m delete mode 100644 chapter07/rvmEbFp.m create mode 100644 chapter07/rvmRegPred.m diff --git a/chapter07/demo.m b/chapter07/demo.m index c25c099..972a092 100755 --- a/chapter07/demo.m +++ b/chapter07/demo.m @@ -2,6 +2,7 @@ % 1) sparse visualization % 2) sparse data/demos % 3) fix coordinate descent +% 4) sparse prediction for regression and classification % clear; close all; % diff --git a/chapter07/rvmBinPred.m b/chapter07/rvmBinPred.m new file mode 100644 index 0000000..7fcd026 --- /dev/null +++ b/chapter07/rvmBinPred.m @@ -0,0 +1,7 @@ +function [ output_args ] = rvmBinPred( input_args ) +%RVMBINPRED Summary of this function goes here +% Detailed explanation goes here + + +end + diff --git a/chapter07/rvmEbEm.m b/chapter07/rvmEbEm.m deleted file mode 100644 index 6e3c206..0000000 --- a/chapter07/rvmEbEm.m +++ /dev/null @@ -1,37 +0,0 @@ -function [model, llh] = rvmEbEm(X, t, alpha) -% Relevance Vector Machine classification training by empirical bayesian (ARD) -% using standard EM update -% Written by Mo Chen (sth4nth@gmail.com). -if nargin < 3 - alpha = 1; -end -n = size(X,2); -X = [X;ones(1,n)]; -d = size(X,1); -alpha = alpha*ones(d,1); -weight = zeros(d,1); - -tol = 1e-4; -maxiter = 100; -llh = -inf(1,maxiter); -infinity = 1e+10; -for iter = 2:maxiter - used = alpha < infinity; - a = alpha(used); - w = weight(used); - [w,energy,U] = optLogitNewton(X(used,:),t,a,w); - w2 = w.^2; - llh(iter) = energy(end)+0.5*(sum(log(a))-2*sum(log(diag(U)))-dot(a,w2)-n*log(2*pi)); % 7.114 - if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end - V = inv(U); - dgS = dot(V,V,2); - alpha(used) = 1./(w2+dgS); % 9.67 - weight(used) = w; -end -llh = llh(2:iter); - -model.used = used; -model.w = w; % nonzero elements of weight -model.a = a; % nonzero elements of alpha -model.weght = weight; -model.alpha = alpha; diff --git a/chapter07/rvmEbFp.m b/chapter07/rvmEbFp.m deleted file mode 100644 index b30cfdb..0000000 --- a/chapter07/rvmEbFp.m +++ /dev/null @@ -1,37 +0,0 @@ -function [model, llh] = rvmEbFp(X, t, alpha) -% Relevance Vector Machine classification training by empirical bayesian (ARD) -% using fix point update (Mackay update) -% Written by Mo Chen (sth4nth@gmail.com). -if nargin < 3 - alpha = 1; -end -n = size(X,2); -X = [X;ones(1,n)]; -d = size(X,1); -alpha = alpha*ones(d,1); -weight = zeros(d,1); - -tol = 1e-4; -maxiter = 100; -llh = -inf(1,maxiter); -infinity = 1e+10; -for iter = 2:maxiter - used = alpha < infinity; - a = alpha(used); - w = weight(used); - [w,energy,U] = optLogitNewton(X(used,:),t,a,w); - w2 = w.^2; - llh(iter) = energy(end)+0.5*(sum(log(a))-2*sum(log(diag(U)))-dot(a,w2)-n*log(2*pi)); % 7.114 - if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end - V = inv(U); - dgS = dot(V,V,2); - alpha(used) = (1-a.*dgS)./w2; % 7.89 & 7.87 - weight(used) = w; -end -llh = llh(2:iter); - -model.used = used; -model.w = w; % nonzero elements of weight -model.a = a; % nonzero elements of alpha -model.weght = weight; -model.alpha = alpha; diff --git a/chapter07/rvmRegPred.m b/chapter07/rvmRegPred.m new file mode 100644 index 0000000..1dfd6dc --- /dev/null +++ b/chapter07/rvmRegPred.m @@ -0,0 +1,7 @@ +function [ output_args ] = rvmRegPred( input_args ) +%RVMREGPRED Summary of this function goes here +% Detailed explanation goes here + + +end + From ab2203e4381e4d8f386c70014583d1ab4a476f1c Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 25 Dec 2015 11:33:36 +0800 Subject: [PATCH 060/149] rename functions --- chapter04/demo.m | 6 +++--- chapter04/{logitReg.m => logitBin.m} | 2 +- chapter04/{logitPred.m => logitBinPred.m} | 2 +- chapter04/{mnReg.m => logitMn.m} | 2 +- chapter04/{mnPred.m => logitMnPred.m} | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) rename chapter04/{logitReg.m => logitBin.m} (96%) rename chapter04/{logitPred.m => logitBinPred.m} (85%) rename chapter04/{mnReg.m => logitMn.m} (97%) rename chapter04/{mnPred.m => logitMnPred.m} (89%) diff --git a/chapter04/demo.m b/chapter04/demo.m index 6799428..3bbe477 100644 --- a/chapter04/demo.m +++ b/chapter04/demo.m @@ -3,7 +3,7 @@ k = 2; n = 1000; [X,t] = kmeansRnd(2,k,n); -[model, llh] = logitReg(X,t-1,0); +[model, llh] = logitBin(X,t-1,0); plot(llh); binPlot(model,X,t) pause @@ -12,6 +12,6 @@ k = 3; n = 1000; [X,t] = kmeansRnd(2,k,n); -[model, llh] = mnReg(X,t); -y = mnPred(model,X); +[model, llh] = logitMn(X,t); +y = logitMnPred(model,X); spread(X,y) diff --git a/chapter04/logitReg.m b/chapter04/logitBin.m similarity index 96% rename from chapter04/logitReg.m rename to chapter04/logitBin.m index 119e0f3..620df7c 100644 --- a/chapter04/logitReg.m +++ b/chapter04/logitBin.m @@ -1,4 +1,4 @@ -function [model, llh] = logitReg(X, t, lambda) +function [model, llh] = logitBin(X, t, lambda) % Logistic regression for binary classification optimized by Newton-Raphson % method. % X: dxn data matrix diff --git a/chapter04/logitPred.m b/chapter04/logitBinPred.m similarity index 85% rename from chapter04/logitPred.m rename to chapter04/logitBinPred.m index 2beeec1..59ca680 100644 --- a/chapter04/logitPred.m +++ b/chapter04/logitBinPred.m @@ -1,4 +1,4 @@ -function [y, p] = logitPred(model, X) +function [y, p] = logitBinPred(model, X) % Prodict the label for binary logistic regression model % model: trained model structure % X: d x n testing data diff --git a/chapter04/mnReg.m b/chapter04/logitMn.m similarity index 97% rename from chapter04/mnReg.m rename to chapter04/logitMn.m index bccee8d..da07f72 100644 --- a/chapter04/mnReg.m +++ b/chapter04/logitMn.m @@ -1,4 +1,4 @@ -function [model, llh] = mnReg(X, t, lambda) +function [model, llh] = logitMn(X, t, lambda) % Multinomial regression for multiclass problem (Multinomial likelihood) % Written by Mo Chen (sth4nth@gmail.com). if nargin < 3 diff --git a/chapter04/mnPred.m b/chapter04/logitMnPred.m similarity index 89% rename from chapter04/mnPred.m rename to chapter04/logitMnPred.m index 065752e..d0fe03d 100644 --- a/chapter04/mnPred.m +++ b/chapter04/logitMnPred.m @@ -1,4 +1,4 @@ -function [y, P] = mnPred(model, X) +function [y, P] = logitMnPred(model, X) % Prodict the label for multiclass (multinomial) logistic regression model % model: trained model structure % X: d x n testing data From 632ca3d40d3f8ea3c671773ecab6d6154d3938b0 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 25 Dec 2015 11:48:19 +0800 Subject: [PATCH 061/149] reorgnize dir --- .gitignore | 1 + chapter04/TODO.txt | 2 +- common/besseliLn.m | 10 ++++++++++ common/isequalf.m | 9 +++++++++ common/log1pexp.m | 7 +++++++ common/logsumexp.m | 18 ++++++++++++++++++ common/maxabsdiff.m | 4 ++++ common/normalize.m | 11 +++++++++++ common/pdist2.m | 6 ++++++ common/plotClass.m | 37 +++++++++++++++++++++++++++++++++++++ 10 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 .gitignore create mode 100644 common/besseliLn.m create mode 100644 common/isequalf.m create mode 100644 common/log1pexp.m create mode 100644 common/logsumexp.m create mode 100644 common/maxabsdiff.m create mode 100644 common/normalize.m create mode 100644 common/pdist2.m create mode 100644 common/plotClass.m diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7632c10 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +reference/* diff --git a/chapter04/TODO.txt b/chapter04/TODO.txt index c98c787..f826511 100644 --- a/chapter04/TODO.txt +++ b/chapter04/TODO.txt @@ -1,2 +1,2 @@ -multiPlot: plot multclass decison boundary +multiPlot: plot multiclass decison boundary diff --git a/common/besseliLn.m b/common/besseliLn.m new file mode 100644 index 0000000..b489938 --- /dev/null +++ b/common/besseliLn.m @@ -0,0 +1,10 @@ +function y = besseliLn(nu,x) +% Compute logarithm of besseli function (modified Bessel function of first kind). +% Written by Mo Chen (mochen80@gmail.com). +% TODO: improve precision using the method in +% Clustering on the Unit Hypersphere using von Mises-Fisher Distributions. A. Banerjee, I. S. Dhillon, J. Ghosh, and S. Sra +[v,ierr] = besseli(nu,x); +if any(ierr ~= 0) || any(v == Inf) + error('ERROR: logbesseli'); +end +y = log(v); diff --git a/common/isequalf.m b/common/isequalf.m new file mode 100644 index 0000000..c7300c6 --- /dev/null +++ b/common/isequalf.m @@ -0,0 +1,9 @@ +function z = isequalf(x, y, tol) +% Determine whether two float number x and y are equal up to precision tol +% Written by Mo Chen (sth4nth@gmail.com). +if nargin < 3 + tol = 1e-8; +end +assert(all(size(x)==size(y))); +z = max(abs(x(:)-y(:))) Date: Fri, 25 Dec 2015 11:55:27 +0800 Subject: [PATCH 062/149] remove depercate dirs --- functions/besseliLn.m | 10 ---------- functions/log1pexp.m | 7 ------- functions/logsumexp.m | 18 ------------------ functions/normalize.m | 11 ----------- functions/pdist2.m | 6 ------ helper/isequalf.m | 8 -------- helper/maxabsdiff.m | 4 ---- helper/spread.m | 37 ------------------------------------- 8 files changed, 101 deletions(-) delete mode 100644 functions/besseliLn.m delete mode 100644 functions/log1pexp.m delete mode 100644 functions/logsumexp.m delete mode 100644 functions/normalize.m delete mode 100644 functions/pdist2.m delete mode 100644 helper/isequalf.m delete mode 100644 helper/maxabsdiff.m delete mode 100644 helper/spread.m diff --git a/functions/besseliLn.m b/functions/besseliLn.m deleted file mode 100644 index b489938..0000000 --- a/functions/besseliLn.m +++ /dev/null @@ -1,10 +0,0 @@ -function y = besseliLn(nu,x) -% Compute logarithm of besseli function (modified Bessel function of first kind). -% Written by Mo Chen (mochen80@gmail.com). -% TODO: improve precision using the method in -% Clustering on the Unit Hypersphere using von Mises-Fisher Distributions. A. Banerjee, I. S. Dhillon, J. Ghosh, and S. Sra -[v,ierr] = besseli(nu,x); -if any(ierr ~= 0) || any(v == Inf) - error('ERROR: logbesseli'); -end -y = log(v); diff --git a/functions/log1pexp.m b/functions/log1pexp.m deleted file mode 100644 index 8a6f018..0000000 --- a/functions/log1pexp.m +++ /dev/null @@ -1,7 +0,0 @@ -function y = log1pexp(x) -% accurately compute y = log(1+exp(x)) -% reference: Accurately Computing log(1-exp(|a|)) Martin Machler -seed = 33.3; -y = x; -idx = x Date: Fri, 25 Dec 2015 16:20:01 +0800 Subject: [PATCH 063/149] add two helper functions --- common/standardize.m | 12 ++++++++++++ common/unitize.m | 11 +++++++++++ 2 files changed, 23 insertions(+) create mode 100644 common/standardize.m create mode 100644 common/unitize.m diff --git a/common/standardize.m b/common/standardize.m new file mode 100644 index 0000000..5dcfd0e --- /dev/null +++ b/common/standardize.m @@ -0,0 +1,12 @@ +function [Y, s] = standardize(X) +% Unitize the vectors to be unit length +% By default dim = 1 (columns). +% Written by Michael Chen (sth4nth@gmail.com). +if nargin == 1, + % Determine which dimension sum will use + dim = find(size(x)~=1,1); + if isempty(dim), dim = 1; end +end +X = bsxfun(@minux,X,mean(X,2)); +s = sqrt(mean(sum(X.^2,1))); +Y = X/s; \ No newline at end of file diff --git a/common/unitize.m b/common/unitize.m new file mode 100644 index 0000000..582cd7e --- /dev/null +++ b/common/unitize.m @@ -0,0 +1,11 @@ +function [Y, s] = unitize(X, dim) +% Unitize the vectors to be unit length +% By default dim = 1 (columns). +% Written by Michael Chen (sth4nth@gmail.com). +if nargin == 1, + % Determine which dimension sum will use + dim = find(size(x)~=1,1); + if isempty(dim), dim = 1; end +end +s = sqrt(sum(X.^2,dim)); +Y = bsxfun(@times,X,1./s); \ No newline at end of file From 63081d8d86ea4406974a73044b2e8c93ffc8c664 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 25 Dec 2015 22:29:52 +0800 Subject: [PATCH 064/149] rvm has bugs --- chapter07/demo.m | 89 +++++++++++++++++++++++++++++++----------- chapter07/rvmRegEbEm.m | 10 ++--- 2 files changed, 70 insertions(+), 29 deletions(-) diff --git a/chapter07/demo.m b/chapter07/demo.m index 972a092..6f1e7af 100755 --- a/chapter07/demo.m +++ b/chapter07/demo.m @@ -4,17 +4,62 @@ % 3) fix coordinate descent % 4) sparse prediction for regression and classification +% 5) need to be extensively tested on high dimensional data (inlucding +% chapter03 Bayesian linear regression) + + + % clear; close all; % + +%% sparse signal recovery demo +clear; close all; + +d = 512; % signal length +k = 20; % number of spikes +n = 100; % number of measurements +% +% random +/- 1 signal +x = zeros(d,1); +q = randperm(d); +x(q(1:k)) = sign(randn(k,1)); + +% projection matrix +A = unitize(randn(d,n),1); +% noisy observations +sigma = 0.005; +e = sigma*randn(1,n); +y = x'*A + e; + + +[model,llh] = rvmRegEbEm(A,y); +plot(llh); + +% solve by BCS +tic; +[weights,used,sigma2,errbars] = BCS_fast_rvm(A,y,initsigma2,1e-8); +t_BCS = toc; +fprintf(1,'BCS number of nonzero weights: %d\d',length(used)); +x_BCS = zeros(d,1); err = zeros(d,1); +x_BCS(used) = weights; err(used) = errbars; + + +E_BCS = norm(x-x_BCS)/norm(x); + +figure +subplot(3,1,1); plot(x); axis([1 d -max(abs(x))-0.2 max(abs(x))+0.2]); title(['(a) Original Signal']); +subplot(3,1,3); errorbar(x_BCS,err); axis([1 d -max(abs(x))-0.2 max(abs(x))+0.2]); title(['(c) Reconstruction with BCS, n=' num2str(n)]); box on; + +disp(['BCS: ||I_hat-I||/||I|| = ' num2str(E_BCS) ', time = ' num2str(t_BCS) ' secs']); %% regression -n = 100; -beta = 1e-1; -X = rand(1,n); -w = randn; -b = randn; -t = w'*X+b+beta*randn(1,n); - -x = linspace(min(X)-1,max(X)+1,n); % test data +% d = 100; +% beta = 1e-1; +% X = rand(1,d); +% w = randn; +% b = randn; +% t = w'*X+b+beta*randn(1,d); + +% x = linspace(min(X)-1,max(X)+1,d); % test data %% % [model,llh] = rvmRegEbFp(X,t); % figure @@ -38,23 +83,23 @@ % plot(x,y,'r-'); % hold off %% -[model,llh] = rvmRegEbCd(X,t); -figure -plot(llh); -[y, sigma] = linPred(x,model,t); -figure; -hold on; -plotBand(x,y,2*sigma); -plot(X,t,'o'); -plot(x,y,'r-'); -hold off +% [model,llh] = rvmRegEbCd(X,t); +% figure +% plot(llh); +% [y, sigma] = linPred(x,model,t); +% figure; +% hold on; +% plotBand(x,y,2*sigma); +% plot(X,t,'o'); +% plot(x,y,'r-'); +% hold off %% classification -% k = 2; +% n = 2; % d = 2; -% n = 1000; -% [X,t] = rndKCluster(d,k,n); -% [x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); +% d = 1000; +% [X,t] = rndKCluster(d,n,d); +% [x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),d), linspace(min(X(2,:)),max(X(2,:)),d)); %% % [model, llh] = rvmEbFp(X,t-1); diff --git a/chapter07/rvmRegEbEm.m b/chapter07/rvmRegEbEm.m index 361e726..3204ac7 100644 --- a/chapter07/rvmRegEbEm.m +++ b/chapter07/rvmRegEbEm.m @@ -19,18 +19,14 @@ Xt = X*t'; tol = 1e-4; -maxiter = 100; +maxiter = 500; llh = -inf(1,maxiter+1); -idx = (1:d)'; -dg = sub2ind([d,d],idx,idx); - -infinity = 1e+10; +infinity = 1e+8; for iter = 2 : maxiter used = alpha < infinity; d = sum(used); alphaUsed = alpha(used); - S = beta*XX(used,used); - S(dg) = S(dg)+alphaUsed; + S = beta*XX(used,used)+diag(alphaUsed); U = chol(S); V = U\eye(d); w = beta*(V*(V'*Xt(used))); % 7.82 From b406341bef22bf28e21c48e5936090f7accd46d0 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 28 Dec 2015 10:18:33 +0800 Subject: [PATCH 065/149] Bayesian regression need rewrite include sparse and linear --- chapter07/rvmRegEbEm.asv | 63 ++++++++++++++++++++++++++++++++++++++++ chapter07/rvmRegEbEm.m | 61 +++++++++++++++++++------------------- 2 files changed, 93 insertions(+), 31 deletions(-) create mode 100644 chapter07/rvmRegEbEm.asv diff --git a/chapter07/rvmRegEbEm.asv b/chapter07/rvmRegEbEm.asv new file mode 100644 index 0000000..3ef7323 --- /dev/null +++ b/chapter07/rvmRegEbEm.asv @@ -0,0 +1,63 @@ +function [model, llh] = rvmRegEbEm(X, t, alpha, beta) +% Relevance Vector Machine (ARD sparse prior) for regression +% training by empirical bayesian (type II ML) using standard EM update +% Written by Mo Chen (sth4nth@gmail.com). +if nargin < 3 + alpha = 0.02; + beta = 0.5; +end +% xbar = mean(X,2); +% tbar = mean(t,2); +% X = bsxfun(@minus,X,xbar); +% t = bsxfun(@minus,t,tbar); + +n = size(X,2); +X = [X;ones(1,n)]; +d = size(X,1); + + + + +% XX = X*X'; +% Xt = X*t'; + +alpha = alpha*ones(d,1); +tol = 1e-4; +maxiter = 500; +llh = -inf(1,maxiter+1); +infinity = 1e+8; +global_index = 1:d; +for iter = 2 : maxiter + local_index = alpha < infinity; + + + + +% alphaUsed = alpha(used); +% S = beta*XX(used,used)+diag(alphaUsed); +% U = chol(S); +% V = U\eye(d); +% w = beta*(V*(V'*Xt(used))); % 7.82 + w = beta*(S\Xt); + w2 = w.^2; + err = sum((t-w'*X(used,:)).^2); + + logdetS = -2*sum(log(diag(V))); + llh(iter) = 0.5*(sum(log(alphaUsed))+n*log(beta)-beta*err-logdetS-dot(alphaUsed,w2)-n*log(2*pi)); + if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end + + dgS = dot(V,V,2); + alpha = 1./(w2+dgS); % 9.67 + + gamma = 1-alphaUsed.*dgS; % 7.89 + beta = n/(err+sum(gamma)/beta); % 9.68 +end +llh = llh(2:iter); + +w0 = tbar-dot(w,xbar(used)); + +model.used = used; +model.w0 = w0; +model.w = w; +model.alpha = alpha; +model.beta = beta; diff --git a/chapter07/rvmRegEbEm.m b/chapter07/rvmRegEbEm.m index 3204ac7..545ef5f 100644 --- a/chapter07/rvmRegEbEm.m +++ b/chapter07/rvmRegEbEm.m @@ -6,48 +6,47 @@ alpha = 0.02; beta = 0.5; end -[d,n] = size(X); -alpha = alpha*ones(d,1); - -xbar = mean(X,2); -tbar = mean(t,2); - -X = bsxfun(@minus,X,xbar); -t = bsxfun(@minus,t,tbar); +% xbar = mean(X,2); +% tbar = mean(t,2); +% X = bsxfun(@minus,X,xbar); +% t = bsxfun(@minus,t,tbar); +n = size(X,2); +X = [X;ones(1,n)]; +d = size(X,1); -XX = X*X'; -Xt = X*t'; +% XX = X*X'; +% Xt = X*t'; +alpha = alpha*ones(d,1); tol = 1e-4; maxiter = 500; llh = -inf(1,maxiter+1); -infinity = 1e+8; +infinity = 1e8; +index = 1:d; for iter = 2 : maxiter - used = alpha < infinity; - d = sum(used); - alphaUsed = alpha(used); - S = beta*XX(used,used)+diag(alphaUsed); - U = chol(S); - V = U\eye(d); - w = beta*(V*(V'*Xt(used))); % 7.82 - w2 = w.^2; - err = sum((t-w'*X(used,:)).^2); + nz = alpha < infinity; % nonzeros + index = index(nz); + alpha = alpha(nz); + X = X(nz,:); - logdetS = -2*sum(log(diag(V))); - llh(iter) = 0.5*(sum(log(alphaUsed))+n*log(beta)-beta*err-logdetS-dot(alphaUsed,w2)-n*log(2*pi)); - if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end - - dgS = dot(V,V,2); - alpha = 1./(w2+dgS); % 9.67 - - gamma = 1-alphaUsed.*dgS; % 7.89 - beta = n/(err+sum(gamma)/beta); % 9.68 + S = inv(beta*(X*X')+diag(alpha)); + % E-step + w = beta*S*X*t'; % E[w] % 7.82 + w2 = m.^2+diag(S); % E[w^2] + e = sum((t-m'*X).^2); + +% logdetS = -2*sum(log(diag(V))); +% llh(iter) = 0.5*(sum(log(alpha))+n*log(beta)-beta*e-logdetS-dot(alpha,w2)-n*log(2*pi)); +% if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end + + % M-step + alpha = 1./w2; % 9.67 + beta = n/(e+sum(w2)); % 9.68 is wrong end llh = llh(2:iter); -w0 = tbar-dot(w,xbar(used)); -model.used = used; +model.index = index; model.w0 = w0; model.w = w; model.alpha = alpha; From 5eb1c1033c4120f915f0cc96849c4610eeef26b7 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 29 Dec 2015 11:07:22 +0800 Subject: [PATCH 066/149] minor fix --- chapter03/linRegEbFp.m | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chapter03/linRegEbFp.m b/chapter03/linRegEbFp.m index 4caebc3..f2ece00 100644 --- a/chapter03/linRegEbFp.m +++ b/chapter03/linRegEbFp.m @@ -21,7 +21,7 @@ idx = (1:d)'; dg = sub2ind([d,d],idx,idx); I = eye(d); -tol = 1e-4; +tol = 1e-8; maxiter = 100; llh = -inf(1,maxiter+1); for iter = 2:maxiter @@ -39,7 +39,7 @@ if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end trS = dot(V(:),V(:)); % A=inv(S) - gamma = d-alpha*trS; % 3.91 + gamma = d-alpha*trS; % 3.91 9.64 alpha = gamma/w2; % 3.92 beta = (n-gamma)/err; % 3.95 end From 1b4ccd692c0a8c980ef9600305421f2ab3125514 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 29 Dec 2015 20:35:58 +0800 Subject: [PATCH 067/149] move regression functions using EM to chapter09 --- chapter03/demo.m | 30 ++++++------- chapter03/linRegEbFp.m | 4 +- chapter07/rvmRegEbEm.asv | 63 --------------------------- {chapter03 => chapter09}/linRegEbEm.m | 0 {chapter07 => chapter09}/rvmBinEbEm.m | 0 {chapter07 => chapter09}/rvmRegEbEm.m | 5 +-- 6 files changed, 19 insertions(+), 83 deletions(-) delete mode 100644 chapter07/rvmRegEbEm.asv rename {chapter03 => chapter09}/linRegEbEm.m (100%) rename {chapter07 => chapter09}/rvmBinEbEm.m (100%) rename {chapter07 => chapter09}/rvmRegEbEm.m (94%) diff --git a/chapter03/demo.m b/chapter03/demo.m index 70fdcb6..54ac3a5 100644 --- a/chapter03/demo.m +++ b/chapter03/demo.m @@ -1,23 +1,23 @@ % Done % demo for chapter 03 clear; close all; -d = 1; -n = 200; +d = 1000; +n = 2000; [x,t] = linRnd(d,n); %% -model = linReg(x,t); -linPlot(model,x,t); -fprintf('Press any key to continue. \n'); +% model = linReg(x,t); +% linPlot(model,x,t); +% fprintf('Press any key to continue. \n'); %% -[model,llh] = linRegEbEm(x,t); -linPlot(model,x,t); -figure; -plot(llh); -fprintf('Press any key to continue. \n'); +[model1,llh1] = linRegEbEm(x,t); +% linPlot(model,x,t); +% figure; +% plot(llh); +% fprintf('Press any key to continue. \n'); %% -[model,llh] = linRegEbFp(x,t); -[y, sigma] = linPred(model,x,t); -linPlot(model,x,t); -figure; -plot(llh); \ No newline at end of file +[model2,llh2] = linRegEbFp(x,t); +% [y, sigma] = linPred(model,x,t); +% % linPlot(model,x,t); +% figure; +% plot(llh); \ No newline at end of file diff --git a/chapter03/linRegEbFp.m b/chapter03/linRegEbFp.m index 4caebc3..f2ece00 100644 --- a/chapter03/linRegEbFp.m +++ b/chapter03/linRegEbFp.m @@ -21,7 +21,7 @@ idx = (1:d)'; dg = sub2ind([d,d],idx,idx); I = eye(d); -tol = 1e-4; +tol = 1e-8; maxiter = 100; llh = -inf(1,maxiter+1); for iter = 2:maxiter @@ -39,7 +39,7 @@ if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end trS = dot(V(:),V(:)); % A=inv(S) - gamma = d-alpha*trS; % 3.91 + gamma = d-alpha*trS; % 3.91 9.64 alpha = gamma/w2; % 3.92 beta = (n-gamma)/err; % 3.95 end diff --git a/chapter07/rvmRegEbEm.asv b/chapter07/rvmRegEbEm.asv deleted file mode 100644 index 3ef7323..0000000 --- a/chapter07/rvmRegEbEm.asv +++ /dev/null @@ -1,63 +0,0 @@ -function [model, llh] = rvmRegEbEm(X, t, alpha, beta) -% Relevance Vector Machine (ARD sparse prior) for regression -% training by empirical bayesian (type II ML) using standard EM update -% Written by Mo Chen (sth4nth@gmail.com). -if nargin < 3 - alpha = 0.02; - beta = 0.5; -end -% xbar = mean(X,2); -% tbar = mean(t,2); -% X = bsxfun(@minus,X,xbar); -% t = bsxfun(@minus,t,tbar); - -n = size(X,2); -X = [X;ones(1,n)]; -d = size(X,1); - - - - -% XX = X*X'; -% Xt = X*t'; - -alpha = alpha*ones(d,1); -tol = 1e-4; -maxiter = 500; -llh = -inf(1,maxiter+1); -infinity = 1e+8; -global_index = 1:d; -for iter = 2 : maxiter - local_index = alpha < infinity; - - - - -% alphaUsed = alpha(used); -% S = beta*XX(used,used)+diag(alphaUsed); -% U = chol(S); -% V = U\eye(d); -% w = beta*(V*(V'*Xt(used))); % 7.82 - w = beta*(S\Xt); - w2 = w.^2; - err = sum((t-w'*X(used,:)).^2); - - logdetS = -2*sum(log(diag(V))); - llh(iter) = 0.5*(sum(log(alphaUsed))+n*log(beta)-beta*err-logdetS-dot(alphaUsed,w2)-n*log(2*pi)); - if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end - - dgS = dot(V,V,2); - alpha = 1./(w2+dgS); % 9.67 - - gamma = 1-alphaUsed.*dgS; % 7.89 - beta = n/(err+sum(gamma)/beta); % 9.68 -end -llh = llh(2:iter); - -w0 = tbar-dot(w,xbar(used)); - -model.used = used; -model.w0 = w0; -model.w = w; -model.alpha = alpha; -model.beta = beta; diff --git a/chapter03/linRegEbEm.m b/chapter09/linRegEbEm.m similarity index 100% rename from chapter03/linRegEbEm.m rename to chapter09/linRegEbEm.m diff --git a/chapter07/rvmBinEbEm.m b/chapter09/rvmBinEbEm.m similarity index 100% rename from chapter07/rvmBinEbEm.m rename to chapter09/rvmBinEbEm.m diff --git a/chapter07/rvmRegEbEm.m b/chapter09/rvmRegEbEm.m similarity index 94% rename from chapter07/rvmRegEbEm.m rename to chapter09/rvmRegEbEm.m index 545ef5f..70bf7e3 100644 --- a/chapter07/rvmRegEbEm.m +++ b/chapter09/rvmRegEbEm.m @@ -18,13 +18,12 @@ % Xt = X*t'; alpha = alpha*ones(d,1); -tol = 1e-4; +tol = 1e-8; maxiter = 500; llh = -inf(1,maxiter+1); -infinity = 1e8; index = 1:d; for iter = 2 : maxiter - nz = alpha < infinity; % nonzeros + nz = 1./alpha > tol ; % nonzeros index = index(nz); alpha = alpha(nz); X = X(nz,:); From 7bf85c8b6d45271788ccb333337fe29974ef4c17 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 29 Dec 2015 20:37:24 +0800 Subject: [PATCH 068/149] fix Bayesian regressions using Mackay update --- chapter03/demo.m | 23 +++++++++----------- chapter03/linRegEbFp.m | 39 ++++++++++++++++----------------- chapter07/demo.m | 46 ++++++++++++++++++++++----------------- chapter07/rvmBinEbFp.m | 2 +- chapter07/rvmRegEbFp.m | 49 ++++++++++++++++++++++-------------------- 5 files changed, 81 insertions(+), 78 deletions(-) diff --git a/chapter03/demo.m b/chapter03/demo.m index 54ac3a5..a4192e5 100644 --- a/chapter03/demo.m +++ b/chapter03/demo.m @@ -1,23 +1,20 @@ % Done % demo for chapter 03 clear; close all; -d = 1000; -n = 2000; +d = 1; +n = 200; [x,t] = linRnd(d,n); %% % model = linReg(x,t); % linPlot(model,x,t); -% fprintf('Press any key to continue. \n'); %% -[model1,llh1] = linRegEbEm(x,t); -% linPlot(model,x,t); -% figure; +% [model1,llh1] = linRegEbEm(x,t); % plot(llh); -% fprintf('Press any key to continue. \n'); - -%% -[model2,llh2] = linRegEbFp(x,t); -% [y, sigma] = linPred(model,x,t); -% % linPlot(model,x,t); % figure; -% plot(llh); \ No newline at end of file +% linPlot(model,x,t); +%% +[model,llh] = linRegEbFp(x,t); +[y, sigma] = linPred(model,x,t); +plot(llh); +figure; +linPlot(model,x,t); diff --git a/chapter03/linRegEbFp.m b/chapter03/linRegEbFp.m index f2ece00..196ec2d 100644 --- a/chapter03/linRegEbFp.m +++ b/chapter03/linRegEbFp.m @@ -16,39 +16,36 @@ X = bsxfun(@minus,X,xbar); t = bsxfun(@minus,t,tbar); -C = X*X'; +XX = X*X'; Xt = X*t'; -idx = (1:d)'; -dg = sub2ind([d,d],idx,idx); -I = eye(d); -tol = 1e-8; -maxiter = 100; -llh = -inf(1,maxiter+1); + +tol = 1e-4; +maxiter = 200; +llh = -inf(1,maxiter); for iter = 2:maxiter - A = beta*C; - A(dg) = A(dg)+alpha; % 3.81 3.54 + A = beta*XX+diag(alpha); % 3.81 3.54 U = chol(A); - V = U\I; % A=inv(S) - w = beta*(V*(V'*Xt)); % 3.84 - w2 = dot(w,w); - err = sum((t-w'*X).^2); + m = beta*(U\(U'\Xt)); % 3.84 + m2 = dot(m,m); + e = sum((t-m'*X).^2); logdetA = 2*sum(log(diag(U))); - llh(iter) = 0.5*(d*log(alpha)+n*log(beta)-alpha*w2-beta*err-logdetA-n*log(2*pi)); % 3.86 + llh(iter) = 0.5*(d*log(alpha)+n*log(beta)-alpha*m2-beta*e-logdetA-n*log(2*pi)); % 3.86 if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end - - trS = dot(V(:),V(:)); % A=inv(S) + + V = inv(U); % A=inv(S) + trS = dot(V(:),V(:)); gamma = d-alpha*trS; % 3.91 9.64 - alpha = gamma/w2; % 3.92 - beta = (n-gamma)/err; % 3.95 + alpha = gamma/m2; % 3.92 + beta = (n-gamma)/e; % 3.95 end -w0 = tbar-dot(w,xbar); +w0 = tbar-dot(m,xbar); llh = llh(2:iter); model.w0 = w0; -model.w = w; -%% optional for bayesian probabilistic inference purpose +model.w = m; +%% optional for bayesian probabilistic prediction purpose model.alpha = alpha; model.beta = beta; model.xbar = xbar; diff --git a/chapter07/demo.m b/chapter07/demo.m index 6f1e7af..75f1c8d 100755 --- a/chapter07/demo.m +++ b/chapter07/demo.m @@ -1,6 +1,4 @@ % TODO: -% 1) sparse visualization -% 2) sparse data/demos % 3) fix coordinate descent % 4) sparse prediction for regression and classification @@ -32,25 +30,33 @@ y = x'*A + e; -[model,llh] = rvmRegEbEm(A,y); +[model,llh] = rvmRegEbFp(A,y); plot(llh); - -% solve by BCS -tic; -[weights,used,sigma2,errbars] = BCS_fast_rvm(A,y,initsigma2,1e-8); -t_BCS = toc; -fprintf(1,'BCS number of nonzero weights: %d\d',length(used)); -x_BCS = zeros(d,1); err = zeros(d,1); -x_BCS(used) = weights; err(used) = errbars; - - -E_BCS = norm(x-x_BCS)/norm(x); - -figure -subplot(3,1,1); plot(x); axis([1 d -max(abs(x))-0.2 max(abs(x))+0.2]); title(['(a) Original Signal']); -subplot(3,1,3); errorbar(x_BCS,err); axis([1 d -max(abs(x))-0.2 max(abs(x))+0.2]); title(['(c) Reconstruction with BCS, n=' num2str(n)]); box on; - -disp(['BCS: ||I_hat-I||/||I|| = ' num2str(E_BCS) ', time = ' num2str(t_BCS) ' secs']); +m = zeros(d,1); +m(model.index) = model.w; + +h = max(abs(x))+0.2; +x_range = [1,d]; +y_range = [-h,+h]; +figure; +subplot(2,1,1);plot(x); axis([x_range,y_range]); title('Original Signal'); +subplot(2,1,2);plot(m); axis([x_range,y_range]); title('Recovery Signal'); +% % solve by BCS +% tic; +% [weights,used,sigma2,errbars] = BCS_fast_rvm(A,y,initsigma2,1e-8); +% t_BCS = toc; +% fprintf(1,'BCS number of nonzero weights: %d\d',length(used)); +% x_BCS = zeros(d,1); err = zeros(d,1); +% x_BCS(used) = weights; err(used) = errbars; +% +% +% E_BCS = norm(x-x_BCS)/norm(x); +% +% figure +% subplot(3,1,1); plot(x); axis([1 d -max(abs(x))-0.2 max(abs(x))+0.2]); title(['(a) Original Signal']); +% subplot(3,1,3); errorbar(x_BCS,err); axis([1 d -max(abs(x))-0.2 max(abs(x))+0.2]); title(['(c) Reconstruction with BCS, n=' num2str(n)]); box on; +% +% disp(['BCS: ||I_hat-I||/||I|| = ' num2str(E_BCS) ', time = ' num2str(t_BCS) ' secs']); %% regression % d = 100; % beta = 1e-1; diff --git a/chapter07/rvmBinEbFp.m b/chapter07/rvmBinEbFp.m index 992d620..8130663 100644 --- a/chapter07/rvmBinEbFp.m +++ b/chapter07/rvmBinEbFp.m @@ -11,7 +11,7 @@ alpha = alpha*ones(d,1); weight = zeros(d,1); -tol = 1e-4; +tol = 1e-3; maxiter = 100; llh = -inf(1,maxiter); infinity = 1e+10; diff --git a/chapter07/rvmRegEbFp.m b/chapter07/rvmRegEbFp.m index 564a59e..1da8b77 100644 --- a/chapter07/rvmRegEbFp.m +++ b/chapter07/rvmRegEbFp.m @@ -18,40 +18,43 @@ XX = X*X'; Xt = X*t'; -tol = 1e-4; -maxiter = 100; +tol = 1e-3; +maxiter = 200; llh = -inf(1,maxiter); - -infinity = 1e+10; -for iter = 2 : maxiter - used = alpha < infinity; - d = sum(used); - alphaUsed = alpha(used); - S = beta*XX(used,used); - idx = (1:d)'; - dg = sub2ind([d,d],idx,idx); - S(dg) = S(dg)+alphaUsed; - U = chol(S); - V = U\eye(d); - w = beta*(V*(V'*Xt(used))); % 7.82 +index = 1:d; +for iter = 2:maxiter + % remove zeros + nz = 1./alpha > tol; % nonzeros + index = index(nz); + alpha = alpha(nz); + XX = XX(nz,nz); + Xt = Xt(nz); + X = X(nz,:); + + U = chol(beta*XX+diag(alpha)); % 7.83 + w = beta*(U\(U'\Xt)); % 7.82 w2 = w.^2; - err = sum((t-w'*X(used,:)).^2); + e = sum((t-w'*X).^2); - logdetS = -2*sum(log(diag(V))); - llh(iter) = 0.5*(sum(log(alphaUsed))+n*log(beta)-beta*err-logdetS-dot(alphaUsed,w2)-n*log(2*pi)); % 3.86 + logdetS = 2*sum(log(diag(U))); + llh(iter) = 0.5*(sum(log(alpha))+n*log(beta)-beta*e-logdetS-dot(alpha,w2)-n*log(2*pi)); % 3.86 if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end + V = inv(U); dgSigma = dot(V,V,2); - gamma = 1-alphaUsed.*dgSigma; % 7.89 - alpha(used) = gamma./w2; % 7.87 - beta = (n-sum(gamma))/err; % 7.88 + gamma = 1-alpha.*dgSigma; % 7.89 + alpha = gamma./w2; % 7.87 + beta = (n-sum(gamma))/e; % 7.88 end llh = llh(2:iter); -w0 = tbar-dot(w,xbar(used)); +w0 = tbar-dot(w,xbar(nz)); -model.used = used; +model.index = index; model.w0 = w0; model.w = w; model.alpha = alpha; model.beta = beta; +%% optional for bayesian probabilistic prediction purpose +model.xbar = xbar; +model.U = U; \ No newline at end of file From 3d1476ffe08cd592cf0fa50cb354083180552d0d Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 29 Dec 2015 20:38:16 +0800 Subject: [PATCH 069/149] minor fix --- chapter03/linReg.m | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chapter03/linReg.m b/chapter03/linReg.m index f177a42..40e9aae 100644 --- a/chapter03/linReg.m +++ b/chapter03/linReg.m @@ -15,10 +15,10 @@ X = bsxfun(@minus,X,xbar); t = bsxfun(@minus,t,tbar); -C = X*X'; -C(dg) = C(dg)+lambda; % 3.54 C=inv(S)/beta -% w = C\(X*t'); -U = chol(C); +XX = X*X'; +XX(dg) = XX(dg)+lambda; % 3.54 XX=inv(S)/beta +% w = XX\(X*t'); +U = chol(XX); w = U\(U'\(X*t')); % 3.15 & 3.28 w0 = tbar-dot(w,xbar); % 3.19 From 8c81658ee7c8313c201cf11f65d9036d692bea22 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 29 Dec 2015 20:47:35 +0800 Subject: [PATCH 070/149] remove SVD based algorithm for Mackay update --- chapter07/rvmRegEbFp.m | 16 ++++++-------- chapter07/rvmRegEbFpSvd.m | 46 --------------------------------------- 2 files changed, 7 insertions(+), 55 deletions(-) delete mode 100644 chapter07/rvmRegEbFpSvd.m diff --git a/chapter07/rvmRegEbFp.m b/chapter07/rvmRegEbFp.m index 1da8b77..06e4d55 100644 --- a/chapter07/rvmRegEbFp.m +++ b/chapter07/rvmRegEbFp.m @@ -32,27 +32,25 @@ X = X(nz,:); U = chol(beta*XX+diag(alpha)); % 7.83 - w = beta*(U\(U'\Xt)); % 7.82 - w2 = w.^2; - e = sum((t-w'*X).^2); + m = beta*(U\(U'\Xt)); % 7.82 + m2 = m.^2; + e = sum((t-m'*X).^2); logdetS = 2*sum(log(diag(U))); - llh(iter) = 0.5*(sum(log(alpha))+n*log(beta)-beta*e-logdetS-dot(alpha,w2)-n*log(2*pi)); % 3.86 + llh(iter) = 0.5*(sum(log(alpha))+n*log(beta)-beta*e-logdetS-dot(alpha,m2)-n*log(2*pi)); % 3.86 if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end V = inv(U); dgSigma = dot(V,V,2); gamma = 1-alpha.*dgSigma; % 7.89 - alpha = gamma./w2; % 7.87 + alpha = gamma./m2; % 7.87 beta = (n-sum(gamma))/e; % 7.88 end llh = llh(2:iter); -w0 = tbar-dot(w,xbar(nz)); - model.index = index; -model.w0 = w0; -model.w = w; +model.w0 = tbar-dot(m,xbar(nz)); +model.w = m; model.alpha = alpha; model.beta = beta; %% optional for bayesian probabilistic prediction purpose diff --git a/chapter07/rvmRegEbFpSvd.m b/chapter07/rvmRegEbFpSvd.m deleted file mode 100644 index dda1a35..0000000 --- a/chapter07/rvmRegEbFpSvd.m +++ /dev/null @@ -1,46 +0,0 @@ -function [model, llh] = rvmRegEbFpSvd(X, t, alpha, beta) -% Relevance Vector Machine (ARD sparse prior) for regression -% training by empirical bayesian (type II ML) using fix point update (Mackay update) with SVD -% Written by Mo Chen (sth4nth@gmail.com). -if nargin < 3 - alpha = 0.02; - beta = 0.5; -end -[d,n] = size(X); -alpha = alpha*ones(d,1); - -xbar = mean(X,2); -tbar = mean(t,2); - -X = bsxfun(@minus,X,xbar); -t = bsxfun(@minus,t,tbar); - -[U,S] = svd(X,'econ'); % X=U*S*V' -s = diag(S).^2; -UXt = U'*(X*t'); - -maxiter = 100; -llh = -inf(1,maxiter+1); -tol = 1e-2; -for iter = 2 : maxiter - h = s+alpha/beta; - m = U*(UXt./h); - m2 = m.^2; - err = sum((t-m'*X).^2); - - logdetS = sum(log(beta*h)); - llh(iter) = 0.5*(sum(log(alpha))+n*log(beta)-beta*err-logdetS-dot(alpha,m2)-n*log(2*pi)); - if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end - - V = bsxfun(@times,U,1./sqrt(h)); - dgS = dot(V,V,2); - gamma = 1-alpha.*dgS; % 7.89 - alpha = gamma./m2; % 7.87 - beta = (n-sum(gamma))/err; % 7.88 -end -llh = llh(2:iter); -m0 = tbar-dot(m,xbar); -model.w0 = m0; -model.w = m; -model.alpha = alpha; -model.beta = beta; From 6021e44d7819c5dcc3b01d8d94f81827f1aedc1c Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 29 Dec 2015 20:58:01 +0800 Subject: [PATCH 071/149] add rvmRegPred.m --- chapter07/demo.m | 3 +++ chapter07/rvmRegEbFp.m | 2 +- chapter07/rvmRegPred.m | 26 ++++++++++++++++++++++---- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/chapter07/demo.m b/chapter07/demo.m index 75f1c8d..b96c5cf 100755 --- a/chapter07/demo.m +++ b/chapter07/demo.m @@ -41,6 +41,9 @@ figure; subplot(2,1,1);plot(x); axis([x_range,y_range]); title('Original Signal'); subplot(2,1,2);plot(m); axis([x_range,y_range]); title('Recovery Signal'); + + +[y, sigma] = rvmRegPred(model,A); % % solve by BCS % tic; % [weights,used,sigma2,errbars] = BCS_fast_rvm(A,y,initsigma2,1e-8); diff --git a/chapter07/rvmRegEbFp.m b/chapter07/rvmRegEbFp.m index 06e4d55..4020777 100644 --- a/chapter07/rvmRegEbFp.m +++ b/chapter07/rvmRegEbFp.m @@ -54,5 +54,5 @@ model.alpha = alpha; model.beta = beta; %% optional for bayesian probabilistic prediction purpose -model.xbar = xbar; +model.xbar = xbar(index); model.U = U; \ No newline at end of file diff --git a/chapter07/rvmRegPred.m b/chapter07/rvmRegPred.m index 1dfd6dc..f10e869 100644 --- a/chapter07/rvmRegPred.m +++ b/chapter07/rvmRegPred.m @@ -1,7 +1,25 @@ -function [ output_args ] = rvmRegPred( input_args ) -%RVMREGPRED Summary of this function goes here -% Detailed explanation goes here - +function [y, sigma, p] = rvmRegPred(model, X, t) +% Compute linear model reponse y = w'*X+w0 and likelihood for sparse model +% model: trained model structure +% X: d x n testing data +% t (optional): 1 x n testing response +% Written by Mo Chen (sth4nth@gmail.com). +index = model.index; +w = model.w; +w0 = model.w0; +X = X(index,:); +y = w'*X+w0; +%% probability prediction +if nargout > 1 + beta = model.beta; + U = model.U; % 3.54 + Xo = bsxfun(@minus,X,model.xbar); + XU = U'\Xo; + sigma = sqrt((1+dot(XU,XU,1))/beta); %3.59 end +if nargin == 3 && nargout == 3 + p = exp(logGauss(t,y,sigma)); +% p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); +end From 695138bbf50e4da4140af0524c829f03b5d08cc9 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 29 Dec 2015 21:17:34 +0800 Subject: [PATCH 072/149] move todo to demo --- chapter04/demo.m | 1 + 1 file changed, 1 insertion(+) diff --git a/chapter04/demo.m b/chapter04/demo.m index 3bbe477..04406b6 100644 --- a/chapter04/demo.m +++ b/chapter04/demo.m @@ -1,3 +1,4 @@ +% TODO: multiPlot: plot multiclass decison boundary % clear; close all; k = 2; From eb1f617c386dd4e1e9aba278b21fcb5a957bafea Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 29 Dec 2015 21:18:03 +0800 Subject: [PATCH 073/149] rvmBinEbFp.m need test --- chapter04/TODO.txt | 2 -- chapter07/demo.m | 7 +---- chapter07/rvmBinEbFp.m | 70 +++++++++++++++++++++++++++++++++--------- 3 files changed, 57 insertions(+), 22 deletions(-) delete mode 100644 chapter04/TODO.txt diff --git a/chapter04/TODO.txt b/chapter04/TODO.txt deleted file mode 100644 index f826511..0000000 --- a/chapter04/TODO.txt +++ /dev/null @@ -1,2 +0,0 @@ -multiPlot: plot multiclass decison boundary - diff --git a/chapter07/demo.m b/chapter07/demo.m index b96c5cf..7f3e117 100755 --- a/chapter07/demo.m +++ b/chapter07/demo.m @@ -1,11 +1,6 @@ % TODO: % 3) fix coordinate descent -% 4) sparse prediction for regression and classification - -% 5) need to be extensively tested on high dimensional data (inlucding -% chapter03 Bayesian linear regression) - - +% 4) sparse predictionand classification % clear; close all; % diff --git a/chapter07/rvmBinEbFp.m b/chapter07/rvmBinEbFp.m index 8130663..cd36cb1 100644 --- a/chapter07/rvmBinEbFp.m +++ b/chapter07/rvmBinEbFp.m @@ -9,29 +9,71 @@ X = [X;ones(1,n)]; d = size(X,1); alpha = alpha*ones(d,1); -weight = zeros(d,1); +m = zeros(d,1); tol = 1e-3; maxiter = 100; llh = -inf(1,maxiter); -infinity = 1e+10; +index = 1:d; for iter = 2:maxiter - used = alpha < infinity; - a = alpha(used); - w = weight(used); - [w,energy,U] = optLogitNewton(X(used,:),t,a,w); - w2 = w.^2; - llh(iter) = energy(end)+0.5*(sum(log(a))-2*sum(log(diag(U)))-dot(a,w2)-n*log(2*pi)); % 7.114 + % remove zeros + nz = 1./alpha > tol; % nonzeros + index = index(nz); + alpha = alpha(nz); + X = X(nz,:); + m = m(nz); + + [m,e,U] = logitNewton(X,t,alpha,m); + + m2 = m.^2; + llh(iter) = e(end)+0.5*(sum(log(alpha))-2*sum(log(diag(U)))-dot(alpha,m2)-n*log(2*pi)); % 7.114 if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end V = inv(U); dgS = dot(V,V,2); - alpha(used) = (1-a.*dgS)./w2; % 7.89 & 7.87 - weight(used) = w; + alpha = (1-alpha.*dgS)./m2; % 7.89 & 7.87 end llh = llh(2:iter); -model.used = used; -model.w = w; % nonzero elements of weight -model.a = a; % nonzero elements of alpha -model.weght = weight; +model.index = index; +model.w = m; model.alpha = alpha; + + +function [w, llh, U] = logitNewton(X, t, lambda, w) +% logistic regression for binary classification (Bernoulli likelihood) +% Written by Mo Chen (sth4nth@gmail.com). +[d,n] = size(X); +tol = 1e-4; +maxiter = 100; +llh = -inf(1,maxiter); + +idx = (1:d)'; +dg = sub2ind([d,d],idx,idx); +h = ones(1,n); +h(t==0) = -1; +z = w'*X; +for iter = 2:maxiter + y = sigmoid(z); + Xw = bsxfun(@times, X, sqrt(y.*(1-y))); + H = Xw*Xw'; + H(dg) = H(dg)+lambda; + U = chol(H); + g = X*(y-t)'+lambda.*w; + p = -U\(U'\g); + wo = w; + while true % line search + w = wo+p; + z = w'*X; + llh(iter) = -sum(log1pexp(-h.*z))-0.5*sum(lambda.*w.^2); + progress = llh(iter)-llh(iter-1); + if progress < 0 + p = p/2; + else + break; + end + end + if progress < tol + break + end +end +llh = llh(2:iter); From c37e77e8493dfd65060c48838452ff49cbe41248 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 29 Dec 2015 22:17:17 +0800 Subject: [PATCH 074/149] change API of logitBin related functions to support rvm functions in chapter07 --- chapter04/binPlot.m | 3 +-- chapter04/demo.m | 17 +++++++++-------- chapter04/logitBin.m | 5 ++--- chapter04/logitBinPred.m | 4 ++-- 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/chapter04/binPlot.m b/chapter04/binPlot.m index 96bfd17..ff147d8 100644 --- a/chapter04/binPlot.m +++ b/chapter04/binPlot.m @@ -4,7 +4,6 @@ function binPlot(model, X, t) % t: 1xn label assert(size(X,1) == 2); w = model.w; -w0 = model.w0; xi = min(X,[],2); xa = max(X,[],2); [x1,x2] = meshgrid(linspace(xi(1),xa(1)), linspace(xi(2),xa(2))); @@ -20,6 +19,6 @@ function binPlot(model, X, t) idc = t==i; scatter(X(1,idc),X(2,idc),36,color(mod(i-1,m)+1)); end -y = w0+w(1)*x1+w(2)*x2; +y = w(1)*x1+w(2)*x2+w(3); contour(x1,x2,y,[-0 0]); hold off; diff --git a/chapter04/demo.m b/chapter04/demo.m index 04406b6..2056cd9 100644 --- a/chapter04/demo.m +++ b/chapter04/demo.m @@ -6,13 +6,14 @@ [X,t] = kmeansRnd(2,k,n); [model, llh] = logitBin(X,t-1,0); plot(llh); -binPlot(model,X,t) +y = logitBinPred(model,X)+1; +binPlot(model,X,y) pause %% -clear -k = 3; -n = 1000; -[X,t] = kmeansRnd(2,k,n); -[model, llh] = logitMn(X,t); -y = logitMnPred(model,X); -spread(X,y) +% clear +% k = 3; +% n = 1000; +% [X,t] = kmeansRnd(2,k,n); +% [model, llh] = logitMn(X,t); +% y = logitMnPred(model,X); +% plotClass(X,y) diff --git a/chapter04/logitBin.m b/chapter04/logitBin.m index 620df7c..32299ab 100644 --- a/chapter04/logitBin.m +++ b/chapter04/logitBin.m @@ -1,4 +1,4 @@ -function [model, llh] = logitBin(X, t, lambda) +function [model, llh] = logitBin(X, t, lambda, w) % Logistic regression for binary classification optimized by Newton-Raphson % method. % X: dxn data matrix @@ -43,5 +43,4 @@ if incr < tol; break; end end llh = llh(2:iter); -model.w = w(1:(end-1)); -model.w0 = w(end); \ No newline at end of file +model.w = w; diff --git a/chapter04/logitBinPred.m b/chapter04/logitBinPred.m index 59ca680..7ac8c2e 100644 --- a/chapter04/logitBinPred.m +++ b/chapter04/logitBinPred.m @@ -3,8 +3,8 @@ % model: trained model structure % X: d x n testing data % Written by Mo Chen (sth4nth@gmail.com). +X = [X;ones(1,size(X,2))]; w = model.w; -w0 = model.w0; -p = exp(-log1pexp(w'*X+w0)); +p = exp(-log1pexp(w'*X)); y = (p>0.5)+0; From b08a372069f98504a9c06a34c959873f460465c4 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 29 Dec 2015 22:19:49 +0800 Subject: [PATCH 075/149] fix rvmBin and add rvmBinPred --- chapter07/demo.m | 119 +++++++++++++++-------------------------- chapter07/rvmBinEbFp.m | 51 +++++++++--------- chapter07/rvmBinPred.m | 18 ++++--- 3 files changed, 79 insertions(+), 109 deletions(-) diff --git a/chapter07/demo.m b/chapter07/demo.m index 7f3e117..8107daf 100755 --- a/chapter07/demo.m +++ b/chapter07/demo.m @@ -1,60 +1,43 @@ % TODO: -% 3) fix coordinate descent -% 4) sparse predictionand classification +% 3) fix coordinate descent rvm (llh not increase) +% 4) need test rvm classification for high dim data % clear; close all; % %% sparse signal recovery demo -clear; close all; - -d = 512; % signal length -k = 20; % number of spikes -n = 100; % number of measurements -% -% random +/- 1 signal -x = zeros(d,1); -q = randperm(d); -x(q(1:k)) = sign(randn(k,1)); - -% projection matrix -A = unitize(randn(d,n),1); -% noisy observations -sigma = 0.005; -e = sigma*randn(1,n); -y = x'*A + e; - - -[model,llh] = rvmRegEbFp(A,y); -plot(llh); -m = zeros(d,1); -m(model.index) = model.w; - -h = max(abs(x))+0.2; -x_range = [1,d]; -y_range = [-h,+h]; -figure; -subplot(2,1,1);plot(x); axis([x_range,y_range]); title('Original Signal'); -subplot(2,1,2);plot(m); axis([x_range,y_range]); title('Recovery Signal'); - - -[y, sigma] = rvmRegPred(model,A); -% % solve by BCS -% tic; -% [weights,used,sigma2,errbars] = BCS_fast_rvm(A,y,initsigma2,1e-8); -% t_BCS = toc; -% fprintf(1,'BCS number of nonzero weights: %d\d',length(used)); -% x_BCS = zeros(d,1); err = zeros(d,1); -% x_BCS(used) = weights; err(used) = errbars; +% clear; close all; % +% d = 512; % signal length +% k = 20; % number of spikes +% n = 100; % number of measurements +% % +% % random +/- 1 signal +% x = zeros(d,1); +% q = randperm(d); +% x(q(1:k)) = sign(randn(k,1)); % -% E_BCS = norm(x-x_BCS)/norm(x); +% % projection matrix +% A = unitize(randn(d,n),1); +% % noisy observations +% sigma = 0.005; +% e = sigma*randn(1,n); +% y = x'*A + e; % -% figure -% subplot(3,1,1); plot(x); axis([1 d -max(abs(x))-0.2 max(abs(x))+0.2]); title(['(a) Original Signal']); -% subplot(3,1,3); errorbar(x_BCS,err); axis([1 d -max(abs(x))-0.2 max(abs(x))+0.2]); title(['(c) Reconstruction with BCS, n=' num2str(n)]); box on; % -% disp(['BCS: ||I_hat-I||/||I|| = ' num2str(E_BCS) ', time = ' num2str(t_BCS) ' secs']); +% [model,llh] = rvmRegEbFp(A,y); +% plot(llh); +% m = zeros(d,1); +% m(model.index) = model.w; +% +% h = max(abs(x))+0.2; +% x_range = [1,d]; +% y_range = [-h,+h]; +% figure; +% subplot(2,1,1);plot(x); axis([x_range,y_range]); title('Original Signal'); +% subplot(2,1,2);plot(m); axis([x_range,y_range]); title('Recovery Signal'); +% +% [y, sigma] = rvmRegPred(model,A); %% regression % d = 100; % beta = 1e-1; @@ -99,34 +82,16 @@ % hold off %% classification -% n = 2; -% d = 2; -% d = 1000; -% [X,t] = rndKCluster(d,n,d); -% [x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),d), linspace(min(X(2,:)),max(X(2,:)),d)); +clear; close all +k = 2; +d = 2; +n = 1000; +[X,t] = kmeansRnd(d,k,n); +[x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); + +[model, llh] = rvmBinEbFp(X,t-1); +plot(llh); +y = rvmBinPred(model,X)+1; +figure; +binPlot(model,X,y); -%% -% [model, llh] = rvmEbFp(X,t-1); -% figure -% plot(llh); -% figure; -% spread(X,t); -% -% w = zeros(3,1); -% w(model.used) = model.w; -% y = w(1)*x1+w(2)*x2+w(3); -% hold on; -% contour(x1,x2,y,1); -% hold off; -%% -% [model, llh] = rvmEbEm(X,t-1); -% figure -% plot(llh); -% figure; -% spread(X,t); -% -% w = model.w; -% y = w(1)*x1+w(2)*x2+w(3); -% hold on; -% contour(x1,x2,y,1); -% hold off; \ No newline at end of file diff --git a/chapter07/rvmBinEbFp.m b/chapter07/rvmBinEbFp.m index cd36cb1..48b5753 100644 --- a/chapter07/rvmBinEbFp.m +++ b/chapter07/rvmBinEbFp.m @@ -11,7 +11,7 @@ alpha = alpha*ones(d,1); m = zeros(d,1); -tol = 1e-3; +tol = 1e-4; maxiter = 100; llh = -inf(1,maxiter); index = 1:d; @@ -23,11 +23,12 @@ X = X(nz,:); m = m(nz); - [m,e,U] = logitNewton(X,t,alpha,m); + [m,e,U] = logitBin(X,t,alpha,m); m2 = m.^2; llh(iter) = e(end)+0.5*(sum(log(alpha))-2*sum(log(diag(U)))-dot(alpha,m2)-n*log(2*pi)); % 7.114 - if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end + if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end + V = inv(U); dgS = dot(V,V,2); alpha = (1-alpha.*dgS)./m2; % 7.89 & 7.87 @@ -39,41 +40,41 @@ model.alpha = alpha; -function [w, llh, U] = logitNewton(X, t, lambda, w) -% logistic regression for binary classification (Bernoulli likelihood) -% Written by Mo Chen (sth4nth@gmail.com). +function [w, llh, U] = logitBin(X, t, lambda, w) +% Logistic regression [d,n] = size(X); tol = 1e-4; maxiter = 100; llh = -inf(1,maxiter); - idx = (1:d)'; dg = sub2ind([d,d],idx,idx); h = ones(1,n); h(t==0) = -1; -z = w'*X; +a = w'*X; for iter = 2:maxiter - y = sigmoid(z); - Xw = bsxfun(@times, X, sqrt(y.*(1-y))); - H = Xw*Xw'; + y = sigmoid(a); % 4.87 + r = y.*(1-y); % 4.98 + Xw = bsxfun(@times, X, sqrt(r)); + H = Xw*Xw'; % 4.97 H(dg) = H(dg)+lambda; U = chol(H); - g = X*(y-t)'+lambda.*w; + g = X*(y-t)'+lambda.*w; % 4.96 p = -U\(U'\g); - wo = w; - while true % line search + wo = w; % 4.92 + w = wo+p; + a = w'*X; + llh(iter) = -sum(log1pexp(-h.*a))-0.5*sum(lambda.*w.^2); % 4.89 + incr = llh(iter)-llh(iter-1); + while incr < 0 % line search + p = p/2; w = wo+p; - z = w'*X; - llh(iter) = -sum(log1pexp(-h.*z))-0.5*sum(lambda.*w.^2); - progress = llh(iter)-llh(iter-1); - if progress < 0 - p = p/2; - else - break; - end - end - if progress < tol - break + a = w'*X; + llh(iter) = -sum(log1pexp(-h.*a))-0.5*sum(lambda.*w.^2); + incr = llh(iter)-llh(iter-1); end + if incr < tol; break; end end llh = llh(2:iter); + + + diff --git a/chapter07/rvmBinPred.m b/chapter07/rvmBinPred.m index 7fcd026..e2781e2 100644 --- a/chapter07/rvmBinPred.m +++ b/chapter07/rvmBinPred.m @@ -1,7 +1,11 @@ -function [ output_args ] = rvmBinPred( input_args ) -%RVMBINPRED Summary of this function goes here -% Detailed explanation goes here - - -end - +function [y, p] = rvmBinPred(model, X) +% Prodict the label for binary logistic regression model +% model: trained model structure +% X: d x n testing data +% Written by Mo Chen (sth4nth@gmail.com). +index = model.index; +X = [X;ones(1,size(X,2))]; +X = X(index,:); +w = model.w; +p = exp(-log1pexp(w'*X)); +y = (p>0.5)+0; From 1c61e119bd9b3de7e4b017b1bc3edb3d3b773dee Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 29 Dec 2015 22:23:57 +0800 Subject: [PATCH 076/149] add comment --- chapter07/rvmBinEbFp.m | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chapter07/rvmBinEbFp.m b/chapter07/rvmBinEbFp.m index 48b5753..921c696 100644 --- a/chapter07/rvmBinEbFp.m +++ b/chapter07/rvmBinEbFp.m @@ -23,15 +23,15 @@ X = X(nz,:); m = m(nz); - [m,e,U] = logitBin(X,t,alpha,m); + [m,e,U] = logitBin(X,t,alpha,m); % 7.110 ~ 7.113 m2 = m.^2; - llh(iter) = e(end)+0.5*(sum(log(alpha))-2*sum(log(diag(U)))-dot(alpha,m2)-n*log(2*pi)); % 7.114 + llh(iter) = e(end)+0.5*(sum(log(alpha))-2*sum(log(diag(U)))-dot(alpha,m2)-n*log(2*pi)); % 7.114 & 7.118 if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end V = inv(U); dgS = dot(V,V,2); - alpha = (1-alpha.*dgS)./m2; % 7.89 & 7.87 + alpha = (1-alpha.*dgS)./m2; % 7.89 & 7.87 & 7.116 end llh = llh(2:iter); From ef88722484c1e1ab35d7f435afc91a802fef1674 Mon Sep 17 00:00:00 2001 From: chenmo Date: Thu, 31 Dec 2015 13:38:35 +0800 Subject: [PATCH 077/149] minor modify logsumexp --- common/logsumexp.m | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/common/logsumexp.m b/common/logsumexp.m index ea231fa..c7b3aa2 100644 --- a/common/logsumexp.m +++ b/common/logsumexp.m @@ -8,11 +8,10 @@ if isempty(dim), dim = 1; end end -% subtract the largest in each column +% subtract the largest in each dim y = max(x,[],dim); -x = bsxfun(@minus,x,y); -s = y + log(sum(exp(x),dim)); -i = find(~isfinite(y)); -if ~isempty(i) +s = y+log(sum(exp(bsxfun(@minus,x,y)),dim)); % TODO: use log1p +i = isinf(y); +if any(i(:)) s(i) = y(i); -end +end \ No newline at end of file From 5be1f4ec3bd243a7ea65e55427406af4bcfb53a3 Mon Sep 17 00:00:00 2001 From: chenmo Date: Thu, 31 Dec 2015 13:44:15 +0800 Subject: [PATCH 078/149] delete never used log1pexp function --- common/log1pexp.m | 7 ------- 1 file changed, 7 deletions(-) delete mode 100644 common/log1pexp.m diff --git a/common/log1pexp.m b/common/log1pexp.m deleted file mode 100644 index 8a6f018..0000000 --- a/common/log1pexp.m +++ /dev/null @@ -1,7 +0,0 @@ -function y = log1pexp(x) -% accurately compute y = log(1+exp(x)) -% reference: Accurately Computing log(1-exp(|a|)) Martin Machler -seed = 33.3; -y = x; -idx = x Date: Thu, 31 Dec 2015 13:59:05 +0800 Subject: [PATCH 079/149] readme.md --- chapter07/demo.m | 62 +++++++++--------- chapter07/rvmRegEbCd.m | 37 +++++++---- chapter07/rvmRegEbCd.m~ | 138 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 195 insertions(+), 42 deletions(-) create mode 100644 chapter07/rvmRegEbCd.m~ diff --git a/chapter07/demo.m b/chapter07/demo.m index 8107daf..5a8afeb 100755 --- a/chapter07/demo.m +++ b/chapter07/demo.m @@ -6,23 +6,25 @@ % %% sparse signal recovery demo -% clear; close all; -% -% d = 512; % signal length -% k = 20; % number of spikes -% n = 100; % number of measurements -% % -% % random +/- 1 signal -% x = zeros(d,1); -% q = randperm(d); -% x(q(1:k)) = sign(randn(k,1)); -% -% % projection matrix -% A = unitize(randn(d,n),1); -% % noisy observations -% sigma = 0.005; -% e = sigma*randn(1,n); -% y = x'*A + e; +clear; close all; + +d = 512; % signal length +k = 20; % number of spikes +n = 100; % number of measurements +% +% random +/- 1 signal +x = zeros(d,1); +q = randperm(d); +x(q(1:k)) = sign(randn(k,1)); + +% projection matrix +A = unitize(randn(d,n),1); +% noisy observations +sigma = 0.005; +e = sigma*randn(1,n); +y = x'*A + e; +[model,llh] = rvmRegEbCd(A,y); +plot(llh); % % % [model,llh] = rvmRegEbFp(A,y); @@ -45,7 +47,7 @@ % w = randn; % b = randn; % t = w'*X+b+beta*randn(1,d); - +% % x = linspace(min(X)-1,max(X)+1,d); % test data %% % [model,llh] = rvmRegEbFp(X,t); @@ -82,16 +84,16 @@ % hold off %% classification -clear; close all -k = 2; -d = 2; -n = 1000; -[X,t] = kmeansRnd(d,k,n); -[x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); - -[model, llh] = rvmBinEbFp(X,t-1); -plot(llh); -y = rvmBinPred(model,X)+1; -figure; -binPlot(model,X,y); +% clear; close all +% k = 2; +% d = 2; +% n = 1000; +% [X,t] = kmeansRnd(d,k,n); +% [x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); +% +% [model, llh] = rvmBinEbFp(X,t-1); +% plot(llh); +% y = rvmBinPred(model,X)+1; +% figure; +% binPlot(model,X,y); diff --git a/chapter07/rvmRegEbCd.m b/chapter07/rvmRegEbCd.m index c1b493c..2c74783 100644 --- a/chapter07/rvmRegEbCd.m +++ b/chapter07/rvmRegEbCd.m @@ -11,7 +11,7 @@ X = bsxfun(@minus,X,xbar); t = bsxfun(@minus,t,tbar); -beta = 1/(0.1*var(t))^2; % beta = 1/sigma^2 +beta = 1/mean(t.^2); % beta = 1/sigma^2 alpha = inf(d,1); S = beta*dot(X,X,2); Q = beta*(X*t'); @@ -20,8 +20,8 @@ Phi = zeros(0,n); dim = zeros(0,1); -maxiter = 100; -tol = 1e-2; +maxiter = 100*d; +tol = 1e-4; llh = -inf(1,maxiter); iAct = zeros(d,3); iUse = false(d,1); @@ -30,30 +30,42 @@ theta = q.^2-s; iNew = theta>0; + assert(any(iNew)); % debug + iUpd = (iNew & iUse); % update iAdd = (iNew ~= iUpd); % add iDel = (iUse ~= iUpd); % del + + iAct(:,1) = iAdd; + iAct(:,2) = iDel; + iAct(:,3) = iUpd; + + assert(isempty(setdiff(find(any(iAct,2)),union(find(iNew),find(iUse))))); % debug - % find the next alpha that maximizes the marginal likilihood - tllh = -inf(d,1); % trial (temptoray) likelihood + % find the next dimension j that maximizes the marginal likilihood + tllh = -inf(d,1); % trial likelihood if any(iAdd) tllh(iAdd) = (Q(iAdd).^2-S(iAdd))./S(iAdd)+log(S(iAdd)./(Q(iAdd).^2)); end if any(iDel) tllh(iDel) = Q(iDel).^2./(S(iDel)-alpha(iDel))-log1p1(-S(iDel)./alpha(iDel)); end - if any(iUpd) + if any(iUpd) % bug newAlpha = s(iUpd).^2./theta(iUpd); oldAlpha = alpha(iUpd); - delta = 1./newAlpha-1./oldAlpha; + delta = 1./oldAlpha-1./newAlpha; tllh(iUpd) = Q(iUpd).^2.*delta./(S(iUpd).*delta+1)-log1p(S(iUpd).*delta); end + if ~isreal(tllh) + dd = []; + for i=1:d + if ~isreal(tllh(i)) + dd = [dd,i]; + end + end + end [llh(iter),j] = max(tllh); - if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end - - iAct(:,1) = iAdd; - iAct(:,2) = iDel; - iAct(:,3) = iUpd; +% if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end % update parameters switch find(iAct(j,:)) @@ -112,6 +124,7 @@ S = S+kappa*v.^2; % eq (35) Q = Q+kappa*mu_j*v; end + iUse = accumarray(dim,true,[d,1],@(x) x); % from Wei Li (pretty cool!) s = S; q = Q; % p.353 Execrcies 7.17 alphaS = alpha(iUse)-S(iUse); diff --git a/chapter07/rvmRegEbCd.m~ b/chapter07/rvmRegEbCd.m~ new file mode 100644 index 0000000..57dfa21 --- /dev/null +++ b/chapter07/rvmRegEbCd.m~ @@ -0,0 +1,138 @@ +function [model, llh] = rvmRegEbCd(X, t) +% TODO: llh not increasing. verify with sparse high dimensional data +% Relevance Vector Machine (ARD sparse prior) for regression +% training by empirical bayesian (type II ML) using Coordinate Descent +% reference: (Fast RVM) +% Tipping and Faul. Fast marginal likelihood maximisation for sparse Bayesian models. AISTATS 2003. +% Written by Mo Chen (sth4nth@gmail.com). +[d,n] = size(X); +xbar = mean(X,2); +tbar = mean(t,2); +X = bsxfun(@minus,X,xbar); +t = bsxfun(@minus,t,tbar); + +beta = 1/mean(t.^2); % beta = 1/sigma^2 +alpha = inf(d,1); +S = beta*dot(X,X,2); +Q = beta*(X*t'); +Sigma = zeros(0,0); +mu = zeros(0,1); +Phi = zeros(0,n); +dim = zeros(0,1); + +maxiter = 100*d; +tol = 1e-4; +llh = -inf(1,maxiter); +iAct = zeros(d,3); +iUse = false(d,1); +s = S; q = Q; +for iter = 2:maxiter + theta = q.^2-s; + iNew = theta>0; + + assert(any(iNew)); % debug + + iUpd = (iNew & iUse); % update + iAdd = (iNew ~= iUpd); % add + iDel = (iUse ~= iUpd); % del + + iAct(:,1) = iAdd; + iAct(:,2) = iDel; + iAct(:,3) = iUpd; + + assert(isempty(setdiff(find(any(iAct,2)),union(find(iNew),iUse)))); % debug + + % find the next dimension j that maximizes the marginal likilihood + tllh = -inf(d,1); % trial likelihood + if any(iAdd) + tllh(iAdd) = (Q(iAdd).^2-S(iAdd))./S(iAdd)+log(S(iAdd)./(Q(iAdd).^2)); + end + if any(iDel) + tllh(iDel) = Q(iDel).^2./(S(iDel)-alpha(iDel))-log1p1(-S(iDel)./alpha(iDel)); + end + if any(iUpd) + newAlpha = s(iUpd).^2./theta(iUpd); + oldAlpha = alpha(iUpd); + delta = 1./newAlpha-1./oldAlpha; + tllh(iUpd) = Q(iUpd).^2.*delta./(S(iUpd).*delta+1)-log1p(S(iUpd).*delta); + end + [llh(iter),j] = max(tllh); +% if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end + + % update parameters + try + switch find(iAct(j,:)) + case 1 % Add + alpha(j) = s(j)^2/theta(j); + Sigma_jj = 1/(alpha(j)+S(j)); + mu_j = Sigma_jj*Q(j); + phi_j = X(j,:); + + v = beta*Sigma*(Phi*phi_j'); % temporary vector for common part + off = -beta*Sigma_jj*v; + Sigma = [Sigma+Sigma_jj*(v*v'), off; off', Sigma_jj]; + mu = [mu-mu_j*v; mu_j]; + + e_j = phi_j-v'*Phi; + v = beta*X*e_j'; + S = S-Sigma_jj*v.^2; + Q = Q-mu_j*v; + + dim = [dim;j]; %#ok + case 2 % del + idx = (dim==j); + alpha(j) = inf; + Sigma_j = Sigma(:,idx); + Sigma_jj = Sigma(idx,idx); + mu_j = mu(idx); + + mu(idx) = []; + Sigma(:,idx) = []; + Sigma(idx,:) = []; + + kappa = 1/Sigma_jj; + Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq (33) + mu = mu-kappa*mu_j*Sigma_j; % eq (34) + + v = beta*X*(Phi'*Sigma_j); + S = S+kappa*v.^2; % eq (35) + Q = Q+kappa*mu_j*v; + + dim(idx) = []; + case 3 % update: + idx = (dim==j); + newAlpha = s(j)^2/theta(j); + oldAlpha = alpha(j); + alpha(j) = newAlpha; + + Sigma_j = Sigma(:,idx); + Sigma_jj = Sigma(idx,idx); + mu_j = mu(idx); + + kappa = 1/(Sigma_jj+1/(newAlpha-oldAlpha)); + Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq (33) + mu = mu-kappa*mu_j*Sigma_j; % eq (34) + + v = beta*X*(Phi'*Sigma_j); + S = S+kappa*v.^2; % eq (35) + Q = Q+kappa*mu_j*v; + end + catch + disp(''); + end + iUse = accumarray(dim,true,[d,1],@(x) x); % from Wei Li (pretty cool!) + s = S; q = Q; % p.353 Execrcies 7.17 + alphaS = alpha(iUse)-S(iUse); + s(iUse) = alpha(iUse).*S(iUse)./alphaS; % 7.104 + q(iUse) = alpha(iUse).*Q(iUse)./alphaS; % 7.105 + + Phi = X(iUse,:); + beta = (n-numel(dim)+dot(alpha(dim),diag(Sigma)))/sum((t-mu'*Phi).^2); +end +llh = llh(2:iter); +w0 = tbar-dot(mu,xbar(dim)); + +model.w0 = w0; +model.w = mu; +model.alpha = alpha; +model.beta = beta; From cb48875334862a67480b5dbd377c7c0c24e5b669 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Thu, 31 Dec 2015 19:18:00 +0800 Subject: [PATCH 080/149] rvmCd is not done --- chapter07/rvmRegEbCd.m | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/chapter07/rvmRegEbCd.m b/chapter07/rvmRegEbCd.m index 2c74783..fa9c488 100644 --- a/chapter07/rvmRegEbCd.m +++ b/chapter07/rvmRegEbCd.m @@ -48,15 +48,15 @@ tllh(iAdd) = (Q(iAdd).^2-S(iAdd))./S(iAdd)+log(S(iAdd)./(Q(iAdd).^2)); end if any(iDel) - tllh(iDel) = Q(iDel).^2./(S(iDel)-alpha(iDel))-log1p1(-S(iDel)./alpha(iDel)); + tllh(iDel) = Q(iDel).^2./(S(iDel)-alpha(iDel))-log1p(-S(iDel)./alpha(iDel)); end if any(iUpd) % bug newAlpha = s(iUpd).^2./theta(iUpd); oldAlpha = alpha(iUpd); - delta = 1./oldAlpha-1./newAlpha; - tllh(iUpd) = Q(iUpd).^2.*delta./(S(iUpd).*delta+1)-log1p(S(iUpd).*delta); + delta = 1./newAlpha-1./oldAlpha; + tllh(iUpd) = Q(iUpd).^2./(S(iUpd)+1./delta)-log1p(S(iUpd).*delta); end - if ~isreal(tllh) + if ~isreal(tllh) % debug dd = []; for i=1:d if ~isreal(tllh(i)) From 812c0315e2bf768b31eefe3a8517f9ee6b15b73e Mon Sep 17 00:00:00 2001 From: sth4nth Date: Fri, 8 Jan 2016 20:47:36 +0800 Subject: [PATCH 081/149] rvmfast is not finished --- chapter07/rvmFast.m | 146 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 chapter07/rvmFast.m diff --git a/chapter07/rvmFast.m b/chapter07/rvmFast.m new file mode 100644 index 0000000..f348a7b --- /dev/null +++ b/chapter07/rvmFast.m @@ -0,0 +1,146 @@ +function [model, llh] = rvmFast(X, t) +% Relevance Vector Machine (ARD sparse prior) for regression +% training by empirical bayesian (type II ML) using Coordinate Descent +% reference: (Fast RVM) +% Tipping and Faul. Fast marginal likelihood maximisation for sparse Bayesian models. AISTATS 2003. + +xbar = mean(X,2); +tbar = mean(t,2); +X = bsxfun(@minus,X,xbar); +t = bsxfun(@minus,t,tbar); + +maxiter = 1000; +tol = 1e-4; +llh = -inf(1,maxiter); + + +[globalParam, localParam] = initParam(X, t); +for iter = 2:maxiter + llh(iter) = calcLlh(globalParam, localParam); + if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end + switch act + case 1 + localParam = addLocalParam(globalParam, localParam); + case 2 + localParam = delLocalParam(globalParam, localParam); + case 3 + localParam = updLocalParam(globalParam, localParam); + otherwise + error('error'); + end + globalParam = updGlobalParam(globalParam, localParam); + +end + + + + + + +function [globalParam, localParam] = initParam(X, t) + +X2 = dot(X,X,2); +Xt = X*t'; +[v,j] = max(Xt.^2./X2); + +beta = 1/mean(t.^2); % Beta = 1/sigma^2 +phi = X(j,:); +alpha = X2(j)/(v-1/beta); +sigma = 1/(alpha + beta*(phi'*phi)); +mu = beta*sigma*phi*t'; + + + + +globalParam = packGlobalParam(Beta, Q, S); +localParam = packLocalParam(j, alpha, mu, sigma); + + + + +function llh = calcLlh(globalParam, localParam) +[Beta, Q, S] = unpackGlobalParam(globalParam); +[index, Alpha, Mu, Sigma] = unpackLocalParam(localParam); + + + +function [globalParam, localParam] = addLocalParam(j, globalParam, localParam) +[Beta, Q, S, X] = unpackGlobalParam(globalParam); +[index, Alpha, Mu, Sigma, Phi] = unpackLocalParam(localParam); + +phi = X(j,:); +alpha = s(j)^2/theta(j); +sigma = 1/(alpha+S(j)); +mu = sigma*Q(j); + +% local +v = Beta*Sigma*(Phi*phi'); +off = -Beta*sigma*v; +Sigma = [Sigma+sigma*(v*v'), off; off', sigma]; +Mu = [Mu-mu*v; mu]; +index = [index,j]; +Alpha = [Alpha,alpha]; + +% global +e = phi-v'*Phi; +v = Beta*X*e'; +S = S-sigma*v.^2; +Q = Q-mu*v; + + + +localParam = packLocalParam(index, Alpha, Mu, Sigma); + + +function localParam = delLocalParam(j, globalParam, localParam) +[Beta, Q, S] = unpackGlobalParam(globalParam); +[index, Alpha, Mu, Sigma] = unpackLocalParam(localParam); + + +localParam = packLocalParam(index, Alpha, Mu, Sigma); + + + +function localParam = updLocalParam(j, globalParam, localParam) +[Beta, Q, S] = unpackGlobalParam(globalParam); +[index, Alpha, Mu, Sigma] = unpackLocalParam(localParam); + + +localParam = packLocalParam(index, Alpha, Mu, Sigma); + + +function globalParam = updGlobalParam(globalParam, localParam) +[Beta, Q, S] = unpackGlobalParam(globalParam); + + + + + +globalParam = packGlobalParam(Beta, Q, S); + +function localParam = packLocalParam(Index, Alpha, Mu, Sigma, Phi) +localParam.Index = Index; +localParam.Alpha = Alpha; +localParam.Mu = Mu; +localParam.Sigma = Sigma; +localParam.Phi = Phi; + +function [Index, Alpha, Mu, Sigma] = unpackLocalParam(localParam) +Index = localParam.Index; +Alpha = localParam.Alpha; +Mu = localParam.Mu; +Sigma = localParam.Sigma; + + +function globalParam = packGlobalParam(Beta, Q, S, X) +globalParam.Beta = Beta; +globalParam.Q = Q; +globalParam.S = S; +globalParam.X = X; + + +function [Beta, Q, S, X] = unpackGlobalParam(globalParam) +Beta = globalParam.Beta; +Q = globalParam.Q; +S = globalParam.S; +X = globalParam.X; \ No newline at end of file From 2b49900c4e5d5bb8b4b1f68b584cb3446514d52b Mon Sep 17 00:00:00 2001 From: sth4nth Date: Sat, 9 Jan 2016 15:54:01 +0800 Subject: [PATCH 082/149] cd is damn hard to fix --- chapter07/demo.m | 3 +- chapter07/rvmFast.m | 13 +-- chapter07/rvmFast.m~ | 153 ++++++++++++++++++++++++++++++ chapter07/rvmFast2.m | 114 ++++++++++++++++++++++ chapter07/rvmFast2.m~ | 91 ++++++++++++++++++ chapter07/rvmRegEbCd.m | 13 ++- chapter07/rvmRegEbCd.m.bak | 188 +++++++++++++++++++++++++++++++++++++ chapter07/rvmRegEbCd.m~ | 47 +++++++--- 8 files changed, 598 insertions(+), 24 deletions(-) create mode 100644 chapter07/rvmFast.m~ create mode 100644 chapter07/rvmFast2.m create mode 100644 chapter07/rvmFast2.m~ create mode 100644 chapter07/rvmRegEbCd.m.bak diff --git a/chapter07/demo.m b/chapter07/demo.m index 5a8afeb..b5882f7 100755 --- a/chapter07/demo.m +++ b/chapter07/demo.m @@ -23,7 +23,8 @@ sigma = 0.005; e = sigma*randn(1,n); y = x'*A + e; -[model,llh] = rvmRegEbCd(A,y); +% [model,llh] = rvmRegEbCd(A,y); +[model,llh] = rvmFast2(A,y); plot(llh); % % diff --git a/chapter07/rvmFast.m b/chapter07/rvmFast.m index f348a7b..a7359cf 100644 --- a/chapter07/rvmFast.m +++ b/chapter07/rvmFast.m @@ -39,20 +39,17 @@ function [globalParam, localParam] = initParam(X, t) -X2 = dot(X,X,2); -Xt = X*t'; -[v,j] = max(Xt.^2./X2); - beta = 1/mean(t.^2); % Beta = 1/sigma^2 phi = X(j,:); alpha = X2(j)/(v-1/beta); -sigma = 1/(alpha + beta*(phi'*phi)); +sigma = 1/(alpha + beta*(phi*phi')); mu = beta*sigma*phi*t'; +V = beta*X*phi'; +S = beta*X2-sigma*V.^2; +Q = beta*Xt-beta*sigma*Xt(j)*V; - - -globalParam = packGlobalParam(Beta, Q, S); +globalParam = packGlobalParam(beta, Q, S); localParam = packLocalParam(j, alpha, mu, sigma); diff --git a/chapter07/rvmFast.m~ b/chapter07/rvmFast.m~ new file mode 100644 index 0000000..8b4497c --- /dev/null +++ b/chapter07/rvmFast.m~ @@ -0,0 +1,153 @@ +function [model, llh] = rvmFast(X, t) +% Relevance Vector Machine (ARD sparse prior) for regression +% training by empirical bayesian (type II ML) using Coordinate Descent +% reference: (Fast RVM) +% Tipping and Faul. Fast marginal likelihood maximisation for sparse Bayesian models. AISTATS 2003. + +xbar = mean(X,2); +tbar = mean(t,2); +X = bsxfun(@minus,X,xbar); +t = bsxfun(@minus,t,tbar); + +maxiter = 1000; +tol = 1e-4; +llh = -inf(1,maxiter); + + +[globalParam, localParam] = initParam(X, t); +for iter = 2:maxiter + llh(iter) = calcLlh(globalParam, localParam); + if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end + switch act + case 1 + localParam = addLocalParam(globalParam, localParam); + case 2 + localParam = delLocalParam(globalParam, localParam); + case 3 + localParam = updLocalParam(globalParam, localParam); + otherwise + error('error'); + end + globalParam = updGlobalParam(globalParam, localParam); + +end + + + + + + +function [globalParam, localParam] = initParam(X, t) + +X2 = dot(X,X,2); +Xt = X*t'; +[v,j] = max(Xt.^2./X2); + +beta = 1/mean(t.^2); % Beta = 1/sigma^2 +phi = X(j,:); +alpha = X2(j)/(v-1/beta); +sigma = 1/(alpha + beta*(phi'*phi)); +mu = beta*sigma*phi*t'; + +V = beta*X*phi'; +S = beta*X2-Sigma*V.^2; +Q = beta*Xt-Sigma*Xt( + + +e = phi-v'*Phi; +v = beta*X*e_j'; +S = S-Sigma_jj*v.^2; +Q = Q-mu_j*v; + +globalParam = packGlobalParam(Beta, Q, S); +localParam = packLocalParam(j, alpha, mu, sigma); + + + + +function llh = calcLlh(globalParam, localParam) +[Beta, Q, S] = unpackGlobalParam(globalParam); +[index, Alpha, Mu, Sigma] = unpackLocalParam(localParam); + + + +function [globalParam, localParam] = addLocalParam(j, globalParam, localParam) +[Beta, Q, S, X] = unpackGlobalParam(globalParam); +[index, Alpha, Mu, Sigma, Phi] = unpackLocalParam(localParam); + +phi = X(j,:); +alpha = s(j)^2/theta(j); +sigma = 1/(alpha+S(j)); +mu = sigma*Q(j); + +% local +v = Beta*Sigma*(Phi*phi'); +off = -Beta*sigma*v; +Sigma = [Sigma+sigma*(v*v'), off; off', sigma]; +Mu = [Mu-mu*v; mu]; +index = [index,j]; +Alpha = [Alpha,alpha]; + +% global +e = phi-v'*Phi; +v = Beta*X*e'; +S = S-sigma*v.^2; +Q = Q-mu*v; + + + +localParam = packLocalParam(index, Alpha, Mu, Sigma); + + +function localParam = delLocalParam(j, globalParam, localParam) +[Beta, Q, S] = unpackGlobalParam(globalParam); +[index, Alpha, Mu, Sigma] = unpackLocalParam(localParam); + + +localParam = packLocalParam(index, Alpha, Mu, Sigma); + + + +function localParam = updLocalParam(j, globalParam, localParam) +[Beta, Q, S] = unpackGlobalParam(globalParam); +[index, Alpha, Mu, Sigma] = unpackLocalParam(localParam); + + +localParam = packLocalParam(index, Alpha, Mu, Sigma); + + +function globalParam = updGlobalParam(globalParam, localParam) +[Beta, Q, S] = unpackGlobalParam(globalParam); + + + + + +globalParam = packGlobalParam(Beta, Q, S); + +function localParam = packLocalParam(Index, Alpha, Mu, Sigma, Phi) +localParam.Index = Index; +localParam.Alpha = Alpha; +localParam.Mu = Mu; +localParam.Sigma = Sigma; +localParam.Phi = Phi; + +function [Index, Alpha, Mu, Sigma] = unpackLocalParam(localParam) +Index = localParam.Index; +Alpha = localParam.Alpha; +Mu = localParam.Mu; +Sigma = localParam.Sigma; + + +function globalParam = packGlobalParam(Beta, Q, S, X) +globalParam.Beta = Beta; +globalParam.Q = Q; +globalParam.S = S; +globalParam.X = X; + + +function [Beta, Q, S, X] = unpackGlobalParam(globalParam) +Beta = globalParam.Beta; +Q = globalParam.Q; +S = globalParam.S; +X = globalParam.X; \ No newline at end of file diff --git a/chapter07/rvmFast2.m b/chapter07/rvmFast2.m new file mode 100644 index 0000000..f3ada47 --- /dev/null +++ b/chapter07/rvmFast2.m @@ -0,0 +1,114 @@ +function [model,llh] = rvmFast2(X,t) + +xbar = mean(X,2); +tbar = mean(t,2); +X = bsxfun(@minus,X,xbar); +t = bsxfun(@minus,t,tbar); + +d = size(X,1); + +tol = 1e-4; +maxiter = 5; +LLH = -inf(1,maxiter); + +X2 = dot(X,X,2); +Xt = X*t'; +[v,j] = max(Xt.^2./X2); + +beta = 1/mean(t.^2); % Beta = 1/sigma^2 +phi = X(j,:); +alpha = X2(j)/(v-1/beta); +sigma = 1/(alpha + beta*(phi*phi')); +mu = beta*sigma*phi*t'; + +V = beta*X*phi'; +S = beta*X2-sigma*V.^2; +Q = beta*Xt-beta*sigma*Xt(j)*V; + + +iUse = j; +Phi = phi; +Alpha = alpha; +Sigma = sigma; +Mu = mu; +for iter = 2:maxiter + s = S; q = Q; + s(iUse) = alpha.*S(iUse)./(alpha-S(iUse)); + q(iUse) = alpha.*Q(iUse)./(alpha-S(iUse)); + theta = q.^2-s; + + iNew = find(theta>0); + llh = -inf(d,1); + [iUpd,~,which] = intersect(iNew, iUse); % update + if ~isempty(iUpd) + alpha = s(iUpd).^2./theta(iUpd); + delta = 1./alpha-1./Alpha(which); + llh(iUpd) = Q(iUpd).^2./(S(iUpd)+1./delta)-log1p(S(iUpd).*delta); + end + + iAdd = setdiff(iNew,iUpd); + if ~isempty(iAdd) + llh(iAdd) = (Q(iAdd).^2-S(iAdd))./S(iAdd)+log(S(iAdd)./(Q(iAdd).^2)); + end + + [LLH(iter),j] = max(llh); + if abs(LLH(iter)-LLH(iter-1)) < tol*abs(LLH(iter)-LLH(2)); break; end + + if any(iUpd==j) + act = 1; + elseif any(iAdd==j) + act = 2; + else + act = 0; + end + switch act + case 1 % update + idx = (iUse==j); + alpha_ = s(j)^2/theta(j); + + alpha = Alpha(idx); + Sigma_j = Sigma(:,idx); + Sigma_jj = Sigma(idx,idx); + mu_j = Mu(idx); + + delta = alpha_-alpha; + kappa = delta/(Sigma_jj*delta+1); + Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq (33) + Mu = Mu-kappa*mu_j*Sigma_j; % eq (34) + + v = beta*X*(Phi'*Sigma_j); + S = S+kappa*v.^2; % eq (35) + Q = Q+kappa*mu_j*v; + + Alpha(idx) = alpha_; + case 2 % Add + phi = X(j,:); + alpha = s(j)^2/theta(j); + sigma = 1/(alpha+S(j)); + mu = sigma*Q(j); + + v = beta*Sigma*(Phi*phi'); + off = -beta*sigma*v; % ?beta + % off = -sigma*v; % ?beta + Sigma = [Sigma+sigma*(v*v'), off; off', sigma]; + Mu = [Mu-mu*v; mu]; + + e = phi-v'*Phi; + v = beta*X*e'; + S = S-sigma*v.^2; + Q = Q-mu*v; + + iUse = [iUse;j]; + Phi = [Phi;phi]; + Alpha = [Alpha;alpha]; + case 0 + disp(''); + end + +end +llh = llh(2:iter); + +model.index = iUse; +model.alpha = Alpha; +model.beta = beta; + diff --git a/chapter07/rvmFast2.m~ b/chapter07/rvmFast2.m~ new file mode 100644 index 0000000..3e2337a --- /dev/null +++ b/chapter07/rvmFast2.m~ @@ -0,0 +1,91 @@ +function [model,llh] = rvmFast2(X,t) + +xbar = mean(X,2); +tbar = mean(t,2); +X = bsxfun(@minus,X,xbar); +t = bsxfun(@minus,t,tbar); + +d = size(X,1); + +tol = 1e-4; +maxiter = 500; +LLH = -inf(1,maxiter); + +X2 = dot(X,X,2); +Xt = X*t'; +[v,j] = max(Xt.^2./X2); + +beta = 1/mean(t.^2); % Beta = 1/sigma^2 +phi = X(j,:); +alpha = X2(j)/(v-1/beta); +sigma = 1/(alpha + beta*(phi*phi')); +mu = beta*sigma*phi*t'; + +V = beta*X*phi'; +S = beta*X2-sigma*V.^2; +Q = beta*Xt-beta*sigma*Xt(j)*V; + + +iUse = j; +Phi = phi; +Alpha = alpha; +Sigma = sigma; +Mu = mu; +for iter = 2:maxiter + s = S; q = Q; + s(iUse) = alpha.*S(iUse)./(alpha-S(iUse)); + q(iUse) = alpha.*Q(iUse)./(alpha-S(iUse)); + theta = q.^2-s; + + iNew = find(theta>0); + + llh = -inf(d,1); + if any(iAdd) + llh(iAdd) = (Q(iAdd).^2-S(iAdd))./S(iAdd)+log(S(iAdd)./(Q(iAdd).^2)); + end + [iUpd,~,] = intersect(iNew, iUse); % update + if any(iUpd) + alpha = s(iUpd).^2./theta(iUpd); + index = iUse==find(iUpd); + delta = 1./alpha-1./Alpha(index); + tllh(iUpd) = Q(iUpd).^2./(S(iUpd)+1./delta)-log1p(S(iUpd).*delta); + end + [LLH(iter),j] = max(llh); + if abs(LLH(iter)-LLH(iter-1)) < tol*abs(LLH(iter)-LLH(2)); break; end + + + + switch find(iAct(j,:)) + case 1 % Add + phi = X(j,:); + alpha = s(j)^2/theta(j); + sigma = 1/(alpha+S(j)); + mu = sigma*Q(j); + + v = beta*Sigma*(Phi*phi'); + off = -beta*sigma*v; % ?beta + % off = -sigma*v; % ?beta + Sigma = [Sigma+sigma*(v*v'), off; off', sigma]; + Mu = [Mu-mu*v; mu]; + + + + e = phi-v'*Phi; + v = beta*X*e'; + S = S-sigma*v.^2; + Q = Q-mu*v; + + iUse = [iUse;j]; + Phi = [Phi;phi]; + Alpha = [Alpha;alpha]; + case 2 % update + + end + +end +llh = llh(2:iter); + +model.index = iUse; +model.alpha = Alpha; +model.beta = beta; + diff --git a/chapter07/rvmRegEbCd.m b/chapter07/rvmRegEbCd.m index fa9c488..dce607a 100644 --- a/chapter07/rvmRegEbCd.m +++ b/chapter07/rvmRegEbCd.m @@ -20,7 +20,7 @@ Phi = zeros(0,n); dim = zeros(0,1); -maxiter = 100*d; +maxiter = d-1; tol = 1e-4; llh = -inf(1,maxiter); iAct = zeros(d,3); @@ -36,11 +36,16 @@ iAdd = (iNew ~= iUpd); % add iDel = (iUse ~= iUpd); % del + + iUpd = false(d,1); + iDel = false(d,1); + iAct(:,1) = iAdd; iAct(:,2) = iDel; iAct(:,3) = iUpd; + - assert(isempty(setdiff(find(any(iAct,2)),union(find(iNew),find(iUse))))); % debug +% assert(isempty(setdiff(find(any(iAct,2)),union(find(iNew),find(iUse))))); % debug % find the next dimension j that maximizes the marginal likilihood tllh = -inf(d,1); % trial likelihood @@ -65,7 +70,7 @@ end end [llh(iter),j] = max(tllh); -% if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end + if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end % update parameters switch find(iAct(j,:)) @@ -75,7 +80,7 @@ mu_j = Sigma_jj*Q(j); phi_j = X(j,:); - v = beta*Sigma*(Phi*phi_j'); % temporary vector for common part + v = beta*Sigma*(Phi*phi_j'); off = -beta*Sigma_jj*v; Sigma = [Sigma+Sigma_jj*(v*v'), off; off', Sigma_jj]; mu = [mu-mu_j*v; mu_j]; diff --git a/chapter07/rvmRegEbCd.m.bak b/chapter07/rvmRegEbCd.m.bak new file mode 100644 index 0000000..9d1aa53 --- /dev/null +++ b/chapter07/rvmRegEbCd.m.bak @@ -0,0 +1,188 @@ +function [model, llh] = rvmRegEbCd(X, t) +% TODO: llh not increasing. verify with sparse high dimensional data +% Relevance Vector Machine (ARD sparse prior) for regression +% training by empirical bayesian (type II ML) using Coordinate Descent +% reference: (Fast RVM) +% Tipping and Faul. Fast marginal likelihood maximisation for sparse Bayesian models. AISTATS 2003. +% Written by Mo Chen (sth4nth@gmail.com). +[d,n] = size(X); +xbar = mean(X,2); +tbar = mean(t,2); +X = bsxfun(@minus,X,xbar); +t = bsxfun(@minus,t,tbar); + + + +% find initial alpha +beta = 1/mean(t.^2); % beta = 1/sigma^2 + + +X2 = dot(X,X,2); +Xt = beta*(X*t'); +[r,j] = max(Xt.^2./X2); +alpha = X2(j)/(r-1/beta); + +phi_j = X(j,:); +Sigma = 1/(alpha+beta*(phi_j*phi_j')); +mu = Sigma; + + + alpha(j) = s(j)^2/theta(j); + Sigma_jj = 1/(alpha(j)+S(j)); + mu_j = Sigma_jj*Q(j); + phi_j = X(j,:); + + v = beta*Sigma*(Phi*phi_j'); + off = -beta*Sigma_jj*v; + Sigma = [Sigma+Sigma_jj*(v*v'), off; off', Sigma_jj]; + mu = [mu-mu_j*v; mu_j]; + + e_j = phi_j-v'*Phi; + v = beta*X*e_j'; + S = S-Sigma_jj*v.^2; + Q = Q-mu_j*v; + + dim = [dim;j]; %#ok + + + + + +Sigma = zeros(0,0); +mu = zeros(0,1); +Phi = zeros(0,n); +dim = zeros(0,1); + +maxiter = 100*d; +tol = 1e-4; +llh = -inf(1,maxiter); +iAct = zeros(d,3); +iUse = false(d,1); +s = S; q = Q; +for iter = 2:maxiter + theta = q.^2-s; + iNew = theta>0; + + assert(any(iNew)); % debug + + iUpd = (iNew & iUse); % update + iAdd = (iNew ~= iUpd); % add + iDel = (iUse ~= iUpd); % del + + iAct(:,1) = iAdd; + iAct(:,2) = iDel; + iAct(:,3) = iUpd; + + assert(isempty(setdiff(find(any(iAct,2)),union(find(iNew),find(iUse))))); % debug + + % find the next dimension j that maximizes the marginal likilihood + tllh = -inf(d,1); % trial likelihood + if any(iAdd) + tllh(iAdd) = (Q(iAdd).^2-S(iAdd))./S(iAdd)+log(S(iAdd)./(Q(iAdd).^2)); + end + if any(iDel) + tllh(iDel) = Q(iDel).^2./(S(iDel)-alpha(iDel))-log1p(-S(iDel)./alpha(iDel)); + end + if any(iUpd) % bug + newAlpha = s(iUpd).^2./theta(iUpd); + oldAlpha = alpha(iUpd); + delta = 1./newAlpha-1./oldAlpha; + tllh(iUpd) = Q(iUpd).^2./(S(iUpd)+1./delta)-log1p(S(iUpd).*delta); + end + if ~isreal(tllh) % debug + dd = []; + for i=1:d + if ~isreal(tllh(i)) + dd = [dd,i]; + end + end + end + [llh(iter),j] = max(tllh); +% if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end + + % update parameters + switch find(iAct(j,:)) + case 1 % Add + alpha(j) = s(j)^2/theta(j); + Sigma_jj = 1/(alpha(j)+S(j)); + mu_j = Sigma_jj*Q(j); + phi_j = X(j,:); + + v = beta*Sigma*(Phi*phi_j'); + off = -beta*Sigma_jj*v; + Sigma = [Sigma+Sigma_jj*(v*v'), off; off', Sigma_jj]; + mu = [mu-mu_j*v; mu_j]; + + e_j = phi_j-v'*Phi; + v = beta*X*e_j'; + S = S-Sigma_jj*v.^2; + Q = Q-mu_j*v; + + dim = [dim;j]; %#ok + case 2 % del + idx = (dim==j); + alpha(j) = inf; + Sigma_j = Sigma(:,idx); + Sigma_jj = Sigma(idx,idx); + mu_j = mu(idx); + + mu(idx) = []; + Sigma(:,idx) = []; + Sigma(idx,:) = []; + + kappa = 1/Sigma_jj; + Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq (33) + mu = mu-kappa*mu_j*Sigma_j; % eq (34) + + v = beta*X*(Phi'*Sigma_j); + S = S+kappa*v.^2; % eq (35) + Q = Q+kappa*mu_j*v; + + dim(idx) = []; + case 3 % update: + idx = (dim==j); + newAlpha = s(j)^2/theta(j); + oldAlpha = alpha(j); + alpha(j) = newAlpha; + + Sigma_j = Sigma(:,idx); + Sigma_jj = Sigma(idx,idx); + mu_j = mu(idx); + + kappa = 1/(Sigma_jj+1/(newAlpha-oldAlpha)); + Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq (33) + mu = mu-kappa*mu_j*Sigma_j; % eq (34) + + v = beta*X*(Phi'*Sigma_j); + S = S+kappa*v.^2; % eq (35) + Q = Q+kappa*mu_j*v; + end + + iUse = accumarray(dim,true,[d,1],@(x) x); % from Wei Li (pretty cool!) + s = S; q = Q; % p.353 Execrcies 7.17 + alphaS = alpha(iUse)-S(iUse); + s(iUse) = alpha(iUse).*S(iUse)./alphaS; % 7.104 + q(iUse) = alpha(iUse).*Q(iUse)./alphaS; % 7.105 + + Phi = X(iUse,:); + beta = (n-numel(dim)+dot(alpha(dim),diag(Sigma)))/sum((t-mu'*Phi).^2); +end +llh = llh(2:iter); +w0 = tbar-dot(mu,xbar(dim)); + +model.w0 = w0; +model.w = mu; +model.alpha = alpha; +model.beta = beta; + +function iAct = GetIndex(iUse,iNew) +d = numelem(iUse); +iAct = zeros(d,3); +iUpd = (iNew & iUse); % update +iAdd = (iNew ~= iUpd); % add +iDel = (iUse ~= iUpd); % del +iAct(:,1) = iAdd; +iAct(:,2) = iDel; +iAct(:,3) = iUpd; + + diff --git a/chapter07/rvmRegEbCd.m~ b/chapter07/rvmRegEbCd.m~ index 57dfa21..521ccc6 100644 --- a/chapter07/rvmRegEbCd.m~ +++ b/chapter07/rvmRegEbCd.m~ @@ -11,10 +11,18 @@ tbar = mean(t,2); X = bsxfun(@minus,X,xbar); t = bsxfun(@minus,t,tbar); + + +% find initial alpha beta = 1/mean(t.^2); % beta = 1/sigma^2 -alpha = inf(d,1); -S = beta*dot(X,X,2); -Q = beta*(X*t'); + + +X2 = dot(X,X,2); +Xt = beta*(X*t'); +max(Xt.^2./X2, + + + Sigma = zeros(0,0); mu = zeros(0,1); Phi = zeros(0,n); @@ -40,7 +48,7 @@ for iter = 2:maxiter iAct(:,2) = iDel; iAct(:,3) = iUpd; - assert(isempty(setdiff(find(any(iAct,2)),union(find(iNew),iUse)))); % debug + assert(isempty(setdiff(find(any(iAct,2)),union(find(iNew),find(iUse))))); % debug % find the next dimension j that maximizes the marginal likilihood tllh = -inf(d,1); % trial likelihood @@ -48,19 +56,26 @@ for iter = 2:maxiter tllh(iAdd) = (Q(iAdd).^2-S(iAdd))./S(iAdd)+log(S(iAdd)./(Q(iAdd).^2)); end if any(iDel) - tllh(iDel) = Q(iDel).^2./(S(iDel)-alpha(iDel))-log1p1(-S(iDel)./alpha(iDel)); + tllh(iDel) = Q(iDel).^2./(S(iDel)-alpha(iDel))-log1p(-S(iDel)./alpha(iDel)); end - if any(iUpd) + if any(iUpd) % bug newAlpha = s(iUpd).^2./theta(iUpd); oldAlpha = alpha(iUpd); delta = 1./newAlpha-1./oldAlpha; - tllh(iUpd) = Q(iUpd).^2.*delta./(S(iUpd).*delta+1)-log1p(S(iUpd).*delta); + tllh(iUpd) = Q(iUpd).^2./(S(iUpd)+1./delta)-log1p(S(iUpd).*delta); + end + if ~isreal(tllh) % debug + dd = []; + for i=1:d + if ~isreal(tllh(i)) + dd = [dd,i]; + end + end end [llh(iter),j] = max(tllh); % if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end % update parameters - try switch find(iAct(j,:)) case 1 % Add alpha(j) = s(j)^2/theta(j); @@ -117,9 +132,7 @@ for iter = 2:maxiter S = S+kappa*v.^2; % eq (35) Q = Q+kappa*mu_j*v; end - catch - disp(''); - end + iUse = accumarray(dim,true,[d,1],@(x) x); % from Wei Li (pretty cool!) s = S; q = Q; % p.353 Execrcies 7.17 alphaS = alpha(iUse)-S(iUse); @@ -136,3 +149,15 @@ model.w0 = w0; model.w = mu; model.alpha = alpha; model.beta = beta; + +function iAct = GetIndex(iUse,iNew) +d = numelem(iUse); +iAct = zeros(d,3); +iUpd = (iNew & iUse); % update +iAdd = (iNew ~= iUpd); % add +iDel = (iUse ~= iUpd); % del +iAct(:,1) = iAdd; +iAct(:,2) = iDel; +iAct(:,3) = iUpd; + + From 8a87cf96fc6fb50e3b4db93769f0960d5bb5ac8c Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 10 Jan 2016 01:31:11 +0800 Subject: [PATCH 083/149] rvmSeq is almost finished: need update beta --- chapter07/demo.m | 26 ++--- chapter07/rvmFast.m | 143 ---------------------------- chapter07/rvmFast.m~ | 153 ------------------------------ chapter07/rvmFast2.m | 114 ---------------------- chapter07/rvmFast2.m~ | 91 ------------------ chapter07/rvmRegEbCd.m | 148 ----------------------------- chapter07/rvmRegEbCd.m.bak | 188 ------------------------------------- chapter07/rvmRegEbCd.m~ | 163 -------------------------------- chapter07/rvmRegEbSeq.m | 126 +++++++++++++++++++++++++ 9 files changed, 139 insertions(+), 1013 deletions(-) delete mode 100644 chapter07/rvmFast.m delete mode 100644 chapter07/rvmFast.m~ delete mode 100644 chapter07/rvmFast2.m delete mode 100644 chapter07/rvmFast2.m~ delete mode 100644 chapter07/rvmRegEbCd.m delete mode 100644 chapter07/rvmRegEbCd.m.bak delete mode 100644 chapter07/rvmRegEbCd.m~ create mode 100644 chapter07/rvmRegEbSeq.m diff --git a/chapter07/demo.m b/chapter07/demo.m index b5882f7..4488d1b 100755 --- a/chapter07/demo.m +++ b/chapter07/demo.m @@ -23,22 +23,22 @@ sigma = 0.005; e = sigma*randn(1,n); y = x'*A + e; -% [model,llh] = rvmRegEbCd(A,y); -[model,llh] = rvmFast2(A,y); +[model,llh] = rvmRegEbCd(A,y); +% [model,llh] = rvmFast(A,y); plot(llh); -% -% + + % [model,llh] = rvmRegEbFp(A,y); % plot(llh); -% m = zeros(d,1); -% m(model.index) = model.w; -% -% h = max(abs(x))+0.2; -% x_range = [1,d]; -% y_range = [-h,+h]; -% figure; -% subplot(2,1,1);plot(x); axis([x_range,y_range]); title('Original Signal'); -% subplot(2,1,2);plot(m); axis([x_range,y_range]); title('Recovery Signal'); +m = zeros(d,1); +m(model.index) = model.w; + +h = max(abs(x))+0.2; +x_range = [1,d]; +y_range = [-h,+h]; +figure; +subplot(2,1,1);plot(x); axis([x_range,y_range]); title('Original Signal'); +subplot(2,1,2);plot(m); axis([x_range,y_range]); title('Recovery Signal'); % % [y, sigma] = rvmRegPred(model,A); %% regression diff --git a/chapter07/rvmFast.m b/chapter07/rvmFast.m deleted file mode 100644 index a7359cf..0000000 --- a/chapter07/rvmFast.m +++ /dev/null @@ -1,143 +0,0 @@ -function [model, llh] = rvmFast(X, t) -% Relevance Vector Machine (ARD sparse prior) for regression -% training by empirical bayesian (type II ML) using Coordinate Descent -% reference: (Fast RVM) -% Tipping and Faul. Fast marginal likelihood maximisation for sparse Bayesian models. AISTATS 2003. - -xbar = mean(X,2); -tbar = mean(t,2); -X = bsxfun(@minus,X,xbar); -t = bsxfun(@minus,t,tbar); - -maxiter = 1000; -tol = 1e-4; -llh = -inf(1,maxiter); - - -[globalParam, localParam] = initParam(X, t); -for iter = 2:maxiter - llh(iter) = calcLlh(globalParam, localParam); - if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end - switch act - case 1 - localParam = addLocalParam(globalParam, localParam); - case 2 - localParam = delLocalParam(globalParam, localParam); - case 3 - localParam = updLocalParam(globalParam, localParam); - otherwise - error('error'); - end - globalParam = updGlobalParam(globalParam, localParam); - -end - - - - - - -function [globalParam, localParam] = initParam(X, t) - -beta = 1/mean(t.^2); % Beta = 1/sigma^2 -phi = X(j,:); -alpha = X2(j)/(v-1/beta); -sigma = 1/(alpha + beta*(phi*phi')); -mu = beta*sigma*phi*t'; - -V = beta*X*phi'; -S = beta*X2-sigma*V.^2; -Q = beta*Xt-beta*sigma*Xt(j)*V; - -globalParam = packGlobalParam(beta, Q, S); -localParam = packLocalParam(j, alpha, mu, sigma); - - - - -function llh = calcLlh(globalParam, localParam) -[Beta, Q, S] = unpackGlobalParam(globalParam); -[index, Alpha, Mu, Sigma] = unpackLocalParam(localParam); - - - -function [globalParam, localParam] = addLocalParam(j, globalParam, localParam) -[Beta, Q, S, X] = unpackGlobalParam(globalParam); -[index, Alpha, Mu, Sigma, Phi] = unpackLocalParam(localParam); - -phi = X(j,:); -alpha = s(j)^2/theta(j); -sigma = 1/(alpha+S(j)); -mu = sigma*Q(j); - -% local -v = Beta*Sigma*(Phi*phi'); -off = -Beta*sigma*v; -Sigma = [Sigma+sigma*(v*v'), off; off', sigma]; -Mu = [Mu-mu*v; mu]; -index = [index,j]; -Alpha = [Alpha,alpha]; - -% global -e = phi-v'*Phi; -v = Beta*X*e'; -S = S-sigma*v.^2; -Q = Q-mu*v; - - - -localParam = packLocalParam(index, Alpha, Mu, Sigma); - - -function localParam = delLocalParam(j, globalParam, localParam) -[Beta, Q, S] = unpackGlobalParam(globalParam); -[index, Alpha, Mu, Sigma] = unpackLocalParam(localParam); - - -localParam = packLocalParam(index, Alpha, Mu, Sigma); - - - -function localParam = updLocalParam(j, globalParam, localParam) -[Beta, Q, S] = unpackGlobalParam(globalParam); -[index, Alpha, Mu, Sigma] = unpackLocalParam(localParam); - - -localParam = packLocalParam(index, Alpha, Mu, Sigma); - - -function globalParam = updGlobalParam(globalParam, localParam) -[Beta, Q, S] = unpackGlobalParam(globalParam); - - - - - -globalParam = packGlobalParam(Beta, Q, S); - -function localParam = packLocalParam(Index, Alpha, Mu, Sigma, Phi) -localParam.Index = Index; -localParam.Alpha = Alpha; -localParam.Mu = Mu; -localParam.Sigma = Sigma; -localParam.Phi = Phi; - -function [Index, Alpha, Mu, Sigma] = unpackLocalParam(localParam) -Index = localParam.Index; -Alpha = localParam.Alpha; -Mu = localParam.Mu; -Sigma = localParam.Sigma; - - -function globalParam = packGlobalParam(Beta, Q, S, X) -globalParam.Beta = Beta; -globalParam.Q = Q; -globalParam.S = S; -globalParam.X = X; - - -function [Beta, Q, S, X] = unpackGlobalParam(globalParam) -Beta = globalParam.Beta; -Q = globalParam.Q; -S = globalParam.S; -X = globalParam.X; \ No newline at end of file diff --git a/chapter07/rvmFast.m~ b/chapter07/rvmFast.m~ deleted file mode 100644 index 8b4497c..0000000 --- a/chapter07/rvmFast.m~ +++ /dev/null @@ -1,153 +0,0 @@ -function [model, llh] = rvmFast(X, t) -% Relevance Vector Machine (ARD sparse prior) for regression -% training by empirical bayesian (type II ML) using Coordinate Descent -% reference: (Fast RVM) -% Tipping and Faul. Fast marginal likelihood maximisation for sparse Bayesian models. AISTATS 2003. - -xbar = mean(X,2); -tbar = mean(t,2); -X = bsxfun(@minus,X,xbar); -t = bsxfun(@minus,t,tbar); - -maxiter = 1000; -tol = 1e-4; -llh = -inf(1,maxiter); - - -[globalParam, localParam] = initParam(X, t); -for iter = 2:maxiter - llh(iter) = calcLlh(globalParam, localParam); - if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end - switch act - case 1 - localParam = addLocalParam(globalParam, localParam); - case 2 - localParam = delLocalParam(globalParam, localParam); - case 3 - localParam = updLocalParam(globalParam, localParam); - otherwise - error('error'); - end - globalParam = updGlobalParam(globalParam, localParam); - -end - - - - - - -function [globalParam, localParam] = initParam(X, t) - -X2 = dot(X,X,2); -Xt = X*t'; -[v,j] = max(Xt.^2./X2); - -beta = 1/mean(t.^2); % Beta = 1/sigma^2 -phi = X(j,:); -alpha = X2(j)/(v-1/beta); -sigma = 1/(alpha + beta*(phi'*phi)); -mu = beta*sigma*phi*t'; - -V = beta*X*phi'; -S = beta*X2-Sigma*V.^2; -Q = beta*Xt-Sigma*Xt( - - -e = phi-v'*Phi; -v = beta*X*e_j'; -S = S-Sigma_jj*v.^2; -Q = Q-mu_j*v; - -globalParam = packGlobalParam(Beta, Q, S); -localParam = packLocalParam(j, alpha, mu, sigma); - - - - -function llh = calcLlh(globalParam, localParam) -[Beta, Q, S] = unpackGlobalParam(globalParam); -[index, Alpha, Mu, Sigma] = unpackLocalParam(localParam); - - - -function [globalParam, localParam] = addLocalParam(j, globalParam, localParam) -[Beta, Q, S, X] = unpackGlobalParam(globalParam); -[index, Alpha, Mu, Sigma, Phi] = unpackLocalParam(localParam); - -phi = X(j,:); -alpha = s(j)^2/theta(j); -sigma = 1/(alpha+S(j)); -mu = sigma*Q(j); - -% local -v = Beta*Sigma*(Phi*phi'); -off = -Beta*sigma*v; -Sigma = [Sigma+sigma*(v*v'), off; off', sigma]; -Mu = [Mu-mu*v; mu]; -index = [index,j]; -Alpha = [Alpha,alpha]; - -% global -e = phi-v'*Phi; -v = Beta*X*e'; -S = S-sigma*v.^2; -Q = Q-mu*v; - - - -localParam = packLocalParam(index, Alpha, Mu, Sigma); - - -function localParam = delLocalParam(j, globalParam, localParam) -[Beta, Q, S] = unpackGlobalParam(globalParam); -[index, Alpha, Mu, Sigma] = unpackLocalParam(localParam); - - -localParam = packLocalParam(index, Alpha, Mu, Sigma); - - - -function localParam = updLocalParam(j, globalParam, localParam) -[Beta, Q, S] = unpackGlobalParam(globalParam); -[index, Alpha, Mu, Sigma] = unpackLocalParam(localParam); - - -localParam = packLocalParam(index, Alpha, Mu, Sigma); - - -function globalParam = updGlobalParam(globalParam, localParam) -[Beta, Q, S] = unpackGlobalParam(globalParam); - - - - - -globalParam = packGlobalParam(Beta, Q, S); - -function localParam = packLocalParam(Index, Alpha, Mu, Sigma, Phi) -localParam.Index = Index; -localParam.Alpha = Alpha; -localParam.Mu = Mu; -localParam.Sigma = Sigma; -localParam.Phi = Phi; - -function [Index, Alpha, Mu, Sigma] = unpackLocalParam(localParam) -Index = localParam.Index; -Alpha = localParam.Alpha; -Mu = localParam.Mu; -Sigma = localParam.Sigma; - - -function globalParam = packGlobalParam(Beta, Q, S, X) -globalParam.Beta = Beta; -globalParam.Q = Q; -globalParam.S = S; -globalParam.X = X; - - -function [Beta, Q, S, X] = unpackGlobalParam(globalParam) -Beta = globalParam.Beta; -Q = globalParam.Q; -S = globalParam.S; -X = globalParam.X; \ No newline at end of file diff --git a/chapter07/rvmFast2.m b/chapter07/rvmFast2.m deleted file mode 100644 index f3ada47..0000000 --- a/chapter07/rvmFast2.m +++ /dev/null @@ -1,114 +0,0 @@ -function [model,llh] = rvmFast2(X,t) - -xbar = mean(X,2); -tbar = mean(t,2); -X = bsxfun(@minus,X,xbar); -t = bsxfun(@minus,t,tbar); - -d = size(X,1); - -tol = 1e-4; -maxiter = 5; -LLH = -inf(1,maxiter); - -X2 = dot(X,X,2); -Xt = X*t'; -[v,j] = max(Xt.^2./X2); - -beta = 1/mean(t.^2); % Beta = 1/sigma^2 -phi = X(j,:); -alpha = X2(j)/(v-1/beta); -sigma = 1/(alpha + beta*(phi*phi')); -mu = beta*sigma*phi*t'; - -V = beta*X*phi'; -S = beta*X2-sigma*V.^2; -Q = beta*Xt-beta*sigma*Xt(j)*V; - - -iUse = j; -Phi = phi; -Alpha = alpha; -Sigma = sigma; -Mu = mu; -for iter = 2:maxiter - s = S; q = Q; - s(iUse) = alpha.*S(iUse)./(alpha-S(iUse)); - q(iUse) = alpha.*Q(iUse)./(alpha-S(iUse)); - theta = q.^2-s; - - iNew = find(theta>0); - llh = -inf(d,1); - [iUpd,~,which] = intersect(iNew, iUse); % update - if ~isempty(iUpd) - alpha = s(iUpd).^2./theta(iUpd); - delta = 1./alpha-1./Alpha(which); - llh(iUpd) = Q(iUpd).^2./(S(iUpd)+1./delta)-log1p(S(iUpd).*delta); - end - - iAdd = setdiff(iNew,iUpd); - if ~isempty(iAdd) - llh(iAdd) = (Q(iAdd).^2-S(iAdd))./S(iAdd)+log(S(iAdd)./(Q(iAdd).^2)); - end - - [LLH(iter),j] = max(llh); - if abs(LLH(iter)-LLH(iter-1)) < tol*abs(LLH(iter)-LLH(2)); break; end - - if any(iUpd==j) - act = 1; - elseif any(iAdd==j) - act = 2; - else - act = 0; - end - switch act - case 1 % update - idx = (iUse==j); - alpha_ = s(j)^2/theta(j); - - alpha = Alpha(idx); - Sigma_j = Sigma(:,idx); - Sigma_jj = Sigma(idx,idx); - mu_j = Mu(idx); - - delta = alpha_-alpha; - kappa = delta/(Sigma_jj*delta+1); - Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq (33) - Mu = Mu-kappa*mu_j*Sigma_j; % eq (34) - - v = beta*X*(Phi'*Sigma_j); - S = S+kappa*v.^2; % eq (35) - Q = Q+kappa*mu_j*v; - - Alpha(idx) = alpha_; - case 2 % Add - phi = X(j,:); - alpha = s(j)^2/theta(j); - sigma = 1/(alpha+S(j)); - mu = sigma*Q(j); - - v = beta*Sigma*(Phi*phi'); - off = -beta*sigma*v; % ?beta - % off = -sigma*v; % ?beta - Sigma = [Sigma+sigma*(v*v'), off; off', sigma]; - Mu = [Mu-mu*v; mu]; - - e = phi-v'*Phi; - v = beta*X*e'; - S = S-sigma*v.^2; - Q = Q-mu*v; - - iUse = [iUse;j]; - Phi = [Phi;phi]; - Alpha = [Alpha;alpha]; - case 0 - disp(''); - end - -end -llh = llh(2:iter); - -model.index = iUse; -model.alpha = Alpha; -model.beta = beta; - diff --git a/chapter07/rvmFast2.m~ b/chapter07/rvmFast2.m~ deleted file mode 100644 index 3e2337a..0000000 --- a/chapter07/rvmFast2.m~ +++ /dev/null @@ -1,91 +0,0 @@ -function [model,llh] = rvmFast2(X,t) - -xbar = mean(X,2); -tbar = mean(t,2); -X = bsxfun(@minus,X,xbar); -t = bsxfun(@minus,t,tbar); - -d = size(X,1); - -tol = 1e-4; -maxiter = 500; -LLH = -inf(1,maxiter); - -X2 = dot(X,X,2); -Xt = X*t'; -[v,j] = max(Xt.^2./X2); - -beta = 1/mean(t.^2); % Beta = 1/sigma^2 -phi = X(j,:); -alpha = X2(j)/(v-1/beta); -sigma = 1/(alpha + beta*(phi*phi')); -mu = beta*sigma*phi*t'; - -V = beta*X*phi'; -S = beta*X2-sigma*V.^2; -Q = beta*Xt-beta*sigma*Xt(j)*V; - - -iUse = j; -Phi = phi; -Alpha = alpha; -Sigma = sigma; -Mu = mu; -for iter = 2:maxiter - s = S; q = Q; - s(iUse) = alpha.*S(iUse)./(alpha-S(iUse)); - q(iUse) = alpha.*Q(iUse)./(alpha-S(iUse)); - theta = q.^2-s; - - iNew = find(theta>0); - - llh = -inf(d,1); - if any(iAdd) - llh(iAdd) = (Q(iAdd).^2-S(iAdd))./S(iAdd)+log(S(iAdd)./(Q(iAdd).^2)); - end - [iUpd,~,] = intersect(iNew, iUse); % update - if any(iUpd) - alpha = s(iUpd).^2./theta(iUpd); - index = iUse==find(iUpd); - delta = 1./alpha-1./Alpha(index); - tllh(iUpd) = Q(iUpd).^2./(S(iUpd)+1./delta)-log1p(S(iUpd).*delta); - end - [LLH(iter),j] = max(llh); - if abs(LLH(iter)-LLH(iter-1)) < tol*abs(LLH(iter)-LLH(2)); break; end - - - - switch find(iAct(j,:)) - case 1 % Add - phi = X(j,:); - alpha = s(j)^2/theta(j); - sigma = 1/(alpha+S(j)); - mu = sigma*Q(j); - - v = beta*Sigma*(Phi*phi'); - off = -beta*sigma*v; % ?beta - % off = -sigma*v; % ?beta - Sigma = [Sigma+sigma*(v*v'), off; off', sigma]; - Mu = [Mu-mu*v; mu]; - - - - e = phi-v'*Phi; - v = beta*X*e'; - S = S-sigma*v.^2; - Q = Q-mu*v; - - iUse = [iUse;j]; - Phi = [Phi;phi]; - Alpha = [Alpha;alpha]; - case 2 % update - - end - -end -llh = llh(2:iter); - -model.index = iUse; -model.alpha = Alpha; -model.beta = beta; - diff --git a/chapter07/rvmRegEbCd.m b/chapter07/rvmRegEbCd.m deleted file mode 100644 index dce607a..0000000 --- a/chapter07/rvmRegEbCd.m +++ /dev/null @@ -1,148 +0,0 @@ -function [model, llh] = rvmRegEbCd(X, t) -% TODO: llh not increasing. verify with sparse high dimensional data -% Relevance Vector Machine (ARD sparse prior) for regression -% training by empirical bayesian (type II ML) using Coordinate Descent -% reference: (Fast RVM) -% Tipping and Faul. Fast marginal likelihood maximisation for sparse Bayesian models. AISTATS 2003. -% Written by Mo Chen (sth4nth@gmail.com). -[d,n] = size(X); -xbar = mean(X,2); -tbar = mean(t,2); -X = bsxfun(@minus,X,xbar); -t = bsxfun(@minus,t,tbar); - -beta = 1/mean(t.^2); % beta = 1/sigma^2 -alpha = inf(d,1); -S = beta*dot(X,X,2); -Q = beta*(X*t'); -Sigma = zeros(0,0); -mu = zeros(0,1); -Phi = zeros(0,n); -dim = zeros(0,1); - -maxiter = d-1; -tol = 1e-4; -llh = -inf(1,maxiter); -iAct = zeros(d,3); -iUse = false(d,1); -s = S; q = Q; -for iter = 2:maxiter - theta = q.^2-s; - iNew = theta>0; - - assert(any(iNew)); % debug - - iUpd = (iNew & iUse); % update - iAdd = (iNew ~= iUpd); % add - iDel = (iUse ~= iUpd); % del - - - iUpd = false(d,1); - iDel = false(d,1); - - iAct(:,1) = iAdd; - iAct(:,2) = iDel; - iAct(:,3) = iUpd; - - -% assert(isempty(setdiff(find(any(iAct,2)),union(find(iNew),find(iUse))))); % debug - - % find the next dimension j that maximizes the marginal likilihood - tllh = -inf(d,1); % trial likelihood - if any(iAdd) - tllh(iAdd) = (Q(iAdd).^2-S(iAdd))./S(iAdd)+log(S(iAdd)./(Q(iAdd).^2)); - end - if any(iDel) - tllh(iDel) = Q(iDel).^2./(S(iDel)-alpha(iDel))-log1p(-S(iDel)./alpha(iDel)); - end - if any(iUpd) % bug - newAlpha = s(iUpd).^2./theta(iUpd); - oldAlpha = alpha(iUpd); - delta = 1./newAlpha-1./oldAlpha; - tllh(iUpd) = Q(iUpd).^2./(S(iUpd)+1./delta)-log1p(S(iUpd).*delta); - end - if ~isreal(tllh) % debug - dd = []; - for i=1:d - if ~isreal(tllh(i)) - dd = [dd,i]; - end - end - end - [llh(iter),j] = max(tllh); - if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end - - % update parameters - switch find(iAct(j,:)) - case 1 % Add - alpha(j) = s(j)^2/theta(j); - Sigma_jj = 1/(alpha(j)+S(j)); - mu_j = Sigma_jj*Q(j); - phi_j = X(j,:); - - v = beta*Sigma*(Phi*phi_j'); - off = -beta*Sigma_jj*v; - Sigma = [Sigma+Sigma_jj*(v*v'), off; off', Sigma_jj]; - mu = [mu-mu_j*v; mu_j]; - - e_j = phi_j-v'*Phi; - v = beta*X*e_j'; - S = S-Sigma_jj*v.^2; - Q = Q-mu_j*v; - - dim = [dim;j]; %#ok - case 2 % del - idx = (dim==j); - alpha(j) = inf; - Sigma_j = Sigma(:,idx); - Sigma_jj = Sigma(idx,idx); - mu_j = mu(idx); - - mu(idx) = []; - Sigma(:,idx) = []; - Sigma(idx,:) = []; - - kappa = 1/Sigma_jj; - Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq (33) - mu = mu-kappa*mu_j*Sigma_j; % eq (34) - - v = beta*X*(Phi'*Sigma_j); - S = S+kappa*v.^2; % eq (35) - Q = Q+kappa*mu_j*v; - - dim(idx) = []; - case 3 % update: - idx = (dim==j); - newAlpha = s(j)^2/theta(j); - oldAlpha = alpha(j); - alpha(j) = newAlpha; - - Sigma_j = Sigma(:,idx); - Sigma_jj = Sigma(idx,idx); - mu_j = mu(idx); - - kappa = 1/(Sigma_jj+1/(newAlpha-oldAlpha)); - Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq (33) - mu = mu-kappa*mu_j*Sigma_j; % eq (34) - - v = beta*X*(Phi'*Sigma_j); - S = S+kappa*v.^2; % eq (35) - Q = Q+kappa*mu_j*v; - end - - iUse = accumarray(dim,true,[d,1],@(x) x); % from Wei Li (pretty cool!) - s = S; q = Q; % p.353 Execrcies 7.17 - alphaS = alpha(iUse)-S(iUse); - s(iUse) = alpha(iUse).*S(iUse)./alphaS; % 7.104 - q(iUse) = alpha(iUse).*Q(iUse)./alphaS; % 7.105 - - Phi = X(iUse,:); - beta = (n-numel(dim)+dot(alpha(dim),diag(Sigma)))/sum((t-mu'*Phi).^2); -end -llh = llh(2:iter); -w0 = tbar-dot(mu,xbar(dim)); - -model.w0 = w0; -model.w = mu; -model.alpha = alpha; -model.beta = beta; diff --git a/chapter07/rvmRegEbCd.m.bak b/chapter07/rvmRegEbCd.m.bak deleted file mode 100644 index 9d1aa53..0000000 --- a/chapter07/rvmRegEbCd.m.bak +++ /dev/null @@ -1,188 +0,0 @@ -function [model, llh] = rvmRegEbCd(X, t) -% TODO: llh not increasing. verify with sparse high dimensional data -% Relevance Vector Machine (ARD sparse prior) for regression -% training by empirical bayesian (type II ML) using Coordinate Descent -% reference: (Fast RVM) -% Tipping and Faul. Fast marginal likelihood maximisation for sparse Bayesian models. AISTATS 2003. -% Written by Mo Chen (sth4nth@gmail.com). -[d,n] = size(X); -xbar = mean(X,2); -tbar = mean(t,2); -X = bsxfun(@minus,X,xbar); -t = bsxfun(@minus,t,tbar); - - - -% find initial alpha -beta = 1/mean(t.^2); % beta = 1/sigma^2 - - -X2 = dot(X,X,2); -Xt = beta*(X*t'); -[r,j] = max(Xt.^2./X2); -alpha = X2(j)/(r-1/beta); - -phi_j = X(j,:); -Sigma = 1/(alpha+beta*(phi_j*phi_j')); -mu = Sigma; - - - alpha(j) = s(j)^2/theta(j); - Sigma_jj = 1/(alpha(j)+S(j)); - mu_j = Sigma_jj*Q(j); - phi_j = X(j,:); - - v = beta*Sigma*(Phi*phi_j'); - off = -beta*Sigma_jj*v; - Sigma = [Sigma+Sigma_jj*(v*v'), off; off', Sigma_jj]; - mu = [mu-mu_j*v; mu_j]; - - e_j = phi_j-v'*Phi; - v = beta*X*e_j'; - S = S-Sigma_jj*v.^2; - Q = Q-mu_j*v; - - dim = [dim;j]; %#ok - - - - - -Sigma = zeros(0,0); -mu = zeros(0,1); -Phi = zeros(0,n); -dim = zeros(0,1); - -maxiter = 100*d; -tol = 1e-4; -llh = -inf(1,maxiter); -iAct = zeros(d,3); -iUse = false(d,1); -s = S; q = Q; -for iter = 2:maxiter - theta = q.^2-s; - iNew = theta>0; - - assert(any(iNew)); % debug - - iUpd = (iNew & iUse); % update - iAdd = (iNew ~= iUpd); % add - iDel = (iUse ~= iUpd); % del - - iAct(:,1) = iAdd; - iAct(:,2) = iDel; - iAct(:,3) = iUpd; - - assert(isempty(setdiff(find(any(iAct,2)),union(find(iNew),find(iUse))))); % debug - - % find the next dimension j that maximizes the marginal likilihood - tllh = -inf(d,1); % trial likelihood - if any(iAdd) - tllh(iAdd) = (Q(iAdd).^2-S(iAdd))./S(iAdd)+log(S(iAdd)./(Q(iAdd).^2)); - end - if any(iDel) - tllh(iDel) = Q(iDel).^2./(S(iDel)-alpha(iDel))-log1p(-S(iDel)./alpha(iDel)); - end - if any(iUpd) % bug - newAlpha = s(iUpd).^2./theta(iUpd); - oldAlpha = alpha(iUpd); - delta = 1./newAlpha-1./oldAlpha; - tllh(iUpd) = Q(iUpd).^2./(S(iUpd)+1./delta)-log1p(S(iUpd).*delta); - end - if ~isreal(tllh) % debug - dd = []; - for i=1:d - if ~isreal(tllh(i)) - dd = [dd,i]; - end - end - end - [llh(iter),j] = max(tllh); -% if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end - - % update parameters - switch find(iAct(j,:)) - case 1 % Add - alpha(j) = s(j)^2/theta(j); - Sigma_jj = 1/(alpha(j)+S(j)); - mu_j = Sigma_jj*Q(j); - phi_j = X(j,:); - - v = beta*Sigma*(Phi*phi_j'); - off = -beta*Sigma_jj*v; - Sigma = [Sigma+Sigma_jj*(v*v'), off; off', Sigma_jj]; - mu = [mu-mu_j*v; mu_j]; - - e_j = phi_j-v'*Phi; - v = beta*X*e_j'; - S = S-Sigma_jj*v.^2; - Q = Q-mu_j*v; - - dim = [dim;j]; %#ok - case 2 % del - idx = (dim==j); - alpha(j) = inf; - Sigma_j = Sigma(:,idx); - Sigma_jj = Sigma(idx,idx); - mu_j = mu(idx); - - mu(idx) = []; - Sigma(:,idx) = []; - Sigma(idx,:) = []; - - kappa = 1/Sigma_jj; - Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq (33) - mu = mu-kappa*mu_j*Sigma_j; % eq (34) - - v = beta*X*(Phi'*Sigma_j); - S = S+kappa*v.^2; % eq (35) - Q = Q+kappa*mu_j*v; - - dim(idx) = []; - case 3 % update: - idx = (dim==j); - newAlpha = s(j)^2/theta(j); - oldAlpha = alpha(j); - alpha(j) = newAlpha; - - Sigma_j = Sigma(:,idx); - Sigma_jj = Sigma(idx,idx); - mu_j = mu(idx); - - kappa = 1/(Sigma_jj+1/(newAlpha-oldAlpha)); - Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq (33) - mu = mu-kappa*mu_j*Sigma_j; % eq (34) - - v = beta*X*(Phi'*Sigma_j); - S = S+kappa*v.^2; % eq (35) - Q = Q+kappa*mu_j*v; - end - - iUse = accumarray(dim,true,[d,1],@(x) x); % from Wei Li (pretty cool!) - s = S; q = Q; % p.353 Execrcies 7.17 - alphaS = alpha(iUse)-S(iUse); - s(iUse) = alpha(iUse).*S(iUse)./alphaS; % 7.104 - q(iUse) = alpha(iUse).*Q(iUse)./alphaS; % 7.105 - - Phi = X(iUse,:); - beta = (n-numel(dim)+dot(alpha(dim),diag(Sigma)))/sum((t-mu'*Phi).^2); -end -llh = llh(2:iter); -w0 = tbar-dot(mu,xbar(dim)); - -model.w0 = w0; -model.w = mu; -model.alpha = alpha; -model.beta = beta; - -function iAct = GetIndex(iUse,iNew) -d = numelem(iUse); -iAct = zeros(d,3); -iUpd = (iNew & iUse); % update -iAdd = (iNew ~= iUpd); % add -iDel = (iUse ~= iUpd); % del -iAct(:,1) = iAdd; -iAct(:,2) = iDel; -iAct(:,3) = iUpd; - - diff --git a/chapter07/rvmRegEbCd.m~ b/chapter07/rvmRegEbCd.m~ deleted file mode 100644 index 521ccc6..0000000 --- a/chapter07/rvmRegEbCd.m~ +++ /dev/null @@ -1,163 +0,0 @@ -function [model, llh] = rvmRegEbCd(X, t) -% TODO: llh not increasing. verify with sparse high dimensional data -% Relevance Vector Machine (ARD sparse prior) for regression -% training by empirical bayesian (type II ML) using Coordinate Descent -% reference: (Fast RVM) -% Tipping and Faul. Fast marginal likelihood maximisation for sparse Bayesian models. AISTATS 2003. -% Written by Mo Chen (sth4nth@gmail.com). -[d,n] = size(X); -xbar = mean(X,2); -tbar = mean(t,2); -X = bsxfun(@minus,X,xbar); -t = bsxfun(@minus,t,tbar); - - - -% find initial alpha -beta = 1/mean(t.^2); % beta = 1/sigma^2 - - -X2 = dot(X,X,2); -Xt = beta*(X*t'); -max(Xt.^2./X2, - - - -Sigma = zeros(0,0); -mu = zeros(0,1); -Phi = zeros(0,n); -dim = zeros(0,1); - -maxiter = 100*d; -tol = 1e-4; -llh = -inf(1,maxiter); -iAct = zeros(d,3); -iUse = false(d,1); -s = S; q = Q; -for iter = 2:maxiter - theta = q.^2-s; - iNew = theta>0; - - assert(any(iNew)); % debug - - iUpd = (iNew & iUse); % update - iAdd = (iNew ~= iUpd); % add - iDel = (iUse ~= iUpd); % del - - iAct(:,1) = iAdd; - iAct(:,2) = iDel; - iAct(:,3) = iUpd; - - assert(isempty(setdiff(find(any(iAct,2)),union(find(iNew),find(iUse))))); % debug - - % find the next dimension j that maximizes the marginal likilihood - tllh = -inf(d,1); % trial likelihood - if any(iAdd) - tllh(iAdd) = (Q(iAdd).^2-S(iAdd))./S(iAdd)+log(S(iAdd)./(Q(iAdd).^2)); - end - if any(iDel) - tllh(iDel) = Q(iDel).^2./(S(iDel)-alpha(iDel))-log1p(-S(iDel)./alpha(iDel)); - end - if any(iUpd) % bug - newAlpha = s(iUpd).^2./theta(iUpd); - oldAlpha = alpha(iUpd); - delta = 1./newAlpha-1./oldAlpha; - tllh(iUpd) = Q(iUpd).^2./(S(iUpd)+1./delta)-log1p(S(iUpd).*delta); - end - if ~isreal(tllh) % debug - dd = []; - for i=1:d - if ~isreal(tllh(i)) - dd = [dd,i]; - end - end - end - [llh(iter),j] = max(tllh); -% if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end - - % update parameters - switch find(iAct(j,:)) - case 1 % Add - alpha(j) = s(j)^2/theta(j); - Sigma_jj = 1/(alpha(j)+S(j)); - mu_j = Sigma_jj*Q(j); - phi_j = X(j,:); - - v = beta*Sigma*(Phi*phi_j'); % temporary vector for common part - off = -beta*Sigma_jj*v; - Sigma = [Sigma+Sigma_jj*(v*v'), off; off', Sigma_jj]; - mu = [mu-mu_j*v; mu_j]; - - e_j = phi_j-v'*Phi; - v = beta*X*e_j'; - S = S-Sigma_jj*v.^2; - Q = Q-mu_j*v; - - dim = [dim;j]; %#ok - case 2 % del - idx = (dim==j); - alpha(j) = inf; - Sigma_j = Sigma(:,idx); - Sigma_jj = Sigma(idx,idx); - mu_j = mu(idx); - - mu(idx) = []; - Sigma(:,idx) = []; - Sigma(idx,:) = []; - - kappa = 1/Sigma_jj; - Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq (33) - mu = mu-kappa*mu_j*Sigma_j; % eq (34) - - v = beta*X*(Phi'*Sigma_j); - S = S+kappa*v.^2; % eq (35) - Q = Q+kappa*mu_j*v; - - dim(idx) = []; - case 3 % update: - idx = (dim==j); - newAlpha = s(j)^2/theta(j); - oldAlpha = alpha(j); - alpha(j) = newAlpha; - - Sigma_j = Sigma(:,idx); - Sigma_jj = Sigma(idx,idx); - mu_j = mu(idx); - - kappa = 1/(Sigma_jj+1/(newAlpha-oldAlpha)); - Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq (33) - mu = mu-kappa*mu_j*Sigma_j; % eq (34) - - v = beta*X*(Phi'*Sigma_j); - S = S+kappa*v.^2; % eq (35) - Q = Q+kappa*mu_j*v; - end - - iUse = accumarray(dim,true,[d,1],@(x) x); % from Wei Li (pretty cool!) - s = S; q = Q; % p.353 Execrcies 7.17 - alphaS = alpha(iUse)-S(iUse); - s(iUse) = alpha(iUse).*S(iUse)./alphaS; % 7.104 - q(iUse) = alpha(iUse).*Q(iUse)./alphaS; % 7.105 - - Phi = X(iUse,:); - beta = (n-numel(dim)+dot(alpha(dim),diag(Sigma)))/sum((t-mu'*Phi).^2); -end -llh = llh(2:iter); -w0 = tbar-dot(mu,xbar(dim)); - -model.w0 = w0; -model.w = mu; -model.alpha = alpha; -model.beta = beta; - -function iAct = GetIndex(iUse,iNew) -d = numelem(iUse); -iAct = zeros(d,3); -iUpd = (iNew & iUse); % update -iAdd = (iNew ~= iUpd); % add -iDel = (iUse ~= iUpd); % del -iAct(:,1) = iAdd; -iAct(:,2) = iDel; -iAct(:,3) = iUpd; - - diff --git a/chapter07/rvmRegEbSeq.m b/chapter07/rvmRegEbSeq.m new file mode 100644 index 0000000..88fbe2b --- /dev/null +++ b/chapter07/rvmRegEbSeq.m @@ -0,0 +1,126 @@ +function [model, llh] = rvmRegEbSeq(X, t) +% TODO: beta is not updated. +% Sparse Bayesian Regression (RVM) using sequential algorithm +% reference: +% Tipping and Faul. Fast marginal likelihood maximisation for sparse Bayesian models. AISTATS 2003. +% Written by Mo Chen (sth4nth@gmail.com). +d = size(X,1); +xbar = mean(X,2); +tbar = mean(t,2); +X = bsxfun(@minus,X,xbar); +t = bsxfun(@minus,t,tbar); + +beta = 0.01/(0.1*var(t))^2; % beta = 1/sigma^2 +alpha = inf(d,1); +S = beta*dot(X,X,2); % ? +Q = beta*(X*t'); % ? +Sigma = zeros(0,0); +mu = zeros(0,1); +index = zeros(0,1); + +maxiter = 1000; +tol = 1e-6; +llh = -inf(1,maxiter); +iAct = zeros(d,3); +for iter = 2:maxiter + s = S; q = Q; % p.353 Execrcies 7.17 + s(index) = alpha(index).*S(index)./(alpha(index)-S(index)); % 7.104 + q(index) = alpha(index).*Q(index)./(alpha(index)-S(index)); % 7.105 + + theta = q.^2-s; + iNew = theta>0; + + iUse = false(d,1); + iUse(index) = true; + + iUpd = (iNew & iUse); % update + iAdd = (iNew ~= iUpd); % add + iDel = (iUse ~= iUpd); % del + + dllh = -inf(d,1); % delta likelihood + if any(iUpd) + alpha_ = s(iUpd).^2./theta(iUpd); + delta = 1./alpha_-1./alpha(iUpd); + dllh(iUpd) = Q(iUpd).^2.*delta./(S(iUpd).*delta+1)-log1p(S(iUpd).*delta); + end + if any(iAdd) + dllh(iAdd) = (Q(iAdd).^2-S(iAdd))./S(iAdd)+log(S(iAdd)./(Q(iAdd).^2)); + end + if any(iDel) + dllh(iDel) = Q(iDel).^2./(S(iDel)-alpha(iDel))-log1p(-S(iDel)./alpha(iDel)); + end + + [llh(iter),j] = max(dllh); + if llh(iter) < tol; break; end + + iAct(:,1) = iUpd; + iAct(:,2) = iAdd; + iAct(:,3) = iDel; + + % update parameters + Phi = X(index,:); +%? beta = (n-numel(index)+dot(alpha(index),diag(Sigma)))/sum((t-mu'*Phi).^2); + switch find(iAct(j,:)) + case 1 % update: + idx = (index==j); + alpha_ = s(j)^2/theta(j); + + Sigma_j = Sigma(:,idx); + Sigma_jj = Sigma(idx,idx); + mu_j = mu(idx); + + kappa = 1/(Sigma_jj+1/(alpha_-alpha(j))); + Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq (33) + mu = mu-kappa*mu_j*Sigma_j; % eq (34) + + v = beta*X*(Phi'*Sigma_j); + S = S+kappa*v.^2; % eq (35) + Q = Q+kappa*mu_j*v; + alpha(j) = alpha_; + case 2 % Add + alpha_ = s(j)^2/theta(j); + Sigma_jj = 1/(alpha_+S(j)); + mu_j = Sigma_jj*Q(j); + phi_j = X(j,:); + + v = beta*Sigma*(Phi*phi_j'); + off = -Sigma_jj*v; % ? + Sigma = [Sigma+Sigma_jj*(v*v'), off; off', Sigma_jj]; + mu = [mu-mu_j*v; mu_j]; + + e_j = phi_j-v'*Phi; + v = beta*X*e_j'; + S = S-Sigma_jj*v.^2; + Q = Q-mu_j*v; + + index = [index;j]; %#ok + alpha(j) = alpha_; + case 3 % del + idx = (index==j); + Sigma_j = Sigma(:,idx); + Sigma_jj = Sigma(idx,idx); + mu_j = mu(idx); + + kappa = 1/Sigma_jj; + Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq (33) + mu = mu-kappa*mu_j*Sigma_j; % eq (34) + + v = beta*X*(Phi'*Sigma_j); + S = S+kappa*v.^2; % eq (35) + Q = Q+kappa*mu_j*v; + + mu(idx) = []; + Sigma(:,idx) = []; + Sigma(idx,:) = []; + index(idx) = []; + alpha(j) = inf; + end +end +llh = llh(2:iter); +w0 = tbar-dot(mu,xbar(index)); + +model.index = index; +model.w0 = w0; +model.w = mu; +model.alpha = alpha(index); +model.beta = beta; \ No newline at end of file From 9a3ad1d5f2bafb12e36289b4523f1c322f3d3c31 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 10 Jan 2016 01:56:05 +0800 Subject: [PATCH 084/149] rvmSeq is done for now. beta can not be updated. --- chapter07/demo.m | 3 +-- chapter07/rvmRegEbSeq.m | 7 ++++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/chapter07/demo.m b/chapter07/demo.m index 4488d1b..36a85e3 100755 --- a/chapter07/demo.m +++ b/chapter07/demo.m @@ -23,8 +23,7 @@ sigma = 0.005; e = sigma*randn(1,n); y = x'*A + e; -[model,llh] = rvmRegEbCd(A,y); -% [model,llh] = rvmFast(A,y); +[model,llh] = rvmRegEbSeq(A,y); plot(llh); diff --git a/chapter07/rvmRegEbSeq.m b/chapter07/rvmRegEbSeq.m index 88fbe2b..f3e39ee 100644 --- a/chapter07/rvmRegEbSeq.m +++ b/chapter07/rvmRegEbSeq.m @@ -4,7 +4,7 @@ % reference: % Tipping and Faul. Fast marginal likelihood maximisation for sparse Bayesian models. AISTATS 2003. % Written by Mo Chen (sth4nth@gmail.com). -d = size(X,1); +[d,n] = size(X); xbar = mean(X,2); tbar = mean(t,2); X = bsxfun(@minus,X,xbar); @@ -17,6 +17,7 @@ Sigma = zeros(0,0); mu = zeros(0,1); index = zeros(0,1); +Phi = zeros(0,n); maxiter = 1000; tol = 1e-6; @@ -58,8 +59,6 @@ iAct(:,3) = iDel; % update parameters - Phi = X(index,:); -%? beta = (n-numel(index)+dot(alpha(index),diag(Sigma)))/sum((t-mu'*Phi).^2); switch find(iAct(j,:)) case 1 % update: idx = (index==j); @@ -115,6 +114,8 @@ index(idx) = []; alpha(j) = inf; end + Phi = X(index,:); +% beta = (n-d+dot(alpha(index),diag(Sigma)))/sum((t-mu'*Phi).^2); end llh = llh(2:iter); w0 = tbar-dot(mu,xbar(index)); From e90a1bdd2c8384e36dd86a9f012fa9c8473321d8 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 10 Jan 2016 13:47:25 +0800 Subject: [PATCH 085/149] add comment --- chapter07/rvmRegEbSeq.m | 51 ++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/chapter07/rvmRegEbSeq.m b/chapter07/rvmRegEbSeq.m index f3e39ee..76793a1 100644 --- a/chapter07/rvmRegEbSeq.m +++ b/chapter07/rvmRegEbSeq.m @@ -4,24 +4,24 @@ % reference: % Tipping and Faul. Fast marginal likelihood maximisation for sparse Bayesian models. AISTATS 2003. % Written by Mo Chen (sth4nth@gmail.com). +maxiter = 1000; +llh = -inf(1,maxiter); +tol = 1e-6; + [d,n] = size(X); xbar = mean(X,2); tbar = mean(t,2); X = bsxfun(@minus,X,xbar); t = bsxfun(@minus,t,tbar); -beta = 0.01/(0.1*var(t))^2; % beta = 1/sigma^2 +beta = 1/mean(t.^2); % beta = 1/sigma^2 alpha = inf(d,1); -S = beta*dot(X,X,2); % ? -Q = beta*(X*t'); % ? +S = beta*dot(X,X,2); % eq.(22) +Q = beta*(X*t'); % eq.(22) Sigma = zeros(0,0); mu = zeros(0,1); index = zeros(0,1); Phi = zeros(0,n); - -maxiter = 1000; -tol = 1e-6; -llh = -inf(1,maxiter); iAct = zeros(d,3); for iter = 2:maxiter s = S; q = Q; % p.353 Execrcies 7.17 @@ -40,15 +40,15 @@ dllh = -inf(d,1); % delta likelihood if any(iUpd) - alpha_ = s(iUpd).^2./theta(iUpd); + alpha_ = s(iUpd).^2./theta(iUpd); % eq.(20) delta = 1./alpha_-1./alpha(iUpd); - dllh(iUpd) = Q(iUpd).^2.*delta./(S(iUpd).*delta+1)-log1p(S(iUpd).*delta); + dllh(iUpd) = Q(iUpd).^2.*delta./(S(iUpd).*delta+1)-log1p(S(iUpd).*delta); % eq.(32) end if any(iAdd) - dllh(iAdd) = (Q(iAdd).^2-S(iAdd))./S(iAdd)+log(S(iAdd)./(Q(iAdd).^2)); + dllh(iAdd) = (Q(iAdd).^2-S(iAdd))./S(iAdd)+log(S(iAdd)./(Q(iAdd).^2)); % eq.(27) end if any(iDel) - dllh(iDel) = Q(iDel).^2./(S(iDel)-alpha(iDel))-log1p(-S(iDel)./alpha(iDel)); + dllh(iDel) = Q(iDel).^2./(S(iDel)-alpha(iDel))-log1p(-S(iDel)./alpha(iDel)); % eq.(37) end [llh(iter),j] = max(dllh); @@ -69,12 +69,12 @@ mu_j = mu(idx); kappa = 1/(Sigma_jj+1/(alpha_-alpha(j))); - Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq (33) - mu = mu-kappa*mu_j*Sigma_j; % eq (34) + Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq.(33) + mu = mu-kappa*mu_j*Sigma_j; % eq.(34) v = beta*X*(Phi'*Sigma_j); - S = S+kappa*v.^2; % eq (35) - Q = Q+kappa*mu_j*v; + S = S+kappa*v.^2; % eq.(35) + Q = Q+kappa*mu_j*v; % eq.(36) alpha(j) = alpha_; case 2 % Add alpha_ = s(j)^2/theta(j); @@ -83,14 +83,14 @@ phi_j = X(j,:); v = beta*Sigma*(Phi*phi_j'); - off = -Sigma_jj*v; % ? - Sigma = [Sigma+Sigma_jj*(v*v'), off; off', Sigma_jj]; - mu = [mu-mu_j*v; mu_j]; + off = -Sigma_jj*v; % eq.(28) has error? + Sigma = [Sigma+Sigma_jj*(v*v'), off; off', Sigma_jj]; % eq.(28) + mu = [mu-mu_j*v; mu_j]; % eq.(29) e_j = phi_j-v'*Phi; v = beta*X*e_j'; - S = S-Sigma_jj*v.^2; - Q = Q-mu_j*v; + S = S-Sigma_jj*v.^2; % eq.(30) + Q = Q-mu_j*v; % eq.(31) index = [index;j]; %#ok alpha(j) = alpha_; @@ -100,13 +100,12 @@ Sigma_jj = Sigma(idx,idx); mu_j = mu(idx); - kappa = 1/Sigma_jj; - Sigma = Sigma-kappa*(Sigma_j*Sigma_j'); % eq (33) - mu = mu-kappa*mu_j*Sigma_j; % eq (34) + Sigma = Sigma-(Sigma_j*Sigma_j')/Sigma_jj; % eq.(38) + mu = mu-mu_j*Sigma_j/Sigma_jj; % eq.(39) v = beta*X*(Phi'*Sigma_j); - S = S+kappa*v.^2; % eq (35) - Q = Q+kappa*mu_j*v; + S = S+v.^2/Sigma_jj; % eq.(40) + Q = Q+mu_j*v/Sigma_jj; % eq.(41) mu(idx) = []; Sigma(:,idx) = []; @@ -115,7 +114,7 @@ alpha(j) = inf; end Phi = X(index,:); -% beta = (n-d+dot(alpha(index),diag(Sigma)))/sum((t-mu'*Phi).^2); +% beta = ; end llh = llh(2:iter); w0 = tbar-dot(mu,xbar(index)); From 9d46aa1981c214c537aeabe5fc346265568efbde Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 10 Jan 2016 22:56:12 +0800 Subject: [PATCH 086/149] simplify function names --- chapter03/{linRegEbFp.m => linRegFp.m} | 2 +- chapter07/{rvmBinEbFp.m => rvmBinFp.m} | 2 +- chapter07/{rvmRegEbFp.m => rvmRegFp.m} | 2 +- chapter07/{rvmRegEbSeq.m => rvmRegSeq.m} | 2 +- chapter09/{linRegEbEm.m => linRegEm.m} | 2 +- chapter09/{rvmBinEbEm.m => rvmBinEm.m} | 2 +- chapter09/{rvmRegEbEm.m => rvmRegEm.m} | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) rename chapter03/{linRegEbFp.m => linRegFp.m} (95%) rename chapter07/{rvmBinEbFp.m => rvmBinFp.m} (97%) rename chapter07/{rvmRegEbFp.m => rvmRegFp.m} (96%) rename chapter07/{rvmRegEbSeq.m => rvmRegSeq.m} (98%) rename chapter09/{linRegEbEm.m => linRegEm.m} (95%) rename chapter09/{rvmBinEbEm.m => rvmBinEm.m} (95%) rename chapter09/{rvmRegEbEm.m => rvmRegEm.m} (95%) diff --git a/chapter03/linRegEbFp.m b/chapter03/linRegFp.m similarity index 95% rename from chapter03/linRegEbFp.m rename to chapter03/linRegFp.m index 231111f..6c04c75 100644 --- a/chapter03/linRegEbFp.m +++ b/chapter03/linRegFp.m @@ -1,4 +1,4 @@ -function [model, llh] = linRegEbFp(X, t, alpha, beta) +function [model, llh] = linRegFp(X, t, alpha, beta) % Fit empirical Bayesian linear model with Mackay fixed point method % (p.168) % X: d x n data diff --git a/chapter07/rvmBinEbFp.m b/chapter07/rvmBinFp.m similarity index 97% rename from chapter07/rvmBinEbFp.m rename to chapter07/rvmBinFp.m index 921c696..5aaa852 100644 --- a/chapter07/rvmBinEbFp.m +++ b/chapter07/rvmBinFp.m @@ -1,4 +1,4 @@ -function [model, llh] = rvmBinEbFp(X, t, alpha) +function [model, llh] = rvmBinFp(X, t, alpha) % Relevance Vector Machine (ARD sparse prior) for binary classification % training by empirical bayesian (type II ML) using fix point update (Mackay update) % Written by Mo Chen (sth4nth@gmail.com). diff --git a/chapter07/rvmRegEbFp.m b/chapter07/rvmRegFp.m similarity index 96% rename from chapter07/rvmRegEbFp.m rename to chapter07/rvmRegFp.m index 4020777..e505e28 100644 --- a/chapter07/rvmRegEbFp.m +++ b/chapter07/rvmRegFp.m @@ -1,4 +1,4 @@ -function [model, llh] = rvmRegEbFp(X, t, alpha, beta) +function [model, llh] = rvmRegFp(X, t, alpha, beta) % Relevance Vector Machine (ARD sparse prior) for regression % training by empirical bayesian (type II ML) using fix point update (Mackay update) % Written by Mo Chen (sth4nth@gmail.com). diff --git a/chapter07/rvmRegEbSeq.m b/chapter07/rvmRegSeq.m similarity index 98% rename from chapter07/rvmRegEbSeq.m rename to chapter07/rvmRegSeq.m index 76793a1..75ef74b 100644 --- a/chapter07/rvmRegEbSeq.m +++ b/chapter07/rvmRegSeq.m @@ -1,4 +1,4 @@ -function [model, llh] = rvmRegEbSeq(X, t) +function [model, llh] = rvmRegSeq(X, t) % TODO: beta is not updated. % Sparse Bayesian Regression (RVM) using sequential algorithm % reference: diff --git a/chapter09/linRegEbEm.m b/chapter09/linRegEm.m similarity index 95% rename from chapter09/linRegEbEm.m rename to chapter09/linRegEm.m index b0e06a5..cf97aff 100644 --- a/chapter09/linRegEbEm.m +++ b/chapter09/linRegEm.m @@ -1,4 +1,4 @@ -function [model, llh] = linRegEbEm(X, t, alpha, beta) +function [model, llh] = linRegEm(X, t, alpha, beta) % Fit empirical Bayesian linear model with EM (p.448 chapter 9.3.4) % X: d x n data % t: 1 x n response diff --git a/chapter09/rvmBinEbEm.m b/chapter09/rvmBinEm.m similarity index 95% rename from chapter09/rvmBinEbEm.m rename to chapter09/rvmBinEm.m index f8d4b26..f1990db 100644 --- a/chapter09/rvmBinEbEm.m +++ b/chapter09/rvmBinEm.m @@ -1,4 +1,4 @@ -function [model, llh] = rvmBinEbEm(X, t, alpha) +function [model, llh] = rvmBinEm(X, t, alpha) % Relevance Vector Machine (ARD sparse prior) for binary classification % training by empirical bayesian (type II ML) using standard EM update % Written by Mo Chen (sth4nth@gmail.com). diff --git a/chapter09/rvmRegEbEm.m b/chapter09/rvmRegEm.m similarity index 95% rename from chapter09/rvmRegEbEm.m rename to chapter09/rvmRegEm.m index 70bf7e3..605218f 100644 --- a/chapter09/rvmRegEbEm.m +++ b/chapter09/rvmRegEm.m @@ -1,4 +1,4 @@ -function [model, llh] = rvmRegEbEm(X, t, alpha, beta) +function [model, llh] = rvmRegEm(X, t, alpha, beta) % Relevance Vector Machine (ARD sparse prior) for regression % training by empirical bayesian (type II ML) using standard EM update % Written by Mo Chen (sth4nth@gmail.com). From fff07ff352a781203b84cd6cbcf10f18cdb4d0a8 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 10 Jan 2016 22:59:37 +0800 Subject: [PATCH 087/149] simplify code for mixture models a little bit --- chapter09/demo.m | 4 ++++ chapter09/mixBernEm.m | 3 +-- chapter09/mixGaussEm.m | 32 ++++++++------------------------ chapter09/mixMnEm.m | 6 ++---- 4 files changed, 15 insertions(+), 30 deletions(-) create mode 100644 chapter09/demo.m diff --git a/chapter09/demo.m b/chapter09/demo.m new file mode 100644 index 0000000..2a104f4 --- /dev/null +++ b/chapter09/demo.m @@ -0,0 +1,4 @@ +% TODO: +% 1) demos for mixture models +% 2) pred functions for mixture models +% 3) beta for em regress \ No newline at end of file diff --git a/chapter09/mixBernEm.m b/chapter09/mixBernEm.m index d318323..26e75aa 100644 --- a/chapter09/mixBernEm.m +++ b/chapter09/mixBernEm.m @@ -8,7 +8,7 @@ n = size(X,2); label = ceil(k*rand(1,n)); % random initialization R = sparse(1:n,label,1,n,k,n); -tol = 1e-10; +tol = 1e-4; maxiter = 500; llh = -inf(1,maxiter); for iter = 2:maxiter @@ -30,7 +30,6 @@ logR = bsxfun(@minus,logRho,T); R = exp(logR); - function model = maximization(X, R) n = size(X,2); nk = sum(R,1); diff --git a/chapter09/mixGaussEm.m b/chapter09/mixGaussEm.m index 88d611e..ca2d2dc 100644 --- a/chapter09/mixGaussEm.m +++ b/chapter09/mixGaussEm.m @@ -3,50 +3,35 @@ % X: d x n data matrix % init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) % Written by Mo Chen (sth4nth@gmail.com). -%% initialization +%% init fprintf('EM for Gaussian mixture: running ... \n'); R = initialization(X,init); [~,label(1,:)] = max(R,[],2); R = R(:,unique(label)); -tol = 1e-10; +tol = 1e-4; maxiter = 500; llh = -inf(1,maxiter); for iter = 2:maxiter model = maximization(X,R); [R, llh(iter)] = expectation(X,model); - if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter)); break; end; end [~,label(:)] = max(R,[],2); llh = llh(2:iter); function R = initialization(X, init) -[d,n] = size(X); -if isstruct(init) % initialize with a model +n = size(X,2); +if isstruct(init) % init with a model R = expectation(X,init); -elseif length(init) == 1 % random initialization +elseif numel(init) == 1 % random init k k = init; - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - while k ~= length(u) - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - end + label = ceil(k*rand(1,n)); R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == 1 && size(init,2) == n % initialize with labels +elseif all(size(init)==[1,n]) % init with labels label = init; k = max(label); R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == d %initialize with only centers - k = size(init,2); - m = init; - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - R = full(sparse(1:n,label,1,n,k,n)); else error('ERROR: init is not valid.'); end @@ -69,7 +54,6 @@ logR = bsxfun(@minus,logRho,T); R = exp(logR); - function model = maximization(X, R) [d,n] = size(X); k = size(R,2); @@ -101,4 +85,4 @@ Q = U'\X; q = dot(Q,Q,1); % quadratic term (M distance) c = d*log(2*pi)+2*sum(log(diag(U))); % normalization constant -y = -(c+q)/2; +y = -(c+q)/2; \ No newline at end of file diff --git a/chapter09/mixMnEm.m b/chapter09/mixMnEm.m index 63d1005..02cfd14 100644 --- a/chapter09/mixMnEm.m +++ b/chapter09/mixMnEm.m @@ -8,7 +8,7 @@ n = size(X,2); label = ceil(k*rand(1,n)); % random initialization R = sparse(1:n,label,1,n,k,n); -tol = 1e-10; +tol = 1e-4; maxiter = 500; llh = -inf(1,maxiter); for iter = 2:maxiter @@ -30,7 +30,6 @@ logR = bsxfun(@minus,logRho,T); R = exp(logR); - function model = maximization(X, R) n = size(X,2); nk = sum(R,1); @@ -41,5 +40,4 @@ prior = (1/d)*ones(d,1); mu = bsxfun(@plus,(1-lambda)*mu,lambda*prior); model.mu = mu; -model.weight = w; - +model.weight = w; \ No newline at end of file From 668929589c95d5babed3001ca60e5153a2656cf2 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 10 Jan 2016 23:09:06 +0800 Subject: [PATCH 088/149] update TODO list --- TODO.txt | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/TODO.txt b/TODO.txt index 419dac6..85d71fb 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,5 +1,6 @@ TODO: -derive simpler bound for vb and improve vb functions chapter 10 -fix llh for rvm cd -viterbi normalize update -Add predict for mixture models +1) update beta for sequential rvm +2) viterbi normalize update +3) compute bound terms inside each factors + + From d72d0f6b68ea8cc6267b83b001aeb89292756f77 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 11 Jan 2016 16:26:05 +0800 Subject: [PATCH 089/149] add gmm pred and rnd stub --- chapter09/demo.m | 3 ++- chapter09/mixGaussPred.m | 28 ++++++++++++++++++++++++++++ chapter09/mixGaussRnd.m | 7 +++++++ 3 files changed, 37 insertions(+), 1 deletion(-) create mode 100644 chapter09/mixGaussPred.m create mode 100644 chapter09/mixGaussRnd.m diff --git a/chapter09/demo.m b/chapter09/demo.m index 2a104f4..126d0b5 100644 --- a/chapter09/demo.m +++ b/chapter09/demo.m @@ -1,4 +1,5 @@ % TODO: % 1) demos for mixture models % 2) pred functions for mixture models -% 3) beta for em regress \ No newline at end of file +% 3) beta for em regress + diff --git a/chapter09/mixGaussPred.m b/chapter09/mixGaussPred.m new file mode 100644 index 0000000..5aeb8ee --- /dev/null +++ b/chapter09/mixGaussPred.m @@ -0,0 +1,28 @@ +function [label, R] = mixGaussPred(X, model) +mu = model.mu; +Sigma = model.Sigma; +w = model.weight; + +n = size(X,2); +k = size(mu,2); +logRho = zeros(n,k); + +for i = 1:k + logRho(:,i) = loggausspdf(X,mu(:,i),Sigma(:,:,i)); +end +logRho = bsxfun(@plus,logRho,log(w)); +R = exp(bsxfun(@minus,logRho,logsumexp(logRho,2))); +[~,label(:)] = max(R,[],2); + + +function y = loggausspdf(X, mu, Sigma) +d = size(X,1); +X = bsxfun(@minus,X,mu); +[U,p]= chol(Sigma); +if p ~= 0 + error('ERROR: Sigma is not PD.'); +end +Q = U'\X; +q = dot(Q,Q,1); % quadratic term (M distance) +c = d*log(2*pi)+2*sum(log(diag(U))); % normalization constant +y = -(c+q)/2; \ No newline at end of file diff --git a/chapter09/mixGaussRnd.m b/chapter09/mixGaussRnd.m new file mode 100644 index 0000000..fec9380 --- /dev/null +++ b/chapter09/mixGaussRnd.m @@ -0,0 +1,7 @@ +function [ output_args ] = mixGaussRnd( input_args ) +%MIXGAUSSRND Summary of this function goes here +% Detailed explanation goes here + + +end + From f0f48171de0ccbb0bed8c13ab88c2c61b90141b7 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 11 Jan 2016 17:13:24 +0800 Subject: [PATCH 090/149] gmm rnd and pred are done --- chapter09/demo.m | 51 ++++++++++++++++++++++++++++++++++++++++ chapter09/kmeansRnd.m | 12 ++++++++++ chapter09/mixGaussEm.m | 9 +++---- chapter09/mixGaussPred.m | 11 ++++++--- chapter09/mixGaussRnd.m | 26 ++++++++++++++++---- 5 files changed, 96 insertions(+), 13 deletions(-) create mode 100644 chapter09/kmeansRnd.m diff --git a/chapter09/demo.m b/chapter09/demo.m index 126d0b5..55953d7 100644 --- a/chapter09/demo.m +++ b/chapter09/demo.m @@ -2,4 +2,55 @@ % 1) demos for mixture models % 2) pred functions for mixture models % 3) beta for em regress +% 4) refine kmeansRnd and mixGaussRnd +d = 2; +k = 3; +n = 500; +%% demo: kmeans +% close all; clear; +% d = 2; +% k = 3; +% n = 500; +% [X,label] = kmeansRnd(d,k,n); +% y = kmeans(X,k); +% plotClass(X,label); +% figure; +% plotClass(X,y); + +%% demo: Em for Gauss Mixture +close all; clear; +d = 2; +k = 3; +n = 1000; +[X,label] = mixGaussRnd(d,k,n); +plotClass(X,label); + +m = floor(n/2); +X1 = X(:,1:m); +X2 = X(:,(m+1):end); +% train +[z1,model,llh] = mixGaussEm(X1,k); +figure; +plot(llh); +figure; +plotClass(X1,z1); +% predict +z2 = mixGaussPred(X2,model); +figure; +plotClass(X2,z2); +%% demo: Em for Gauss mixture initialized with kmeans; +% close all; clear; +% d = 2; +% k = 3; +% n = 500; +% [X,label] = mixGaussRnd(d,k,n); +% init = kmeans(X,k); +% [z,model,llh] = mixGaussEm(X,init); +% plotClass(X,label); +% figure; +% plotClass(X,init); +% figure; +% plotClass(X,z); +% figure; +% plot(llh); diff --git a/chapter09/kmeansRnd.m b/chapter09/kmeansRnd.m new file mode 100644 index 0000000..f30ff9f --- /dev/null +++ b/chapter09/kmeansRnd.m @@ -0,0 +1,12 @@ +function [X, z, center] = kmeansRnd(d, k, n) +% Sampling from a Gaussian mixture distribution with common variances (kmeans model). +% Written by Michael Chen (sth4nth@gmail.com). +a = 1; +b = 6*nthroot(k,d); + +X = randn(d,n); +w = dirichletRnd(ones(k,a)); +z = discreteRnd(w,n); +E = full(sparse(z,1:n,1,k,n,n)); +center = rand(d,k)*b; +X = X+center*E; \ No newline at end of file diff --git a/chapter09/mixGaussEm.m b/chapter09/mixGaussEm.m index ca2d2dc..8113f71 100644 --- a/chapter09/mixGaussEm.m +++ b/chapter09/mixGaussEm.m @@ -5,19 +5,16 @@ % Written by Mo Chen (sth4nth@gmail.com). %% init fprintf('EM for Gaussian mixture: running ... \n'); -R = initialization(X,init); -[~,label(1,:)] = max(R,[],2); -R = R(:,unique(label)); - -tol = 1e-4; +tol = 1e-6; maxiter = 500; llh = -inf(1,maxiter); +R = initialization(X,init); for iter = 2:maxiter model = maximization(X,R); [R, llh(iter)] = expectation(X,model); if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter)); break; end; end -[~,label(:)] = max(R,[],2); +[~,label(1,:)] = max(R,[],2); llh = llh(2:iter); function R = initialization(X, init) diff --git a/chapter09/mixGaussPred.m b/chapter09/mixGaussPred.m index 5aeb8ee..6abd6f9 100644 --- a/chapter09/mixGaussPred.m +++ b/chapter09/mixGaussPred.m @@ -1,4 +1,8 @@ function [label, R] = mixGaussPred(X, model) +% Predict label and responsibility for Gaussian mixture model. +% X: d x n data matrix +% model: trained model structure outputed by the EM algirthm +% Written by Mo Chen (sth4nth@gmail.com). mu = model.mu; Sigma = model.Sigma; w = model.weight; @@ -11,9 +15,10 @@ logRho(:,i) = loggausspdf(X,mu(:,i),Sigma(:,:,i)); end logRho = bsxfun(@plus,logRho,log(w)); -R = exp(bsxfun(@minus,logRho,logsumexp(logRho,2))); -[~,label(:)] = max(R,[],2); - +T = logsumexp(logRho,2); +logR = bsxfun(@minus,logRho,T); +R = exp(logR); +[~,label(1,:)] = max(R,[],2); function y = loggausspdf(X, mu, Sigma) d = size(X,1); diff --git a/chapter09/mixGaussRnd.m b/chapter09/mixGaussRnd.m index fec9380..5bf245e 100644 --- a/chapter09/mixGaussRnd.m +++ b/chapter09/mixGaussRnd.m @@ -1,7 +1,25 @@ -function [ output_args ] = mixGaussRnd( input_args ) -%MIXGAUSSRND Summary of this function goes here -% Detailed explanation goes here +function [X, label, model] = mixGaussRnd(d, k, n) +% Sampling form a Gaussian mixture distribution. +% Written by Michael Chen (sth4nth@gmail.com). +alpha0 = 1; % hyperparameter of Dirichlet prior +W0 = eye(d); % hyperparameter of inverse Wishart prior of covariances +v0 = d+1; % hyperparameter of inverse Wishart prior of covariances +mu0 = zeros(d,1); % hyperparameter of Guassian prior of means +beta0 = 1/(nthroot(k,d))^2; % hyperparameter of Guassian prior of means +w = dirichletRnd(ones(alpha0,k)); +z = discreteRnd(w,n); +mu = zeros(d,k); +Sigma = zeros(d,d,k); +X = zeros(d,n); +for i = 1:k + idc = z==i; + Sigma(:,:,i) = iwishrnd(W0,v0); % invpd(wishrnd(W0,v0)); + mu(:,i) = gaussRnd(mu0,Sigma(:,:,i)/beta0); + X(:,idc) = gaussRnd(mu(:,i),Sigma(:,:,i),sum(idc)); end - +label = z; +model.mu = mu; +model.Sigma = Sigma; +model.weight = w; \ No newline at end of file From c81cb2653a4700864c4b6e1fdb5ff02db90900b5 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 11 Jan 2016 17:14:07 +0800 Subject: [PATCH 091/149] modify demo.m --- chapter09/demo.m | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/chapter09/demo.m b/chapter09/demo.m index 55953d7..ddfd45e 100644 --- a/chapter09/demo.m +++ b/chapter09/demo.m @@ -1,12 +1,6 @@ % TODO: -% 1) demos for mixture models -% 2) pred functions for mixture models -% 3) beta for em regress -% 4) refine kmeansRnd and mixGaussRnd -d = 2; -k = 3; -n = 500; - +% 1) beta for em regress +% 2) refine kmeansRnd and mixGaussRnd %% demo: kmeans % close all; clear; % d = 2; From 81fcf335f23601151dcf905666f06d06f0f872d0 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 11 Jan 2016 17:35:55 +0800 Subject: [PATCH 092/149] remove redundant files --- chapter11/kmeansRnd.m | 12 ------------ chapter13/logGauss.m | 35 ----------------------------------- chapter13/normalize.m | 11 ----------- 3 files changed, 58 deletions(-) delete mode 100644 chapter11/kmeansRnd.m delete mode 100644 chapter13/logGauss.m delete mode 100644 chapter13/normalize.m diff --git a/chapter11/kmeansRnd.m b/chapter11/kmeansRnd.m deleted file mode 100644 index f30ff9f..0000000 --- a/chapter11/kmeansRnd.m +++ /dev/null @@ -1,12 +0,0 @@ -function [X, z, center] = kmeansRnd(d, k, n) -% Sampling from a Gaussian mixture distribution with common variances (kmeans model). -% Written by Michael Chen (sth4nth@gmail.com). -a = 1; -b = 6*nthroot(k,d); - -X = randn(d,n); -w = dirichletRnd(ones(k,a)); -z = discreteRnd(w,n); -E = full(sparse(z,1:n,1,k,n,n)); -center = rand(d,k)*b; -X = X+center*E; \ No newline at end of file diff --git a/chapter13/logGauss.m b/chapter13/logGauss.m deleted file mode 100644 index 9b287f2..0000000 --- a/chapter13/logGauss.m +++ /dev/null @@ -1,35 +0,0 @@ -function y = logGauss(X, mu, sigma) -% Compute log pdf of a Gaussian distribution. -% Written by Mo Chen (sth4nth@gmail.com). -[d,n] = size(X); -k = size(mu,2); -if n == k && size(sigma,1) == 1 - X = bsxfun(@times,X-mu,1./sigma); - q = dot(X,X,1); % M distance - c = d*log(2*pi)+2*log(sigma); % normalization constant - y = -0.5*(c+q); -elseif size(sigma,1)==d && size(sigma,2)==d && k==1 % one mu and one dxd sigma - X = bsxfun(@minus,X,mu); - [R,p]= chol(sigma); - if p ~= 0 - error('ERROR: sigma is not PD.'); - end - Q = R'\X; - q = dot(Q,Q,1); % quadratic term (M distance) - c = d*log(2*pi)+2*sum(log(diag(R))); % normalization constant - y = -0.5*(c+q); -elseif size(sigma,1)==d && size(sigma,2)==k % k mu and k diagonal sigma - lambda = 1./sigma; - ml = mu.*lambda; - q = bsxfun(@plus,X'.^2*lambda-2*X'*ml,dot(mu,ml,1)); % M distance - c = d*log(2*pi)+2*sum(log(sigma),1); % normalization constant - y = -0.5*bsxfun(@plus,q,c); -elseif size(sigma,1)==1 && (size(sigma,2)==k || size(sigma,2)==1) % k mu and (k or one) scalar sigma - X2 = repmat(dot(X,X,1)',1,k); - D = bsxfun(@plus,X2-2*X'*mu,dot(mu,mu,1)); - q = bsxfun(@times,D,1./sigma); % M distance - c = d*(log(2*pi)+2*log(sigma)); % normalization constant - y = -0.5*bsxfun(@plus,q,c); -else - error('Parameters mismatched.'); -end diff --git a/chapter13/normalize.m b/chapter13/normalize.m deleted file mode 100644 index fdcb1b8..0000000 --- a/chapter13/normalize.m +++ /dev/null @@ -1,11 +0,0 @@ -function [Y, s] = normalize(X, dim) -% Normalize the vectors to be summing to one -% By default dim = 1 (columns). -% Written by Mo Chen (sth4nth@gmail.com). -if nargin == 1, - % Determine which dimension sum will use - dim = find(size(X)~=1,1); - if isempty(dim), dim = 1; end -end -s = sum(X,dim); -Y = bsxfun(@times,X,1./s); \ No newline at end of file From 33b5960502fd124c12718c06a6f73928879cd1d6 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 11 Jan 2016 17:38:16 +0800 Subject: [PATCH 093/149] remove redundant files again --- chapter11/discreteRnd.m | 1 - chapter13/discreternd.m | 9 --------- 2 files changed, 10 deletions(-) delete mode 100644 chapter13/discreternd.m diff --git a/chapter11/discreteRnd.m b/chapter11/discreteRnd.m index 4148f35..5fb4e0d 100644 --- a/chapter11/discreteRnd.m +++ b/chapter11/discreteRnd.m @@ -6,5 +6,4 @@ end r = rand(1,n); p = cumsum(p(:)); -% x = sum(repmat(r,length(p),1) > repmat(p/p(end),1,n),1)+1; [~,x] = histc(r,[0;p/p(end)]); diff --git a/chapter13/discreternd.m b/chapter13/discreternd.m deleted file mode 100644 index 74ad868..0000000 --- a/chapter13/discreternd.m +++ /dev/null @@ -1,9 +0,0 @@ -function x = discreternd(p, n) -% Sampling from a discrete distribution (multinomial). -% Written by Mo Chen (sth4nth@gmail.com). -if nargin == 1 - n = 1; -end -r = rand(1,n); -p = cumsum(p(:)); -[~,x] = histc(r,[0;p/p(end)]); From 0b4a90575ba026dae38c1ec887fee534b1c7a89a Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 11 Jan 2016 18:54:24 +0800 Subject: [PATCH 094/149] modify demos --- chapter07/demo.m | 8 +++--- chapter10/demo.m | 59 ++++++++++++++++++++-------------------- chapter10/linRegVbPred.m | 23 ++++++++++++++++ 3 files changed, 56 insertions(+), 34 deletions(-) create mode 100644 chapter10/linRegVbPred.m diff --git a/chapter07/demo.m b/chapter07/demo.m index 36a85e3..6bd871f 100755 --- a/chapter07/demo.m +++ b/chapter07/demo.m @@ -50,7 +50,7 @@ % % x = linspace(min(X)-1,max(X)+1,d); % test data %% -% [model,llh] = rvmRegEbFp(X,t); +% [model,llh] = rvmRegFp(X,t); % figure % plot(llh); % [y, sigma] = linInfer(x,model,t); @@ -61,7 +61,7 @@ % plot(x,y,'r-'); % hold off %% -% [model,llh] = rvmRegEbEm(X,t); +% [model,llh] = rvmRegEm(X,t); % figure % plot(llh); % [y, sigma] = linInfer(x,model,t); @@ -72,7 +72,7 @@ % plot(x,y,'r-'); % hold off %% -% [model,llh] = rvmRegEbCd(X,t); +% [model,llh] = rvmRegSeq(X,t); % figure % plot(llh); % [y, sigma] = linPred(x,model,t); @@ -91,7 +91,7 @@ % [X,t] = kmeansRnd(d,k,n); % [x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); % -% [model, llh] = rvmBinEbFp(X,t-1); +% [model, llh] = rvmBinFp(X,t-1); % plot(llh); % y = rvmBinPred(model,X)+1; % figure; diff --git a/chapter10/demo.m b/chapter10/demo.m index de836bf..90bd313 100755 --- a/chapter10/demo.m +++ b/chapter10/demo.m @@ -1,40 +1,39 @@ -clear; close all; +% TODO: +% 1) prediction functions for vb reg and mix +% 2) modify mixGaussMix to compute bound inside each factor -%% regression -% n = 100; +% %% regression +% clear; close all; +% +% d = 100; % beta = 1e-1; -% X = rand(1,n); +% X = rand(1,d); % w = randn; % b = randn; -% t = w'*X+b+beta*randn(1,n); +% t = w'*X+b+beta*randn(1,d); +% x = linspace(min(X)-1,max(X)+1,d); % test data % -% x = linspace(min(X)-1,max(X)+1,n); % test data -% X = rand(3,100); -% t = rand(1,100); -%% -% [model,energy] = regressVb(X,t); -% % figure -% plot(energy); -% y = linInfer(x,model); +% [model,llh] = linRegVb(X,t); +% % [model,llh] = rvmRegVb(X,t); +% figure +% plot(llh); +% [y, sigma] = linPred(model,x); % figure; % hold on; -% % plotBand(x,y,2*sigma); +% plotBand(x,y,2*sigma); % plot(X,t,'o'); % plot(x,y,'r-'); % hold off -%% -% [model,energy] = regressRvmVb(X,t); -% % figure -% plot(energy); -% y = linInfer(x,model); -% figure; -% hold on; -% % plotBand(x,y,2*sigma); -% plot(X,t,'o'); -% plot(x,y,'r-'); -% hold off -%% -[X,y] = rndKmeans(2,3,1000); -spread(X,y) -[label, model, energy] = mixGaussVb(X,10); -spread(X,label) \ No newline at end of file + +%% Variational Bayesian for Gaussian Mixture Model +close all; clear; +d = 2; +k = 3; +n = 500; +[X,label] = mixGaussRnd(d,k,n); +plotClass(X,label); +[y, model, bound] = mixGaussVb(X,10); +figure; +plotClass(X,y); +figure; +plot(bound) diff --git a/chapter10/linRegVbPred.m b/chapter10/linRegVbPred.m new file mode 100644 index 0000000..84281b7 --- /dev/null +++ b/chapter10/linRegVbPred.m @@ -0,0 +1,23 @@ +function [y, sigma, p] = linRegVbPred(model, X, t) +% Compute linear model reponse y = w'*X+w0 and likelihood +% model: trained model structure +% X: d x n testing data +% t (optional): 1 x n testing response +% Written by Mo Chen (sth4nth@gmail.com). +w = model.w; +w0 = model.w0; +y = w'*X+w0; +%% probability prediction +if nargout > 1 + beta = model.beta; + U = model.U; % 3.54 + Xo = bsxfun(@minus,X,model.xbar); + XU = U'\Xo; + sigma = sqrt((1+dot(XU,XU,1))/beta); %3.59 +end + +if nargin == 3 && nargout == 3 + p = exp(logGauss(t,y,sigma)); +% p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); +end + From 89e0465cdbf74a0753e8ed8b7b8ca5bae04fb83a Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 11 Jan 2016 19:13:38 +0800 Subject: [PATCH 095/149] add hmmRnd --- chapter13/demo.m | 17 +---------------- chapter13/hmmFilter.m | 6 ++---- chapter13/hmmRnd.m | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 20 deletions(-) create mode 100644 chapter13/hmmRnd.m diff --git a/chapter13/demo.m b/chapter13/demo.m index c5119de..77df62a 100644 --- a/chapter13/demo.m +++ b/chapter13/demo.m @@ -4,21 +4,7 @@ k = 2; n = 10000; -A = normalize(rand(k,k),2); -E = normalize(rand(k,d),2); -s = normalize(rand(k,1),1); - -z = zeros(1,n); -x = zeros(1,n); -z(1) = discreternd(s); -x(1) = discreternd(E(z(1),:)); -for i = 2:n - z(i) = discreternd(A(z(i-1),:)); - x(i) = discreternd(E(z(i),:)); -end -X = sparse(x,1:n,1,d,n); -M = E*X; - +[ X, M, A, E, s ] = hmmRnd(d, k, n); [z, v] = hmmViterbi(M, A, s); % [model, energy] = hmmEm(x,k); @@ -26,4 +12,3 @@ % beta = hmmBwd(M,A); % gamma = normalize(alpha.*beta,1); -% [gamma2,alpha2.beta2,loglik] = hmmFwdBack(s, A, M); \ No newline at end of file diff --git a/chapter13/hmmFilter.m b/chapter13/hmmFilter.m index 44a0f5a..9529f58 100644 --- a/chapter13/hmmFilter.m +++ b/chapter13/hmmFilter.m @@ -1,9 +1,7 @@ function [alpha, energy] = hmmFilter(M, A, s) % HMM forward filtering algorithm -% unlike the method described in the book of PRML -% the alpha returned is the normalized version: alpha(t)=p(z_t|x_{1:t}) -% the unnormalized version alpha(t)=p(z_t,x_{1:t}) grows exponential fast -% to infinity. +% Unlike the method described in the book of PRML, the alpha returned is the normalized version: alpha(t)=p(z_t|x_{1:t}) +% The unnormalized version is numerical unstable. alpha(t)=p(z_t,x_{1:t}) grows exponential fast to infinity. % Written by Mo Chen (sth4nth@gmail.com). [K,T] = size(M); At = A'; diff --git a/chapter13/hmmRnd.m b/chapter13/hmmRnd.m new file mode 100644 index 0000000..6d87b2d --- /dev/null +++ b/chapter13/hmmRnd.m @@ -0,0 +1,16 @@ +function [ X, M, A, E, s ] = hmmRnd(d, k, n) +% Generate a data sequence from a hidden Markov model +A = normalize(rand(k,k),2); +E = normalize(rand(k,d),2); +s = normalize(rand(k,1),1); + +z = zeros(1,n); +x = zeros(1,n); +z(1) = discreteRnd(s); +x(1) = discreteRnd(E(z(1),:)); +for i = 2:n + z(i) = discreteRnd(A(z(i-1),:)); + x(i) = discreteRnd(E(z(i),:)); +end +X = sparse(x,1:n,1,d,n); +M = E*X; From 0773509d32d09499a8f3f959b99c0d55cd407e28 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 11 Jan 2016 19:13:52 +0800 Subject: [PATCH 096/149] update TODO --- TODO.txt | 8 ++++++-- chapter03/demo.m | 4 ++-- chapter07/demo.m | 4 ---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/TODO.txt b/TODO.txt index 85d71fb..30230e8 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,6 +1,10 @@ TODO: -1) update beta for sequential rvm +1) update beta for sequential rvm, beta for EM regress 2) viterbi normalize update 3) compute bound terms inside each factors - +4) plot multiclass data boundary +5) refine kmeansrnd and mixGaussRnd +6) chapter05: MLP +7) chapter08: BP, EP +8) chapter11: collapse Gibss for gmm and dpgm diff --git a/chapter03/demo.m b/chapter03/demo.m index 4894733..43d1268 100644 --- a/chapter03/demo.m +++ b/chapter03/demo.m @@ -8,11 +8,11 @@ % model = linReg(x,t); % linPlot(model,x,t); %% -% [model1,llh1] = linRegEbEm(x,t); +% [model1,llh1] = linRegEm(x,t); % plot(llh); % linPlot(model,x,t); %% -[model,llh] = linRegEbFp(x,t); +[model,llh] = linRegFp(x,t); [y, sigma] = linPred(model,x,t); plot(llh); linPlot(model,x,t); diff --git a/chapter07/demo.m b/chapter07/demo.m index 6bd871f..72818a5 100755 --- a/chapter07/demo.m +++ b/chapter07/demo.m @@ -1,7 +1,3 @@ -% TODO: -% 3) fix coordinate descent rvm (llh not increase) -% 4) need test rvm classification for high dim data - % clear; close all; % From cb6fb1ada74707fcebf50a812007c5b0dfae6648 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 11 Jan 2016 19:17:54 +0800 Subject: [PATCH 097/149] update TODO --- TODO.txt | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/TODO.txt b/TODO.txt index 30230e8..cef1818 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,10 +1,14 @@ TODO: -1) update beta for sequential rvm, beta for EM regress -2) viterbi normalize update -3) compute bound terms inside each factors -4) plot multiclass data boundary -5) refine kmeansrnd and mixGaussRnd -6) chapter05: MLP -7) chapter08: BP, EP -8) chapter11: collapse Gibss for gmm and dpgm +create prediction functions for VB +beta for EM regress +refine kmeansrnd and mixGaussRnd +viterbi normalize update +chapter11: collapse Gibss for gmm and dpgm +chapter05: MLP +chapter08: BP, EP +update beta for sequential rvm, +compute bound terms inside each factors, +plot multiclass data boundary + + From 1721d71606902e224ad38023e73a44609fcdeb4d Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 11 Jan 2016 19:36:20 +0800 Subject: [PATCH 098/149] update TODO again --- TODO.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/TODO.txt b/TODO.txt index cef1818..00a40e8 100644 --- a/TODO.txt +++ b/TODO.txt @@ -8,7 +8,4 @@ chapter05: MLP chapter08: BP, EP update beta for sequential rvm, compute bound terms inside each factors, -plot multiclass data boundary - - - +plot multiclass data boundary \ No newline at end of file From 1fe67336a03017db55729008e8127a9f7fdf352e Mon Sep 17 00:00:00 2001 From: sth4nth Date: Tue, 12 Jan 2016 15:52:13 +0800 Subject: [PATCH 099/149] fix linRegEm: update beta --- chapter09/demo.m | 50 ++++++++++++++++++++++++++------------------ chapter09/linRegEm.m | 22 +++++++++---------- 2 files changed, 41 insertions(+), 31 deletions(-) diff --git a/chapter09/demo.m b/chapter09/demo.m index ddfd45e..00b3e3d 100644 --- a/chapter09/demo.m +++ b/chapter09/demo.m @@ -1,6 +1,16 @@ % TODO: % 1) beta for em regress % 2) refine kmeansRnd and mixGaussRnd + +%% demo: EM linear regression +close all; clear; +d = 5; +n = 200; +[x,t] = linRnd(d,n); +[model,llh] = linRegEm(x,t); +plot(llh); + + %% demo: kmeans % close all; clear; % d = 2; @@ -13,26 +23,26 @@ % plotClass(X,y); %% demo: Em for Gauss Mixture -close all; clear; -d = 2; -k = 3; -n = 1000; -[X,label] = mixGaussRnd(d,k,n); -plotClass(X,label); - -m = floor(n/2); -X1 = X(:,1:m); -X2 = X(:,(m+1):end); -% train -[z1,model,llh] = mixGaussEm(X1,k); -figure; -plot(llh); -figure; -plotClass(X1,z1); -% predict -z2 = mixGaussPred(X2,model); -figure; -plotClass(X2,z2); +% close all; clear; +% d = 2; +% k = 3; +% n = 1000; +% [X,label] = mixGaussRnd(d,k,n); +% plotClass(X,label); +% +% m = floor(n/2); +% X1 = X(:,1:m); +% X2 = X(:,(m+1):end); +% % train +% [z1,model,llh] = mixGaussEm(X1,k); +% figure; +% plot(llh); +% figure; +% plotClass(X1,z1); +% % predict +% z2 = mixGaussPred(X2,model); +% figure; +% plotClass(X2,z2); %% demo: Em for Gauss mixture initialized with kmeans; % close all; clear; % d = 2; diff --git a/chapter09/linRegEm.m b/chapter09/linRegEm.m index cf97aff..fdda165 100644 --- a/chapter09/linRegEm.m +++ b/chapter09/linRegEm.m @@ -19,7 +19,6 @@ Xt = X*t'; idx = (1:d)'; dg = sub2ind([d,d],idx,idx); -I = eye(d); tol = 1e-4; maxiter = 100; llh = -inf(1,maxiter+1); @@ -27,27 +26,28 @@ A = beta*C; A(dg) = A(dg)+alpha; % 3.81 3.54 U = chol(A); - V = U\I; % A=inv(S) - - w = beta*(V*(V'*Xt)); % 3.84 - w2 = dot(w,w); - err = sum((t-w'*X).^2); + + m = beta*(U\(U'\Xt)); + w2 = dot(m,m); + e2 = sum((t-m'*X).^2); logdetA = 2*sum(log(diag(U))); - llh(iter) = 0.5*(d*log(alpha)+n*log(beta)-alpha*w2-beta*err-logdetA-n*log(2*pi)); % 3.86 + llh(iter) = 0.5*(d*log(alpha)+n*log(beta)-alpha*w2-beta*e2-logdetA-n*log(2*pi)); % 3.86 if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end + V = inv(U); trS = dot(V(:),V(:)); % A=inv(S) alpha = d/(w2+trS); % 9.63 - gamma = d-alpha*trS; % 9.64 - beta = n/(err+gamma/beta); % 9.68 + UX = U'\X; + trXSX = dot(UX(:),UX(:)); + beta = n/(e2+trXSX); % 9.68 is wrong end -w0 = tbar-dot(w,xbar); +w0 = tbar-dot(m,xbar); llh = llh(2:iter); model.w0 = w0; -model.w = w; +model.m = m; %% optional for bayesian probabilistic inference purpose model.alpha = alpha; model.beta = beta; From 769d65bf938f05e9193738c45adf0b5e73cec2b2 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Tue, 12 Jan 2016 16:00:05 +0800 Subject: [PATCH 100/149] refine linRegEm a bit --- chapter09/linRegEm.m | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/chapter09/linRegEm.m b/chapter09/linRegEm.m index fdda165..6b3d4da 100644 --- a/chapter09/linRegEm.m +++ b/chapter09/linRegEm.m @@ -15,29 +15,27 @@ X = bsxfun(@minus,X,xbar); t = bsxfun(@minus,t,tbar); -C = X*X'; +XX = X*X'; Xt = X*t'; -idx = (1:d)'; -dg = sub2ind([d,d],idx,idx); + tol = 1e-4; maxiter = 100; llh = -inf(1,maxiter+1); for iter = 2:maxiter - A = beta*C; - A(dg) = A(dg)+alpha; % 3.81 3.54 + A = beta*XX+alpha*eye(d); U = chol(A); m = beta*(U\(U'\Xt)); - w2 = dot(m,m); + m2 = dot(m,m); e2 = sum((t-m'*X).^2); logdetA = 2*sum(log(diag(U))); - llh(iter) = 0.5*(d*log(alpha)+n*log(beta)-alpha*w2-beta*e2-logdetA-n*log(2*pi)); % 3.86 + llh(iter) = 0.5*(d*log(alpha)+n*log(beta)-alpha*m2-beta*e2-logdetA-n*log(2*pi)); % 3.86 if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end V = inv(U); trS = dot(V(:),V(:)); % A=inv(S) - alpha = d/(w2+trS); % 9.63 + alpha = d/(m2+trS); % 9.63 UX = U'\X; trXSX = dot(UX(:),UX(:)); From 1ea056bea5b6f8cc7df091e8476e237e592c6080 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Tue, 12 Jan 2016 16:00:37 +0800 Subject: [PATCH 101/149] rvmRegEm is not finished --- chapter09/rvmRegEm.m | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/chapter09/rvmRegEm.m b/chapter09/rvmRegEm.m index 605218f..1c08b46 100644 --- a/chapter09/rvmRegEm.m +++ b/chapter09/rvmRegEm.m @@ -6,16 +6,12 @@ alpha = 0.02; beta = 0.5; end -% xbar = mean(X,2); -% tbar = mean(t,2); -% X = bsxfun(@minus,X,xbar); -% t = bsxfun(@minus,t,tbar); -n = size(X,2); -X = [X;ones(1,n)]; -d = size(X,1); +xbar = mean(X,2); +tbar = mean(t,2); + +X = bsxfun(@minus,X,xbar); +t = bsxfun(@minus,t,tbar); -% XX = X*X'; -% Xt = X*t'; alpha = alpha*ones(d,1); tol = 1e-8; @@ -28,25 +24,28 @@ alpha = alpha(nz); X = X(nz,:); - S = inv(beta*(X*X')+diag(alpha)); + A = beta*(X*X')+diag(alpha); % E-step - w = beta*S*X*t'; % E[w] % 7.82 - w2 = m.^2+diag(S); % E[w^2] - e = sum((t-m'*X).^2); + m = beta*(A\(X*t')); % E[m] % 7.82 + m2 = m.^2; % E[m^2] + e2 = sum((t-m'*X).^2); -% logdetS = -2*sum(log(diag(V))); -% llh(iter) = 0.5*(sum(log(alpha))+n*log(beta)-beta*e-logdetS-dot(alpha,w2)-n*log(2*pi)); -% if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end + logdetS = -2*sum(log(diag(V))); + llh(iter) = 0.5*(sum(log(alpha))+n*log(beta)-beta*e2-logdetS-dot(alpha,m2)-n*log(2*pi)); + if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end % M-step - alpha = 1./w2; % 9.67 - beta = n/(e+sum(w2)); % 9.68 is wrong + S = inv(A); + alpha = 1./(m2+diag(S)); % 9.67 + + trXSX = trace(X'*S*X); + beta = n/(e2+trXSX); % 9.68 is wrong end llh = llh(2:iter); model.index = index; model.w0 = w0; -model.w = w; +model.m = m; model.alpha = alpha; model.beta = beta; From 8151e05aa06a0c33398b35acfd5de65277f04741 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 12 Jan 2016 19:50:02 +0800 Subject: [PATCH 102/149] rvmRegEm is done --- chapter07/rvmRegFp.m | 9 +++------ chapter09/demo.m | 41 ++++++++++++++++++++++++++++++++++++----- chapter09/linRegEm.m | 2 +- chapter09/rvmRegEm.m | 42 +++++++++++++++++++++++------------------- 4 files changed, 63 insertions(+), 31 deletions(-) diff --git a/chapter07/rvmRegFp.m b/chapter07/rvmRegFp.m index e505e28..f71a00d 100644 --- a/chapter07/rvmRegFp.m +++ b/chapter07/rvmRegFp.m @@ -7,21 +7,18 @@ beta = 0.5; end [d,n] = size(X); -alpha = alpha*ones(d,1); - xbar = mean(X,2); tbar = mean(t,2); - X = bsxfun(@minus,X,xbar); t = bsxfun(@minus,t,tbar); - XX = X*X'; Xt = X*t'; tol = 1e-3; -maxiter = 200; +maxiter = 500; llh = -inf(1,maxiter); index = 1:d; +alpha = alpha*ones(d,1); for iter = 2:maxiter % remove zeros nz = 1./alpha > tol; % nonzeros @@ -38,7 +35,7 @@ logdetS = 2*sum(log(diag(U))); llh(iter) = 0.5*(sum(log(alpha))+n*log(beta)-beta*e-logdetS-dot(alpha,m2)-n*log(2*pi)); % 3.86 - if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end + if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end V = inv(U); dgSigma = dot(V,V,2); diff --git a/chapter09/demo.m b/chapter09/demo.m index 00b3e3d..ce84943 100644 --- a/chapter09/demo.m +++ b/chapter09/demo.m @@ -3,14 +3,45 @@ % 2) refine kmeansRnd and mixGaussRnd %% demo: EM linear regression -close all; clear; -d = 5; -n = 200; -[x,t] = linRnd(d,n); -[model,llh] = linRegEm(x,t); +% close all; clear; +% d = 5; +% n = 200; +% [x,t] = linRnd(d,n); +% [model,llh] = linRegEm(x,t); +% plot(llh); +%% +d = 512; % signal length +k = 20; % number of spikes +n = 100; % number of measurements +% +% random +/- 1 signal +x = zeros(d,1); +q = randperm(d); +x(q(1:k)) = sign(randn(k,1)); + +% projection matrix +A = unitize(randn(d,n),1); +% noisy observations +sigma = 0.005; +e = sigma*randn(1,n); +y = x'*A + e; +[model,llh] = rvmRegEm(A,y); plot(llh); +% [model,llh] = rvmRegEbFp(A,y); +% plot(llh); +m = zeros(d,1); +m(model.index) = model.w; + +h = max(abs(x))+0.2; +x_range = [1,d]; +y_range = [-h,+h]; +figure; +subplot(2,1,1);plot(x); axis([x_range,y_range]); title('Original Signal'); +subplot(2,1,2);plot(m); axis([x_range,y_range]); title('Recovery Signal'); + + %% demo: kmeans % close all; clear; % d = 2; diff --git a/chapter09/linRegEm.m b/chapter09/linRegEm.m index 6b3d4da..0c0cedd 100644 --- a/chapter09/linRegEm.m +++ b/chapter09/linRegEm.m @@ -45,7 +45,7 @@ llh = llh(2:iter); model.w0 = w0; -model.m = m; +model.w = m; %% optional for bayesian probabilistic inference purpose model.alpha = alpha; model.beta = beta; diff --git a/chapter09/rvmRegEm.m b/chapter09/rvmRegEm.m index 1c08b46..11c1897 100644 --- a/chapter09/rvmRegEm.m +++ b/chapter09/rvmRegEm.m @@ -6,46 +6,50 @@ alpha = 0.02; beta = 0.5; end +[d,n] = size(X); xbar = mean(X,2); tbar = mean(t,2); - X = bsxfun(@minus,X,xbar); t = bsxfun(@minus,t,tbar); +XX = X*X'; +Xt = X*t'; - -alpha = alpha*ones(d,1); -tol = 1e-8; +tol = 1e-3; maxiter = 500; llh = -inf(1,maxiter+1); index = 1:d; +alpha = alpha*ones(d,1); for iter = 2 : maxiter nz = 1./alpha > tol ; % nonzeros index = index(nz); alpha = alpha(nz); + XX = XX(nz,nz); + Xt = Xt(nz); X = X(nz,:); - - A = beta*(X*X')+diag(alpha); % E-step - m = beta*(A\(X*t')); % E[m] % 7.82 - m2 = m.^2; % E[m^2] + U = chol(beta*(XX)+diag(alpha)); % 7.83 + m = beta*(U\(U'\(X*t'))); % E[m] % 7.82 + m2 = m.^2; e2 = sum((t-m'*X).^2); - logdetS = -2*sum(log(diag(V))); - llh(iter) = 0.5*(sum(log(alpha))+n*log(beta)-beta*e2-logdetS-dot(alpha,m2)-n*log(2*pi)); - if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end - + logdetS = 2*sum(log(diag(U))); + llh(iter) = 0.5*(sum(log(alpha))+n*log(beta)-beta*e2-logdetS-dot(alpha,m2)-n*log(2*pi)); % 3.86 + if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end % M-step - S = inv(A); - alpha = 1./(m2+diag(S)); % 9.67 - - trXSX = trace(X'*S*X); + V = inv(U); + dgS = dot(V,V,2); + alpha = 1./(m2+dgS); % 9.67 + UX = U'\X; + trXSX = dot(UX(:),UX(:)); beta = n/(e2+trXSX); % 9.68 is wrong end llh = llh(2:iter); - model.index = index; -model.w0 = w0; -model.m = m; +model.w0 = tbar-dot(m,xbar(nz)); +model.w = m; model.alpha = alpha; model.beta = beta; +%% optional for bayesian probabilistic prediction purpose +model.xbar = xbar; +model.U = U; \ No newline at end of file From d404911c7854c5b7d66dc0cbe8e5307cd9c765f4 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 12 Jan 2016 20:08:31 +0800 Subject: [PATCH 103/149] rvmBinEm is done --- chapter09/demo.m | 71 +++++++++++++++++++++++++------------------- chapter09/rvmBinEm.m | 71 ++++++++++++++++++++++++++++++++++---------- common/log1pexp.m | 7 +++++ 3 files changed, 103 insertions(+), 46 deletions(-) create mode 100644 common/log1pexp.m diff --git a/chapter09/demo.m b/chapter09/demo.m index ce84943..e6d4bba 100644 --- a/chapter09/demo.m +++ b/chapter09/demo.m @@ -9,39 +9,50 @@ % [x,t] = linRnd(d,n); % [model,llh] = linRegEm(x,t); % plot(llh); -%% -d = 512; % signal length -k = 20; % number of spikes -n = 100; % number of measurements -% -% random +/- 1 signal -x = zeros(d,1); -q = randperm(d); -x(q(1:k)) = sign(randn(k,1)); - -% projection matrix -A = unitize(randn(d,n),1); -% noisy observations -sigma = 0.005; -e = sigma*randn(1,n); -y = x'*A + e; -[model,llh] = rvmRegEm(A,y); -plot(llh); - - -% [model,llh] = rvmRegEbFp(A,y); +%% demo: sparse signal recovery +% d = 512; % signal length +% k = 20; % number of spikes +% n = 100; % number of measurements +% % +% % random +/- 1 signal +% x = zeros(d,1); +% q = randperm(d); +% x(q(1:k)) = sign(randn(k,1)); +% +% % projection matrix +% A = unitize(randn(d,n),1); +% % noisy observations +% sigma = 0.005; +% e = sigma*randn(1,n); +% y = x'*A + e; +% [model,llh] = rvmRegEm(A,y); % plot(llh); -m = zeros(d,1); -m(model.index) = model.w; +% +% +% % [model,llh] = rvmRegEbFp(A,y); +% % plot(llh); +% m = zeros(d,1); +% m(model.index) = model.w; +% +% h = max(abs(x))+0.2; +% x_range = [1,d]; +% y_range = [-h,+h]; +% figure; +% subplot(2,1,1);plot(x); axis([x_range,y_range]); title('Original Signal'); +% subplot(2,1,2);plot(m); axis([x_range,y_range]); title('Recovery Signal'); +%% classification +clear; close all +k = 2; +d = 2; +n = 1000; +[X,t] = kmeansRnd(d,k,n); +[x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); -h = max(abs(x))+0.2; -x_range = [1,d]; -y_range = [-h,+h]; +[model, llh] = rvmBinEm(X,t-1); +plot(llh); +y = rvmBinPred(model,X)+1; figure; -subplot(2,1,1);plot(x); axis([x_range,y_range]); title('Original Signal'); -subplot(2,1,2);plot(m); axis([x_range,y_range]); title('Recovery Signal'); - - +binPlot(model,X,y); %% demo: kmeans % close all; clear; % d = 2; diff --git a/chapter09/rvmBinEm.m b/chapter09/rvmBinEm.m index f1990db..7f9a335 100644 --- a/chapter09/rvmBinEm.m +++ b/chapter09/rvmBinEm.m @@ -1,6 +1,6 @@ function [model, llh] = rvmBinEm(X, t, alpha) % Relevance Vector Machine (ARD sparse prior) for binary classification -% training by empirical bayesian (type II ML) using standard EM update +% training by empirical bayesian (type II ML) using fix point update (Mackay update) % Written by Mo Chen (sth4nth@gmail.com). if nargin < 3 alpha = 1; @@ -9,29 +9,68 @@ X = [X;ones(1,n)]; d = size(X,1); alpha = alpha*ones(d,1); -weight = zeros(d,1); +m = zeros(d,1); tol = 1e-4; maxiter = 100; llh = -inf(1,maxiter); -infinity = 1e+10; +index = 1:d; for iter = 2:maxiter - used = alpha < infinity; - a = alpha(used); - w = weight(used); - [w,energy,U] = optLogitNewton(X(used,:),t,a,w); - w2 = w.^2; - llh(iter) = energy(end)+0.5*(sum(log(a))-2*sum(log(diag(U)))-dot(a,w2)-n*log(2*pi)); % 7.114 - if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end + % remove zeros + nz = 1./alpha > tol; % nonzeros + index = index(nz); + alpha = alpha(nz); + X = X(nz,:); + m = m(nz); + + [m,e,U] = logitBin(X,t,alpha,m); % 7.110 ~ 7.113 + + m2 = m.^2; + llh(iter) = e(end)+0.5*(sum(log(alpha))-2*sum(log(diag(U)))-dot(alpha,m2)-n*log(2*pi)); % 7.114 & 7.118 + if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end + V = inv(U); dgS = dot(V,V,2); - alpha(used) = 1./(w2+dgS); % 9.67 - weight(used) = w; + alpha = 1./(m2+dgS); % 9.67 end llh = llh(2:iter); -model.used = used; -model.w = w; % nonzero elements of weight -model.a = a; % nonzero elements of alpha -model.weght = weight; +model.index = index; +model.w = m; model.alpha = alpha; + +function [w, llh, U] = logitBin(X, t, lambda, w) +% Logistic regression +[d,n] = size(X); +tol = 1e-4; +maxiter = 100; +llh = -inf(1,maxiter); +idx = (1:d)'; +dg = sub2ind([d,d],idx,idx); +h = ones(1,n); +h(t==0) = -1; +a = w'*X; +for iter = 2:maxiter + y = sigmoid(a); % 4.87 + r = y.*(1-y); % 4.98 + Xw = bsxfun(@times, X, sqrt(r)); + H = Xw*Xw'; % 4.97 + H(dg) = H(dg)+lambda; + U = chol(H); + g = X*(y-t)'+lambda.*w; % 4.96 + p = -U\(U'\g); + wo = w; % 4.92 + w = wo+p; + a = w'*X; + llh(iter) = -sum(log1pexp(-h.*a))-0.5*sum(lambda.*w.^2); % 4.89 + incr = llh(iter)-llh(iter-1); + while incr < 0 % line search + p = p/2; + w = wo+p; + a = w'*X; + llh(iter) = -sum(log1pexp(-h.*a))-0.5*sum(lambda.*w.^2); + incr = llh(iter)-llh(iter-1); + end + if incr < tol; break; end +end +llh = llh(2:iter); \ No newline at end of file diff --git a/common/log1pexp.m b/common/log1pexp.m new file mode 100644 index 0000000..3b5beb2 --- /dev/null +++ b/common/log1pexp.m @@ -0,0 +1,7 @@ +function y = log1pexp(x) +% accurately compute y = log(1+exp(x)) +% reference: Accurately Computing log(1-exp(|a|)) Martin Machler +seed = 33.3; +y = x; +idx = x Date: Tue, 12 Jan 2016 20:09:46 +0800 Subject: [PATCH 104/149] update TODO --- TODO.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/TODO.txt b/TODO.txt index 00a40e8..d1e370b 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,8 +1,7 @@ TODO: -create prediction functions for VB -beta for EM regress refine kmeansrnd and mixGaussRnd viterbi normalize update +create prediction functions for VB chapter11: collapse Gibss for gmm and dpgm chapter05: MLP chapter08: BP, EP From 4afc694b8967fd74240e634e0a889c61c9594e04 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 12 Jan 2016 22:51:31 +0800 Subject: [PATCH 105/149] refine mix rnd: tbc --- chapter09/demo.m | 1 - chapter09/kmeansRnd.m | 8 ++++---- chapter09/mixGaussRnd.m | 9 ++++----- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/chapter09/demo.m b/chapter09/demo.m index e6d4bba..25c87c7 100644 --- a/chapter09/demo.m +++ b/chapter09/demo.m @@ -1,5 +1,4 @@ % TODO: -% 1) beta for em regress % 2) refine kmeansRnd and mixGaussRnd %% demo: EM linear regression diff --git a/chapter09/kmeansRnd.m b/chapter09/kmeansRnd.m index f30ff9f..7c61fa1 100644 --- a/chapter09/kmeansRnd.m +++ b/chapter09/kmeansRnd.m @@ -1,12 +1,12 @@ function [X, z, center] = kmeansRnd(d, k, n) % Sampling from a Gaussian mixture distribution with common variances (kmeans model). % Written by Michael Chen (sth4nth@gmail.com). -a = 1; -b = 6*nthroot(k,d); +alpha = 1; +beta = 6*nthroot(k,d); X = randn(d,n); -w = dirichletRnd(ones(k,a)); +w = dirichletRnd(alpha,ones(1,k)/k); z = discreteRnd(w,n); E = full(sparse(z,1:n,1,k,n,n)); -center = rand(d,k)*b; +center = rand(d,k)*beta; X = X+center*E; \ No newline at end of file diff --git a/chapter09/mixGaussRnd.m b/chapter09/mixGaussRnd.m index 5bf245e..8783938 100644 --- a/chapter09/mixGaussRnd.m +++ b/chapter09/mixGaussRnd.m @@ -1,4 +1,4 @@ -function [X, label, model] = mixGaussRnd(d, k, n) +function [X, z, model] = mixGaussRnd(d, k, n) % Sampling form a Gaussian mixture distribution. % Written by Michael Chen (sth4nth@gmail.com). alpha0 = 1; % hyperparameter of Dirichlet prior @@ -7,19 +7,18 @@ mu0 = zeros(d,1); % hyperparameter of Guassian prior of means beta0 = 1/(nthroot(k,d))^2; % hyperparameter of Guassian prior of means -w = dirichletRnd(ones(alpha0,k)); +w = dirichletRnd(alpha0,ones(1,k)/k); z = discreteRnd(w,n); mu = zeros(d,k); Sigma = zeros(d,d,k); X = zeros(d,n); for i = 1:k - idc = z==i; + idx = z==i; Sigma(:,:,i) = iwishrnd(W0,v0); % invpd(wishrnd(W0,v0)); mu(:,i) = gaussRnd(mu0,Sigma(:,:,i)/beta0); - X(:,idc) = gaussRnd(mu(:,i),Sigma(:,:,i),sum(idc)); + X(:,idx) = gaussRnd(mu(:,i),Sigma(:,:,i),sum(idx)); end -label = z; model.mu = mu; model.Sigma = Sigma; model.weight = w; \ No newline at end of file From e56db3418a7dee813ed29c17ca3e1ce99707cf51 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 13 Jan 2016 09:51:21 +0800 Subject: [PATCH 106/149] refine mix rnd: done --- chapter09/demo.m | 26 +++++++++++++------------- chapter09/kmeansRnd.m | 4 ++-- chapter09/mixGaussRnd.m | 4 ++-- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/chapter09/demo.m b/chapter09/demo.m index 25c87c7..3d83b8e 100644 --- a/chapter09/demo.m +++ b/chapter09/demo.m @@ -40,18 +40,18 @@ % subplot(2,1,1);plot(x); axis([x_range,y_range]); title('Original Signal'); % subplot(2,1,2);plot(m); axis([x_range,y_range]); title('Recovery Signal'); %% classification -clear; close all -k = 2; -d = 2; -n = 1000; -[X,t] = kmeansRnd(d,k,n); -[x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); - -[model, llh] = rvmBinEm(X,t-1); -plot(llh); -y = rvmBinPred(model,X)+1; -figure; -binPlot(model,X,y); +% clear; close all +% k = 2; +% d = 2; +% n = 1000; +% [X,t] = kmeansRnd(d,k,n); +% [x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n)); +% +% [model, llh] = rvmBinEm(X,t-1); +% plot(llh); +% y = rvmBinPred(model,X)+1; +% figure; +% binPlot(model,X,y); %% demo: kmeans % close all; clear; % d = 2; @@ -98,4 +98,4 @@ % figure; % plotClass(X,z); % figure; -% plot(llh); +% plot(llh); \ No newline at end of file diff --git a/chapter09/kmeansRnd.m b/chapter09/kmeansRnd.m index 7c61fa1..2f304cc 100644 --- a/chapter09/kmeansRnd.m +++ b/chapter09/kmeansRnd.m @@ -2,11 +2,11 @@ % Sampling from a Gaussian mixture distribution with common variances (kmeans model). % Written by Michael Chen (sth4nth@gmail.com). alpha = 1; -beta = 6*nthroot(k,d); +beta = k; X = randn(d,n); w = dirichletRnd(alpha,ones(1,k)/k); z = discreteRnd(w,n); E = full(sparse(z,1:n,1,k,n,n)); -center = rand(d,k)*beta; +center = randn(d,k)*beta; X = X+center*E; \ No newline at end of file diff --git a/chapter09/mixGaussRnd.m b/chapter09/mixGaussRnd.m index 8783938..62461b3 100644 --- a/chapter09/mixGaussRnd.m +++ b/chapter09/mixGaussRnd.m @@ -5,7 +5,7 @@ W0 = eye(d); % hyperparameter of inverse Wishart prior of covariances v0 = d+1; % hyperparameter of inverse Wishart prior of covariances mu0 = zeros(d,1); % hyperparameter of Guassian prior of means -beta0 = 1/(nthroot(k,d))^2; % hyperparameter of Guassian prior of means +beta0 = k; % hyperparameter of Guassian prior of means w = dirichletRnd(alpha0,ones(1,k)/k); z = discreteRnd(w,n); @@ -16,7 +16,7 @@ for i = 1:k idx = z==i; Sigma(:,:,i) = iwishrnd(W0,v0); % invpd(wishrnd(W0,v0)); - mu(:,i) = gaussRnd(mu0,Sigma(:,:,i)/beta0); + mu(:,i) = gaussRnd(mu0,beta0*Sigma(:,:,i)); X(:,idx) = gaussRnd(mu(:,i),Sigma(:,:,i),sum(idx)); end model.mu = mu; From d3a3871aa0b0fbf9e5d67036f6b78b650a645185 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 13 Jan 2016 10:06:37 +0800 Subject: [PATCH 107/149] update TODO --- TODO.txt | 1 - chapter09/demo.m | 2 -- 2 files changed, 3 deletions(-) diff --git a/TODO.txt b/TODO.txt index d1e370b..e4874f4 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,5 +1,4 @@ TODO: -refine kmeansrnd and mixGaussRnd viterbi normalize update create prediction functions for VB chapter11: collapse Gibss for gmm and dpgm diff --git a/chapter09/demo.m b/chapter09/demo.m index 3d83b8e..b56a437 100644 --- a/chapter09/demo.m +++ b/chapter09/demo.m @@ -1,5 +1,3 @@ -% TODO: -% 2) refine kmeansRnd and mixGaussRnd %% demo: EM linear regression % close all; clear; From f0645f7f0df777b0f69f5d0b2c4c29253102fd6f Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 13 Jan 2016 10:06:55 +0800 Subject: [PATCH 108/149] refine mixReg --- chapter14/demo.m | 44 +++++++++++++++++++++---------------------- chapter14/mixLinReg.m | 12 ++++-------- 2 files changed, 26 insertions(+), 30 deletions(-) diff --git a/chapter14/demo.m b/chapter14/demo.m index 8d2262b..dd72216 100644 --- a/chapter14/demo.m +++ b/chapter14/demo.m @@ -1,25 +1,25 @@ -clear all; close all +close all; clear %% Test for mixLinReg -% d = 1; -% k = 3; -% n = 500; -% W = randn(d+1,k); -% -% [x, label] = rndKmeans(d, k, n); -% X = [x; ones(1,n)]; -% y = zeros(1,n); -% for j = 1:k -% idx = (label == j); -% y(idx) = W(:,j)'*X(:,idx); -% end -% -% plot(x,y,'.'); -% [model, label,llh] = mixLinReg(X, y, 3); -% spread([x;y],label); -% figure -% plot(llh); +d = 1; +k = 2; +n = 500; +W = randn(d+1,k); + +[x, label] = kmeansRnd(d, k, n); +X = [x; ones(1,n)]; +y = zeros(1,n); +for j = 1:k + idx = (label == j); + y(idx) = W(:,j)'*X(:,idx); +end + +plot(x,y,'.'); +[model, label,llh] = mixLinReg(X, y, k); +plotClass([x;y],label); +figure +plot(llh); %% -[X, y] = rndKmeans(2,3,1000); -[label,L] = mixGaussVb(X, 10); -plot(L); \ No newline at end of file +% [X, y] = rndKmeans(2,3,1000); +% [label,L] = mixGaussVb(X, 10); +% plot(L); \ No newline at end of file diff --git a/chapter14/mixLinReg.m b/chapter14/mixLinReg.m index b92a8e9..6f5b1e2 100644 --- a/chapter14/mixLinReg.m +++ b/chapter14/mixLinReg.m @@ -7,14 +7,12 @@ n = size(X,2); X = [X;ones(1,n)]; % adding the bias term d = size(X,1); -idx = (1:d)'; -dg = sub2ind([d,d],idx,idx); label = ceil(k*rand(1,n)); % random initialization R = full(sparse(label,1:n,1,k,n,n)); -tol = 1e-4; -maxiter = 200; +tol = 1e-6; +maxiter = 500; llh = -inf(1,maxiter); -lambda = lambda*ones(d,1); +Lambda = lambda*eye(d); W = zeros(d,k); Xy = bsxfun(@times,X,y); beta = 1; @@ -24,9 +22,7 @@ alpha = nk/n; for j = 1:k Xw = bsxfun(@times,X,sqrt(R(j,:))); - C = Xw*Xw'; - C(dg) = C(dg)+lambda; - U = chol(C); + U = chol(Xw*Xw'+Lambda); W(:,j) = U\(U'\(Xy*R(j,:)')); % 3.15 & 3.28 end D = bsxfun(@minus,W'*X,y).^2; From 8755df746b62c82fd0fcc6b7b20238b68e398c98 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 13 Jan 2016 10:10:25 +0800 Subject: [PATCH 109/149] minor fix comment --- chapter14/demo.m | 2 +- chapter14/mixLinReg.m | 2 +- chapter14/mixLogitReg.m | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/chapter14/demo.m b/chapter14/demo.m index dd72216..1d129cf 100644 --- a/chapter14/demo.m +++ b/chapter14/demo.m @@ -1,5 +1,5 @@ close all; clear -%% Test for mixLinReg +%% Demo for mixture of linear regression d = 1; k = 2; n = 500; diff --git a/chapter14/mixLinReg.m b/chapter14/mixLinReg.m index 6f5b1e2..dd470dd 100644 --- a/chapter14/mixLinReg.m +++ b/chapter14/mixLinReg.m @@ -1,5 +1,5 @@ function [model, label, llh] = mixLinReg(X, y, k, lambda) -% mixture of linear regression +% Mixture of linear regression % Written by Mo Chen (sth4nth@gmail.com). if nargin < 4 lambda = 1; diff --git a/chapter14/mixLogitReg.m b/chapter14/mixLogitReg.m index 3f1b6f4..40122ba 100644 --- a/chapter14/mixLogitReg.m +++ b/chapter14/mixLogitReg.m @@ -1,5 +1,5 @@ function [model, llh] = mixLogitReg(X, t, k) -% mixture of logistic regression model +% Mixture of logistic regression model % Written by Mo Chen (sth4nth@gmail.com). n = size(X,2); X = [X; ones(1,n)]; From 5a595670191605cec32999f0d28afd804f675be4 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 13 Jan 2016 10:34:11 +0800 Subject: [PATCH 110/149] add mixLinRnd --- chapter14/demo.m | 17 ++++------------- chapter14/mixLinRnd.m | 12 ++++++++++++ 2 files changed, 16 insertions(+), 13 deletions(-) create mode 100644 chapter14/mixLinRnd.m diff --git a/chapter14/demo.m b/chapter14/demo.m index 1d129cf..e705730 100644 --- a/chapter14/demo.m +++ b/chapter14/demo.m @@ -3,19 +3,10 @@ d = 1; k = 2; n = 500; -W = randn(d+1,k); - -[x, label] = kmeansRnd(d, k, n); -X = [x; ones(1,n)]; -y = zeros(1,n); -for j = 1:k - idx = (label == j); - y(idx) = W(:,j)'*X(:,idx); -end - -plot(x,y,'.'); -[model, label,llh] = mixLinReg(X, y, k); -plotClass([x;y],label); +[X,y] = mixLinRnd(d,k,n); +plot(X,y,'.'); +[model,label,llh] = mixLinReg(X, y, k); +plotClass([X;y],label); figure plot(llh); diff --git a/chapter14/mixLinRnd.m b/chapter14/mixLinRnd.m new file mode 100644 index 0000000..23e2278 --- /dev/null +++ b/chapter14/mixLinRnd.m @@ -0,0 +1,12 @@ +function [X, y, W ] = mixLinRnd(d, k, n) +% Generate data from mixture of linear model +% Written by Mo Chen (sth4nth@gmail.com). +W = randn(d+1,k); +[X, z] = kmeansRnd(d, k, n); +y = zeros(1,n); +for j = 1:k + idx = (z == j); + y(idx) = W(1:(end-1),j)'*X(:,idx)+W(end,j); +end + + From f4a90007c34a9d0e131867b36a506a90dcc9418b Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 13 Jan 2016 10:35:26 +0800 Subject: [PATCH 111/149] change parameter order --- chapter14/demo.m | 2 +- chapter14/mixLinReg.m | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/chapter14/demo.m b/chapter14/demo.m index e705730..e740bac 100644 --- a/chapter14/demo.m +++ b/chapter14/demo.m @@ -5,7 +5,7 @@ n = 500; [X,y] = mixLinRnd(d,k,n); plot(X,y,'.'); -[model,label,llh] = mixLinReg(X, y, k); +[label,model,llh] = mixLinReg(X, y, k); plotClass([X;y],label); figure plot(llh); diff --git a/chapter14/mixLinReg.m b/chapter14/mixLinReg.m index dd470dd..eb56557 100644 --- a/chapter14/mixLinReg.m +++ b/chapter14/mixLinReg.m @@ -1,4 +1,4 @@ -function [model, label, llh] = mixLinReg(X, y, k, lambda) +function [label, model, llh] = mixLinReg(X, y, k, lambda) % Mixture of linear regression % Written by Mo Chen (sth4nth@gmail.com). if nargin < 4 From ed0e32fe6eee26d9121e249cd9c213fd85a4890e Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 13 Jan 2016 10:37:39 +0800 Subject: [PATCH 112/149] update TODO --- TODO.txt | 4 +++- chapter14/demo.m | 4 ---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/TODO.txt b/TODO.txt index e4874f4..3677ca6 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,6 +1,8 @@ TODO: viterbi normalize update -create prediction functions for VB +demo for time series model +prediction functions for VB +prediction functions for ppca chapter11: collapse Gibss for gmm and dpgm chapter05: MLP chapter08: BP, EP diff --git a/chapter14/demo.m b/chapter14/demo.m index e740bac..7790685 100644 --- a/chapter14/demo.m +++ b/chapter14/demo.m @@ -10,7 +10,3 @@ figure plot(llh); -%% -% [X, y] = rndKmeans(2,3,1000); -% [label,L] = mixGaussVb(X, 10); -% plot(L); \ No newline at end of file From e7042a29d365c564034d3276dc4c825b9570a3ce Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 13 Jan 2016 10:47:31 +0800 Subject: [PATCH 113/149] minor fix --- chapter07/demo.m | 4 ++-- chapter10/linRegVbPred.m | 23 ----------------------- chapter14/demo.m | 3 ++- 3 files changed, 4 insertions(+), 26 deletions(-) delete mode 100644 chapter10/linRegVbPred.m diff --git a/chapter07/demo.m b/chapter07/demo.m index 72818a5..7c8b492 100755 --- a/chapter07/demo.m +++ b/chapter07/demo.m @@ -49,7 +49,7 @@ % [model,llh] = rvmRegFp(X,t); % figure % plot(llh); -% [y, sigma] = linInfer(x,model,t); +% [y, sigma] = linPred(x,model,t); % figure; % hold on; % plotBand(x,y,2*sigma); @@ -60,7 +60,7 @@ % [model,llh] = rvmRegEm(X,t); % figure % plot(llh); -% [y, sigma] = linInfer(x,model,t); +% [y, sigma] = linPred(x,model,t); % figure; % hold on; % plotBand(x,y,2*sigma); diff --git a/chapter10/linRegVbPred.m b/chapter10/linRegVbPred.m deleted file mode 100644 index 84281b7..0000000 --- a/chapter10/linRegVbPred.m +++ /dev/null @@ -1,23 +0,0 @@ -function [y, sigma, p] = linRegVbPred(model, X, t) -% Compute linear model reponse y = w'*X+w0 and likelihood -% model: trained model structure -% X: d x n testing data -% t (optional): 1 x n testing response -% Written by Mo Chen (sth4nth@gmail.com). -w = model.w; -w0 = model.w0; -y = w'*X+w0; -%% probability prediction -if nargout > 1 - beta = model.beta; - U = model.U; % 3.54 - Xo = bsxfun(@minus,X,model.xbar); - XU = U'\Xo; - sigma = sqrt((1+dot(XU,XU,1))/beta); %3.59 -end - -if nargin == 3 && nargout == 3 - p = exp(logGauss(t,y,sigma)); -% p = exp(-0.5*(((t-y)./sigma).^2+log(2*pi))-log(sigma)); -end - diff --git a/chapter14/demo.m b/chapter14/demo.m index 7790685..da25770 100644 --- a/chapter14/demo.m +++ b/chapter14/demo.m @@ -1,5 +1,6 @@ -close all; clear +% TODO: demo for mixLogitReg %% Demo for mixture of linear regression +close all; clear d = 1; k = 2; n = 500; From 177832a3d629e1206c0ae83e3be30401c0b141f1 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Thu, 14 Jan 2016 14:51:36 +0800 Subject: [PATCH 114/149] refine mix rand --- chapter09/kmeansRnd.m | 2 +- chapter09/mixGaussRnd.m | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/chapter09/kmeansRnd.m b/chapter09/kmeansRnd.m index 2f304cc..08df22d 100644 --- a/chapter09/kmeansRnd.m +++ b/chapter09/kmeansRnd.m @@ -2,7 +2,7 @@ % Sampling from a Gaussian mixture distribution with common variances (kmeans model). % Written by Michael Chen (sth4nth@gmail.com). alpha = 1; -beta = k; +beta = nthroot(k,d); % in volume x^d there is k points: x^d=k X = randn(d,n); w = dirichletRnd(alpha,ones(1,k)/k); diff --git a/chapter09/mixGaussRnd.m b/chapter09/mixGaussRnd.m index 62461b3..c22bccc 100644 --- a/chapter09/mixGaussRnd.m +++ b/chapter09/mixGaussRnd.m @@ -5,7 +5,8 @@ W0 = eye(d); % hyperparameter of inverse Wishart prior of covariances v0 = d+1; % hyperparameter of inverse Wishart prior of covariances mu0 = zeros(d,1); % hyperparameter of Guassian prior of means -beta0 = k; % hyperparameter of Guassian prior of means +beta0 = nthroot(k,d); % hyperparameter of Guassian prior of means % in volume x^d there is k points: x^d=k + w = dirichletRnd(alpha0,ones(1,k)/k); z = discreteRnd(w,n); From bba71470c3a982e8c287097e18be718e60726b05 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Thu, 14 Jan 2016 16:01:37 +0800 Subject: [PATCH 115/149] update TODO --- TODO.txt | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/TODO.txt b/TODO.txt index 3677ca6..127673b 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,11 +1,11 @@ TODO: -viterbi normalize update -demo for time series model -prediction functions for VB -prediction functions for ppca -chapter11: collapse Gibss for gmm and dpgm +chapter14: adaboost +chapter11: collapsed Gibss sampling for gmm and dpgm +chapter13: demo for time series models +chapter10: prediction functions for VB +chapter12: prediction functions for ppca chapter05: MLP chapter08: BP, EP -update beta for sequential rvm, -compute bound terms inside each factors, -plot multiclass data boundary \ No newline at end of file +chapter07: update beta for sequential rvm, +chapter10: compute bound terms inside each factors, +chapter04: plot multiclass data boundary \ No newline at end of file From 2954f1156bfa7fabde98d4a8b9088930d95d8d12 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Thu, 14 Jan 2016 16:06:18 +0800 Subject: [PATCH 116/149] rename functions in chapter12 --- chapter12/pcaEm.m | 81 ++++++++++++--------------------- chapter12/pcaLsi.m | 34 -------------- chapter12/pcaPred.m | 6 +-- chapter12/ppcaEm.m | 59 ++++++++++++++++++++++++ chapter12/{pcaVb.m => ppcaVb.m} | 5 +- 5 files changed, 92 insertions(+), 93 deletions(-) delete mode 100644 chapter12/pcaLsi.m create mode 100644 chapter12/ppcaEm.m rename chapter12/{pcaVb.m => ppcaVb.m} (87%) diff --git a/chapter12/pcaEm.m b/chapter12/pcaEm.m index c9a36a6..7e1ccf0 100644 --- a/chapter12/pcaEm.m +++ b/chapter12/pcaEm.m @@ -1,59 +1,34 @@ -function [model, llh] = pcaEm(X, q) -% Perform EM algorithm to maiximize likelihood of probabilistic PCA model. -% X: m x n data matrix -% q: dimension of target space +function [V, A] = pcaEm(X, p) +% Perform EM-like algorithm for PCA (by Sam Roweis). +% X: d x n data matrix +% p: dimension of target space % Reference: % Pattern Recognition and Machine Learning by Christopher M. Bishop -% Probabilistic Principal Component Analysis by Michael E. Tipping & Christopher M. Bishop +% EM algorithms for PCA and SPCA by Sam Roweis % Written by Mo Chen (sth4nth@gmail.com). -[m,n] = size(X); -mu = mean(X,2); -X = bsxfun(@minus,X,mu); +[d,n] = size(X); +X = bsxfun(@minus,X,mean(X,2)); +W = rand(d,p); -tol = 1e-4; -maxiter = 500; -llh = -inf(1,maxiter); -idx = (1:q)'; -dg = sub2ind([q,q],idx,idx); -I = eye(q); -r = dot(X(:),X(:)); % total norm of X +tol = 1e-8; +error = inf; +last = inf; +t = 0; +while ~(abs(last-error) Date: Fri, 15 Jan 2016 15:51:29 +0800 Subject: [PATCH 117/149] update todo --- TODO.txt | 5 +++-- chapter14/demo.m | 24 +++++++++++++++++------- 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/TODO.txt b/TODO.txt index 127673b..fcbaced 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,10 +1,11 @@ TODO: chapter14: adaboost +all: modify all binary label to use 0/1 chapter11: collapsed Gibss sampling for gmm and dpgm chapter13: demo for time series models -chapter10: prediction functions for VB -chapter12: prediction functions for ppca chapter05: MLP +chapter12: prediction functions for ppca +chapter10: prediction functions for VB chapter08: BP, EP chapter07: update beta for sequential rvm, chapter10: compute bound terms inside each factors, diff --git a/chapter14/demo.m b/chapter14/demo.m index da25770..a16e007 100644 --- a/chapter14/demo.m +++ b/chapter14/demo.m @@ -1,13 +1,23 @@ % TODO: demo for mixLogitReg %% Demo for mixture of linear regression +% close all; clear +% d = 1; +% k = 2; +% n = 500; +% [X,y] = mixLinRnd(d,k,n); +% plot(X,y,'.'); +% [label,model,llh] = mixLinReg(X, y, k); +% plotClass([X;y],label); +% figure +% plot(llh); + +%% Demo for adaboost close all; clear -d = 1; +d = 2; k = 2; n = 500; -[X,y] = mixLinRnd(d,k,n); -plot(X,y,'.'); -[label,model,llh] = mixLinReg(X, y, k); -plotClass([X;y],label); -figure -plot(llh); +[X,t] = kmeansRnd(d,k,n); +plotClass(X,t); +t = t-1; +model = adaboost(X,t); From f3036a17bfb4b3157bcb62f291a5a7ce1961c53a Mon Sep 17 00:00:00 2001 From: sth4nth Date: Fri, 15 Jan 2016 15:52:48 +0800 Subject: [PATCH 118/149] adaboost not done yet --- chapter14/adaboost.m | 39 +++++++++++++++++++++++++++++++++++++++ chapter14/adaboostPred.m | 1 + 2 files changed, 40 insertions(+) create mode 100644 chapter14/adaboost.m create mode 100644 chapter14/adaboostPred.m diff --git a/chapter14/adaboost.m b/chapter14/adaboost.m new file mode 100644 index 0000000..6a1b914 --- /dev/null +++ b/chapter14/adaboost.m @@ -0,0 +1,39 @@ +function model = adaboost(X, t) +% Adaboost with decision stump for binary classification +% Written by Mo Chen (sth4nth@gmail.com). +n = size(X,2); +w = ones(1,n)/n; +T = 1000; +Alpha = zeros(1,T); +Theta = zeros(1,T); +E = sparse(1:n,t+1,1,n,2,n); +for it = 1:T + % weak learner: decision stump + m = bsxfun(@times,X,w)*E; + theta = mean(m,2); + + y = bsxfun(@gt,X,theta); + I = bsxfun(@eq,y,t); + j = max(I*w'); + I = I(j,:); + + % boosting + e = sum(w.*I); + alpha = log((1-e)./e); + + w = w.*exp(alpha*I); + w = w/sum(w); + + Alpha(it) = alpha; + Theta(it) = theta; +end +model.alpha = Alpha; +model.theta = Theta; + + + + + + + + diff --git a/chapter14/adaboostPred.m b/chapter14/adaboostPred.m new file mode 100644 index 0000000..0be4183 --- /dev/null +++ b/chapter14/adaboostPred.m @@ -0,0 +1 @@ +function [ t ] = adaboostPred( model, X ) From 71b83c0e4be7fa9fd31d2e9c6fa702410aca67cf Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Sun, 17 Jan 2016 23:27:14 +0800 Subject: [PATCH 119/149] adaboost and others --- chapter04/logitBinPred.m | 2 +- chapter07/rvmBinPred.m | 2 +- chapter14/adaboostBin.m | 27 +++++++++++++++++ chapter14/adaboostBinPred.m | 14 +++++++++ chapter14/demo.m | 34 ++++++++++++++++------ chapter14/mixLinPred.m | 2 ++ chapter14/mixLinReg.m | 5 ++-- chapter14/{mixLogitReg.m => mixLogitBin.m} | 2 +- chapter14/mixLogitBinPred.m | 7 +++++ 9 files changed, 81 insertions(+), 14 deletions(-) create mode 100644 chapter14/adaboostBin.m create mode 100644 chapter14/adaboostBinPred.m create mode 100644 chapter14/mixLinPred.m rename chapter14/{mixLogitReg.m => mixLogitBin.m} (96%) create mode 100644 chapter14/mixLogitBinPred.m diff --git a/chapter04/logitBinPred.m b/chapter04/logitBinPred.m index 7ac8c2e..0d3cedd 100644 --- a/chapter04/logitBinPred.m +++ b/chapter04/logitBinPred.m @@ -6,5 +6,5 @@ X = [X;ones(1,size(X,2))]; w = model.w; p = exp(-log1pexp(w'*X)); -y = (p>0.5)+0; +y = round(p); diff --git a/chapter07/rvmBinPred.m b/chapter07/rvmBinPred.m index e2781e2..a5259dc 100644 --- a/chapter07/rvmBinPred.m +++ b/chapter07/rvmBinPred.m @@ -8,4 +8,4 @@ X = X(index,:); w = model.w; p = exp(-log1pexp(w'*X)); -y = (p>0.5)+0; +y = round(p); diff --git a/chapter14/adaboostBin.m b/chapter14/adaboostBin.m new file mode 100644 index 0000000..4cdf230 --- /dev/null +++ b/chapter14/adaboostBin.m @@ -0,0 +1,27 @@ +function model = adaboostBin(X, t) +% Adaboost for binary classification (weak learner: kmeans) +t = t+1; +k = 2; +[d,n] = size(X); +w = ones(1,n)/n; +M = 100; +Alpha = zeros(1,M); +Theta = zeros(d,k,M); +T = sparse(1:n,t,1,n,k,n); % transform label into indicator matrix +for m = 1:M + % weak learner + E = spdiags(w',0,n,n)*T; + E = E*spdiags(1./sum(E,1)',0,k,k); + c = X*E; + [~,y] = min(pdist2(c,X),[],1); + Theta(:,:,m) = c; + % adaboost + I = y~=t; + e = dot(w,I); + alpha = log((1-e)/e); + w = w.*exp(alpha*I); + w = w/sum(w); + Alpha(m) = alpha; +end +model.alpha = Alpha; +model.theta = Theta; \ No newline at end of file diff --git a/chapter14/adaboostBinPred.m b/chapter14/adaboostBinPred.m new file mode 100644 index 0000000..4b9d60d --- /dev/null +++ b/chapter14/adaboostBinPred.m @@ -0,0 +1,14 @@ +function t = adaboostBinPred(model,X) +Alpha = model.alpha; +Theta = model.theta; +M = size(Alpha,2); +t = zeros(1,size(X,2)); +for m = 1:M + c = Theta(:,:,m); + [~,y] = min(pdist2(c,X),[],1); + y(y==1) = -1; + y(y==2) = 1; + t = t+Alpha(m)*y; +end +t = sign(t); +t(t==-1) = 0; \ No newline at end of file diff --git a/chapter14/demo.m b/chapter14/demo.m index da25770..0de61e7 100644 --- a/chapter14/demo.m +++ b/chapter14/demo.m @@ -1,13 +1,29 @@ -% TODO: demo for mixLogitReg %% Demo for mixture of linear regression -close all; clear -d = 1; +% close all; clear +% d = 1; +% k = 2; +% n = 500; +% [X,y] = mixLinRnd(d,k,n); +% plot(X,y,'.'); +% [model,llh] = mixLinReg(X, y, k); +y_ = mixLin +% plotClass([X;y],label); +% figure +% plot(llh); +%% Demo for mixture of logistic regression +d = 2; k = 2; n = 500; -[X,y] = mixLinRnd(d,k,n); -plot(X,y,'.'); -[label,model,llh] = mixLinReg(X, y, k); -plotClass([X;y],label); -figure -plot(llh); +[X,t] = kmeansRnd(d,k,n); +model = mixnLogitBin(X,t-1); +y = adaboostBinPred(model,X); +plotClass(X,y+1) +%% Demo for adaboost +d = 2; +k = 2; +n = 500; +[X,t] = kmeansRnd(d,k,n); +model = adaboostBin(X,t-1); +y = adaboostBinPred(model,X); +plotClass(X,y+1) \ No newline at end of file diff --git a/chapter14/mixLinPred.m b/chapter14/mixLinPred.m new file mode 100644 index 0000000..cc1e62d --- /dev/null +++ b/chapter14/mixLinPred.m @@ -0,0 +1,2 @@ +function y = mixLinPred(model, X) + diff --git a/chapter14/mixLinReg.m b/chapter14/mixLinReg.m index eb56557..adabd1d 100644 --- a/chapter14/mixLinReg.m +++ b/chapter14/mixLinReg.m @@ -1,4 +1,4 @@ -function [label, model, llh] = mixLinReg(X, y, k, lambda) +function [model, llh] = mixLinReg(X, y, k, lambda) % Mixture of linear regression % Written by Mo Chen (sth4nth@gmail.com). if nargin < 4 @@ -35,9 +35,10 @@ llh(iter) = sum(T)/n; if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter)); break; end end -[~,label] = max(R,[],1); llh = llh(2:iter); model.alpha = alpha; % mixing coefficient model.beta = beta; % mixture component precision model.W = W; % linear model coefficent +[~,label] = max(R,[],1); +model.label = label; diff --git a/chapter14/mixLogitReg.m b/chapter14/mixLogitBin.m similarity index 96% rename from chapter14/mixLogitReg.m rename to chapter14/mixLogitBin.m index 40122ba..e13ac0b 100644 --- a/chapter14/mixLogitReg.m +++ b/chapter14/mixLogitBin.m @@ -1,4 +1,4 @@ -function [model, llh] = mixLogitReg(X, t, k) +function [model, llh] = mixLogitBin(X, t, k) % Mixture of logistic regression model % Written by Mo Chen (sth4nth@gmail.com). n = size(X,2); diff --git a/chapter14/mixLogitBinPred.m b/chapter14/mixLogitBinPred.m new file mode 100644 index 0000000..708f55c --- /dev/null +++ b/chapter14/mixLogitBinPred.m @@ -0,0 +1,7 @@ +function t = mixLogitBinPred(model, X) +%MIXLOGITBINPRED Summary of this function goes here +% Detailed explanation goes here + + +end + From 6674da930b013620afdcae937d4c95bea8136bfe Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 18 Jan 2016 09:48:13 +0800 Subject: [PATCH 120/149] add mixLinPred --- chapter14/demo.m | 53 ++++++++++++++++++++++-------------------- chapter14/mixLinPred.m | 24 ++++++++++++++++++- chapter14/mixLinReg.m | 4 ++-- 3 files changed, 53 insertions(+), 28 deletions(-) diff --git a/chapter14/demo.m b/chapter14/demo.m index db120d7..cac208e 100644 --- a/chapter14/demo.m +++ b/chapter14/demo.m @@ -1,30 +1,33 @@ %% Demo for mixture of linear regression -% close all; clear -% d = 1; -% k = 2; -% n = 500; -% [X,y] = mixLinRnd(d,k,n); -% plot(X,y,'.'); -% [model,llh] = mixLinReg(X, y, k); -y_ = mixLinReg(X,y,k); -% plotClass([X;y],label); -% figure -% plot(llh); -%% Demo for mixture of logistic regression -d = 2; +close all; clear +d = 1; k = 2; n = 500; -[X,t] = kmeansRnd(d,k,n); +[X,y] = mixLinRnd(d,k,n); +plot(X,y,'.'); +[label,model,llh] = mixLinReg(X, y, k); +plotClass([X;y],label); +figure +plot(llh); +[y_,z,p] = mixLinPred(model,X,y); +figure; +plotClass([X;y],label); -model = mixnLogitBin(X,t-1); -y = adaboostBinPred(model,X); -plotClass(X,y+1) -%% Demo for adaboost -d = 2; -k = 2; -n = 500; -[X,t] = kmeansRnd(d,k,n); -model = adaboostBin(X,t-1); -y = adaboostBinPred(model,X); -plotClass(X,y+1) +%% Demo for mixture of logistic regression +% d = 2; +% k = 2; +% n = 500; +% [X,t] = kmeansRnd(d,k,n); +% +% model = mixnLogitBin(X,t-1); +% y = adaboostBinPred(model,X); +% plotClass(X,y+1) +% %% Demo for adaboost +% d = 2; +% k = 2; +% n = 500; +% [X,t] = kmeansRnd(d,k,n); +% model = adaboostBin(X,t-1); +% y = adaboostBinPred(model,X); +% plotClass(X,y+1) diff --git a/chapter14/mixLinPred.m b/chapter14/mixLinPred.m index cc1e62d..b20c148 100644 --- a/chapter14/mixLinPred.m +++ b/chapter14/mixLinPred.m @@ -1,2 +1,24 @@ -function y = mixLinPred(model, X) +function [y, z, p] = mixLinPred(model, X, t) +% Prediction function for mxiture of linear regression +% input: +% model: trained model structure +% X: dxn data matrix +% t:(optional) 1xn responding vector +% output: +% y: prediction +% z: cluster label +% p: probability for t +W = model.W; +alpha = model.alpha; +beta = model.beta; +X = [X;ones(1,size(X,2))]; % adding the bias term +y = W'*X; +D = bsxfun(@minus,y,t).^2; +logRho = (-0.5)*beta*D; +logRho = bsxfun(@plus,logRho,log(alpha)); +T = logsumexp(logRho,1); +p = exp(T); +logR = bsxfun(@minus,logRho,T); +R = exp(logR); +z = max(R,[],1); diff --git a/chapter14/mixLinReg.m b/chapter14/mixLinReg.m index adabd1d..14c4700 100644 --- a/chapter14/mixLinReg.m +++ b/chapter14/mixLinReg.m @@ -1,4 +1,4 @@ -function [model, llh] = mixLinReg(X, y, k, lambda) +function [label, model, llh] = mixLinReg(X, y, k, lambda) % Mixture of linear regression % Written by Mo Chen (sth4nth@gmail.com). if nargin < 4 @@ -36,7 +36,7 @@ if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter)); break; end end llh = llh(2:iter); - +label = max(R,[],1); model.alpha = alpha; % mixing coefficient model.beta = beta; % mixture component precision model.W = W; % linear model coefficent From 8b680ad85bf9d8c18e28f65db4942613f03c42d4 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 18 Jan 2016 09:49:58 +0800 Subject: [PATCH 121/149] remove redundant file --- chapter14/adaboost.m | 39 ------------------------------------- chapter14/mixLogitBinPred.m | 5 ++--- 2 files changed, 2 insertions(+), 42 deletions(-) delete mode 100644 chapter14/adaboost.m diff --git a/chapter14/adaboost.m b/chapter14/adaboost.m deleted file mode 100644 index 6a1b914..0000000 --- a/chapter14/adaboost.m +++ /dev/null @@ -1,39 +0,0 @@ -function model = adaboost(X, t) -% Adaboost with decision stump for binary classification -% Written by Mo Chen (sth4nth@gmail.com). -n = size(X,2); -w = ones(1,n)/n; -T = 1000; -Alpha = zeros(1,T); -Theta = zeros(1,T); -E = sparse(1:n,t+1,1,n,2,n); -for it = 1:T - % weak learner: decision stump - m = bsxfun(@times,X,w)*E; - theta = mean(m,2); - - y = bsxfun(@gt,X,theta); - I = bsxfun(@eq,y,t); - j = max(I*w'); - I = I(j,:); - - % boosting - e = sum(w.*I); - alpha = log((1-e)./e); - - w = w.*exp(alpha*I); - w = w/sum(w); - - Alpha(it) = alpha; - Theta(it) = theta; -end -model.alpha = Alpha; -model.theta = Theta; - - - - - - - - diff --git a/chapter14/mixLogitBinPred.m b/chapter14/mixLogitBinPred.m index 708f55c..ac2ab07 100644 --- a/chapter14/mixLogitBinPred.m +++ b/chapter14/mixLogitBinPred.m @@ -1,7 +1,6 @@ function t = mixLogitBinPred(model, X) -%MIXLOGITBINPRED Summary of this function goes here -% Detailed explanation goes here -end + + From 75ac8b7b760ab960e1e10d62f6a48e06528770a7 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 18 Jan 2016 10:37:16 +0800 Subject: [PATCH 122/149] update TODO --- TODO.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/TODO.txt b/TODO.txt index fcbaced..275a4f1 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,10 +1,8 @@ TODO: -chapter14: adaboost -all: modify all binary label to use 0/1 chapter11: collapsed Gibss sampling for gmm and dpgm chapter13: demo for time series models -chapter05: MLP chapter12: prediction functions for ppca +chapter05: MLP chapter10: prediction functions for VB chapter08: BP, EP chapter07: update beta for sequential rvm, From 4cedd87292f81875cce08fa8a102d7436543593f Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 18 Jan 2016 21:02:17 +0800 Subject: [PATCH 123/149] add mixLogitBinPred --- chapter14/demo.m | 45 +++++++++++++++++++------------------ chapter14/mixLogitBin.m | 9 ++++---- chapter14/mixLogitBinPred.m | 10 +++++---- 3 files changed, 33 insertions(+), 31 deletions(-) diff --git a/chapter14/demo.m b/chapter14/demo.m index cac208e..32432ac 100644 --- a/chapter14/demo.m +++ b/chapter14/demo.m @@ -1,28 +1,29 @@ %% Demo for mixture of linear regression -close all; clear -d = 1; -k = 2; -n = 500; -[X,y] = mixLinRnd(d,k,n); -plot(X,y,'.'); -[label,model,llh] = mixLinReg(X, y, k); -plotClass([X;y],label); -figure -plot(llh); -[y_,z,p] = mixLinPred(model,X,y); -figure; -plotClass([X;y],label); - -%% Demo for mixture of logistic regression -% d = 2; +% close all; clear +% d = 1; % k = 2; % n = 500; -% [X,t] = kmeansRnd(d,k,n); -% -% model = mixnLogitBin(X,t-1); -% y = adaboostBinPred(model,X); -% plotClass(X,y+1) -% %% Demo for adaboost +% [X,y] = mixLinRnd(d,k,n); +% plot(X,y,'.'); +% [label,model,llh] = mixLinReg(X, y, k); +% plotClass([X;y],label); +% figure +% plot(llh); +% [y_,z,p] = mixLinPred(model,X,y); +% figure; +% plotClass([X;y],label); + +%% Demo for mixture of logistic regression +d = 2; +c = 2; +k = 4; +n = 500; +[X,t] = kmeansRnd(d,c,n); + +model = mixLogitBin(X,t-1,k); +y = mixLogitBinPred(model,X); +plotClass(X,y+1) +%% Demo for adaboost % d = 2; % k = 2; % n = 500; diff --git a/chapter14/mixLogitBin.m b/chapter14/mixLogitBin.m index e13ac0b..9519371 100644 --- a/chapter14/mixLogitBin.m +++ b/chapter14/mixLogitBin.m @@ -8,7 +8,6 @@ R = full(sparse(1:n,z,1,n,k,n)); % n x k W = zeros(d,k); -w0 = zeros(1,k); tol = 1e-4; maxiter = 100; llh = -inf(1,maxiter); @@ -16,7 +15,7 @@ t = t(:); h = ones(n,1); h(t==0) = -1; -A = bsxfun(@plus,X'*W,w0); +A = X'*W; for iter = 2:maxiter % maximization nk = sum(R,1); @@ -26,7 +25,7 @@ W(:,j) = newtonStep(X, t, Y(:,j), W(:,j), R(:,j)); end % expectation - A = bsxfun(@plus,X'*W,w0); + A = X'*W; logRho = -log1pexp(-bsxfun(@times,A,h)); logRho = bsxfun(@plus,logRho,log(alpha)); T = logsumexp(logRho,2); @@ -42,9 +41,9 @@ function w = newtonStep(X, t, y, w, r) -% lambda = 1e-6; +lambda = 1e-6; v = y.*(1-y).*r; -H = bsxfun(@times,X,v')*X';%+lambda*eye(size(X,1)); +H = bsxfun(@times,X,v')*X'+lambda*eye(size(X,1)); s = (y-t).*r; g = X*s; w = w-H\g; diff --git a/chapter14/mixLogitBinPred.m b/chapter14/mixLogitBinPred.m index ac2ab07..e989fe0 100644 --- a/chapter14/mixLogitBinPred.m +++ b/chapter14/mixLogitBinPred.m @@ -1,6 +1,8 @@ function t = mixLogitBinPred(model, X) - - - - +% Prediction function for mixture of logistic regression +alpha = model.alpha; % mixing coefficient +W = model.W ; % logistic model coefficentalpha +n = size(X,2); +X = [X; ones(1,n)]; +t = round(alpha*sigmoid(W'*X)); From 7c1ca07e334b8fc6109fe080e8ea5bd9c2a7af93 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 18 Jan 2016 21:18:27 +0800 Subject: [PATCH 124/149] add function stump --- chapter11/mixGaussGb.m | 1 + chapter12/ppcaRnd.m | 1 + 2 files changed, 2 insertions(+) create mode 100644 chapter11/mixGaussGb.m create mode 100644 chapter12/ppcaRnd.m diff --git a/chapter11/mixGaussGb.m b/chapter11/mixGaussGb.m new file mode 100644 index 0000000..09d640d --- /dev/null +++ b/chapter11/mixGaussGb.m @@ -0,0 +1 @@ +function [label, model, llh] = mixGaussGb(X, init) diff --git a/chapter12/ppcaRnd.m b/chapter12/ppcaRnd.m new file mode 100644 index 0000000..a64cc94 --- /dev/null +++ b/chapter12/ppcaRnd.m @@ -0,0 +1 @@ +function [X, model] = ppcaRnd(d, n) From a2754d5e938d8a85ea31ab9d207cf0f9636ec220 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 18 Jan 2016 21:51:54 +0800 Subject: [PATCH 125/149] nothing --- chapter12/ppcaRnd.m | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chapter12/ppcaRnd.m b/chapter12/ppcaRnd.m index a64cc94..62a8fd7 100644 --- a/chapter12/ppcaRnd.m +++ b/chapter12/ppcaRnd.m @@ -1 +1,2 @@ -function [X, model] = ppcaRnd(d, n) +function [X, model] = ppcaRnd(q, d, n) +% Generate data from probabilistic PCA model \ No newline at end of file From 48548dad876b855237be048baf0608b7fcad0a94 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Tue, 19 Jan 2016 10:12:25 +0800 Subject: [PATCH 126/149] added ppcaRnd.m --- chapter03/linRnd.m | 5 ++--- chapter12/demo.m | 13 +++---------- chapter12/ppcaEm.m | 24 ++++++++++++------------ chapter12/ppcaRnd.m | 13 +++++++++++-- 4 files changed, 28 insertions(+), 27 deletions(-) diff --git a/chapter03/linRnd.m b/chapter03/linRnd.m index 9005c94..ed0f953 100644 --- a/chapter03/linRnd.m +++ b/chapter03/linRnd.m @@ -5,9 +5,8 @@ % X is generated form [0,1] % d: dimension of data % n: number of data -beta = gamrnd(10,10); % need statistcs toolbox +beta = randg; % need statistcs toolbox X = rand(d,n); w = randn(d,1); w0 = randn(1,1); -err = randn(1,n)/sqrt(beta); -t = w'*X+w0+err; \ No newline at end of file +t = w'*X+w0+randn(1,n)/sqrt(beta); \ No newline at end of file diff --git a/chapter12/demo.m b/chapter12/demo.m index db27059..3e19cf0 100644 --- a/chapter12/demo.m +++ b/chapter12/demo.m @@ -1,16 +1,9 @@ % demo -m = 10; +d = 10; +m = 2; n = 1000; -X = randn(m,n); -mu = mean(X,2); -Xo = bsxfun(@minus,X,mu); -[U,S,V] = svd(Xo,'econ'); -r = rand(m,1).^8; - -S = S.*diag(r); -Xo = U*S*V'; -X = bsxfun(@plus,Xo,mu); +[X] = ppcaRnd(m,d,n); %% [model,energy] = pcaVb(X); [model, llh] = pcaEm(X,3); diff --git a/chapter12/ppcaEm.m b/chapter12/ppcaEm.m index 61f4d72..483b5aa 100644 --- a/chapter12/ppcaEm.m +++ b/chapter12/ppcaEm.m @@ -1,24 +1,24 @@ -function [model, llh] = ppcaEm(X, q) +function [model, llh] = ppcaEm(X, m) % Perform EM algorithm to maiximize likelihood of probabilistic PCA model. -% X: m x n data matrix -% q: dimension of target space +% X: d x n data matrix +% m: dimension of target space % Reference: % Pattern Recognition and Machine Learning by Christopher M. Bishop % Probabilistic Principal Component Analysis by Michael E. Tipping & Christopher M. Bishop % Written by Mo Chen (sth4nth@gmail.com). -[m,n] = size(X); +[d,n] = size(X); mu = mean(X,2); X = bsxfun(@minus,X,mu); tol = 1e-4; maxiter = 500; llh = -inf(1,maxiter); -idx = (1:q)'; -dg = sub2ind([q,q],idx,idx); -I = eye(q); +idx = (1:m)'; +dg = sub2ind([m,m],idx,idx); +I = eye(m); r = dot(X(:),X(:)); % total norm of X -W = rand(m,q); +W = randn(d,m); s = rand; for iter = 2:maxiter M = W'*W; @@ -28,10 +28,10 @@ WX = W'*X; % likelihood - logdetC = 2*sum(log(diag(U)))+(m-q)*log(s); + logdetC = 2*sum(log(diag(U)))+(d-m)*log(s); T = U'\WX; trInvCS = (r-dot(T(:),T(:)))/(s*n); - llh(iter) = -n*(m*log(2*pi)+logdetC+trInvCS)/2; + llh(iter) = -n*(d*log(2*pi)+logdetC+trInvCS)/2; if abs(llh(iter)-llh(iter-1)) < tol*abs(llh(iter-1)); break; end % check likelihood for convergence % E step @@ -42,7 +42,7 @@ U = chol(Ezz); W = ((X*Ez')/U)/U'; WR = W*U'; - s = (r-2*dot(Ez(:),WX(:))+dot(WR(:),WR(:)))/(n*m); + s = (r-2*dot(Ez(:),WX(:))+dot(WR(:),WR(:)))/(n*d); end llh = llh(2:iter); % W = normalize(orth(W)); @@ -56,4 +56,4 @@ % model.V = V; model.W = W; model.mu = mu; -model.sigma = s; \ No newline at end of file +model.beta = 1/s; \ No newline at end of file diff --git a/chapter12/ppcaRnd.m b/chapter12/ppcaRnd.m index 62a8fd7..2745647 100644 --- a/chapter12/ppcaRnd.m +++ b/chapter12/ppcaRnd.m @@ -1,2 +1,11 @@ -function [X, model] = ppcaRnd(q, d, n) -% Generate data from probabilistic PCA model \ No newline at end of file +function [X, model] = ppcaRnd(m, d, n) +% Generate data from probabilistic PCA model +beta = randg; +Z = randn(m,n); +W = randn(d,m); +mu = randn(d,1); +X = bsxfun(@times,W*Z,mu)+randn(d,n)/sqrt(beta); + +model.W = W; +model.mu = mu; +model.beta = beta; \ No newline at end of file From c60c700cfd2766ef7a8bd225985d4a319f3bcff0 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Tue, 19 Jan 2016 10:14:51 +0800 Subject: [PATCH 127/149] updated TODO --- chapter12/demo.m | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/chapter12/demo.m b/chapter12/demo.m index 3e19cf0..82b9963 100644 --- a/chapter12/demo.m +++ b/chapter12/demo.m @@ -1,3 +1,7 @@ +% TODO: +% 1) demo +% 2) pred +% 3) unify model parameter % demo d = 10; m = 2; @@ -5,7 +9,7 @@ [X] = ppcaRnd(m,d,n); %% -[model,energy] = pcaVb(X); -[model, llh] = pcaEm(X,3); -[model, llh] = fa(X,3); +[model,llh] = pcaVb(X); +[model, llh] = pcaEm(X,m); +[model, llh] = fa(X,m); plot(energy) \ No newline at end of file From 9f1482da93764923cb2d5ee70bd5c888b72aee66 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Tue, 19 Jan 2016 10:28:42 +0800 Subject: [PATCH 128/149] nothing --- chapter11/mixGaussGb.m | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/chapter11/mixGaussGb.m b/chapter11/mixGaussGb.m index 09d640d..8ff4ebd 100644 --- a/chapter11/mixGaussGb.m +++ b/chapter11/mixGaussGb.m @@ -1 +1,6 @@ function [label, model, llh] = mixGaussGb(X, init) +% Collapsed Gibbs sampling for (infinite) Gaussian mixture model (a.k.a. +% DPGM) + + + From 3d61f9febbc15339c182437ff9c70bba055c19e3 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 19 Jan 2016 15:04:38 +0800 Subject: [PATCH 129/149] add a lot of unorganized stuff --- other/KCluster/farseeds.m | 17 +++ other/KCluster/kmeanspp.m | 33 +++++ other/KCluster/kmedoids.m | 15 ++ other/KCluster/kspheres.m | 30 ++++ other/KCluster/rndseeds.m | 8 + other/KCluster/softseeds.m | 16 ++ other/KCluster/spkmeans.m | 35 +++++ other/KCluster/wkmeans.m | 37 +++++ other/Mixture/emidgm.m | 81 +++++++++++ other/Mixture/emkmeans.m | 82 +++++++++++ other/Mixture/emsgm.m | 80 ++++++++++ other/Mixture/vbdgm.m | 164 +++++++++++++++++++++ other/Mixture/vbgm.m | 199 +++++++++++++++++++++++++ other/Mixture/vbigm.m | 179 +++++++++++++++++++++++ other/Mixture/vbkmeans.m | 178 +++++++++++++++++++++++ other/Mixture/vbsgm.m | 181 +++++++++++++++++++++++ other/Mixture/vbtsbgm.m | 225 +++++++++++++++++++++++++++++ other/SpectralCluster/affinity.m | 31 ++++ other/SpectralCluster/bncut.m | 19 +++ other/SpectralCluster/discretize.m | 118 +++++++++++++++ other/SpectralCluster/knn.m | 7 + other/SpectralCluster/laplacian.m | 36 +++++ other/SpectralCluster/mncut.m | 11 ++ other/SpectralCluster/sc.m | 15 ++ other/SpectralCluster/transition.m | 10 ++ other/cempca.m | 26 ++++ other/geig.m | 13 ++ other/gso.m | 6 + other/invpd.m | 10 ++ other/kn2sd.m | 5 + other/logdet.m | 9 ++ other/loggmpdf.m | 27 ++++ other/logkdepdf.m | 4 + other/plotgm.m | 26 ++++ other/plotkde.m | 33 +++++ other/sd2kn.m | 7 + other/symeig.m | 16 ++ 37 files changed, 1989 insertions(+) create mode 100644 other/KCluster/farseeds.m create mode 100644 other/KCluster/kmeanspp.m create mode 100644 other/KCluster/kmedoids.m create mode 100644 other/KCluster/kspheres.m create mode 100644 other/KCluster/rndseeds.m create mode 100644 other/KCluster/softseeds.m create mode 100644 other/KCluster/spkmeans.m create mode 100644 other/KCluster/wkmeans.m create mode 100644 other/Mixture/emidgm.m create mode 100644 other/Mixture/emkmeans.m create mode 100644 other/Mixture/emsgm.m create mode 100644 other/Mixture/vbdgm.m create mode 100644 other/Mixture/vbgm.m create mode 100644 other/Mixture/vbigm.m create mode 100644 other/Mixture/vbkmeans.m create mode 100644 other/Mixture/vbsgm.m create mode 100644 other/Mixture/vbtsbgm.m create mode 100644 other/SpectralCluster/affinity.m create mode 100644 other/SpectralCluster/bncut.m create mode 100644 other/SpectralCluster/discretize.m create mode 100644 other/SpectralCluster/knn.m create mode 100644 other/SpectralCluster/laplacian.m create mode 100644 other/SpectralCluster/mncut.m create mode 100644 other/SpectralCluster/sc.m create mode 100644 other/SpectralCluster/transition.m create mode 100644 other/cempca.m create mode 100644 other/geig.m create mode 100644 other/gso.m create mode 100644 other/invpd.m create mode 100644 other/kn2sd.m create mode 100644 other/logdet.m create mode 100644 other/loggmpdf.m create mode 100644 other/logkdepdf.m create mode 100644 other/plotgm.m create mode 100644 other/plotkde.m create mode 100644 other/sd2kn.m create mode 100644 other/symeig.m diff --git a/other/KCluster/farseeds.m b/other/KCluster/farseeds.m new file mode 100644 index 0000000..8b42169 --- /dev/null +++ b/other/KCluster/farseeds.m @@ -0,0 +1,17 @@ +function m = farseeds(X, k) +% Find k farest samples as seeds for initializing clustering. +% X: d x n data matrix +% k: number of seeds +% Written by Michael Chen (sth4nth@gmail.com). +d = size(X,1); +m = zeros(d,k); +% idx = ceil(n.*rand); +[~,idx] = max(dot(X,X,1)); +m(:,1) = X(:,idx); +D = 0; +for i = 2:k + Y = bsxfun(@minus,X,m(:,i-1)); + D = D+sqrt(dot(Y,Y,1)); + [~,idx] = max(D); + m(:,i) = X(:,idx); +end diff --git a/other/KCluster/kmeanspp.m b/other/KCluster/kmeanspp.m new file mode 100644 index 0000000..4a87d29 --- /dev/null +++ b/other/KCluster/kmeanspp.m @@ -0,0 +1,33 @@ +function [label, energy] = kmeanspp(X, k) +% X: d x n data matrix +% k: number of seeds +% reference: k-means++: the advantages of careful seeding. David Arthur and Sergei Vassilvitskii +% Written by Michael Chen (sth4nth@gmail.com). +m = seeds(X,k); +[label, energy] = kmeans(X, m); + +function m = seeds(X, k) +[d,n] = size(X); +m = zeros(d,k); +v = inf(1,n); +m(:,1) = X(:,ceil(n*rand)); +for i = 2:k + Y = bsxfun(@minus,X,m(:,i-1)); + v = cumsum(min(v,dot(Y,Y,1))); + m(:,i) = X(:,find(rand < v/v(end),1)); +end + +function [label, energy] = kmeans(X, m) +n = size(X,2); +last = 0; +[~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); +while any(label ~= last) + [u,~,label] = unique(label); % remove empty clusters + k = length(u); + E = sparse(1:n,label,1,n,k,n); % transform label into indicator matrix + m = X*(E*spdiags(1./sum(E,1)',0,k,k)); % compute m of each cluster + last = label; + [value,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); % assign samples to the nearest centers +end +[~,~,label] = unique(label); % remove empty clusters +energy = -2*sum(value)+dot(X(:),X(:)); \ No newline at end of file diff --git a/other/KCluster/kmedoids.m b/other/KCluster/kmedoids.m new file mode 100644 index 0000000..15971ea --- /dev/null +++ b/other/KCluster/kmedoids.m @@ -0,0 +1,15 @@ +function [label, energy, index] = kmedoids(X,k) +% X: d x n data matrix +% k: number of cluster +% Written by Mo Chen (sth4nth@gamil.com) +v = dot(X,X,1); +D = bsxfun(@plus,v,v')-2*(X'*X); +n = size(X,2); +[~, label] = min(D(randsample(n,k),:)); +last = 0; +while any(label ~= last) + [~, index] = min(D*sparse(1:n,label,1,n,k,n)); + last = label; + [val, label] = min(D(index,:),[],1); +end +energy = sum(val); diff --git a/other/KCluster/kspheres.m b/other/KCluster/kspheres.m new file mode 100644 index 0000000..348de2c --- /dev/null +++ b/other/KCluster/kspheres.m @@ -0,0 +1,30 @@ +function [label, model] = kspheres(X, k) +% Clustering samples into k isotropic Gaussian with different variances. +% X: d x n data matrix +% k: number of seeds +% Written by Michael Chen (sth4nth@gmail.com). +[d,n] = size(X); +last = 0; +label = ceil(k*rand(1,n)); % random initialization +while any(label ~= last) + [u,~,label] = unique(label); % remove empty clusters + k = length(u); + R = sparse(label,1:n,1,k,n,n); + nk = sum(R,2); + w = nk/n; + mu = bsxfun(@times, X*R', 1./nk'); + + D = sqdistance(mu,X); + s = dot(D,R,2)./(d*nk); + + R = bsxfun(@times,D,1./s); + R = bsxfun(@plus,R,d*log(2*pi*s))/(-2); + R = bsxfun(@plus,R,log(w)); + + last = label; + [~,label] = max(R,[],1); +end +[~,~,label] = unique(label); % remove empty clusters +model.mu = mu; +model.sigma = s'; +model.weight = w; \ No newline at end of file diff --git a/other/KCluster/rndseeds.m b/other/KCluster/rndseeds.m new file mode 100644 index 0000000..bce4c97 --- /dev/null +++ b/other/KCluster/rndseeds.m @@ -0,0 +1,8 @@ +function [S, idx] = rndseeds(X, k) +% Random pick k samples from X. +% X: d x n data matrix +% k: number of seeds +% Written by Michael Chen (sth4nth@gmail.com). +n = size(X,2); +idx = randsample(n,k); +S = X(:,idx); \ No newline at end of file diff --git a/other/KCluster/softseeds.m b/other/KCluster/softseeds.m new file mode 100644 index 0000000..8a34da6 --- /dev/null +++ b/other/KCluster/softseeds.m @@ -0,0 +1,16 @@ +function m = softseeds(X, k) +% Find samples as seeds for initializing clustering using kmeans++ algorithm. +% X: d x n data matrix +% k: number of seeds +% Reference: k-means++: the advantages of careful seeding. +% by David Arthur and Sergei Vassilvitskii +% Written by Michael Chen (sth4nth@gmail.com). +[d,n] = size(X); +m = zeros(d,k); +v = inf(1,n); +m(:,1) = X(:,ceil(n*rand)); +for i = 2:k + Y = bsxfun(@minus,X,m(:,i-1)); + v = cumsum(min(v,dot(Y,Y,1))); + m(:,i) = X(:,find(rand < v/v(end),1)); +end \ No newline at end of file diff --git a/other/KCluster/spkmeans.m b/other/KCluster/spkmeans.m new file mode 100644 index 0000000..7e0b88d --- /dev/null +++ b/other/KCluster/spkmeans.m @@ -0,0 +1,35 @@ +function [label, m, energy] = spkmeans(X, init) +% Perform spherical k-means clustering. +% X: d x n data matrix +% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) +% Reference: Clustering on the Unit Hypersphere using Von Mises-Fisher Distributions. +% by A. Banerjee, I. Dhillon, J. Ghosh and S. Sra. +% Written by Michael Chen (sth4nth@gmail.com). +%% initialization +[d,n] = size(X); +X = normalize(X); + +if length(init) == 1 + idx = randsample(n,init); + m = X(:,idx); + [~,label] = max(m'*X,[],1); +elseif size(init,1) == 1 && size(init,2) == n + label = init; +elseif size(init,1) == d + m = normalize(init); + [~,label] = max(m'*X,[],1); +else + error('ERROR: init is not valid.'); +end +%% main algorithm: final version +last = 0; +while any(label ~= last) + [u,~,label] = unique(label); % remove empty clusters + k = length(u); + E = sparse(1:n,label,1,n,k,n); + m = normalize(X*E); + last = label; + [val,label] = max(m'*X,[],1); +end +[~,~,label] = unique(label); % remove empty clusters +energy = sum(val); \ No newline at end of file diff --git a/other/KCluster/wkmeans.m b/other/KCluster/wkmeans.m new file mode 100644 index 0000000..c7174bb --- /dev/null +++ b/other/KCluster/wkmeans.m @@ -0,0 +1,37 @@ +function [label, energy, m] = wkmeans(X, init, w) +% Perform weighted k-means clustering. +% X: d x n data matrix +% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) +% w: 1 x n weight vector (default w=1, equivalent to kmeans. +% Written by Michael Chen (sth4nth@gmail.com). +%% initialization +if nargin == 2 + w = 1; +end +[d,n] = size(X); +if length(init) == 1 + idx = randsample(n,init); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); +elseif size(init,1) == 1 && size(init,2) == n + label = init; +elseif size(init,1) == d + m = init; + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); +else + error('ERROR: init is not valid.'); +end +%% main algorithm +last = 0; +while any(label ~= last) + [u,~,label] = unique(label); % remove empty clusters + k = length(u); + E = sparse(1:n,label,w,n,k,n); + m = bsxfun(@times,X*E,1./full(sum(E,1))); + last = label; + [val,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); +end +[~,~,label] = unique(label); % remove empty clusters +energy = -2*sum(val)+dot(X(:),X(:)); % sum of distances of clusters + +% s = energy/(n-k); % variance \ No newline at end of file diff --git a/other/Mixture/emidgm.m b/other/Mixture/emidgm.m new file mode 100644 index 0000000..7d4b615 --- /dev/null +++ b/other/Mixture/emidgm.m @@ -0,0 +1,81 @@ +function [label, model, llh] = emidgm(X, init) +% EM algorithm for independent Gaussian mixture model +% Written by Michael Chen (sth4nth@gmail.com). +fprintf('EM for independent Gaussian mixture: running ... \n'); +R = initialization(X, init); +[~,label(1,:)] = max(R,[],2); +R = R(:,unique(label)); + +[d,n] = size(X); +tol = 1e-6; +maxiter = 500; +llh = -inf(1,maxiter); +converged = false; +t = 1; +X2 = X.^2; +while ~converged && t < maxiter + t = t+1; + % maximizition step + nk = sum(R,1); + w = nk/n; + R = bsxfun(@times,R,1./nk); + mu = X*R; + mu2 = mu.*mu; + sigma = X2*R-mu2; + + % expectation step + lambda = 1./sigma; + M = bsxfun(@plus,X2'*lambda-2*X'*(mu.*lambda),dot(mu2,lambda,1)); % M distance + c = (d*log(2*pi)+sum(log(sigma),1))/(-2); % normalization constant + + logRho = bsxfun(@plus,M/(-2),c+log(w)); + T = logsumexp(logRho,2); + logR = bsxfun(@minus,logRho,T); + R = exp(logR); + llh(t) = sum(T)/n; % loglikelihood + + [~,label(:)] = max(R,[],2); + u = unique(label); % non-empty components + if size(R,2) ~= size(u,2) + R = R(:,u); % remove empty components + else + converged = llh(t)-llh(t-1) < tol*abs(llh(t)); + end +end +model.w = w; +model.mu = mu; +model.sigma = sigma; +llh = llh(2:t); +if converged + fprintf('Converged in %d steps.\n',t-1); +else + fprintf('Not converged in %d steps.\n',maxiter); +end + +function R = initialization(X, init) +[d,n] = size(X); +if length(init) == 1 % random initialization + k = init; + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + while k ~= length(u) + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + end + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == 1 && size(init,2) == n % initialize with labels + label = init; + k = max(label); + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == d %initialize with only centers + k = size(init,2); + m = init; + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + R = full(sparse(1:n,label,1,n,k,n)); +else + error('ERROR: init is not valid.'); +end diff --git a/other/Mixture/emkmeans.m b/other/Mixture/emkmeans.m new file mode 100644 index 0000000..3a5dd40 --- /dev/null +++ b/other/Mixture/emkmeans.m @@ -0,0 +1,82 @@ +function [label, model, llh] = emkmeans(X, init) +% EM algorithm for spherical (isotropic) Gaussian mixture model with common +% variance (aka kmeans model) +% Written by Michael Chen (sth4nth@gmail.com). +fprintf('EM for Kmeans: running ... \n'); +R = initialization(X, init); +[~,label(1,:)] = max(R,[],2); +R = R(:,unique(label)); + +[d,n] = size(X); +tol = 1e-6; +maxiter = 500; +llh = -inf(1,maxiter); +converged = false; +t = 1; +while ~converged && t < maxiter + t = t+1; + % maximizition step + nk = sum(R,1); + w = nk/n; + mu = bsxfun(@times,X*R,1./nk); + D = sqdistance(X,mu); + sigma = dot(D(:),R(:))/(d*n); + + % expectation step + c = d*log(2*pi*sigma)/(-2); + logRho = bsxfun(@plus,D/(-2*sigma),c+log(w)); + T = logsumexp(logRho,2); + logR = bsxfun(@minus,logRho,T); + R = exp(logR); + + llh(t) = sum(T)/n; % loglikelihood + + [~,label(:)] = max(R,[],2); + u = unique(label); % non-empty components + if size(R,2) ~= size(u,2) + R = R(:,u); % remove empty components + else + converged = llh(t)-llh(t-1) < tol*abs(llh(t)); + end +end +model.w = w; +model.mu = mu; +model.sigma = sigma; +llh = llh(2:t); +if converged + fprintf('Converged in %d steps.\n',t-1); +else + fprintf('Not converged in %d steps.\n',maxiter); +end + + +function R = initialization(X, init) +[d,n] = size(X); +if length(init) == 1 % random initialization + k = init; + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + while k ~= length(u) + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + end + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == 1 && size(init,2) == n % initialize with labels + label = init; + k = max(label); + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == d %initialize with only centers + k = size(init,2); + m = init; + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + R = full(sparse(1:n,label,1,n,k,n)); +else + error('ERROR: init is not valid.'); +end + +function D = sqdistance(A, B) +D = (-2)*(A'*B)+bsxfun(@plus,dot(B,B,1),dot(A,A,1)'); diff --git a/other/Mixture/emsgm.m b/other/Mixture/emsgm.m new file mode 100644 index 0000000..a595376 --- /dev/null +++ b/other/Mixture/emsgm.m @@ -0,0 +1,80 @@ +function [label, model, llh] = emsgm(X, init) +% EM algorithm for spherical (isotropic) Gaussian mixture model +% Written by Michael Chen (sth4nth@gmail.com). +fprintf('EM for spherical (isotropic) Gaussian mixture: running ... \n'); +R = initialization(X, init); +[~,label(1,:)] = max(R,[],2); +R = R(:,unique(label)); + +[d,n] = size(X); +tol = 1e-6; +maxiter = 500; +llh = -inf(1,maxiter); +converged = false; +t = 1; +X2 = repmat(dot(X,X,1)',1,size(R,2)); +while ~converged && t < maxiter + t = t+1; + % maximizition step + nk = sum(R,1); + w = nk/n; + mu = bsxfun(@times,X*R,1./nk); + D = bsxfun(@plus,X2-2*X'*mu,dot(mu,mu,1)); + sigma = dot(D,R,1)./(d*nk); + + % expectation step + M = bsxfun(@times,D,1./sigma); % M distance + c = d*log(2*pi*sigma)/(-2); % normalization constant + logRho = bsxfun(@plus,M/(-2),c+log(w)); + T = logsumexp(logRho,2); + logR = bsxfun(@minus,logRho,T); + R = exp(logR); + + llh(t) = sum(T)/n; % loglikelihood + + [~,label(:)] = max(R,[],2); + u = unique(label); % non-empty components + if size(R,2) ~= size(u,2) + R = R(:,u); % remove empty components + else + converged = llh(t)-llh(t-1) < tol*abs(llh(t)); + end +end +model.w = w; +model.mu = mu; +model.sigma = sigma; +llh = llh(2:t); +if converged + fprintf('Converged in %d steps.\n',t-1); +else + fprintf('Not converged in %d steps.\n',maxiter); +end + + +function R = initialization(X, init) +[d,n] = size(X); +if length(init) == 1 % random initialization + k = init; + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + while k ~= length(u) + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + end + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == 1 && size(init,2) == n % initialize with labels + label = init; + k = max(label); + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == d %initialize with only centers + k = size(init,2); + m = init; + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + R = full(sparse(1:n,label,1,n,k,n)); +else + error('ERROR: init is not valid.'); +end \ No newline at end of file diff --git a/other/Mixture/vbdgm.m b/other/Mixture/vbdgm.m new file mode 100644 index 0000000..9b1e3e2 --- /dev/null +++ b/other/Mixture/vbdgm.m @@ -0,0 +1,164 @@ +function [label, model, L] = vbdgm(X, init, prior) +% Perform variational Bayesian inference for Gaussian mixture. +% X: d x n data matrix +% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) +% Reference: Pattern Recognition and Machine Learning by Christopher M. Bishop (P.474) +% Written by Michael Chen (sth4nth@gmail.com). + +fprintf('Variational Bayesian Gaussian mixture: running ... \n'); +[d,n] = size(X); +if nargin < 3 + prior.alpha = 1; + prior.kappa = 1; + prior.m = mean(X,2); + prior.nu = d+1; + prior.M = eye(d); % M = inv(W) +end +tol = 1e-10; +maxiter = 1000; +L = -inf(1,maxiter); +converged = false; +t = 1; + +model.R = initialization(X,init); + +while ~converged && t < maxiter + t = t+1; + model = qDirichlet(model, prior); + model = qGaussianWishart(X, model, prior); + model = qMultinomial(X, model); + L(t) = bound(model,prior)/n; + converged = abs(L(t)-L(t-1)) < tol*abs(L(t)); +end +L = L(2:t); +label = zeros(1,n); +[~,label(:)] = max(model.R,[],2); +[~,~,label] = unique(label); +if converged + fprintf('Converged in %d steps.\n',t-1); +else + fprintf('Not converged in %d steps.\n',maxiter); +end + +function R = initialization(X, init) +[d,n] = size(X); +if length(init) == 1 % random initialization + k = init; + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + while k ~= length(u) + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + end + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == 1 && size(init,2) == n % initialize with labels + label = init; + k = max(label); + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == d %initialize with only centers + k = size(init,2); + m = init; + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + R = full(sparse(1:n,label,1,n,k,n)); +else + error('ERROR: init is not valid.'); +end + + +% Done +function model = qDirichlet(model, prior) +alpha0 = prior.alpha; +R = model.R; + +nk = sum(R,1); % 10.51 +alpha = alpha0+nk; % 10.58 + +model.alpha = alpha; + +% Done +function model = qGaussianWishart(X, model, prior) +kappa0 = prior.kappa; +m0 = prior.m; +nu0 = prior.nu; +M0 = prior.M; +R = model.R; + +nk = sum(R,1); % 10.51 +nxbar = X*R; + +kappa = kappa0+nk; % 10.60 +m = bsxfun(@times,bsxfun(@plus,kappa0*m0,nxbar),1./kappa); % 10.61 +nu = nu0+nk; % 10.63 + + +[d,k] = size(m); +M = zeros(d,d,k); +sqrtR = sqrt(R); + +xbar = bsxfun(@times,nxbar,1./nk); % 10.52 +xbarm0 = bsxfun(@minus,xbar,m0); +w = (kappa0*nk./(kappa0+nk)); +for i = 1:k + Xs = bsxfun(@times,bsxfun(@minus,X,xbar(:,i)),sqrtR(:,i)'); + xbarm0i = xbarm0(:,i); + M(:,:,i) = M0+Xs*Xs'+w(i)*(xbarm0i*xbarm0i'); % 10.62 +end + +model.kappa = kappa; +model.m = m; +model.nu = nu; +model.M = M; % Whishart: M = inv(W) + + +% Done +function model = qMultinomial(X, model) +alpha = model.alpha; % Dirichlet +kappa = model.kappa; % Gaussian +m = model.m; % Gasusian +nu = model.nu; % Whishart +M = model.M; % Whishart: inv(W) = V'*V + +n = size(X,2); +[d,k] = size(m); + +logW = zeros(1,k); +EQ = zeros(n,k); +for i = 1:k + U = chol(M(:,:,i)); + logW(i) = -2*sum(log(diag(U))); + Q = (U'\bsxfun(@minus,X,m(:,i))); + EQ(:,i) = d/kappa(i)+nu(i)*dot(Q,Q,1); % 10.64 +end + +ElogLambda = sum(psi(0,bsxfun(@minus,nu+1,(1:d)')/2),1)+d*log(2)+logW; % 10.65 +Elogpi = psi(0,alpha)-psi(0,sum(alpha)); % 10.66 + +logRho = (bsxfun(@minus,EQ,2*Elogpi+ElogLambda-d*log(2*pi)))/(-2); % 10.46 +logR = bsxfun(@minus,logRho,logsumexp(logRho,2)); % 10.49 +R = exp(logR); + +model.logR = logR; +model.R = R; + +function L = bound(model, prior) +alpha0 = prior.alpha; +alpha = model.alpha; +R = model.R; +logR = model.logR; + +nk = sum(R,1); % 10.51 +k = size(R,2); +Elogpi = psi(0,alpha)-psi(0,sum(alpha)); + +Epz = dot(nk,Elogpi); +Eqz = dot(R(:),logR(:)); +logCalpha0 = gammaln(k*alpha0)-k*gammaln(alpha0); +Eppi = logCalpha0+(alpha0-1)*sum(Elogpi); +logCalpha = gammaln(sum(alpha))-sum(gammaln(alpha)); +Eqpi = logCalpha+dot(alpha-1,Elogpi); + +L = Epz-Eqz+Eppi-Eqpi; \ No newline at end of file diff --git a/other/Mixture/vbgm.m b/other/Mixture/vbgm.m new file mode 100644 index 0000000..eec0717 --- /dev/null +++ b/other/Mixture/vbgm.m @@ -0,0 +1,199 @@ +function [label, model, L] = vbgm(X, init, prior) +% Perform variational Bayesian inference for Gaussian mixture. +% X: d x n data matrix +% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) +% Reference: Pattern Recognition and Machine Learning by Christopher M. Bishop (P.474) +% Written by Michael Chen (sth4nth@gmail.com). + +fprintf('Variational Bayesian Gaussian mixture: running ... \n'); +[d,n] = size(X); +if nargin < 3 + prior.alpha = 1; + prior.kappa = 1; + prior.m = mean(X,2); + prior.v = d+1; + prior.M = eye(d); % M = inv(W) +end +tol = 1e-4; +maxiter = 500; +L = -inf(1,maxiter); +converged = false; +t = 1; + +model.R = initialization(X,init); +while ~converged && t < maxiter + t = t+1; + model = vmax(X, model, prior); + model = vexp(X, model); + L(t) = vbound(X,model,prior)/n; + converged = abs(L(t)-L(t-1)) < tol*abs(L(t)); +end +L = L(2:t); +label = zeros(1,n); +[~,label(:)] = max(model.R,[],2); +[~,~,label] = unique(label); +if converged + fprintf('Converged in %d steps.\n',t-1); +else + fprintf('Not converged in %d steps.\n',maxiter); +end + +function R = initialization(X, init) +[d,n] = size(X); +if length(init) == 1 % random initialization + k = init; + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + while k ~= length(u) + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + end + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == 1 && size(init,2) == n % initialize with labels + label = init; + k = max(label); + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == d %initialize with only centers + k = size(init,2); + m = init; + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + R = full(sparse(1:n,label,1,n,k,n)); +else + error('ERROR: init is not valid.'); +end +% Done +function model = vmax(X, model, prior) +alpha0 = prior.alpha; +kappa0 = prior.kappa; +m0 = prior.m; +v0 = prior.v; +M0 = prior.M; +R = model.R; + +nk = sum(R,1); % 10.51 +alpha = alpha0+nk; % 10.58 +nxbar = X*R; +kappa = kappa0+nk; % 10.60 +m = bsxfun(@times,bsxfun(@plus,kappa0*m0,nxbar),1./kappa); % 10.61 +v = v0+nk; % 10.63 + +[d,k] = size(m); +M = zeros(d,d,k); +sqrtR = sqrt(R); + +xbar = bsxfun(@times,nxbar,1./nk); % 10.52 +xbarm0 = bsxfun(@minus,xbar,m0); +w = (kappa0*nk./(kappa0+nk)); +for i = 1:k + Xs = bsxfun(@times,bsxfun(@minus,X,xbar(:,i)),sqrtR(:,i)'); + xbarm0i = xbarm0(:,i); + M(:,:,i) = M0+Xs*Xs'+w(i)*(xbarm0i*xbarm0i'); % 10.62 +end + +model.alpha = alpha; +model.kappa = kappa; +model.m = m; +model.v = v; +model.M = M; % Whishart: M = inv(W) +% Done +function model = vexp(X, model) +alpha = model.alpha; % Dirichlet +kappa = model.kappa; % Gaussian +m = model.m; % Gasusian +v = model.v; % Whishart +M = model.M; % Whishart: inv(W) = V'*V + +n = size(X,2); +[d,k] = size(m); + +logW = zeros(1,k); +EQ = zeros(n,k); +for i = 1:k + U = chol(M(:,:,i)); + logW(i) = -2*sum(log(diag(U))); + Q = (U'\bsxfun(@minus,X,m(:,i))); + EQ(:,i) = d/kappa(i)+v(i)*dot(Q,Q,1); % 10.64 +end + +ElogLambda = sum(psi(0,bsxfun(@minus,v+1,(1:d)')/2),1)+d*log(2)+logW; % 10.65 +Elogpi = psi(0,alpha)-psi(0,sum(alpha)); % 10.66 + +logRho = (bsxfun(@minus,EQ,2*Elogpi+ElogLambda-d*log(2*pi)))/(-2); % 10.46 +logR = bsxfun(@minus,logRho,logsumexp(logRho,2)); % 10.49 +R = exp(logR); + +model.logR = logR; +model.R = R; +% Done +function L = vbound(X, model, prior) +alpha0 = prior.alpha; +kappa0 = prior.kappa; +m0 = prior.m; +v0 = prior.v; +M0 = prior.M; + +alpha = model.alpha; % Dirichlet +kappa = model.kappa; % Gaussian +m = model.m; % Gasusian +v = model.v; % Whishart +M = model.M; % Whishart: inv(W) = V'*V +R = model.R; +logR = model.logR; + + +[d,k] = size(m); +nk = sum(R,1); % 10.51 + +Elogpi = psi(0,alpha)-psi(0,sum(alpha)); + +Epz = dot(nk,Elogpi); +Eqz = dot(R(:),logR(:)); +logCalpha0 = gammaln(k*alpha0)-k*gammaln(alpha0); +Eppi = logCalpha0+(alpha0-1)*sum(Elogpi); +logCalpha = gammaln(sum(alpha))-sum(gammaln(alpha)); +Eqpi = dot(alpha-1,Elogpi)+logCalpha; +L = Epz-Eqz+Eppi-Eqpi; + + +U0 = chol(M0); +sqrtR = sqrt(R); +xbar = bsxfun(@times,X*R,1./nk); % 10.52 + +logW = zeros(1,k); +trSW = zeros(1,k); +trM0W = zeros(1,k); +xbarmWxbarm = zeros(1,k); +mm0Wmm0 = zeros(1,k); +for i = 1:k + U = chol(M(:,:,i)); + logW(i) = -2*sum(log(diag(U))); + + Xs = bsxfun(@times,bsxfun(@minus,X,xbar(:,i)),sqrtR(:,i)'); + V = chol(Xs*Xs'/nk(i)); + Q = V/U; + trSW(i) = dot(Q(:),Q(:)); % equivalent to tr(SW)=trace(S/M) + Q = U0/U; + trM0W(i) = dot(Q(:),Q(:)); + + q = U'\(xbar(:,i)-m(:,i)); + xbarmWxbarm(i) = dot(q,q); + q = U'\(m(:,i)-m0); + mm0Wmm0(i) = dot(q,q); +end + +ElogLambda = sum(psi(0,bsxfun(@minus,v+1,(1:d)')/2),1)+d*log(2)+logW; % 10.65 +Epmu = sum(d*log(kappa0/(2*pi))+ElogLambda-d*kappa0./kappa-kappa0*(v.*mm0Wmm0))/2; +logB0 = v0*sum(log(diag(U0)))-0.5*v0*d*log(2)-logmvgamma(0.5*v0,d); +EpLambda = k*logB0+0.5*(v0-d-1)*sum(ElogLambda)-0.5*dot(v,trM0W); + +Eqmu = 0.5*sum(ElogLambda+d*log(kappa/(2*pi)))-0.5*d*k; +logB = -v.*(logW+d*log(2))/2-logmvgamma(0.5*v,d); +EqLambda = 0.5*sum((v-d-1).*ElogLambda-v*d)+sum(logB); + +EpX = 0.5*dot(nk,ElogLambda-d./kappa-v.*trSW-v.*xbarmWxbarm-d*log(2*pi)); + +L = L+Epmu-Eqmu+EpLambda-EqLambda+EpX; \ No newline at end of file diff --git a/other/Mixture/vbigm.m b/other/Mixture/vbigm.m new file mode 100644 index 0000000..6ec744f --- /dev/null +++ b/other/Mixture/vbigm.m @@ -0,0 +1,179 @@ +function [label, model, bound] = vbigm(X, init, prior) +% Perform variational Bayesian inference for independent Gaussian mixture. +% X: d x n data matrix +% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) +% Written by Michael Chen (sth4nth@gmail.com). + +fprintf('Variational Bayesian independent Gaussian mixture: running ... \n'); +n = size(X,2); +if nargin < 3 + prior.alpha = 1; % noninformative setting of Dirichet prior + prior.kappa = 1; % noninformative setting of Gassian prior of Gaussian mean ? + prior.m = mean(X,2); % when prior.kappa = 0 it doesnt matter how to set this + prior.nu = 1; % noninformative setting of 1d Wishart + prior.tau = 1; % noninformative setting of 1d Wishart +end +R = initialization(X,init); + +tol = 1e-8; +maxiter = 1000; +bound = -inf(1,maxiter); +converged = false; +t = 1; + +model.R = R; +model = vbmaximization(X,model,prior); +while ~converged && t < maxiter + t = t+1; + model = vbexpection(X,model); + model = vbmaximization(X,model,prior); + bound(t) = vbound(X,model,prior)/n; + converged = abs(bound(t)-bound(t-1)) < tol*abs(bound(t)); +end +bound = bound(2:t); +label = zeros(1,n); +[~,label(:)] = max(model.R,[],2); +[~,~,label] = unique(label); + +if converged + fprintf('Converged in %d steps.\n',t-1); +else + fprintf('Not converged in %d steps.\n',maxiter); +end + +% Done. + +function R = initialization(X, init) +[d,n] = size(X); +if length(init) == 1 % random initialization + k = init; + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + while k ~= length(u) + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + end + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == 1 && size(init,2) == n % initialize with labels + label = init; + k = max(label); + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == d %initialize with only centers + k = size(init,2); + m = init; + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + R = full(sparse(1:n,label,1,n,k,n)); +else + error('ERROR: init is not valid.'); +end + +% update latent variables +function model = vbexpection(X, model) +alpha = model.alpha; +kappa = model.kappa; +m = model.m; +nu = model.nu; +tau = model.tau; + +d = size(X,1); + +logw = psi(0,alpha)-psi(0,sum(alpha)); +% loglambda = psi(0,nu/2)-log(tau/2); +loglambda = bsxfun(@plus,-log(tau/2),psi(0,nu/2)); + +% M = bsxfun(@times,sqdistance(X,m),nu./tau); +aib = bsxfun(@times,1./tau,nu); +r = m.*aib; +M = bsxfun(@plus,X.^2'*aib-2*X'*r,dot(m,r,1)); +M = bsxfun(@plus,M,d./kappa); +% c = d*(loglambda-log(2*pi))/2; +c = (sum(loglambda,1)-d*log(2*pi))/2; % normalization constant +logRho = bsxfun(@plus,M/(-2),logw+c); + +% [~,idx] = max(logR,[],2); +% logR = logR(:,unique(idx)); % remove empty components!!! + +logR = bsxfun(@minus,logRho,logsumexp(logRho,2)); +R = exp(logR); + +model.logR = logR; +model.R = R; + +% Done. +% update the parameters. +function model = vbmaximization(X, model, prior) +alpha0 = prior.alpha; % Dirichet prior +kappa0 = prior.kappa; % piror of Gaussian mean +m0 = prior.m; % piror of Gaussian mean +nu0 = prior.nu; % 1d Wishart +tau0 = prior.tau; % 1d Wishart + +R = model.R; + +% Dirichlet +nk = sum(R,1); +alpha = alpha0+nk; +% Gaussian +kappa = kappa0+nk; +R = bsxfun(@times,R,1./nk); +xbar = X*R; +s = X.^2*R-xbar.^2; + +m = bsxfun(@times,bsxfun(@plus,kappa0*m0,bsxfun(@times,xbar,nk)),1./kappa); +% 1d Wishart +nu = nu0+nk; +tau = tau0+bsxfun(@times,s,nk)+bsxfun(@times,bsxfun(@minus,xbar,m0).^2,kappa0*nk./(kappa0+nk)); + +model.alpha = alpha; +model.kappa = kappa; +model.m = m; +model.nu = nu; +model.tau = tau; + +function bound = vbound(X, model, prior) +alpha0 = prior.alpha; % Dirichet prior +kappa0 = prior.kappa; % piror of Gaussian mean +m0 = prior.m; % piror of Gaussian mean +nu0 = prior.nu; % 1d Wishart +tau0 = prior.tau; % 1d Wishart + +alpha = model.alpha; +kappa = model.kappa; +m = model.m; +nu = model.nu; +tau = model.tau; + +logR = model.logR; +R = model.R; + +[d,k] = size(m); + +nk = sum(R,1); +logw = psi(0,alpha)-psi(0,sum(alpha)); + +Epz = nk*logw'; +Eqz = R(:)'*logR(:); +logCalpha0 = gammaln(k*alpha0)-k*gammaln(alpha0); +Epw = logCalpha0+(alpha0-1)*sum(logw); +logCalpha = gammaln(sum(alpha))-sum(gammaln(alpha)); +Eqw = sum((alpha-1).*logw)+logCalpha; + +loglambda = bsxfun(@plus,-log(tau/2),psi(0,nu/2)); +aib = bsxfun(@times,1./tau,nu); +mm0 = bsxfun(@minus,m,m0).^2; +Epmu = 0.5*(sum(loglambda(:))+d*(k*log(kappa0/(2*pi))-sum(kappa0./kappa))-kappa0*(aib(:)'*mm0(:))); +Eqmu = 0.5*(d*(sum(log(kappa))-k*log(2*pi)-k)+sum(loglambda(:))); + +Eplambda = k*d*(nu0*log(tau0/2)/2-gammaln(nu0/2))+(nu0/2-1)*sum(loglambda(:))-tau0*sum(aib(:))/2; +Eqlambda = -d*sum(gammaln(nu/2))+d*sum((nu/2-1).*psi(0,nu/2))+sum(log(tau(:)/2))-d*sum(nu/2); + +R = bsxfun(@times,R,1./nk); +xbar = X*R; +s = X.^2*R-xbar.^2; +EpX = 0.5*sum(loglambda-1./repmat(kappa,d,1)-log(2*pi)-aib.*s-aib.*(xbar-m).^2,1)*nk'; + +bound = Epz-Eqz+Epw-Eqw+Epmu-Eqmu+Eplambda-Eqlambda+EpX; diff --git a/other/Mixture/vbkmeans.m b/other/Mixture/vbkmeans.m new file mode 100644 index 0000000..414ca94 --- /dev/null +++ b/other/Mixture/vbkmeans.m @@ -0,0 +1,178 @@ +function [label, model, bound] = vbkmeans(X, init, prior) +% Perform variational Bayesian inference for isotropic Gaussian mixture. +% X: d x n data matrix +% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) +% Written by Michael Chen (sth4nth@gmail.com). + + +fprintf('Variantional Bayeisn Kmeans: running ... \n'); +[d,n] = size(X); +if nargin < 3 + prior.alpha = 1; % noninformative setting of Dirichet prior + prior.kappa = 1; % noninformative setting of Gassian prior of Gaussian mean ? + prior.m = mean(X,2); % when prior.kappa = 0 it doesnt matter how to set this + prior.nu = 1; % noninformative setting of 1d Wishart + prior.tau = 1; % noninformative setting of 1d Wishart +end +model.nu = prior.nu+d*n; + +R = initialization(X,init); + +tol = 1e-8; +maxiter = 5000; +bound = -inf(1,maxiter); +converged = false; +t = 1; + +model.R = R; +model = vbmaximization(X,model,prior); +while ~converged && t < maxiter + t = t+1; + model = vbexpection(X,model); + model = vbmaximization(X,model,prior); + bound(t) = vbound(X,model,prior)/n; + converged = abs(bound(t)-bound(t-1)) < tol*abs(bound(t)); +end +bound = bound(2:t); +label = zeros(1,n); +[~,label(:)] = max(model.R,[],2); +[~,~,label] = unique(label); + +if converged + fprintf('Converged in %d steps.\n',t-1); +else + fprintf('Not converged in %d steps.\n',maxiter); +end + +% Done. + +function R = initialization(X, init) +[d,n] = size(X); +if length(init) == 1 % random initialization + k = init; + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + while k ~= length(u) + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + end + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == 1 && size(init,2) == n % initialize with labels + label = init; + k = max(label); + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == d %initialize with only centers + k = size(init,2); + m = init; + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + R = full(sparse(1:n,label,1,n,k,n)); +else + error('ERROR: init is not valid.'); +end + +% Done. +% update latent variables +function model = vbexpection(X, model) +alpha = model.alpha; +kappa = model.kappa; +m = model.m; +nu = model.nu; +tau = model.tau; + +d = size(X,1); + +logw = psi(0,alpha)-psi(0,sum(alpha)); +loglambda = psi(0,nu/2)-log(tau/2); + +M = bsxfun(@plus,sqdistance(X,m)*nu/tau,d./kappa); +c = d*(loglambda-log(2*pi))/2; +logRho = bsxfun(@plus,M/(-2),logw+c); + +% [~,idx] = max(logR,[],2); +% logR = logR(:,unique(idx)); % remove empty components!!! + +logR = bsxfun(@minus,logRho,logsumexp(logRho,2)); +R = exp(logR); + +model.logR = logR; +model.R = R; + +% Done. +% update the parameters +function model = vbmaximization(X, model, prior) +alpha0 = prior.alpha; % Dirichet prior +kappa0 = prior.kappa; % piror of Gaussian mean +m0 = prior.m; % piror of Gaussian mean +tau0 = prior.tau; % 1d Wishart + +R = model.R; + +% Dirichlet +nk = sum(R,1); +alpha = alpha0+nk; +% Gaussian +kappa = kappa0+nk; +xbar = bsxfun(@times,X*R,1./nk); +m = bsxfun(@times,bsxfun(@plus,kappa0*m0,bsxfun(@times,xbar,nk)),1./kappa); +% 1d Wishart +Q = sqdistance(X,xbar); +tau = tau0+dot(Q(:),R(:))+dot((kappa0*nk./(kappa0+nk)),sqdistance(m0,xbar)); + +model.alpha = alpha; +model.kappa = kappa; +model.m = m; +model.tau = tau; + + +function bound = vbound(X, model, prior) +alpha0 = prior.alpha; % Dirichet prior +kappa0 = prior.kappa; % piror of Gaussian mean +m0 = prior.m; % piror of Gaussian mean +nu0 = prior.nu; % 1d Wishart +tau0 = prior.tau; % 1d Wishart + +alpha = model.alpha; +kappa = model.kappa; +m = model.m; +nu = model.nu; +tau = model.tau; + +logR = model.logR; +R = model.R; + +[d,k] = size(m); + +nk = sum(R,1); +logw = psi(0,alpha)-psi(0,sum(alpha)); + +Epz = nk*logw'; +Eqz = R(:)'*logR(:); +logCalpha0 = gammaln(k*alpha0)-k*gammaln(alpha0); +Epw = logCalpha0+(alpha0-1)*sum(logw); +logCalpha = gammaln(sum(alpha))-sum(gammaln(alpha)); +Eqw = sum((alpha-1).*logw)+logCalpha; + +loglambda = psi(0,nu/2)-log(tau/2); +aib = nu./tau; +Q = bsxfun(@minus,m,m0); +Epmu = 0.5*(d*(k*log(kappa0/(2*pi))+k*loglambda-sum(kappa0./kappa))-kappa0*aib*dot(Q(:),Q(:))); +Eqmu = 0.5*d*(k*loglambda+sum(log(kappa))-k*log(2*pi)-k); + +Eplambda = k*(nu0/2*log(tau0/2)-gammaln(nu0/2))+(nu0/2-1)*sum(loglambda)-tau0*aib/2; +Eqlambda = -gammaln(nu/2)+(nu/2-1)*psi(0,nu/2)+log(tau/2)-nu/2; + +xbar = bsxfun(@times,X*R,1./nk); +s = dot(sqdistance(X,xbar),R,1)./(d*nk); + +r = xbar-m; +EpX = 0.5*(d*(loglambda-1./kappa-log(2*pi)-aib.*s)-aib.*dot(r,r,1))*nk'; + +bound = Epz-Eqz+Epw-Eqw+Epmu-Eqmu+Eplambda-Eqlambda+EpX; + + +function D = sqdistance(A, B) +D = (-2)*(A'*B)+bsxfun(@plus,dot(B,B,1),dot(A,A,1)'); \ No newline at end of file diff --git a/other/Mixture/vbsgm.m b/other/Mixture/vbsgm.m new file mode 100644 index 0000000..b28cf45 --- /dev/null +++ b/other/Mixture/vbsgm.m @@ -0,0 +1,181 @@ +function [label, model, bound] = vbsgm(X, init, prior) +% Perform variational Bayesian inference for spherical (isotropic) Gaussian mixture. +% X: d x n data matrix +% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) +% Written by Michael Chen (sth4nth@gmail.com). + +fprintf('Variational Bayesian spherical Gaussian mixture: running ... \n'); +n = size(X,2); +if nargin < 3 + prior.alpha = 1; % noninformative setting of Dirichet prior + prior.kappa = 1; % noninformative setting of Gassian prior of Gaussian mean ? + prior.m = mean(X,2); % when prior.kappa = 0 it doesnt matter how to set this + prior.nu = 1; % noninformative setting of 1d Wishart + prior.tau = 1; % noninformative setting of 1d Wishart +end +R = initialization(X,init); + +tol = 1e-8; +maxiter = 1000; +bound = -inf(1,maxiter); +converged = false; +t = 1; + +model.R = R; +model = vbmaximization(X,model,prior); +while ~converged && t < maxiter + t = t+1; + model = vbexpection(X,model); + model = vbmaximization(X,model,prior); + bound(t) = vbound(X,model,prior)/n; + converged = abs(bound(t)-bound(t-1)) < tol*abs(bound(t)); +end +bound = bound(2:t); +label = zeros(1,n); +[~,label(:)] = max(model.R,[],2); +[~,~,label] = unique(label); + +if converged + fprintf('Converged in %d steps.\n',t-1); +else + fprintf('Not converged in %d steps.\n',maxiter); +end + +% Done. + +function R = initialization(X, init) +[d,n] = size(X); +if length(init) == 1 % random initialization + k = init; + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + while k ~= length(u) + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + end + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == 1 && size(init,2) == n % initialize with labels + label = init; + k = max(label); + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == d %initialize with only centers + k = size(init,2); + m = init; + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + R = full(sparse(1:n,label,1,n,k,n)); +else + error('ERROR: init is not valid.'); +end + +% Done. +% update latent variables +function model = vbexpection(X, model) +alpha = model.alpha; +kappa = model.kappa; +m = model.m; +nu = model.nu; +tau = model.tau; + +d = size(X,1); + +logw = psi(0,alpha)-psi(0,sum(alpha)); +loglambda = psi(0,nu/2)-log(tau/2); + +M = bsxfun(@times,sqdistance(X,m),nu./tau); +M = bsxfun(@plus,M,d./kappa); +c = d*(loglambda-log(2*pi))/2; +logRho = bsxfun(@plus,M/(-2),logw+c); + +% [~,idx] = max(logR,[],2); +% logR = logR(:,unique(idx)); % remove empty components!!! + +logR = bsxfun(@minus,logRho,logsumexp(logRho,2)); +R = exp(logR); + +model.logR = logR; +model.R = R; + +% Done. +% update the parameters. +function model = vbmaximization(X, model, prior) +alpha0 = prior.alpha; % Dirichet prior +kappa0 = prior.kappa; % piror of Gaussian mean +m0 = prior.m; % piror of Gaussian mean +nu0 = prior.nu; % 1d Wishart +tau0 = prior.tau; % 1d Wishart + +R = model.R; +d = size(X,1); + +% Dirichlet +nk = sum(R,1); +alpha = alpha0+nk; +% Gaussian +kappa = kappa0+nk; +xbar = bsxfun(@times,X*R,1./nk); +m = bsxfun(@times,bsxfun(@plus,kappa0*m0,bsxfun(@times,xbar,nk)),1./kappa); +% 1d Wishart +nu = nu0+d*nk; +tau = tau0+dot(sqdistance(X,xbar),R,1)+(kappa0*nk./(kappa0+nk)).*sqdistance(m0,xbar); + +model.alpha = alpha; +model.kappa = kappa; +model.m = m; +model.nu = nu; +model.tau = tau; + +function bound = vbound(X, model, prior) +alpha0 = prior.alpha; % Dirichet prior +kappa0 = prior.kappa; % piror of Gaussian mean +m0 = prior.m; % piror of Gaussian mean +nu0 = prior.nu; % 1d Wishart +tau0 = prior.tau; % 1d Wishart + +alpha = model.alpha; +kappa = model.kappa; +m = model.m; +nu = model.nu; +tau = model.tau; + +logR = model.logR; +R = model.R; + +[d,k] = size(m); + +nk = sum(R,1); +logw = psi(0,alpha)-psi(0,sum(alpha)); + +Epz = nk*logw'; +Eqz = R(:)'*logR(:); +logCalpha0 = gammaln(k*alpha0)-k*gammaln(alpha0); +Epw = logCalpha0+(alpha0-1)*sum(logw); +logCalpha = gammaln(sum(alpha))-sum(gammaln(alpha)); +Eqw = dot((alpha-1),logw)+logCalpha; + +loglambda = psi(0,nu/2)-log(tau/2); +aib = nu./tau; +% Epmu = 0.5*sum(d*(log(kappa0/(2*pi))+loglambda-kappa0./kappa)-kappa0*aib.*sum(bsxfun(@minus,m,m0).^2,1)); +r = bsxfun(@minus,m,m0); +Epmu = 0.5*(d*(k*log(kappa0/(2*pi))+sum(loglambda)-sum(kappa0./kappa))-dot(kappa0*aib,dot(r,r,1))); +Eplambda = k*(nu0*log(tau0/2)/2-gammaln(nu0/2))+(nu0/2-1)*sum(loglambda)-tau0*sum(aib)/2; +Epthalpha = Epmu+Eplambda; + +Eqmu = 0.5*d*(sum(loglambda)+sum(log(kappa))-k*log(2*pi)-k); +Eqlambda = -sum(gammaln(nu/2))+sum((nu/2-1).*psi(0,nu/2))+sum(log(tau/2))-sum(nu/2); +Eqthalpha = Eqmu+Eqlambda; + +xbar = bsxfun(@times,X*R,1./nk); +s = dot(sqdistance(X,xbar),R,1)./(d*nk); + +r = xbar-m; +EpX = 0.5*(d*(loglambda-1./kappa-log(2*pi)-aib.*s)-aib.*dot(r,r,1))*nk'; + +bound = Epz-Eqz+Epw-Eqw+Epthalpha-Eqthalpha+EpX; + + +function D = sqdistance(A, B) +D = (-2)*(A'*B)+bsxfun(@plus,dot(B,B,1),dot(A,A,1)'); diff --git a/other/Mixture/vbtsbgm.m b/other/Mixture/vbtsbgm.m new file mode 100644 index 0000000..36a5b92 --- /dev/null +++ b/other/Mixture/vbtsbgm.m @@ -0,0 +1,225 @@ +function [label, model, L] = vbtsbgm(X, init, prior) +% Perform variational Bayesian inference for trancated stick breaking Gaussian mixture. +% X: d x n data matrix +% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) +% Reference: Pattern Recognition and Machine Learning by Christopher M. Bishop (P.474) +% Written by Michael Chen (sth4nth@gmail.com). + +fprintf('Variational trancated stick breaking Gaussian mixture: running ... \n'); +[d,n] = size(X); +if nargin < 3 + prior.alpha = 1; + prior.beta = 1; + prior.kappa = 1; + prior.m = mean(X,2); + prior.nu = d+1; + prior.M = eye(d); % M = inv(W) +end +tol = 1e-10; +maxiter = 1000; +L = -inf(1,maxiter); +converged = false; +t = 1; + +model.R = initialization(X,init); +while ~converged && t < maxiter + t = t+1; + model = vmax(X, model, prior); + model = vexp(X, model); + L(t) = vbound(X,model,prior)/n; + converged = abs(L(t)-L(t-1)) < tol*abs(L(t)); +end +L = L(2:t); +label = zeros(1,n); +[~,label(:)] = max(model.R,[],2); +[~,~,label] = unique(label); +if converged + fprintf('Converged in %d steps.\n',t-1); +else + fprintf('Not converged in %d steps.\n',maxiter); +end + +function R = initialization(X, init) +[d,n] = size(X); +if length(init) == 1 % random initialization + k = init; + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + while k ~= length(u) + idx = randsample(n,k); + m = X(:,idx); + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + [u,~,label] = unique(label); + end + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == 1 && size(init,2) == n % initialize with labels + label = init; + k = max(label); + R = full(sparse(1:n,label,1,n,k,n)); +elseif size(init,1) == d %initialize with only centers + k = size(init,2); + m = init; + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); + R = full(sparse(1:n,label,1,n,k,n)); +else + error('ERROR: init is not valid.'); +end +% Done +function model = vmax(X, model, prior) +alpha0 = prior.alpha; +beta0 = prior.beta; +kappa0 = prior.kappa; +m0 = prior.m; +nu0 = prior.nu; +M0 = prior.M; +R = model.R; + +nk = sum(R,1); % 10.51 +k = length(nk); + +alpha = alpha0+nk(1:k-1); +ngk = sum(tril(repmat(nk(:),1,k)))-nk; +beta = beta0+ngk(1:k-1); +nxbar = X*R; +kappa = kappa0+nk; % 10.60 +m = bsxfun(@times,bsxfun(@plus,kappa0*m0,nxbar),1./kappa); % 10.61 +nu = nu0+nk; % 10.63 + +[d,k] = size(m); +M = zeros(d,d,k); +sqrtR = sqrt(R); + +xbar = bsxfun(@times,nxbar,1./nk); % 10.52 +xbarm0 = bsxfun(@minus,xbar,m0); +w = (kappa0*nk./(kappa0+nk)); +for i = 1:k + Xs = bsxfun(@times,bsxfun(@minus,X,xbar(:,i)),sqrtR(:,i)'); + xbarm0i = xbarm0(:,i); + M(:,:,i) = M0+Xs*Xs'+w(i)*(xbarm0i*xbarm0i'); % 10.62 +end + +model.alpha = alpha; +model.beta = beta; +model.kappa = kappa; +model.m = m; +model.nu = nu; +model.M = M; % Whishart: M = inv(W) +% Done +function model = vexp(X, model) +alpha = model.alpha; +beta = model.beta; +kappa = model.kappa; % Gaussian +m = model.m; % Gasusian +nu = model.nu; % Whishart +M = model.M; % Whishart: inv(W) = V'*V + +n = size(X,2); +[d,k] = size(m); +Elogpi = zeros(1,k); + +Elogv = psi(0,alpha)-psi(0,alpha+beta); % E[ln(v)] +Elogu = psi(0,beta)-psi(0,alpha+beta); % E[ln(1-v)] +Elogpi(1:k-1) = Elogv+cumsum(Elogu)-Elogu; +Elogpi(k) = sum(Elogu); + +logW = zeros(1,k); +EQ = zeros(n,k); +for i = 1:k + U = chol(M(:,:,i)); + logW(i) = -2*sum(log(diag(U))); + Q = (U'\bsxfun(@minus,X,m(:,i))); + EQ(:,i) = d/kappa(i)+nu(i)*dot(Q,Q,1); % 10.64 +end + +ElogLambda = sum(psi(0,bsxfun(@minus,nu+1,(1:d)')/2),1)+d*log(2)+logW; % 10.65 + +logRho = (bsxfun(@minus,EQ,2*Elogpi+ElogLambda-d*log(2*pi)))/(-2); % 10.46 +logR = bsxfun(@minus,logRho,logsumexp(logRho,2)); % 10.49 +R = exp(logR); + +model.logR = logR; +model.R = R; + + + +% TBD +function L = vbound(X, model, prior) +alpha0 = prior.alpha; +beta0 = prior.beta; +kappa0 = prior.kappa; +m0 = prior.m; +nu0 = prior.nu; +M0 = prior.M; + +alpha = model.alpha; +beta = model.beta; +kappa = model.kappa; % Gaussian +m = model.m; % Gasusian +nu = model.nu; % Whishart +M = model.M; % Whishart: inv(W) = V'*V +R = model.R; +logR = model.logR; + + +[d,k] = size(m); +nk = sum(R,1); % 10.51 + + + + +Epz = dot(nk,Elogpi); +Eqz = dot(R(:),logR(:)); + +% logBeta0 = gammaln(alpha0+beta0)-(gammaln(alpha0)+gammaln(beta0)); + +% logBeta = gammaln(alpha+beta)-(gammaln(alpha)+gammaln(beta)); + +Elogv = psi(0,alpha)-psi(0,alpha+beta); % E[ln(v)] +Elogu = psi(0,beta)-psi(0,alpha+beta); % E[ln(1-v)] +% % logCalpha0 = gammaln(k*alpha0)-k*gammaln(alpha0); +Epv = logCalpha0+(alpha0-1)*sum(Elogv); +% logCalpha = gammaln(sum(alpha))-sum(gammaln(alpha)); +Eqv = logCalpha+dot(alpha-1,Elogpv); +L = Epz-Eqz+Epv-Eqv; + + +U0 = chol(M0); +sqrtR = sqrt(R); +xbar = bsxfun(@times,X*R,1./nk); % 10.52 + +logW = zeros(1,k); +trSW = zeros(1,k); +trM0W = zeros(1,k); +xbarmWxbarm = zeros(1,k); +mm0Wmm0 = zeros(1,k); +for i = 1:k + U = chol(M(:,:,i)); + logW(i) = -2*sum(log(diag(U))); + + Xs = bsxfun(@times,bsxfun(@minus,X,xbar(:,i)),sqrtR(:,i)'); + V = chol(Xs*Xs'/nk(i)); + Q = V/U; + trSW(i) = dot(Q(:),Q(:)); % equivalent to tr(SW)=trace(S/M) + Q = U0/U; + trM0W(i) = dot(Q(:),Q(:)); + + q = U'\(xbar(:,i)-m(:,i)); + xbarmWxbarm(i) = dot(q,q); + q = U'\(m(:,i)-m0); + mm0Wmm0(i) = dot(q,q); +end + +ElogLambda = sum(psi(0,bsxfun(@minus,nu+1,(1:d)')/2),1)+d*log(2)+logW; % 10.65 +Epmu = sum(d*log(kappa0/(2*pi))+ElogLambda-d*kappa0./kappa-kappa0*(nu.*mm0Wmm0))/2; +logB0 = nu0*sum(log(diag(U0)))-0.5*nu0*d*log(2)-logmvgamma(0.5*nu0,d); +EpLambda = k*logB0+0.5*(nu0-d-1)*sum(ElogLambda)-0.5*dot(nu,trM0W); + +Eqmu = 0.5*sum(ElogLambda+d*log(kappa/(2*pi)))-0.5*d*k; +logB = -nu.*(logW+d*log(2))/2-logmvgamma(0.5*nu,d); +EqLambda = 0.5*sum((nu-d-1).*ElogLambda-nu*d)+sum(logB); + +EpX = 0.5*dot(nk,ElogLambda-d./kappa-nu.*trSW-nu.*xbarmWxbarm-d*log(2*pi)); + +L = L+Epmu-Eqmu+EpLambda-EqLambda+EpX; \ No newline at end of file diff --git a/other/SpectralCluster/affinity.m b/other/SpectralCluster/affinity.m new file mode 100644 index 0000000..94a14fd --- /dev/null +++ b/other/SpectralCluster/affinity.m @@ -0,0 +1,31 @@ +function W = affinity(X, sigma, k) +% Construct the affinity matrix of connected undirected graph. +% Wij=exp(-|xi-xj|^2/(2*Sigma)) +% Written by Michael Chen (sth4nth@gmail.com). +X = bsxfun(@minus,X,mean(X,2)); +S = dot(X,X,1); +if nargin < 3 + k = 0; +end +if nargin < 2 + sigma = mean(S); +end + +n = size(X,2); +D = (-2)*(X'*X)+bsxfun(@plus,S,S'); + +if k == 0 + W = exp(D/((-2)*sigma)); + W(sub2ind([n,n],1:n,1:n)) = 0; % remove diagonal +else + [ND, NI] = sort(D); + ND = ND(2:k+1,:); + NI = NI(2:k+1,:); + XI = repmat(1:n,k,1); + W = sparse(XI(:),NI(:),exp(ND(:)/((-2)*sigma)),n,n); + W = max(W,W'); % force symmetry (not necessary for digraph) +end + + + + diff --git a/other/SpectralCluster/bncut.m b/other/SpectralCluster/bncut.m new file mode 100644 index 0000000..b365766 --- /dev/null +++ b/other/SpectralCluster/bncut.m @@ -0,0 +1,19 @@ +function [y, ratio] = bncut(W) +% Bipartitioning normalized cut +mincut = 1; % minimal number of nodes to be cut off + +n = size(W,2); +[L,d] = laplacian(W,'n'); +V = symeig(L,2)'; +%% +[~,idx] = sort(V(2,:)./sqrt(d)); +Vol_A = cumsum(d(idx)); +Vol_B = sum(d)-Vol_A; + +S = triu(W(idx,idx)); +W_AB = full(cumsum(sum(S'-S,1))); + +ratios = W_AB.*(1./Vol_A+1./Vol_B)/2; +[ratio,cut] = min(ratios(mincut:n-mincut)); +y = true(1,n); +y(idx(1:cut+mincut-1)) = false; diff --git a/other/SpectralCluster/discretize.m b/other/SpectralCluster/discretize.m new file mode 100644 index 0000000..3851bbd --- /dev/null +++ b/other/SpectralCluster/discretize.m @@ -0,0 +1,118 @@ +function label = discretize(V, d, m) +% Perform discretization on relaxed real value solution of spectral clustering +% V: k x n eigenvectors +% d: 1 x n degree vector +% Written by Michael Chen (sth4nth@gmail.com). +if nargin < 3 + m = 1; +end +switch m + case 1 + label = ys(V,d); + case 2 + label = njw(V); + case 3 + label = bj(V,d); + case 4 + label = zj(V(2:end,:),d); + otherwise + error('The parameter value of m is not supported.'); +end + +function label = ys(X, d) % Multiclass Spectral Clustering by S.Yu & J.Shi +[k,n] = size(X); +X = bsxfun(@times,X,1./sqrt(d+eps)); +X = normalize(X); +idx = initialize(X); +R = X(:,idx); +% s = inf; +% while true +% X = R'*X; +% [~,label] = max(X,[],1); +% [U,S,V] = svd(X*full(sparse(1:n,label,1,n,k,n))); +% +% l = s; +% s = trace(S); +% if abs(s-l) < eps; break; end; +% R = U*V'; +% end +X = R'*X; +[~,label] = max(X,[],1); +last = 0; +while any(label ~= last) + [U,~,V] = svd(X*full(sparse(1:n,label,1,n,k,n))); + R = U*V'; + X = R'*X; + last = label; + [~,label] = max(X,[],1); +end + + +function label = njw(X) % On Spectral Clustering by A.Y.Ng, M.I.Jordan & Y.Weiss +X = normalize(X); +idx = initialize(X); +label = wkmeans(X,idx,1); % standard kmeans. + +function label = bj(X, d) % Learning Spectral Clustering by F.R.Bach & M.I.Jordans +X = bsxfun(@times,X,1./sqrt(d+eps)); +idx = initialize(X); +label = wkmeans(X,idx,d); + +function label = zj(X, d) % Multiway Spectral Clustering by Z.Zhang & M.I.Jordan +k = size(X,1)+1; +n = size(X,2); +G = eye(k,k-1)-repmat(1./k,k,k-1); +w = 1./sqrt(d+eps); +idx = initialize(X); +R = normalize(X(:,idx)); +% s = inf; +% while true +% Y = bsxfun(@times,R'*X,w); +% [~,label] = max([Y;zeros(1,n)],[],1); +% [U,S,V]=svd(X*full(sparse(1:n,label,1,n,k,n))*G); +% +% l = s; +% s = trace(S); +% if abs(s-l) < eps; break; end; +% R = U*V'; +% end +Y = bsxfun(@times,R'*X,w); +[~,label] = max([Y;zeros(1,n)],[],1); +last = 0; +while any(label ~= last) + [U,~,V]=svd(X*full(sparse(1:n,label,1,n,k,n))*G); + R = U*V'; + Y = bsxfun(@times,R'*X,w); + last = label; + [~,label] = max([Y;zeros(1,n)],[],1); +end + +function idx = initialize(X) +% Choose k approximately orthogonal samples. +[k,n] = size(X); +X = normalize(X); +idx = zeros(1,k); +idx(1) = ceil(n*rand); +c = zeros(1,n); +for i = 2:k + c = c+abs(X(:,idx(i-1))'*X); + [~,idx(i)] = min(c); +end + +function X = normalize(X) +% Normalize column vectors. +X = bsxfun(@times,X,1./sqrt(dot(X,X,1))); + +function label = wkmeans(X, init, w) +% Perform weighted k-means initialized by centers. +[k,n] = size(X); +m = X(:,init); +[~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); +last = 0; +while any(label ~= last) + E = sparse(1:n,label,w,n,k,n); + m = bsxfun(@times,X*E,1./full(sum(E,1))); + last = label; + [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); +end + diff --git a/other/SpectralCluster/knn.m b/other/SpectralCluster/knn.m new file mode 100644 index 0000000..720bd0c --- /dev/null +++ b/other/SpectralCluster/knn.m @@ -0,0 +1,7 @@ +function [D, N] = knn(X, Y, k) +% Find k nearest neighbors in Y of each sample in X. +% Written by Michael Chen (sth4nth@gmail.com). +D = sqdistance(Y, X); +[D, N] = sort(D); +N = N(2:(1+k),:); +D = D(2:(1+k),:); diff --git a/other/SpectralCluster/laplacian.m b/other/SpectralCluster/laplacian.m new file mode 100644 index 0000000..a41b309 --- /dev/null +++ b/other/SpectralCluster/laplacian.m @@ -0,0 +1,36 @@ +function [L, d] = laplacian(W, m) +% Compute (normalized) Laplacian matrix from an affinity matrix of an undirected graph. +% input: +% W: a symmetric adjacent matrix of a undirected graph +% m: m == 'u' construct unnormalized Laplacian L=D-W +% m == 'n' construct nomalized Laplacian L=I-D^(-1/2)*W*D^(-1/2) +% Written by Michael Chen (sth4nth@gmail.com). +if nargin == 1 + m = 'u'; +end + +n = size(W,2); +d = sum(W,1); +if issparse(W) + switch m + case 'u' + L = spdiags(d(:),0,n,n)-W; + case 'n' + r = spdiags(sqrt(1./d(:)),0,n,n); + L = speye(n)-r*W*r; + L = (L+L')/2; + otherwise + error('The parameter is not supported.'); + end + d = full(d); +else + switch m + case 'u' + L = diag(d)-W; + case 'n' + r = sqrt(1./d); + L = eye(n)-(r'*r).*W; + otherwise + error('The parameter is not supported.'); + end +end \ No newline at end of file diff --git a/other/SpectralCluster/mncut.m b/other/SpectralCluster/mncut.m new file mode 100644 index 0000000..c7fdf4b --- /dev/null +++ b/other/SpectralCluster/mncut.m @@ -0,0 +1,11 @@ +function label = mncut(W, c, m) +% Multiway normailized cut +% W: symetric affinity matrix +% c: number of clusters +% m: {1,2,3,4} method for discretization +if nargin < 3 + m = 1; +end +[L,d] = laplacian(W,'n'); +V = symeig(L,c)'; +label = discretize(V,d,m); \ No newline at end of file diff --git a/other/SpectralCluster/sc.m b/other/SpectralCluster/sc.m new file mode 100644 index 0000000..89e67a2 --- /dev/null +++ b/other/SpectralCluster/sc.m @@ -0,0 +1,15 @@ +function label = sc(X, k, opt) +% Perform multiclass spectral clustering (normalized cut). +% Written by Michael Chen (sth4nth@gmail.com). +if nargin < 3 + sigma = 1; + nnn = 0; + m = 1; +else + sigma = fieldvalue(opt,'sigma',1); + nnn = fieldvalue(opt,'nnn',0); % number of nearest neighbors + m = fieldvalue(opt,'method',1); +end + +W = affinity(standardize(X),sigma,nnn); +label = mncut(W,k,m); \ No newline at end of file diff --git a/other/SpectralCluster/transition.m b/other/SpectralCluster/transition.m new file mode 100644 index 0000000..a306a61 --- /dev/null +++ b/other/SpectralCluster/transition.m @@ -0,0 +1,10 @@ +function P = transition(W) +% Compute a transition matrix from an affinity matrix. +% Written by Michael Chen (sth4nth@gmail.com). +if issparse(W) + P = spdiags(1./sum(W,2),0,n,n)*W; +else + P = bsxfun(@times,W,1./sum(W,2)); +end + + diff --git a/other/cempca.m b/other/cempca.m new file mode 100644 index 0000000..95109e4 --- /dev/null +++ b/other/cempca.m @@ -0,0 +1,26 @@ +function V = cempca(X, p) +% Perform Constrained EM like algorithm for PCA. +% X: d x n data matrix +% p: dimension of target space +% Reference: A Constrained EM Algorithm for Principal Component Analysis by Jong-Hoon Ahn & Jong-Hoon Oh +% Written by Michael Chen (sth4nth@gmail.com). + +[d,n] = size(X); +X = bsxfun(@minus,X,mean(X,2)); +W = rand(d,p); + +tol = 1e-8; +error = inf; +last = inf; +t = 0; +while ~(abs(last-error) 0 + error('ERROR: the matrix is not positive definite.'); +end +V = R\(R'\I); \ No newline at end of file diff --git a/other/kn2sd.m b/other/kn2sd.m new file mode 100644 index 0000000..20650f0 --- /dev/null +++ b/other/kn2sd.m @@ -0,0 +1,5 @@ +function D = kn2sd(K) +% Transform a kernel matrix (or inner product matrix) to a square distance matrix +% Written by Michael Chen (sth4nth@gmail.com). +d = diag(K); +D = -2*K+bsxfun(@plus,d,d'); diff --git a/other/logdet.m b/other/logdet.m new file mode 100644 index 0000000..8cf1dc7 --- /dev/null +++ b/other/logdet.m @@ -0,0 +1,9 @@ +function y = logdet(A) +% Compute log(det(A)) where A is positive definite. +% Written by Michael Chen (sth4nth@gmail.com). +[R,p] = chol(A); +if p > 0 + y = -inf; +else + y = 2*sum(log(diag(R))); +end \ No newline at end of file diff --git a/other/loggmpdf.m b/other/loggmpdf.m new file mode 100644 index 0000000..a86af95 --- /dev/null +++ b/other/loggmpdf.m @@ -0,0 +1,27 @@ +function r = loggmpdf(X, model) + +mu = model.mu; +Sigma = model.Sigma; +w = model.weight; + +n = size(X,2); +k = size(mu,2); +logRho = zeros(k,n); + +for i = 1:k + logRho(i,:) = loggausspdf(X,mu(:,i),Sigma(:,:,i)); +end +r = logsumexp(bsxfun(@plus,logRho,log(w)'),1); + + +function y = loggausspdf(X, mu, Sigma) +d = size(X,1); +X = bsxfun(@minus,X,mu); +[U,p]= chol(Sigma); +if p ~= 0 + error('ERROR: Sigma is not PD.'); +end +Q = U'\X; +q = dot(Q,Q,1); % quadratic term (M distance) +c = d*log(2*pi)+2*sum(log(diag(U))); % normalization constant +y = -(c+q)/2; \ No newline at end of file diff --git a/other/logkdepdf.m b/other/logkdepdf.m new file mode 100644 index 0000000..9f3a27c --- /dev/null +++ b/other/logkdepdf.m @@ -0,0 +1,4 @@ +function r = logkdepdf(X, Y, sigma2) + +d = size(X,1); +r = logsumexp(sqdistance(Y,X)/(-2*sigma2)-(log(2*pi)+d*log(sigma2))/2,1); \ No newline at end of file diff --git a/other/plotgm.m b/other/plotgm.m new file mode 100644 index 0000000..02d7ac3 --- /dev/null +++ b/other/plotgm.m @@ -0,0 +1,26 @@ +function plotgm(X, model) +% Written by Michael Chen (sth4nth@gmail.com). +level = 64; +n = 256; + +spread(X); +x_range = xlim; +y_range = ylim; + +x = linspace(x_range(1),x_range(2), n); +y = linspace(y_range(2),y_range(1), n); + +[a,b] = meshgrid(x,y); +z = exp(loggmpdf([a(:)';b(:)'],model)); + +z = z-min(z); +z = floor(z/max(z)*(level-1)); + +figure; +image(reshape(z,n,n)); +colormap(jet(level)); +set(gca, 'XTick', [1 256]); +set(gca, 'XTickLabel', [min(x) max(x)]); +set(gca, 'YTick', [1 256]); +set(gca, 'YTickLabel', [min(y) max(y)]); +axis off diff --git a/other/plotkde.m b/other/plotkde.m new file mode 100644 index 0000000..eaee983 --- /dev/null +++ b/other/plotkde.m @@ -0,0 +1,33 @@ +function plotkde(X, sigma2) +% Written by Michael Chen (sth4nth@gmail.com). + +if nargin < 2 + sigma2 = 1e-1; +end +level = 64; +n = 256; + +X = standardize(X); + +spread(X); +x_range = xlim; +y_range = ylim; + +x = linspace(x_range(1),x_range(2), n); +y = linspace(y_range(2),y_range(1), n); + +[a,b] = meshgrid(x,y); + +z = exp(logkdepdf([a(:)';b(:)'],X,sigma2)); + +z = z-min(z); +z = floor(z/max(z)*(level-1)); + +figure; +image(reshape(z,n,n)); +colormap(jet(level)); +set(gca, 'XTick', [1 256]); +set(gca, 'XTickLabel', [min(x) max(x)]); +set(gca, 'YTick', [1 256]); +set(gca, 'YTickLabel', [min(y) max(y)]); +axis off diff --git a/other/sd2kn.m b/other/sd2kn.m new file mode 100644 index 0000000..8ddf59c --- /dev/null +++ b/other/sd2kn.m @@ -0,0 +1,7 @@ +function K = sd2kn(D) +% Transform a square distance matrix to a kernel matrix. +% The data are assumed to be centered, i.e., H=eye(n)-ones(n)/n; K=-(H*D*H)/2; +% Written by Michael Chen (sth4nth@gmail.com). +D = bsxfun(@minus,D,mean(D,1)); +D = bsxfun(@minus,D,mean(D,2)); +K = (D+D')/(-4); \ No newline at end of file diff --git a/other/symeig.m b/other/symeig.m new file mode 100644 index 0000000..c029259 --- /dev/null +++ b/other/symeig.m @@ -0,0 +1,16 @@ +function [V,A,flag] = symeig(S,d,m) +% Compute eigenvalues and eigenvectors of symmetric matrix +% m == 's' smallest (default) +% m == 'l' largest +% Written by Michael Chen (sth4nth@gmail.com). +if nargin == 2 + m = 's'; +end +opt.disp = 0; +opt.issym = 1; +opt.isreal = 1; +if any(m == 'ls') + [V,A,flag] = eigs(S,d,[m,'a'],opt); +else + error('The third parameter must be l or s.'); +end From 4d8d904f8a69b4defd8d11cfda6a8620ecc9333a Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 19 Jan 2016 15:37:33 +0800 Subject: [PATCH 130/149] remove some unrelated files --- chapter11/mixGaussGb.m | 8 +- other/KCluster/farseeds.m | 17 --- other/KCluster/kmeanspp.m | 33 ----- other/KCluster/kmedoids.m | 15 -- other/KCluster/kspheres.m | 30 ---- other/KCluster/rndseeds.m | 8 - other/KCluster/softseeds.m | 16 -- other/KCluster/spkmeans.m | 35 ----- other/KCluster/wkmeans.m | 37 ----- other/Mixture/emidgm.m | 81 ----------- other/Mixture/emkmeans.m | 82 ----------- other/Mixture/emsgm.m | 80 ---------- other/Mixture/vbdgm.m | 164 --------------------- other/Mixture/vbgm.m | 199 ------------------------- other/Mixture/vbigm.m | 179 ----------------------- other/Mixture/vbkmeans.m | 178 ----------------------- other/Mixture/vbsgm.m | 181 ----------------------- other/Mixture/vbtsbgm.m | 225 ----------------------------- other/SpectralCluster/affinity.m | 31 ---- other/SpectralCluster/bncut.m | 19 --- other/SpectralCluster/discretize.m | 118 --------------- other/SpectralCluster/knn.m | 7 - other/SpectralCluster/laplacian.m | 36 ----- other/SpectralCluster/mncut.m | 11 -- other/SpectralCluster/sc.m | 15 -- other/SpectralCluster/transition.m | 10 -- 26 files changed, 7 insertions(+), 1808 deletions(-) delete mode 100644 other/KCluster/farseeds.m delete mode 100644 other/KCluster/kmeanspp.m delete mode 100644 other/KCluster/kmedoids.m delete mode 100644 other/KCluster/kspheres.m delete mode 100644 other/KCluster/rndseeds.m delete mode 100644 other/KCluster/softseeds.m delete mode 100644 other/KCluster/spkmeans.m delete mode 100644 other/KCluster/wkmeans.m delete mode 100644 other/Mixture/emidgm.m delete mode 100644 other/Mixture/emkmeans.m delete mode 100644 other/Mixture/emsgm.m delete mode 100644 other/Mixture/vbdgm.m delete mode 100644 other/Mixture/vbgm.m delete mode 100644 other/Mixture/vbigm.m delete mode 100644 other/Mixture/vbkmeans.m delete mode 100644 other/Mixture/vbsgm.m delete mode 100644 other/Mixture/vbtsbgm.m delete mode 100644 other/SpectralCluster/affinity.m delete mode 100644 other/SpectralCluster/bncut.m delete mode 100644 other/SpectralCluster/discretize.m delete mode 100644 other/SpectralCluster/knn.m delete mode 100644 other/SpectralCluster/laplacian.m delete mode 100644 other/SpectralCluster/mncut.m delete mode 100644 other/SpectralCluster/sc.m delete mode 100644 other/SpectralCluster/transition.m diff --git a/chapter11/mixGaussGb.m b/chapter11/mixGaussGb.m index 8ff4ebd..acdc86a 100644 --- a/chapter11/mixGaussGb.m +++ b/chapter11/mixGaussGb.m @@ -3,4 +3,10 @@ % DPGM) - +[d,n] = size(X); +maxIter = 1000; +for iter = 1:maxIter + for i = randperm(n) + + end +end \ No newline at end of file diff --git a/other/KCluster/farseeds.m b/other/KCluster/farseeds.m deleted file mode 100644 index 8b42169..0000000 --- a/other/KCluster/farseeds.m +++ /dev/null @@ -1,17 +0,0 @@ -function m = farseeds(X, k) -% Find k farest samples as seeds for initializing clustering. -% X: d x n data matrix -% k: number of seeds -% Written by Michael Chen (sth4nth@gmail.com). -d = size(X,1); -m = zeros(d,k); -% idx = ceil(n.*rand); -[~,idx] = max(dot(X,X,1)); -m(:,1) = X(:,idx); -D = 0; -for i = 2:k - Y = bsxfun(@minus,X,m(:,i-1)); - D = D+sqrt(dot(Y,Y,1)); - [~,idx] = max(D); - m(:,i) = X(:,idx); -end diff --git a/other/KCluster/kmeanspp.m b/other/KCluster/kmeanspp.m deleted file mode 100644 index 4a87d29..0000000 --- a/other/KCluster/kmeanspp.m +++ /dev/null @@ -1,33 +0,0 @@ -function [label, energy] = kmeanspp(X, k) -% X: d x n data matrix -% k: number of seeds -% reference: k-means++: the advantages of careful seeding. David Arthur and Sergei Vassilvitskii -% Written by Michael Chen (sth4nth@gmail.com). -m = seeds(X,k); -[label, energy] = kmeans(X, m); - -function m = seeds(X, k) -[d,n] = size(X); -m = zeros(d,k); -v = inf(1,n); -m(:,1) = X(:,ceil(n*rand)); -for i = 2:k - Y = bsxfun(@minus,X,m(:,i-1)); - v = cumsum(min(v,dot(Y,Y,1))); - m(:,i) = X(:,find(rand < v/v(end),1)); -end - -function [label, energy] = kmeans(X, m) -n = size(X,2); -last = 0; -[~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); -while any(label ~= last) - [u,~,label] = unique(label); % remove empty clusters - k = length(u); - E = sparse(1:n,label,1,n,k,n); % transform label into indicator matrix - m = X*(E*spdiags(1./sum(E,1)',0,k,k)); % compute m of each cluster - last = label; - [value,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); % assign samples to the nearest centers -end -[~,~,label] = unique(label); % remove empty clusters -energy = -2*sum(value)+dot(X(:),X(:)); \ No newline at end of file diff --git a/other/KCluster/kmedoids.m b/other/KCluster/kmedoids.m deleted file mode 100644 index 15971ea..0000000 --- a/other/KCluster/kmedoids.m +++ /dev/null @@ -1,15 +0,0 @@ -function [label, energy, index] = kmedoids(X,k) -% X: d x n data matrix -% k: number of cluster -% Written by Mo Chen (sth4nth@gamil.com) -v = dot(X,X,1); -D = bsxfun(@plus,v,v')-2*(X'*X); -n = size(X,2); -[~, label] = min(D(randsample(n,k),:)); -last = 0; -while any(label ~= last) - [~, index] = min(D*sparse(1:n,label,1,n,k,n)); - last = label; - [val, label] = min(D(index,:),[],1); -end -energy = sum(val); diff --git a/other/KCluster/kspheres.m b/other/KCluster/kspheres.m deleted file mode 100644 index 348de2c..0000000 --- a/other/KCluster/kspheres.m +++ /dev/null @@ -1,30 +0,0 @@ -function [label, model] = kspheres(X, k) -% Clustering samples into k isotropic Gaussian with different variances. -% X: d x n data matrix -% k: number of seeds -% Written by Michael Chen (sth4nth@gmail.com). -[d,n] = size(X); -last = 0; -label = ceil(k*rand(1,n)); % random initialization -while any(label ~= last) - [u,~,label] = unique(label); % remove empty clusters - k = length(u); - R = sparse(label,1:n,1,k,n,n); - nk = sum(R,2); - w = nk/n; - mu = bsxfun(@times, X*R', 1./nk'); - - D = sqdistance(mu,X); - s = dot(D,R,2)./(d*nk); - - R = bsxfun(@times,D,1./s); - R = bsxfun(@plus,R,d*log(2*pi*s))/(-2); - R = bsxfun(@plus,R,log(w)); - - last = label; - [~,label] = max(R,[],1); -end -[~,~,label] = unique(label); % remove empty clusters -model.mu = mu; -model.sigma = s'; -model.weight = w; \ No newline at end of file diff --git a/other/KCluster/rndseeds.m b/other/KCluster/rndseeds.m deleted file mode 100644 index bce4c97..0000000 --- a/other/KCluster/rndseeds.m +++ /dev/null @@ -1,8 +0,0 @@ -function [S, idx] = rndseeds(X, k) -% Random pick k samples from X. -% X: d x n data matrix -% k: number of seeds -% Written by Michael Chen (sth4nth@gmail.com). -n = size(X,2); -idx = randsample(n,k); -S = X(:,idx); \ No newline at end of file diff --git a/other/KCluster/softseeds.m b/other/KCluster/softseeds.m deleted file mode 100644 index 8a34da6..0000000 --- a/other/KCluster/softseeds.m +++ /dev/null @@ -1,16 +0,0 @@ -function m = softseeds(X, k) -% Find samples as seeds for initializing clustering using kmeans++ algorithm. -% X: d x n data matrix -% k: number of seeds -% Reference: k-means++: the advantages of careful seeding. -% by David Arthur and Sergei Vassilvitskii -% Written by Michael Chen (sth4nth@gmail.com). -[d,n] = size(X); -m = zeros(d,k); -v = inf(1,n); -m(:,1) = X(:,ceil(n*rand)); -for i = 2:k - Y = bsxfun(@minus,X,m(:,i-1)); - v = cumsum(min(v,dot(Y,Y,1))); - m(:,i) = X(:,find(rand < v/v(end),1)); -end \ No newline at end of file diff --git a/other/KCluster/spkmeans.m b/other/KCluster/spkmeans.m deleted file mode 100644 index 7e0b88d..0000000 --- a/other/KCluster/spkmeans.m +++ /dev/null @@ -1,35 +0,0 @@ -function [label, m, energy] = spkmeans(X, init) -% Perform spherical k-means clustering. -% X: d x n data matrix -% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) -% Reference: Clustering on the Unit Hypersphere using Von Mises-Fisher Distributions. -% by A. Banerjee, I. Dhillon, J. Ghosh and S. Sra. -% Written by Michael Chen (sth4nth@gmail.com). -%% initialization -[d,n] = size(X); -X = normalize(X); - -if length(init) == 1 - idx = randsample(n,init); - m = X(:,idx); - [~,label] = max(m'*X,[],1); -elseif size(init,1) == 1 && size(init,2) == n - label = init; -elseif size(init,1) == d - m = normalize(init); - [~,label] = max(m'*X,[],1); -else - error('ERROR: init is not valid.'); -end -%% main algorithm: final version -last = 0; -while any(label ~= last) - [u,~,label] = unique(label); % remove empty clusters - k = length(u); - E = sparse(1:n,label,1,n,k,n); - m = normalize(X*E); - last = label; - [val,label] = max(m'*X,[],1); -end -[~,~,label] = unique(label); % remove empty clusters -energy = sum(val); \ No newline at end of file diff --git a/other/KCluster/wkmeans.m b/other/KCluster/wkmeans.m deleted file mode 100644 index c7174bb..0000000 --- a/other/KCluster/wkmeans.m +++ /dev/null @@ -1,37 +0,0 @@ -function [label, energy, m] = wkmeans(X, init, w) -% Perform weighted k-means clustering. -% X: d x n data matrix -% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) -% w: 1 x n weight vector (default w=1, equivalent to kmeans. -% Written by Michael Chen (sth4nth@gmail.com). -%% initialization -if nargin == 2 - w = 1; -end -[d,n] = size(X); -if length(init) == 1 - idx = randsample(n,init); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); -elseif size(init,1) == 1 && size(init,2) == n - label = init; -elseif size(init,1) == d - m = init; - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); -else - error('ERROR: init is not valid.'); -end -%% main algorithm -last = 0; -while any(label ~= last) - [u,~,label] = unique(label); % remove empty clusters - k = length(u); - E = sparse(1:n,label,w,n,k,n); - m = bsxfun(@times,X*E,1./full(sum(E,1))); - last = label; - [val,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); -end -[~,~,label] = unique(label); % remove empty clusters -energy = -2*sum(val)+dot(X(:),X(:)); % sum of distances of clusters - -% s = energy/(n-k); % variance \ No newline at end of file diff --git a/other/Mixture/emidgm.m b/other/Mixture/emidgm.m deleted file mode 100644 index 7d4b615..0000000 --- a/other/Mixture/emidgm.m +++ /dev/null @@ -1,81 +0,0 @@ -function [label, model, llh] = emidgm(X, init) -% EM algorithm for independent Gaussian mixture model -% Written by Michael Chen (sth4nth@gmail.com). -fprintf('EM for independent Gaussian mixture: running ... \n'); -R = initialization(X, init); -[~,label(1,:)] = max(R,[],2); -R = R(:,unique(label)); - -[d,n] = size(X); -tol = 1e-6; -maxiter = 500; -llh = -inf(1,maxiter); -converged = false; -t = 1; -X2 = X.^2; -while ~converged && t < maxiter - t = t+1; - % maximizition step - nk = sum(R,1); - w = nk/n; - R = bsxfun(@times,R,1./nk); - mu = X*R; - mu2 = mu.*mu; - sigma = X2*R-mu2; - - % expectation step - lambda = 1./sigma; - M = bsxfun(@plus,X2'*lambda-2*X'*(mu.*lambda),dot(mu2,lambda,1)); % M distance - c = (d*log(2*pi)+sum(log(sigma),1))/(-2); % normalization constant - - logRho = bsxfun(@plus,M/(-2),c+log(w)); - T = logsumexp(logRho,2); - logR = bsxfun(@minus,logRho,T); - R = exp(logR); - llh(t) = sum(T)/n; % loglikelihood - - [~,label(:)] = max(R,[],2); - u = unique(label); % non-empty components - if size(R,2) ~= size(u,2) - R = R(:,u); % remove empty components - else - converged = llh(t)-llh(t-1) < tol*abs(llh(t)); - end -end -model.w = w; -model.mu = mu; -model.sigma = sigma; -llh = llh(2:t); -if converged - fprintf('Converged in %d steps.\n',t-1); -else - fprintf('Not converged in %d steps.\n',maxiter); -end - -function R = initialization(X, init) -[d,n] = size(X); -if length(init) == 1 % random initialization - k = init; - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - while k ~= length(u) - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - end - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == 1 && size(init,2) == n % initialize with labels - label = init; - k = max(label); - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == d %initialize with only centers - k = size(init,2); - m = init; - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - R = full(sparse(1:n,label,1,n,k,n)); -else - error('ERROR: init is not valid.'); -end diff --git a/other/Mixture/emkmeans.m b/other/Mixture/emkmeans.m deleted file mode 100644 index 3a5dd40..0000000 --- a/other/Mixture/emkmeans.m +++ /dev/null @@ -1,82 +0,0 @@ -function [label, model, llh] = emkmeans(X, init) -% EM algorithm for spherical (isotropic) Gaussian mixture model with common -% variance (aka kmeans model) -% Written by Michael Chen (sth4nth@gmail.com). -fprintf('EM for Kmeans: running ... \n'); -R = initialization(X, init); -[~,label(1,:)] = max(R,[],2); -R = R(:,unique(label)); - -[d,n] = size(X); -tol = 1e-6; -maxiter = 500; -llh = -inf(1,maxiter); -converged = false; -t = 1; -while ~converged && t < maxiter - t = t+1; - % maximizition step - nk = sum(R,1); - w = nk/n; - mu = bsxfun(@times,X*R,1./nk); - D = sqdistance(X,mu); - sigma = dot(D(:),R(:))/(d*n); - - % expectation step - c = d*log(2*pi*sigma)/(-2); - logRho = bsxfun(@plus,D/(-2*sigma),c+log(w)); - T = logsumexp(logRho,2); - logR = bsxfun(@minus,logRho,T); - R = exp(logR); - - llh(t) = sum(T)/n; % loglikelihood - - [~,label(:)] = max(R,[],2); - u = unique(label); % non-empty components - if size(R,2) ~= size(u,2) - R = R(:,u); % remove empty components - else - converged = llh(t)-llh(t-1) < tol*abs(llh(t)); - end -end -model.w = w; -model.mu = mu; -model.sigma = sigma; -llh = llh(2:t); -if converged - fprintf('Converged in %d steps.\n',t-1); -else - fprintf('Not converged in %d steps.\n',maxiter); -end - - -function R = initialization(X, init) -[d,n] = size(X); -if length(init) == 1 % random initialization - k = init; - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - while k ~= length(u) - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - end - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == 1 && size(init,2) == n % initialize with labels - label = init; - k = max(label); - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == d %initialize with only centers - k = size(init,2); - m = init; - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - R = full(sparse(1:n,label,1,n,k,n)); -else - error('ERROR: init is not valid.'); -end - -function D = sqdistance(A, B) -D = (-2)*(A'*B)+bsxfun(@plus,dot(B,B,1),dot(A,A,1)'); diff --git a/other/Mixture/emsgm.m b/other/Mixture/emsgm.m deleted file mode 100644 index a595376..0000000 --- a/other/Mixture/emsgm.m +++ /dev/null @@ -1,80 +0,0 @@ -function [label, model, llh] = emsgm(X, init) -% EM algorithm for spherical (isotropic) Gaussian mixture model -% Written by Michael Chen (sth4nth@gmail.com). -fprintf('EM for spherical (isotropic) Gaussian mixture: running ... \n'); -R = initialization(X, init); -[~,label(1,:)] = max(R,[],2); -R = R(:,unique(label)); - -[d,n] = size(X); -tol = 1e-6; -maxiter = 500; -llh = -inf(1,maxiter); -converged = false; -t = 1; -X2 = repmat(dot(X,X,1)',1,size(R,2)); -while ~converged && t < maxiter - t = t+1; - % maximizition step - nk = sum(R,1); - w = nk/n; - mu = bsxfun(@times,X*R,1./nk); - D = bsxfun(@plus,X2-2*X'*mu,dot(mu,mu,1)); - sigma = dot(D,R,1)./(d*nk); - - % expectation step - M = bsxfun(@times,D,1./sigma); % M distance - c = d*log(2*pi*sigma)/(-2); % normalization constant - logRho = bsxfun(@plus,M/(-2),c+log(w)); - T = logsumexp(logRho,2); - logR = bsxfun(@minus,logRho,T); - R = exp(logR); - - llh(t) = sum(T)/n; % loglikelihood - - [~,label(:)] = max(R,[],2); - u = unique(label); % non-empty components - if size(R,2) ~= size(u,2) - R = R(:,u); % remove empty components - else - converged = llh(t)-llh(t-1) < tol*abs(llh(t)); - end -end -model.w = w; -model.mu = mu; -model.sigma = sigma; -llh = llh(2:t); -if converged - fprintf('Converged in %d steps.\n',t-1); -else - fprintf('Not converged in %d steps.\n',maxiter); -end - - -function R = initialization(X, init) -[d,n] = size(X); -if length(init) == 1 % random initialization - k = init; - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - while k ~= length(u) - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - end - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == 1 && size(init,2) == n % initialize with labels - label = init; - k = max(label); - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == d %initialize with only centers - k = size(init,2); - m = init; - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - R = full(sparse(1:n,label,1,n,k,n)); -else - error('ERROR: init is not valid.'); -end \ No newline at end of file diff --git a/other/Mixture/vbdgm.m b/other/Mixture/vbdgm.m deleted file mode 100644 index 9b1e3e2..0000000 --- a/other/Mixture/vbdgm.m +++ /dev/null @@ -1,164 +0,0 @@ -function [label, model, L] = vbdgm(X, init, prior) -% Perform variational Bayesian inference for Gaussian mixture. -% X: d x n data matrix -% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) -% Reference: Pattern Recognition and Machine Learning by Christopher M. Bishop (P.474) -% Written by Michael Chen (sth4nth@gmail.com). - -fprintf('Variational Bayesian Gaussian mixture: running ... \n'); -[d,n] = size(X); -if nargin < 3 - prior.alpha = 1; - prior.kappa = 1; - prior.m = mean(X,2); - prior.nu = d+1; - prior.M = eye(d); % M = inv(W) -end -tol = 1e-10; -maxiter = 1000; -L = -inf(1,maxiter); -converged = false; -t = 1; - -model.R = initialization(X,init); - -while ~converged && t < maxiter - t = t+1; - model = qDirichlet(model, prior); - model = qGaussianWishart(X, model, prior); - model = qMultinomial(X, model); - L(t) = bound(model,prior)/n; - converged = abs(L(t)-L(t-1)) < tol*abs(L(t)); -end -L = L(2:t); -label = zeros(1,n); -[~,label(:)] = max(model.R,[],2); -[~,~,label] = unique(label); -if converged - fprintf('Converged in %d steps.\n',t-1); -else - fprintf('Not converged in %d steps.\n',maxiter); -end - -function R = initialization(X, init) -[d,n] = size(X); -if length(init) == 1 % random initialization - k = init; - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - while k ~= length(u) - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - end - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == 1 && size(init,2) == n % initialize with labels - label = init; - k = max(label); - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == d %initialize with only centers - k = size(init,2); - m = init; - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - R = full(sparse(1:n,label,1,n,k,n)); -else - error('ERROR: init is not valid.'); -end - - -% Done -function model = qDirichlet(model, prior) -alpha0 = prior.alpha; -R = model.R; - -nk = sum(R,1); % 10.51 -alpha = alpha0+nk; % 10.58 - -model.alpha = alpha; - -% Done -function model = qGaussianWishart(X, model, prior) -kappa0 = prior.kappa; -m0 = prior.m; -nu0 = prior.nu; -M0 = prior.M; -R = model.R; - -nk = sum(R,1); % 10.51 -nxbar = X*R; - -kappa = kappa0+nk; % 10.60 -m = bsxfun(@times,bsxfun(@plus,kappa0*m0,nxbar),1./kappa); % 10.61 -nu = nu0+nk; % 10.63 - - -[d,k] = size(m); -M = zeros(d,d,k); -sqrtR = sqrt(R); - -xbar = bsxfun(@times,nxbar,1./nk); % 10.52 -xbarm0 = bsxfun(@minus,xbar,m0); -w = (kappa0*nk./(kappa0+nk)); -for i = 1:k - Xs = bsxfun(@times,bsxfun(@minus,X,xbar(:,i)),sqrtR(:,i)'); - xbarm0i = xbarm0(:,i); - M(:,:,i) = M0+Xs*Xs'+w(i)*(xbarm0i*xbarm0i'); % 10.62 -end - -model.kappa = kappa; -model.m = m; -model.nu = nu; -model.M = M; % Whishart: M = inv(W) - - -% Done -function model = qMultinomial(X, model) -alpha = model.alpha; % Dirichlet -kappa = model.kappa; % Gaussian -m = model.m; % Gasusian -nu = model.nu; % Whishart -M = model.M; % Whishart: inv(W) = V'*V - -n = size(X,2); -[d,k] = size(m); - -logW = zeros(1,k); -EQ = zeros(n,k); -for i = 1:k - U = chol(M(:,:,i)); - logW(i) = -2*sum(log(diag(U))); - Q = (U'\bsxfun(@minus,X,m(:,i))); - EQ(:,i) = d/kappa(i)+nu(i)*dot(Q,Q,1); % 10.64 -end - -ElogLambda = sum(psi(0,bsxfun(@minus,nu+1,(1:d)')/2),1)+d*log(2)+logW; % 10.65 -Elogpi = psi(0,alpha)-psi(0,sum(alpha)); % 10.66 - -logRho = (bsxfun(@minus,EQ,2*Elogpi+ElogLambda-d*log(2*pi)))/(-2); % 10.46 -logR = bsxfun(@minus,logRho,logsumexp(logRho,2)); % 10.49 -R = exp(logR); - -model.logR = logR; -model.R = R; - -function L = bound(model, prior) -alpha0 = prior.alpha; -alpha = model.alpha; -R = model.R; -logR = model.logR; - -nk = sum(R,1); % 10.51 -k = size(R,2); -Elogpi = psi(0,alpha)-psi(0,sum(alpha)); - -Epz = dot(nk,Elogpi); -Eqz = dot(R(:),logR(:)); -logCalpha0 = gammaln(k*alpha0)-k*gammaln(alpha0); -Eppi = logCalpha0+(alpha0-1)*sum(Elogpi); -logCalpha = gammaln(sum(alpha))-sum(gammaln(alpha)); -Eqpi = logCalpha+dot(alpha-1,Elogpi); - -L = Epz-Eqz+Eppi-Eqpi; \ No newline at end of file diff --git a/other/Mixture/vbgm.m b/other/Mixture/vbgm.m deleted file mode 100644 index eec0717..0000000 --- a/other/Mixture/vbgm.m +++ /dev/null @@ -1,199 +0,0 @@ -function [label, model, L] = vbgm(X, init, prior) -% Perform variational Bayesian inference for Gaussian mixture. -% X: d x n data matrix -% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) -% Reference: Pattern Recognition and Machine Learning by Christopher M. Bishop (P.474) -% Written by Michael Chen (sth4nth@gmail.com). - -fprintf('Variational Bayesian Gaussian mixture: running ... \n'); -[d,n] = size(X); -if nargin < 3 - prior.alpha = 1; - prior.kappa = 1; - prior.m = mean(X,2); - prior.v = d+1; - prior.M = eye(d); % M = inv(W) -end -tol = 1e-4; -maxiter = 500; -L = -inf(1,maxiter); -converged = false; -t = 1; - -model.R = initialization(X,init); -while ~converged && t < maxiter - t = t+1; - model = vmax(X, model, prior); - model = vexp(X, model); - L(t) = vbound(X,model,prior)/n; - converged = abs(L(t)-L(t-1)) < tol*abs(L(t)); -end -L = L(2:t); -label = zeros(1,n); -[~,label(:)] = max(model.R,[],2); -[~,~,label] = unique(label); -if converged - fprintf('Converged in %d steps.\n',t-1); -else - fprintf('Not converged in %d steps.\n',maxiter); -end - -function R = initialization(X, init) -[d,n] = size(X); -if length(init) == 1 % random initialization - k = init; - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - while k ~= length(u) - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - end - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == 1 && size(init,2) == n % initialize with labels - label = init; - k = max(label); - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == d %initialize with only centers - k = size(init,2); - m = init; - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - R = full(sparse(1:n,label,1,n,k,n)); -else - error('ERROR: init is not valid.'); -end -% Done -function model = vmax(X, model, prior) -alpha0 = prior.alpha; -kappa0 = prior.kappa; -m0 = prior.m; -v0 = prior.v; -M0 = prior.M; -R = model.R; - -nk = sum(R,1); % 10.51 -alpha = alpha0+nk; % 10.58 -nxbar = X*R; -kappa = kappa0+nk; % 10.60 -m = bsxfun(@times,bsxfun(@plus,kappa0*m0,nxbar),1./kappa); % 10.61 -v = v0+nk; % 10.63 - -[d,k] = size(m); -M = zeros(d,d,k); -sqrtR = sqrt(R); - -xbar = bsxfun(@times,nxbar,1./nk); % 10.52 -xbarm0 = bsxfun(@minus,xbar,m0); -w = (kappa0*nk./(kappa0+nk)); -for i = 1:k - Xs = bsxfun(@times,bsxfun(@minus,X,xbar(:,i)),sqrtR(:,i)'); - xbarm0i = xbarm0(:,i); - M(:,:,i) = M0+Xs*Xs'+w(i)*(xbarm0i*xbarm0i'); % 10.62 -end - -model.alpha = alpha; -model.kappa = kappa; -model.m = m; -model.v = v; -model.M = M; % Whishart: M = inv(W) -% Done -function model = vexp(X, model) -alpha = model.alpha; % Dirichlet -kappa = model.kappa; % Gaussian -m = model.m; % Gasusian -v = model.v; % Whishart -M = model.M; % Whishart: inv(W) = V'*V - -n = size(X,2); -[d,k] = size(m); - -logW = zeros(1,k); -EQ = zeros(n,k); -for i = 1:k - U = chol(M(:,:,i)); - logW(i) = -2*sum(log(diag(U))); - Q = (U'\bsxfun(@minus,X,m(:,i))); - EQ(:,i) = d/kappa(i)+v(i)*dot(Q,Q,1); % 10.64 -end - -ElogLambda = sum(psi(0,bsxfun(@minus,v+1,(1:d)')/2),1)+d*log(2)+logW; % 10.65 -Elogpi = psi(0,alpha)-psi(0,sum(alpha)); % 10.66 - -logRho = (bsxfun(@minus,EQ,2*Elogpi+ElogLambda-d*log(2*pi)))/(-2); % 10.46 -logR = bsxfun(@minus,logRho,logsumexp(logRho,2)); % 10.49 -R = exp(logR); - -model.logR = logR; -model.R = R; -% Done -function L = vbound(X, model, prior) -alpha0 = prior.alpha; -kappa0 = prior.kappa; -m0 = prior.m; -v0 = prior.v; -M0 = prior.M; - -alpha = model.alpha; % Dirichlet -kappa = model.kappa; % Gaussian -m = model.m; % Gasusian -v = model.v; % Whishart -M = model.M; % Whishart: inv(W) = V'*V -R = model.R; -logR = model.logR; - - -[d,k] = size(m); -nk = sum(R,1); % 10.51 - -Elogpi = psi(0,alpha)-psi(0,sum(alpha)); - -Epz = dot(nk,Elogpi); -Eqz = dot(R(:),logR(:)); -logCalpha0 = gammaln(k*alpha0)-k*gammaln(alpha0); -Eppi = logCalpha0+(alpha0-1)*sum(Elogpi); -logCalpha = gammaln(sum(alpha))-sum(gammaln(alpha)); -Eqpi = dot(alpha-1,Elogpi)+logCalpha; -L = Epz-Eqz+Eppi-Eqpi; - - -U0 = chol(M0); -sqrtR = sqrt(R); -xbar = bsxfun(@times,X*R,1./nk); % 10.52 - -logW = zeros(1,k); -trSW = zeros(1,k); -trM0W = zeros(1,k); -xbarmWxbarm = zeros(1,k); -mm0Wmm0 = zeros(1,k); -for i = 1:k - U = chol(M(:,:,i)); - logW(i) = -2*sum(log(diag(U))); - - Xs = bsxfun(@times,bsxfun(@minus,X,xbar(:,i)),sqrtR(:,i)'); - V = chol(Xs*Xs'/nk(i)); - Q = V/U; - trSW(i) = dot(Q(:),Q(:)); % equivalent to tr(SW)=trace(S/M) - Q = U0/U; - trM0W(i) = dot(Q(:),Q(:)); - - q = U'\(xbar(:,i)-m(:,i)); - xbarmWxbarm(i) = dot(q,q); - q = U'\(m(:,i)-m0); - mm0Wmm0(i) = dot(q,q); -end - -ElogLambda = sum(psi(0,bsxfun(@minus,v+1,(1:d)')/2),1)+d*log(2)+logW; % 10.65 -Epmu = sum(d*log(kappa0/(2*pi))+ElogLambda-d*kappa0./kappa-kappa0*(v.*mm0Wmm0))/2; -logB0 = v0*sum(log(diag(U0)))-0.5*v0*d*log(2)-logmvgamma(0.5*v0,d); -EpLambda = k*logB0+0.5*(v0-d-1)*sum(ElogLambda)-0.5*dot(v,trM0W); - -Eqmu = 0.5*sum(ElogLambda+d*log(kappa/(2*pi)))-0.5*d*k; -logB = -v.*(logW+d*log(2))/2-logmvgamma(0.5*v,d); -EqLambda = 0.5*sum((v-d-1).*ElogLambda-v*d)+sum(logB); - -EpX = 0.5*dot(nk,ElogLambda-d./kappa-v.*trSW-v.*xbarmWxbarm-d*log(2*pi)); - -L = L+Epmu-Eqmu+EpLambda-EqLambda+EpX; \ No newline at end of file diff --git a/other/Mixture/vbigm.m b/other/Mixture/vbigm.m deleted file mode 100644 index 6ec744f..0000000 --- a/other/Mixture/vbigm.m +++ /dev/null @@ -1,179 +0,0 @@ -function [label, model, bound] = vbigm(X, init, prior) -% Perform variational Bayesian inference for independent Gaussian mixture. -% X: d x n data matrix -% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) -% Written by Michael Chen (sth4nth@gmail.com). - -fprintf('Variational Bayesian independent Gaussian mixture: running ... \n'); -n = size(X,2); -if nargin < 3 - prior.alpha = 1; % noninformative setting of Dirichet prior - prior.kappa = 1; % noninformative setting of Gassian prior of Gaussian mean ? - prior.m = mean(X,2); % when prior.kappa = 0 it doesnt matter how to set this - prior.nu = 1; % noninformative setting of 1d Wishart - prior.tau = 1; % noninformative setting of 1d Wishart -end -R = initialization(X,init); - -tol = 1e-8; -maxiter = 1000; -bound = -inf(1,maxiter); -converged = false; -t = 1; - -model.R = R; -model = vbmaximization(X,model,prior); -while ~converged && t < maxiter - t = t+1; - model = vbexpection(X,model); - model = vbmaximization(X,model,prior); - bound(t) = vbound(X,model,prior)/n; - converged = abs(bound(t)-bound(t-1)) < tol*abs(bound(t)); -end -bound = bound(2:t); -label = zeros(1,n); -[~,label(:)] = max(model.R,[],2); -[~,~,label] = unique(label); - -if converged - fprintf('Converged in %d steps.\n',t-1); -else - fprintf('Not converged in %d steps.\n',maxiter); -end - -% Done. - -function R = initialization(X, init) -[d,n] = size(X); -if length(init) == 1 % random initialization - k = init; - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - while k ~= length(u) - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - end - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == 1 && size(init,2) == n % initialize with labels - label = init; - k = max(label); - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == d %initialize with only centers - k = size(init,2); - m = init; - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - R = full(sparse(1:n,label,1,n,k,n)); -else - error('ERROR: init is not valid.'); -end - -% update latent variables -function model = vbexpection(X, model) -alpha = model.alpha; -kappa = model.kappa; -m = model.m; -nu = model.nu; -tau = model.tau; - -d = size(X,1); - -logw = psi(0,alpha)-psi(0,sum(alpha)); -% loglambda = psi(0,nu/2)-log(tau/2); -loglambda = bsxfun(@plus,-log(tau/2),psi(0,nu/2)); - -% M = bsxfun(@times,sqdistance(X,m),nu./tau); -aib = bsxfun(@times,1./tau,nu); -r = m.*aib; -M = bsxfun(@plus,X.^2'*aib-2*X'*r,dot(m,r,1)); -M = bsxfun(@plus,M,d./kappa); -% c = d*(loglambda-log(2*pi))/2; -c = (sum(loglambda,1)-d*log(2*pi))/2; % normalization constant -logRho = bsxfun(@plus,M/(-2),logw+c); - -% [~,idx] = max(logR,[],2); -% logR = logR(:,unique(idx)); % remove empty components!!! - -logR = bsxfun(@minus,logRho,logsumexp(logRho,2)); -R = exp(logR); - -model.logR = logR; -model.R = R; - -% Done. -% update the parameters. -function model = vbmaximization(X, model, prior) -alpha0 = prior.alpha; % Dirichet prior -kappa0 = prior.kappa; % piror of Gaussian mean -m0 = prior.m; % piror of Gaussian mean -nu0 = prior.nu; % 1d Wishart -tau0 = prior.tau; % 1d Wishart - -R = model.R; - -% Dirichlet -nk = sum(R,1); -alpha = alpha0+nk; -% Gaussian -kappa = kappa0+nk; -R = bsxfun(@times,R,1./nk); -xbar = X*R; -s = X.^2*R-xbar.^2; - -m = bsxfun(@times,bsxfun(@plus,kappa0*m0,bsxfun(@times,xbar,nk)),1./kappa); -% 1d Wishart -nu = nu0+nk; -tau = tau0+bsxfun(@times,s,nk)+bsxfun(@times,bsxfun(@minus,xbar,m0).^2,kappa0*nk./(kappa0+nk)); - -model.alpha = alpha; -model.kappa = kappa; -model.m = m; -model.nu = nu; -model.tau = tau; - -function bound = vbound(X, model, prior) -alpha0 = prior.alpha; % Dirichet prior -kappa0 = prior.kappa; % piror of Gaussian mean -m0 = prior.m; % piror of Gaussian mean -nu0 = prior.nu; % 1d Wishart -tau0 = prior.tau; % 1d Wishart - -alpha = model.alpha; -kappa = model.kappa; -m = model.m; -nu = model.nu; -tau = model.tau; - -logR = model.logR; -R = model.R; - -[d,k] = size(m); - -nk = sum(R,1); -logw = psi(0,alpha)-psi(0,sum(alpha)); - -Epz = nk*logw'; -Eqz = R(:)'*logR(:); -logCalpha0 = gammaln(k*alpha0)-k*gammaln(alpha0); -Epw = logCalpha0+(alpha0-1)*sum(logw); -logCalpha = gammaln(sum(alpha))-sum(gammaln(alpha)); -Eqw = sum((alpha-1).*logw)+logCalpha; - -loglambda = bsxfun(@plus,-log(tau/2),psi(0,nu/2)); -aib = bsxfun(@times,1./tau,nu); -mm0 = bsxfun(@minus,m,m0).^2; -Epmu = 0.5*(sum(loglambda(:))+d*(k*log(kappa0/(2*pi))-sum(kappa0./kappa))-kappa0*(aib(:)'*mm0(:))); -Eqmu = 0.5*(d*(sum(log(kappa))-k*log(2*pi)-k)+sum(loglambda(:))); - -Eplambda = k*d*(nu0*log(tau0/2)/2-gammaln(nu0/2))+(nu0/2-1)*sum(loglambda(:))-tau0*sum(aib(:))/2; -Eqlambda = -d*sum(gammaln(nu/2))+d*sum((nu/2-1).*psi(0,nu/2))+sum(log(tau(:)/2))-d*sum(nu/2); - -R = bsxfun(@times,R,1./nk); -xbar = X*R; -s = X.^2*R-xbar.^2; -EpX = 0.5*sum(loglambda-1./repmat(kappa,d,1)-log(2*pi)-aib.*s-aib.*(xbar-m).^2,1)*nk'; - -bound = Epz-Eqz+Epw-Eqw+Epmu-Eqmu+Eplambda-Eqlambda+EpX; diff --git a/other/Mixture/vbkmeans.m b/other/Mixture/vbkmeans.m deleted file mode 100644 index 414ca94..0000000 --- a/other/Mixture/vbkmeans.m +++ /dev/null @@ -1,178 +0,0 @@ -function [label, model, bound] = vbkmeans(X, init, prior) -% Perform variational Bayesian inference for isotropic Gaussian mixture. -% X: d x n data matrix -% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) -% Written by Michael Chen (sth4nth@gmail.com). - - -fprintf('Variantional Bayeisn Kmeans: running ... \n'); -[d,n] = size(X); -if nargin < 3 - prior.alpha = 1; % noninformative setting of Dirichet prior - prior.kappa = 1; % noninformative setting of Gassian prior of Gaussian mean ? - prior.m = mean(X,2); % when prior.kappa = 0 it doesnt matter how to set this - prior.nu = 1; % noninformative setting of 1d Wishart - prior.tau = 1; % noninformative setting of 1d Wishart -end -model.nu = prior.nu+d*n; - -R = initialization(X,init); - -tol = 1e-8; -maxiter = 5000; -bound = -inf(1,maxiter); -converged = false; -t = 1; - -model.R = R; -model = vbmaximization(X,model,prior); -while ~converged && t < maxiter - t = t+1; - model = vbexpection(X,model); - model = vbmaximization(X,model,prior); - bound(t) = vbound(X,model,prior)/n; - converged = abs(bound(t)-bound(t-1)) < tol*abs(bound(t)); -end -bound = bound(2:t); -label = zeros(1,n); -[~,label(:)] = max(model.R,[],2); -[~,~,label] = unique(label); - -if converged - fprintf('Converged in %d steps.\n',t-1); -else - fprintf('Not converged in %d steps.\n',maxiter); -end - -% Done. - -function R = initialization(X, init) -[d,n] = size(X); -if length(init) == 1 % random initialization - k = init; - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - while k ~= length(u) - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - end - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == 1 && size(init,2) == n % initialize with labels - label = init; - k = max(label); - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == d %initialize with only centers - k = size(init,2); - m = init; - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - R = full(sparse(1:n,label,1,n,k,n)); -else - error('ERROR: init is not valid.'); -end - -% Done. -% update latent variables -function model = vbexpection(X, model) -alpha = model.alpha; -kappa = model.kappa; -m = model.m; -nu = model.nu; -tau = model.tau; - -d = size(X,1); - -logw = psi(0,alpha)-psi(0,sum(alpha)); -loglambda = psi(0,nu/2)-log(tau/2); - -M = bsxfun(@plus,sqdistance(X,m)*nu/tau,d./kappa); -c = d*(loglambda-log(2*pi))/2; -logRho = bsxfun(@plus,M/(-2),logw+c); - -% [~,idx] = max(logR,[],2); -% logR = logR(:,unique(idx)); % remove empty components!!! - -logR = bsxfun(@minus,logRho,logsumexp(logRho,2)); -R = exp(logR); - -model.logR = logR; -model.R = R; - -% Done. -% update the parameters -function model = vbmaximization(X, model, prior) -alpha0 = prior.alpha; % Dirichet prior -kappa0 = prior.kappa; % piror of Gaussian mean -m0 = prior.m; % piror of Gaussian mean -tau0 = prior.tau; % 1d Wishart - -R = model.R; - -% Dirichlet -nk = sum(R,1); -alpha = alpha0+nk; -% Gaussian -kappa = kappa0+nk; -xbar = bsxfun(@times,X*R,1./nk); -m = bsxfun(@times,bsxfun(@plus,kappa0*m0,bsxfun(@times,xbar,nk)),1./kappa); -% 1d Wishart -Q = sqdistance(X,xbar); -tau = tau0+dot(Q(:),R(:))+dot((kappa0*nk./(kappa0+nk)),sqdistance(m0,xbar)); - -model.alpha = alpha; -model.kappa = kappa; -model.m = m; -model.tau = tau; - - -function bound = vbound(X, model, prior) -alpha0 = prior.alpha; % Dirichet prior -kappa0 = prior.kappa; % piror of Gaussian mean -m0 = prior.m; % piror of Gaussian mean -nu0 = prior.nu; % 1d Wishart -tau0 = prior.tau; % 1d Wishart - -alpha = model.alpha; -kappa = model.kappa; -m = model.m; -nu = model.nu; -tau = model.tau; - -logR = model.logR; -R = model.R; - -[d,k] = size(m); - -nk = sum(R,1); -logw = psi(0,alpha)-psi(0,sum(alpha)); - -Epz = nk*logw'; -Eqz = R(:)'*logR(:); -logCalpha0 = gammaln(k*alpha0)-k*gammaln(alpha0); -Epw = logCalpha0+(alpha0-1)*sum(logw); -logCalpha = gammaln(sum(alpha))-sum(gammaln(alpha)); -Eqw = sum((alpha-1).*logw)+logCalpha; - -loglambda = psi(0,nu/2)-log(tau/2); -aib = nu./tau; -Q = bsxfun(@minus,m,m0); -Epmu = 0.5*(d*(k*log(kappa0/(2*pi))+k*loglambda-sum(kappa0./kappa))-kappa0*aib*dot(Q(:),Q(:))); -Eqmu = 0.5*d*(k*loglambda+sum(log(kappa))-k*log(2*pi)-k); - -Eplambda = k*(nu0/2*log(tau0/2)-gammaln(nu0/2))+(nu0/2-1)*sum(loglambda)-tau0*aib/2; -Eqlambda = -gammaln(nu/2)+(nu/2-1)*psi(0,nu/2)+log(tau/2)-nu/2; - -xbar = bsxfun(@times,X*R,1./nk); -s = dot(sqdistance(X,xbar),R,1)./(d*nk); - -r = xbar-m; -EpX = 0.5*(d*(loglambda-1./kappa-log(2*pi)-aib.*s)-aib.*dot(r,r,1))*nk'; - -bound = Epz-Eqz+Epw-Eqw+Epmu-Eqmu+Eplambda-Eqlambda+EpX; - - -function D = sqdistance(A, B) -D = (-2)*(A'*B)+bsxfun(@plus,dot(B,B,1),dot(A,A,1)'); \ No newline at end of file diff --git a/other/Mixture/vbsgm.m b/other/Mixture/vbsgm.m deleted file mode 100644 index b28cf45..0000000 --- a/other/Mixture/vbsgm.m +++ /dev/null @@ -1,181 +0,0 @@ -function [label, model, bound] = vbsgm(X, init, prior) -% Perform variational Bayesian inference for spherical (isotropic) Gaussian mixture. -% X: d x n data matrix -% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) -% Written by Michael Chen (sth4nth@gmail.com). - -fprintf('Variational Bayesian spherical Gaussian mixture: running ... \n'); -n = size(X,2); -if nargin < 3 - prior.alpha = 1; % noninformative setting of Dirichet prior - prior.kappa = 1; % noninformative setting of Gassian prior of Gaussian mean ? - prior.m = mean(X,2); % when prior.kappa = 0 it doesnt matter how to set this - prior.nu = 1; % noninformative setting of 1d Wishart - prior.tau = 1; % noninformative setting of 1d Wishart -end -R = initialization(X,init); - -tol = 1e-8; -maxiter = 1000; -bound = -inf(1,maxiter); -converged = false; -t = 1; - -model.R = R; -model = vbmaximization(X,model,prior); -while ~converged && t < maxiter - t = t+1; - model = vbexpection(X,model); - model = vbmaximization(X,model,prior); - bound(t) = vbound(X,model,prior)/n; - converged = abs(bound(t)-bound(t-1)) < tol*abs(bound(t)); -end -bound = bound(2:t); -label = zeros(1,n); -[~,label(:)] = max(model.R,[],2); -[~,~,label] = unique(label); - -if converged - fprintf('Converged in %d steps.\n',t-1); -else - fprintf('Not converged in %d steps.\n',maxiter); -end - -% Done. - -function R = initialization(X, init) -[d,n] = size(X); -if length(init) == 1 % random initialization - k = init; - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - while k ~= length(u) - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - end - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == 1 && size(init,2) == n % initialize with labels - label = init; - k = max(label); - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == d %initialize with only centers - k = size(init,2); - m = init; - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - R = full(sparse(1:n,label,1,n,k,n)); -else - error('ERROR: init is not valid.'); -end - -% Done. -% update latent variables -function model = vbexpection(X, model) -alpha = model.alpha; -kappa = model.kappa; -m = model.m; -nu = model.nu; -tau = model.tau; - -d = size(X,1); - -logw = psi(0,alpha)-psi(0,sum(alpha)); -loglambda = psi(0,nu/2)-log(tau/2); - -M = bsxfun(@times,sqdistance(X,m),nu./tau); -M = bsxfun(@plus,M,d./kappa); -c = d*(loglambda-log(2*pi))/2; -logRho = bsxfun(@plus,M/(-2),logw+c); - -% [~,idx] = max(logR,[],2); -% logR = logR(:,unique(idx)); % remove empty components!!! - -logR = bsxfun(@minus,logRho,logsumexp(logRho,2)); -R = exp(logR); - -model.logR = logR; -model.R = R; - -% Done. -% update the parameters. -function model = vbmaximization(X, model, prior) -alpha0 = prior.alpha; % Dirichet prior -kappa0 = prior.kappa; % piror of Gaussian mean -m0 = prior.m; % piror of Gaussian mean -nu0 = prior.nu; % 1d Wishart -tau0 = prior.tau; % 1d Wishart - -R = model.R; -d = size(X,1); - -% Dirichlet -nk = sum(R,1); -alpha = alpha0+nk; -% Gaussian -kappa = kappa0+nk; -xbar = bsxfun(@times,X*R,1./nk); -m = bsxfun(@times,bsxfun(@plus,kappa0*m0,bsxfun(@times,xbar,nk)),1./kappa); -% 1d Wishart -nu = nu0+d*nk; -tau = tau0+dot(sqdistance(X,xbar),R,1)+(kappa0*nk./(kappa0+nk)).*sqdistance(m0,xbar); - -model.alpha = alpha; -model.kappa = kappa; -model.m = m; -model.nu = nu; -model.tau = tau; - -function bound = vbound(X, model, prior) -alpha0 = prior.alpha; % Dirichet prior -kappa0 = prior.kappa; % piror of Gaussian mean -m0 = prior.m; % piror of Gaussian mean -nu0 = prior.nu; % 1d Wishart -tau0 = prior.tau; % 1d Wishart - -alpha = model.alpha; -kappa = model.kappa; -m = model.m; -nu = model.nu; -tau = model.tau; - -logR = model.logR; -R = model.R; - -[d,k] = size(m); - -nk = sum(R,1); -logw = psi(0,alpha)-psi(0,sum(alpha)); - -Epz = nk*logw'; -Eqz = R(:)'*logR(:); -logCalpha0 = gammaln(k*alpha0)-k*gammaln(alpha0); -Epw = logCalpha0+(alpha0-1)*sum(logw); -logCalpha = gammaln(sum(alpha))-sum(gammaln(alpha)); -Eqw = dot((alpha-1),logw)+logCalpha; - -loglambda = psi(0,nu/2)-log(tau/2); -aib = nu./tau; -% Epmu = 0.5*sum(d*(log(kappa0/(2*pi))+loglambda-kappa0./kappa)-kappa0*aib.*sum(bsxfun(@minus,m,m0).^2,1)); -r = bsxfun(@minus,m,m0); -Epmu = 0.5*(d*(k*log(kappa0/(2*pi))+sum(loglambda)-sum(kappa0./kappa))-dot(kappa0*aib,dot(r,r,1))); -Eplambda = k*(nu0*log(tau0/2)/2-gammaln(nu0/2))+(nu0/2-1)*sum(loglambda)-tau0*sum(aib)/2; -Epthalpha = Epmu+Eplambda; - -Eqmu = 0.5*d*(sum(loglambda)+sum(log(kappa))-k*log(2*pi)-k); -Eqlambda = -sum(gammaln(nu/2))+sum((nu/2-1).*psi(0,nu/2))+sum(log(tau/2))-sum(nu/2); -Eqthalpha = Eqmu+Eqlambda; - -xbar = bsxfun(@times,X*R,1./nk); -s = dot(sqdistance(X,xbar),R,1)./(d*nk); - -r = xbar-m; -EpX = 0.5*(d*(loglambda-1./kappa-log(2*pi)-aib.*s)-aib.*dot(r,r,1))*nk'; - -bound = Epz-Eqz+Epw-Eqw+Epthalpha-Eqthalpha+EpX; - - -function D = sqdistance(A, B) -D = (-2)*(A'*B)+bsxfun(@plus,dot(B,B,1),dot(A,A,1)'); diff --git a/other/Mixture/vbtsbgm.m b/other/Mixture/vbtsbgm.m deleted file mode 100644 index 36a5b92..0000000 --- a/other/Mixture/vbtsbgm.m +++ /dev/null @@ -1,225 +0,0 @@ -function [label, model, L] = vbtsbgm(X, init, prior) -% Perform variational Bayesian inference for trancated stick breaking Gaussian mixture. -% X: d x n data matrix -% init: k (1 x 1) or label (1 x n, 1<=label(i)<=k) or center (d x k) -% Reference: Pattern Recognition and Machine Learning by Christopher M. Bishop (P.474) -% Written by Michael Chen (sth4nth@gmail.com). - -fprintf('Variational trancated stick breaking Gaussian mixture: running ... \n'); -[d,n] = size(X); -if nargin < 3 - prior.alpha = 1; - prior.beta = 1; - prior.kappa = 1; - prior.m = mean(X,2); - prior.nu = d+1; - prior.M = eye(d); % M = inv(W) -end -tol = 1e-10; -maxiter = 1000; -L = -inf(1,maxiter); -converged = false; -t = 1; - -model.R = initialization(X,init); -while ~converged && t < maxiter - t = t+1; - model = vmax(X, model, prior); - model = vexp(X, model); - L(t) = vbound(X,model,prior)/n; - converged = abs(L(t)-L(t-1)) < tol*abs(L(t)); -end -L = L(2:t); -label = zeros(1,n); -[~,label(:)] = max(model.R,[],2); -[~,~,label] = unique(label); -if converged - fprintf('Converged in %d steps.\n',t-1); -else - fprintf('Not converged in %d steps.\n',maxiter); -end - -function R = initialization(X, init) -[d,n] = size(X); -if length(init) == 1 % random initialization - k = init; - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - while k ~= length(u) - idx = randsample(n,k); - m = X(:,idx); - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - [u,~,label] = unique(label); - end - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == 1 && size(init,2) == n % initialize with labels - label = init; - k = max(label); - R = full(sparse(1:n,label,1,n,k,n)); -elseif size(init,1) == d %initialize with only centers - k = size(init,2); - m = init; - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); - R = full(sparse(1:n,label,1,n,k,n)); -else - error('ERROR: init is not valid.'); -end -% Done -function model = vmax(X, model, prior) -alpha0 = prior.alpha; -beta0 = prior.beta; -kappa0 = prior.kappa; -m0 = prior.m; -nu0 = prior.nu; -M0 = prior.M; -R = model.R; - -nk = sum(R,1); % 10.51 -k = length(nk); - -alpha = alpha0+nk(1:k-1); -ngk = sum(tril(repmat(nk(:),1,k)))-nk; -beta = beta0+ngk(1:k-1); -nxbar = X*R; -kappa = kappa0+nk; % 10.60 -m = bsxfun(@times,bsxfun(@plus,kappa0*m0,nxbar),1./kappa); % 10.61 -nu = nu0+nk; % 10.63 - -[d,k] = size(m); -M = zeros(d,d,k); -sqrtR = sqrt(R); - -xbar = bsxfun(@times,nxbar,1./nk); % 10.52 -xbarm0 = bsxfun(@minus,xbar,m0); -w = (kappa0*nk./(kappa0+nk)); -for i = 1:k - Xs = bsxfun(@times,bsxfun(@minus,X,xbar(:,i)),sqrtR(:,i)'); - xbarm0i = xbarm0(:,i); - M(:,:,i) = M0+Xs*Xs'+w(i)*(xbarm0i*xbarm0i'); % 10.62 -end - -model.alpha = alpha; -model.beta = beta; -model.kappa = kappa; -model.m = m; -model.nu = nu; -model.M = M; % Whishart: M = inv(W) -% Done -function model = vexp(X, model) -alpha = model.alpha; -beta = model.beta; -kappa = model.kappa; % Gaussian -m = model.m; % Gasusian -nu = model.nu; % Whishart -M = model.M; % Whishart: inv(W) = V'*V - -n = size(X,2); -[d,k] = size(m); -Elogpi = zeros(1,k); - -Elogv = psi(0,alpha)-psi(0,alpha+beta); % E[ln(v)] -Elogu = psi(0,beta)-psi(0,alpha+beta); % E[ln(1-v)] -Elogpi(1:k-1) = Elogv+cumsum(Elogu)-Elogu; -Elogpi(k) = sum(Elogu); - -logW = zeros(1,k); -EQ = zeros(n,k); -for i = 1:k - U = chol(M(:,:,i)); - logW(i) = -2*sum(log(diag(U))); - Q = (U'\bsxfun(@minus,X,m(:,i))); - EQ(:,i) = d/kappa(i)+nu(i)*dot(Q,Q,1); % 10.64 -end - -ElogLambda = sum(psi(0,bsxfun(@minus,nu+1,(1:d)')/2),1)+d*log(2)+logW; % 10.65 - -logRho = (bsxfun(@minus,EQ,2*Elogpi+ElogLambda-d*log(2*pi)))/(-2); % 10.46 -logR = bsxfun(@minus,logRho,logsumexp(logRho,2)); % 10.49 -R = exp(logR); - -model.logR = logR; -model.R = R; - - - -% TBD -function L = vbound(X, model, prior) -alpha0 = prior.alpha; -beta0 = prior.beta; -kappa0 = prior.kappa; -m0 = prior.m; -nu0 = prior.nu; -M0 = prior.M; - -alpha = model.alpha; -beta = model.beta; -kappa = model.kappa; % Gaussian -m = model.m; % Gasusian -nu = model.nu; % Whishart -M = model.M; % Whishart: inv(W) = V'*V -R = model.R; -logR = model.logR; - - -[d,k] = size(m); -nk = sum(R,1); % 10.51 - - - - -Epz = dot(nk,Elogpi); -Eqz = dot(R(:),logR(:)); - -% logBeta0 = gammaln(alpha0+beta0)-(gammaln(alpha0)+gammaln(beta0)); - -% logBeta = gammaln(alpha+beta)-(gammaln(alpha)+gammaln(beta)); - -Elogv = psi(0,alpha)-psi(0,alpha+beta); % E[ln(v)] -Elogu = psi(0,beta)-psi(0,alpha+beta); % E[ln(1-v)] -% % logCalpha0 = gammaln(k*alpha0)-k*gammaln(alpha0); -Epv = logCalpha0+(alpha0-1)*sum(Elogv); -% logCalpha = gammaln(sum(alpha))-sum(gammaln(alpha)); -Eqv = logCalpha+dot(alpha-1,Elogpv); -L = Epz-Eqz+Epv-Eqv; - - -U0 = chol(M0); -sqrtR = sqrt(R); -xbar = bsxfun(@times,X*R,1./nk); % 10.52 - -logW = zeros(1,k); -trSW = zeros(1,k); -trM0W = zeros(1,k); -xbarmWxbarm = zeros(1,k); -mm0Wmm0 = zeros(1,k); -for i = 1:k - U = chol(M(:,:,i)); - logW(i) = -2*sum(log(diag(U))); - - Xs = bsxfun(@times,bsxfun(@minus,X,xbar(:,i)),sqrtR(:,i)'); - V = chol(Xs*Xs'/nk(i)); - Q = V/U; - trSW(i) = dot(Q(:),Q(:)); % equivalent to tr(SW)=trace(S/M) - Q = U0/U; - trM0W(i) = dot(Q(:),Q(:)); - - q = U'\(xbar(:,i)-m(:,i)); - xbarmWxbarm(i) = dot(q,q); - q = U'\(m(:,i)-m0); - mm0Wmm0(i) = dot(q,q); -end - -ElogLambda = sum(psi(0,bsxfun(@minus,nu+1,(1:d)')/2),1)+d*log(2)+logW; % 10.65 -Epmu = sum(d*log(kappa0/(2*pi))+ElogLambda-d*kappa0./kappa-kappa0*(nu.*mm0Wmm0))/2; -logB0 = nu0*sum(log(diag(U0)))-0.5*nu0*d*log(2)-logmvgamma(0.5*nu0,d); -EpLambda = k*logB0+0.5*(nu0-d-1)*sum(ElogLambda)-0.5*dot(nu,trM0W); - -Eqmu = 0.5*sum(ElogLambda+d*log(kappa/(2*pi)))-0.5*d*k; -logB = -nu.*(logW+d*log(2))/2-logmvgamma(0.5*nu,d); -EqLambda = 0.5*sum((nu-d-1).*ElogLambda-nu*d)+sum(logB); - -EpX = 0.5*dot(nk,ElogLambda-d./kappa-nu.*trSW-nu.*xbarmWxbarm-d*log(2*pi)); - -L = L+Epmu-Eqmu+EpLambda-EqLambda+EpX; \ No newline at end of file diff --git a/other/SpectralCluster/affinity.m b/other/SpectralCluster/affinity.m deleted file mode 100644 index 94a14fd..0000000 --- a/other/SpectralCluster/affinity.m +++ /dev/null @@ -1,31 +0,0 @@ -function W = affinity(X, sigma, k) -% Construct the affinity matrix of connected undirected graph. -% Wij=exp(-|xi-xj|^2/(2*Sigma)) -% Written by Michael Chen (sth4nth@gmail.com). -X = bsxfun(@minus,X,mean(X,2)); -S = dot(X,X,1); -if nargin < 3 - k = 0; -end -if nargin < 2 - sigma = mean(S); -end - -n = size(X,2); -D = (-2)*(X'*X)+bsxfun(@plus,S,S'); - -if k == 0 - W = exp(D/((-2)*sigma)); - W(sub2ind([n,n],1:n,1:n)) = 0; % remove diagonal -else - [ND, NI] = sort(D); - ND = ND(2:k+1,:); - NI = NI(2:k+1,:); - XI = repmat(1:n,k,1); - W = sparse(XI(:),NI(:),exp(ND(:)/((-2)*sigma)),n,n); - W = max(W,W'); % force symmetry (not necessary for digraph) -end - - - - diff --git a/other/SpectralCluster/bncut.m b/other/SpectralCluster/bncut.m deleted file mode 100644 index b365766..0000000 --- a/other/SpectralCluster/bncut.m +++ /dev/null @@ -1,19 +0,0 @@ -function [y, ratio] = bncut(W) -% Bipartitioning normalized cut -mincut = 1; % minimal number of nodes to be cut off - -n = size(W,2); -[L,d] = laplacian(W,'n'); -V = symeig(L,2)'; -%% -[~,idx] = sort(V(2,:)./sqrt(d)); -Vol_A = cumsum(d(idx)); -Vol_B = sum(d)-Vol_A; - -S = triu(W(idx,idx)); -W_AB = full(cumsum(sum(S'-S,1))); - -ratios = W_AB.*(1./Vol_A+1./Vol_B)/2; -[ratio,cut] = min(ratios(mincut:n-mincut)); -y = true(1,n); -y(idx(1:cut+mincut-1)) = false; diff --git a/other/SpectralCluster/discretize.m b/other/SpectralCluster/discretize.m deleted file mode 100644 index 3851bbd..0000000 --- a/other/SpectralCluster/discretize.m +++ /dev/null @@ -1,118 +0,0 @@ -function label = discretize(V, d, m) -% Perform discretization on relaxed real value solution of spectral clustering -% V: k x n eigenvectors -% d: 1 x n degree vector -% Written by Michael Chen (sth4nth@gmail.com). -if nargin < 3 - m = 1; -end -switch m - case 1 - label = ys(V,d); - case 2 - label = njw(V); - case 3 - label = bj(V,d); - case 4 - label = zj(V(2:end,:),d); - otherwise - error('The parameter value of m is not supported.'); -end - -function label = ys(X, d) % Multiclass Spectral Clustering by S.Yu & J.Shi -[k,n] = size(X); -X = bsxfun(@times,X,1./sqrt(d+eps)); -X = normalize(X); -idx = initialize(X); -R = X(:,idx); -% s = inf; -% while true -% X = R'*X; -% [~,label] = max(X,[],1); -% [U,S,V] = svd(X*full(sparse(1:n,label,1,n,k,n))); -% -% l = s; -% s = trace(S); -% if abs(s-l) < eps; break; end; -% R = U*V'; -% end -X = R'*X; -[~,label] = max(X,[],1); -last = 0; -while any(label ~= last) - [U,~,V] = svd(X*full(sparse(1:n,label,1,n,k,n))); - R = U*V'; - X = R'*X; - last = label; - [~,label] = max(X,[],1); -end - - -function label = njw(X) % On Spectral Clustering by A.Y.Ng, M.I.Jordan & Y.Weiss -X = normalize(X); -idx = initialize(X); -label = wkmeans(X,idx,1); % standard kmeans. - -function label = bj(X, d) % Learning Spectral Clustering by F.R.Bach & M.I.Jordans -X = bsxfun(@times,X,1./sqrt(d+eps)); -idx = initialize(X); -label = wkmeans(X,idx,d); - -function label = zj(X, d) % Multiway Spectral Clustering by Z.Zhang & M.I.Jordan -k = size(X,1)+1; -n = size(X,2); -G = eye(k,k-1)-repmat(1./k,k,k-1); -w = 1./sqrt(d+eps); -idx = initialize(X); -R = normalize(X(:,idx)); -% s = inf; -% while true -% Y = bsxfun(@times,R'*X,w); -% [~,label] = max([Y;zeros(1,n)],[],1); -% [U,S,V]=svd(X*full(sparse(1:n,label,1,n,k,n))*G); -% -% l = s; -% s = trace(S); -% if abs(s-l) < eps; break; end; -% R = U*V'; -% end -Y = bsxfun(@times,R'*X,w); -[~,label] = max([Y;zeros(1,n)],[],1); -last = 0; -while any(label ~= last) - [U,~,V]=svd(X*full(sparse(1:n,label,1,n,k,n))*G); - R = U*V'; - Y = bsxfun(@times,R'*X,w); - last = label; - [~,label] = max([Y;zeros(1,n)],[],1); -end - -function idx = initialize(X) -% Choose k approximately orthogonal samples. -[k,n] = size(X); -X = normalize(X); -idx = zeros(1,k); -idx(1) = ceil(n*rand); -c = zeros(1,n); -for i = 2:k - c = c+abs(X(:,idx(i-1))'*X); - [~,idx(i)] = min(c); -end - -function X = normalize(X) -% Normalize column vectors. -X = bsxfun(@times,X,1./sqrt(dot(X,X,1))); - -function label = wkmeans(X, init, w) -% Perform weighted k-means initialized by centers. -[k,n] = size(X); -m = X(:,init); -[~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); -last = 0; -while any(label ~= last) - E = sparse(1:n,label,w,n,k,n); - m = bsxfun(@times,X*E,1./full(sum(E,1))); - last = label; - [~,label] = max(bsxfun(@minus,m'*X,dot(m,m,1)'/2),[],1); -end - diff --git a/other/SpectralCluster/knn.m b/other/SpectralCluster/knn.m deleted file mode 100644 index 720bd0c..0000000 --- a/other/SpectralCluster/knn.m +++ /dev/null @@ -1,7 +0,0 @@ -function [D, N] = knn(X, Y, k) -% Find k nearest neighbors in Y of each sample in X. -% Written by Michael Chen (sth4nth@gmail.com). -D = sqdistance(Y, X); -[D, N] = sort(D); -N = N(2:(1+k),:); -D = D(2:(1+k),:); diff --git a/other/SpectralCluster/laplacian.m b/other/SpectralCluster/laplacian.m deleted file mode 100644 index a41b309..0000000 --- a/other/SpectralCluster/laplacian.m +++ /dev/null @@ -1,36 +0,0 @@ -function [L, d] = laplacian(W, m) -% Compute (normalized) Laplacian matrix from an affinity matrix of an undirected graph. -% input: -% W: a symmetric adjacent matrix of a undirected graph -% m: m == 'u' construct unnormalized Laplacian L=D-W -% m == 'n' construct nomalized Laplacian L=I-D^(-1/2)*W*D^(-1/2) -% Written by Michael Chen (sth4nth@gmail.com). -if nargin == 1 - m = 'u'; -end - -n = size(W,2); -d = sum(W,1); -if issparse(W) - switch m - case 'u' - L = spdiags(d(:),0,n,n)-W; - case 'n' - r = spdiags(sqrt(1./d(:)),0,n,n); - L = speye(n)-r*W*r; - L = (L+L')/2; - otherwise - error('The parameter is not supported.'); - end - d = full(d); -else - switch m - case 'u' - L = diag(d)-W; - case 'n' - r = sqrt(1./d); - L = eye(n)-(r'*r).*W; - otherwise - error('The parameter is not supported.'); - end -end \ No newline at end of file diff --git a/other/SpectralCluster/mncut.m b/other/SpectralCluster/mncut.m deleted file mode 100644 index c7fdf4b..0000000 --- a/other/SpectralCluster/mncut.m +++ /dev/null @@ -1,11 +0,0 @@ -function label = mncut(W, c, m) -% Multiway normailized cut -% W: symetric affinity matrix -% c: number of clusters -% m: {1,2,3,4} method for discretization -if nargin < 3 - m = 1; -end -[L,d] = laplacian(W,'n'); -V = symeig(L,c)'; -label = discretize(V,d,m); \ No newline at end of file diff --git a/other/SpectralCluster/sc.m b/other/SpectralCluster/sc.m deleted file mode 100644 index 89e67a2..0000000 --- a/other/SpectralCluster/sc.m +++ /dev/null @@ -1,15 +0,0 @@ -function label = sc(X, k, opt) -% Perform multiclass spectral clustering (normalized cut). -% Written by Michael Chen (sth4nth@gmail.com). -if nargin < 3 - sigma = 1; - nnn = 0; - m = 1; -else - sigma = fieldvalue(opt,'sigma',1); - nnn = fieldvalue(opt,'nnn',0); % number of nearest neighbors - m = fieldvalue(opt,'method',1); -end - -W = affinity(standardize(X),sigma,nnn); -label = mncut(W,k,m); \ No newline at end of file diff --git a/other/SpectralCluster/transition.m b/other/SpectralCluster/transition.m deleted file mode 100644 index a306a61..0000000 --- a/other/SpectralCluster/transition.m +++ /dev/null @@ -1,10 +0,0 @@ -function P = transition(W) -% Compute a transition matrix from an affinity matrix. -% Written by Michael Chen (sth4nth@gmail.com). -if issparse(W) - P = spdiags(1./sum(W,2),0,n,n)*W; -else - P = bsxfun(@times,W,1./sum(W,2)); -end - - From 7da5797b1d09579516a4e60331e6d4033a23d6a1 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Tue, 19 Jan 2016 15:47:52 +0800 Subject: [PATCH 131/149] reorganize unorganized files --- {other => chapter06}/kn2sd.m | 0 {other => chapter06}/sd2kn.m | 0 {other => chapter12}/gso.m | 0 other/cempca.m => chapter12/pcaEmC.m | 2 +- {other => common}/invpd.m | 9 ++++----- {other => common}/logdet.m | 4 ++-- {other => common}/symeig.m | 0 other/geig.m | 13 ------------- other/logkdepdf.m | 4 ---- 9 files changed, 7 insertions(+), 25 deletions(-) rename {other => chapter06}/kn2sd.m (100%) rename {other => chapter06}/sd2kn.m (100%) rename {other => chapter12}/gso.m (100%) rename other/cempca.m => chapter12/pcaEmC.m (95%) rename {other => common}/invpd.m (56%) rename {other => common}/logdet.m (76%) rename {other => common}/symeig.m (100%) delete mode 100644 other/geig.m delete mode 100644 other/logkdepdf.m diff --git a/other/kn2sd.m b/chapter06/kn2sd.m similarity index 100% rename from other/kn2sd.m rename to chapter06/kn2sd.m diff --git a/other/sd2kn.m b/chapter06/sd2kn.m similarity index 100% rename from other/sd2kn.m rename to chapter06/sd2kn.m diff --git a/other/gso.m b/chapter12/gso.m similarity index 100% rename from other/gso.m rename to chapter12/gso.m diff --git a/other/cempca.m b/chapter12/pcaEmC.m similarity index 95% rename from other/cempca.m rename to chapter12/pcaEmC.m index 95109e4..645d72d 100644 --- a/other/cempca.m +++ b/chapter12/pcaEmC.m @@ -1,4 +1,4 @@ -function V = cempca(X, p) +function V = pcaEmC(X, p) % Perform Constrained EM like algorithm for PCA. % X: d x n data matrix % p: dimension of target space diff --git a/other/invpd.m b/common/invpd.m similarity index 56% rename from other/invpd.m rename to common/invpd.m index da27166..198836d 100644 --- a/other/invpd.m +++ b/common/invpd.m @@ -1,10 +1,9 @@ -function V = invpd(A) -% Compute invert of a positive definite matrix +function V = solvpd(A,B) +% Compute A\B where A is a positive definite matrix % A: a positive difinie matrix % Written by Michael Chen (sth4nth@gmail.com). -I = eye(size(A)); -[R,p] = chol(A); +[U,p] = chol(A); if p > 0 error('ERROR: the matrix is not positive definite.'); end -V = R\(R'\I); \ No newline at end of file +V = U\(U'\B); \ No newline at end of file diff --git a/other/logdet.m b/common/logdet.m similarity index 76% rename from other/logdet.m rename to common/logdet.m index 8cf1dc7..0f280da 100644 --- a/other/logdet.m +++ b/common/logdet.m @@ -1,9 +1,9 @@ function y = logdet(A) % Compute log(det(A)) where A is positive definite. % Written by Michael Chen (sth4nth@gmail.com). -[R,p] = chol(A); +[U,p] = chol(A); if p > 0 y = -inf; else - y = 2*sum(log(diag(R))); + y = 2*sum(log(diag(U))); end \ No newline at end of file diff --git a/other/symeig.m b/common/symeig.m similarity index 100% rename from other/symeig.m rename to common/symeig.m diff --git a/other/geig.m b/other/geig.m deleted file mode 100644 index bac84fb..0000000 --- a/other/geig.m +++ /dev/null @@ -1,13 +0,0 @@ -function U = geig(C, S) -% Solve generalized eigen problem CU=aSU. U simultaneously diagonalize C and S. -% U'SU = I -% This is concept verify code, not mean to be used. -[Q,A] = eig(S); - -A = sqrt(A); -R = A\Q; - -[V,~] = eig(R*C*R'); - -U = (Q'/A)*V; - diff --git a/other/logkdepdf.m b/other/logkdepdf.m deleted file mode 100644 index 9f3a27c..0000000 --- a/other/logkdepdf.m +++ /dev/null @@ -1,4 +0,0 @@ -function r = logkdepdf(X, Y, sigma2) - -d = size(X,1); -r = logsumexp(sqdistance(Y,X)/(-2*sigma2)-(log(2*pi)+d*log(sigma2))/2,1); \ No newline at end of file From 70d899b4f161d242da9a16175c0e95709e9fa442 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Wed, 20 Jan 2016 18:03:24 +0800 Subject: [PATCH 132/149] added unfinished Gibbs --- chapter11/GaussianWishart.m | 67 +++++++++++++++++++++++++++++++++++++ chapter11/mixGaussGb.m | 27 ++++++++++++--- 2 files changed, 90 insertions(+), 4 deletions(-) create mode 100644 chapter11/GaussianWishart.m diff --git a/chapter11/GaussianWishart.m b/chapter11/GaussianWishart.m new file mode 100644 index 0000000..5b603e2 --- /dev/null +++ b/chapter11/GaussianWishart.m @@ -0,0 +1,67 @@ + +classdef GaussianWishart + properties + nu_ + kappa_ + m_ + W_ + end + + methods + function obj = GaussianWishart(prior) + obj.kappa_ = prior.kappa; + obj.m_ = prior.m; + obj.nu_ = prior.nu; + obj.W_ = prior.W; + end + + function obj = addSample(obj, X) + kappa0 = obj.kappa_; + m0 = obj.m_; + nu0 = obj.nu_; + W0 = obj.W_; + + n = size(X,2); + xbar = mean(X,2); + kappa = kappa0+n; + m = (kappa0*m0+n*xbar)/kappa; + xm = xbar-m0; + W = W0+X*X'+xm*xm'*kappa*n/(kappa0+n); + nu = nu0+n; + + obj.kappa_ = kappa; + obj.m_ = m; + obj.nu_ = nu; + obj.W_ = W; + end + + function obj = delSample(obj, X) + kappa0 = obj.kappa_; + m0 = obj.m_; + nu0 = obj.nu_; + W0 = obj.W_; + + n = size(X,2); + xbar = mean(X,2); + kappa = kappa0+n; + m = (kappa0*m0+n*xbar)/kappa; + xm = xbar-m0; + W = W0+X*X'+xm*xm'*kappa*n/(kappa0+n); + nu = nu0+n; + + obj.kappa_ = kappa; + obj.m_ = m; + obj.nu_ = nu; + obj.W_ = W; + end + + function p = predict(obj, X) + kappa0 = obj.kappa_; + m0 = obj.m_; + nu0 = obj.nu_; + W0 = obj.W_; + + + end + end +end \ No newline at end of file diff --git a/chapter11/mixGaussGb.m b/chapter11/mixGaussGb.m index acdc86a..0ab335e 100644 --- a/chapter11/mixGaussGb.m +++ b/chapter11/mixGaussGb.m @@ -1,12 +1,31 @@ -function [label, model, llh] = mixGaussGb(X, init) +function [label, model, llh] = mixGaussGb(X, prior) % Collapsed Gibbs sampling for (infinite) Gaussian mixture model (a.k.a. % DPGM) +n = size(X,2); - -[d,n] = size(X); +Theta = prior.theta; +alpha = prior.alpha; +Z = ones(1,n); maxIter = 1000; for iter = 1:maxIter for i = randperm(n) + rest = true(1,n); + rest(i) = false; + x = X(:,i); + z = Z(:,i); + Theta{z} = delSample(Theta{z},x); + logFx = cellfun(x,Theta.Pred); + logNk = log(sum(Z(:,rest),2)); + logNk(1) = log(alpha); + logR = logFx+logNk; + p = exp(logR-logsumexp(logR)); + z = discreteRnd(p); + Z(:,i) = z; + Theta{z} = addSample(Theta{z},x); + index = any(Z,2); + Z = [zeros(1,n);Z(index)]; + Theta = {prior.theta,Theta{index}}; % remove empty end -end \ No newline at end of file +end + From d2163caae052045bc358b8e5bbc8125ed52a0628 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Fri, 22 Jan 2016 14:38:02 +0800 Subject: [PATCH 133/149] added sequential update Gaussian class --- chapter11/Gaussian.m | 61 +++++++++++++++++++++++++++++++++ chapter11/GaussianWishart.m | 67 ------------------------------------- chapter11/demo.m | 23 +++++++++++++ 3 files changed, 84 insertions(+), 67 deletions(-) create mode 100644 chapter11/Gaussian.m delete mode 100644 chapter11/GaussianWishart.m create mode 100644 chapter11/demo.m diff --git a/chapter11/Gaussian.m b/chapter11/Gaussian.m new file mode 100644 index 0000000..54c30b7 --- /dev/null +++ b/chapter11/Gaussian.m @@ -0,0 +1,61 @@ + +classdef Gaussian + properties + n_ + mu_ + U_ + end + + methods + function obj = Gaussian(X) + n = size(X,2); + mu = mean(X,2); + U = chol(X*X'); + + obj.n_ = n; + obj.mu_ = mu; + obj.U_ = U; + end + + function obj = addSample(obj, x) + n = obj.n_; + mu = obj.mu_; + U = obj.U_; + + n = n+1; + mu = mu+(x-mu)/n; + U = cholupdate(U,x,'+'); + + obj.n_ = n; + obj.mu_ = mu; + obj.U_ = U; + end + + function obj = delSample(obj, x) + n = obj.n_; + mu = obj.mu_; + U = obj.U_; + + n = n-1; + mu = mu-(x-mu)/n; + U = cholupdate(U,x,'-'); + + obj.n_ = n; + obj.mu_ = mu; + obj.U_ = U; + end + + function y = logPdf(obj,X) + n = obj.n_; + mu = obj.mu_; + U = obj.U_; + d = size(X,1); + + U = cholupdate(U/sqrt(n),mu,'-'); % Sigma=X*X'/n-mu*mu + Q = U'\bsxfun(@minus,X,mu); + q = dot(Q,Q,1); % quadratic term (M distance) + c = d*log(2*pi)+2*sum(log(diag(U))); % normalization constant + y = -0.5*(c+q); + end + end +end \ No newline at end of file diff --git a/chapter11/GaussianWishart.m b/chapter11/GaussianWishart.m deleted file mode 100644 index 5b603e2..0000000 --- a/chapter11/GaussianWishart.m +++ /dev/null @@ -1,67 +0,0 @@ - -classdef GaussianWishart - properties - nu_ - kappa_ - m_ - W_ - end - - methods - function obj = GaussianWishart(prior) - obj.kappa_ = prior.kappa; - obj.m_ = prior.m; - obj.nu_ = prior.nu; - obj.W_ = prior.W; - end - - function obj = addSample(obj, X) - kappa0 = obj.kappa_; - m0 = obj.m_; - nu0 = obj.nu_; - W0 = obj.W_; - - n = size(X,2); - xbar = mean(X,2); - kappa = kappa0+n; - m = (kappa0*m0+n*xbar)/kappa; - xm = xbar-m0; - W = W0+X*X'+xm*xm'*kappa*n/(kappa0+n); - nu = nu0+n; - - obj.kappa_ = kappa; - obj.m_ = m; - obj.nu_ = nu; - obj.W_ = W; - end - - function obj = delSample(obj, X) - kappa0 = obj.kappa_; - m0 = obj.m_; - nu0 = obj.nu_; - W0 = obj.W_; - - n = size(X,2); - xbar = mean(X,2); - kappa = kappa0+n; - m = (kappa0*m0+n*xbar)/kappa; - xm = xbar-m0; - W = W0+X*X'+xm*xm'*kappa*n/(kappa0+n); - nu = nu0+n; - - obj.kappa_ = kappa; - obj.m_ = m; - obj.nu_ = nu; - obj.W_ = W; - end - - function p = predict(obj, X) - kappa0 = obj.kappa_; - m0 = obj.m_; - nu0 = obj.nu_; - W0 = obj.W_; - - - end - end -end \ No newline at end of file diff --git a/chapter11/demo.m b/chapter11/demo.m new file mode 100644 index 0000000..bd342d5 --- /dev/null +++ b/chapter11/demo.m @@ -0,0 +1,23 @@ +d = 2; +n = 100; +X = randn(d,n); +x = rand(d,1); +mu = mean(X,2); +Xo = bsxfun(@minus,X,mu); +Sigma = Xo*Xo'/n; + +XX = Xo*Xo'/n; +XXX = X*X'/n-mu*mu'; +U = chol(XX); + +U_ = chol(Xo(:,2:end)*Xo(:,2:end)'); +UU = cholupdate(U,(X(:,1)-mu)); + + + + +p = logGauss(x,mu,Sigma); +gauss = Gaussian(X(:,3:end)); +gauss = gauss.addSample(X(:,1)); +gauss = gauss.addSample(X(:,2)); +p2 = gauss.logPdf(x); \ No newline at end of file From 498dc35ad1f8d6bcfcfcf1b37972739a50b9898f Mon Sep 17 00:00:00 2001 From: sth4nth Date: Fri, 22 Jan 2016 14:40:45 +0800 Subject: [PATCH 134/149] added sequential update Gaussian demo --- chapter11/demo.m | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/chapter11/demo.m b/chapter11/demo.m index bd342d5..36a0745 100644 --- a/chapter11/demo.m +++ b/chapter11/demo.m @@ -1,23 +1,16 @@ + +% demo for sequential update Gaussian +close all; clear; d = 2; n = 100; X = randn(d,n); -x = rand(d,1); +x = randn(d,1); + mu = mean(X,2); Xo = bsxfun(@minus,X,mu); Sigma = Xo*Xo'/n; +p1 = logGauss(x,mu,Sigma); -XX = Xo*Xo'/n; -XXX = X*X'/n-mu*mu'; -U = chol(XX); - -U_ = chol(Xo(:,2:end)*Xo(:,2:end)'); -UU = cholupdate(U,(X(:,1)-mu)); - - - - -p = logGauss(x,mu,Sigma); -gauss = Gaussian(X(:,3:end)); -gauss = gauss.addSample(X(:,1)); -gauss = gauss.addSample(X(:,2)); -p2 = gauss.logPdf(x); \ No newline at end of file +gauss = Gaussian(X(:,3:end)).addSample(X(:,1)).addSample(X(:,2)); +p2 = gauss.logPdf(x); +abs(p1-p2) \ No newline at end of file From 2892de3ef0a4b723f7d9ec7547f1ec4045120611 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Fri, 22 Jan 2016 14:59:28 +0800 Subject: [PATCH 135/149] tweak --- chapter02/logGauss.m | 2 +- chapter02/logSt.m | 6 +++--- chapter11/Gaussian.m | 14 ++++++++++++++ chapter11/demo.m | 7 ++++--- 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/chapter02/logGauss.m b/chapter02/logGauss.m index c1acd31..6ab2606 100644 --- a/chapter02/logGauss.m +++ b/chapter02/logGauss.m @@ -32,5 +32,5 @@ c = d*(log(2*pi)+2*log(sigma)); % normalization constant y = -0.5*bsxfun(@plus,q,c); else - error('Parameters mismatched.'); + error('Parameters are mismatched.'); end diff --git a/chapter02/logSt.m b/chapter02/logSt.m index dbd2405..93869bb 100644 --- a/chapter02/logSt.m +++ b/chapter02/logSt.m @@ -1,10 +1,10 @@ function y = logSt(X, mu, sigma, v) -% Compute log pdf of a student-t distribution. +% Compute log pdf of a Student's t distribution. % Written by mo Chen (sth4nth@gmail.com). [d,k] = size(mu); if size(sigma,1)==d && size(sigma,2)==d && k==1 - [R,p]= cholcov(sigma,0); + [R,p]= chol(sigma,0); if p ~= 0 error('ERROR: sigma is not SPD.'); end @@ -29,5 +29,5 @@ c = gammaln((v+d)/2)-gammaln(v/2)-d*log(pi*v.*sigma)/2; y = bsxfun(@plus,o,c); else - error('Parameters mismatched.'); + error('Parameters are mismatched.'); end diff --git a/chapter11/Gaussian.m b/chapter11/Gaussian.m index 54c30b7..830bc11 100644 --- a/chapter11/Gaussian.m +++ b/chapter11/Gaussian.m @@ -57,5 +57,19 @@ c = d*log(2*pi)+2*sum(log(diag(U))); % normalization constant y = -0.5*(c+q); end + + function y = logTPdf(obj, X) + [R,p]= cholcov(sigma,0); + if p ~= 0 + error('ERROR: sigma is not SPD.'); + end + X = bsxfun(@minus,X,mu); + Q = R'\X; + q = dot(Q,Q,1); % quadratic term (M distance) + o = -log(1+q/v)*((v+d)/2); + c = gammaln((v+d)/2)-gammaln(v/2)-(d*log(v*pi)+2*sum(log(diag(R))))/2; + y = c+o; + + end end end \ No newline at end of file diff --git a/chapter11/demo.m b/chapter11/demo.m index 36a0745..2911779 100644 --- a/chapter11/demo.m +++ b/chapter11/demo.m @@ -1,5 +1,5 @@ -% demo for sequential update Gaussian +%% demo for sequential update Gaussian close all; clear; d = 2; n = 100; @@ -11,6 +11,7 @@ Sigma = Xo*Xo'/n; p1 = logGauss(x,mu,Sigma); -gauss = Gaussian(X(:,3:end)).addSample(X(:,1)).addSample(X(:,2)); +gauss = Gaussian(X(:,3:end)).addSample(X(:,1)).addSample(X(:,2)).addSample(X(:,3)).delSample(X(:,3)); p2 = gauss.logPdf(x); -abs(p1-p2) \ No newline at end of file +abs(p1-p2) +%% \ No newline at end of file From 5d381487095640c9a05b620721fc8aeb66e8d240 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Fri, 22 Jan 2016 21:57:02 +0800 Subject: [PATCH 136/149] undo --- chapter11/Gaussian.m | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/chapter11/Gaussian.m b/chapter11/Gaussian.m index 830bc11..54c30b7 100644 --- a/chapter11/Gaussian.m +++ b/chapter11/Gaussian.m @@ -57,19 +57,5 @@ c = d*log(2*pi)+2*sum(log(diag(U))); % normalization constant y = -0.5*(c+q); end - - function y = logTPdf(obj, X) - [R,p]= cholcov(sigma,0); - if p ~= 0 - error('ERROR: sigma is not SPD.'); - end - X = bsxfun(@minus,X,mu); - Q = R'\X; - q = dot(Q,Q,1); % quadratic term (M distance) - o = -log(1+q/v)*((v+d)/2); - c = gammaln((v+d)/2)-gammaln(v/2)-(d*log(v*pi)+2*sum(log(diag(R))))/2; - y = c+o; - - end end end \ No newline at end of file From 2ecbed4980e61d99228d02d26b7c408d3f376e8c Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 25 Jan 2016 11:08:52 +0800 Subject: [PATCH 137/149] temp --- chapter11/Gaussian.m | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/chapter11/Gaussian.m b/chapter11/Gaussian.m index 830bc11..00e1457 100644 --- a/chapter11/Gaussian.m +++ b/chapter11/Gaussian.m @@ -51,25 +51,11 @@ U = obj.U_; d = size(X,1); - U = cholupdate(U/sqrt(n),mu,'-'); % Sigma=X*X'/n-mu*mu + U = cholupdate(U/sqrt(n),mu,'-'); % Sigma=X*X'/n-mu*mu' Q = U'\bsxfun(@minus,X,mu); q = dot(Q,Q,1); % quadratic term (M distance) c = d*log(2*pi)+2*sum(log(diag(U))); % normalization constant y = -0.5*(c+q); end - - function y = logTPdf(obj, X) - [R,p]= cholcov(sigma,0); - if p ~= 0 - error('ERROR: sigma is not SPD.'); - end - X = bsxfun(@minus,X,mu); - Q = R'\X; - q = dot(Q,Q,1); % quadratic term (M distance) - o = -log(1+q/v)*((v+d)/2); - c = gammaln((v+d)/2)-gammaln(v/2)-(d*log(v*pi)+2*sum(log(diag(R))))/2; - y = c+o; - - end end end \ No newline at end of file From 20ed39b23f5b79a9f2801e3002fe5c5c05fc7d48 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 25 Jan 2016 11:17:49 +0800 Subject: [PATCH 138/149] rename --- chapter11/{Gaussian.m => Gauss.m} | 3 +- chapter11/GaussWishart.m | 61 +++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 2 deletions(-) rename chapter11/{Gaussian.m => Gauss.m} (98%) create mode 100644 chapter11/GaussWishart.m diff --git a/chapter11/Gaussian.m b/chapter11/Gauss.m similarity index 98% rename from chapter11/Gaussian.m rename to chapter11/Gauss.m index 00e1457..b9cb56a 100644 --- a/chapter11/Gaussian.m +++ b/chapter11/Gauss.m @@ -1,5 +1,4 @@ - -classdef Gaussian +classdef Gauss properties n_ mu_ diff --git a/chapter11/GaussWishart.m b/chapter11/GaussWishart.m new file mode 100644 index 0000000..ad39690 --- /dev/null +++ b/chapter11/GaussWishart.m @@ -0,0 +1,61 @@ +classdef GaussWishart + properties + kappa_ + mu_ + nu_ + U_ + end + + methods + function obj = GaussianWishart(kappa,mu,nu,W) + U = chol(W); + + obj.kappa_ = kappa; + obj.mu_ = mu; + obj.nu_ = nu; + obj.U_ = U; + end + + function obj = addSample(obj, x) + kappa = obj.kappa_; + mu = obj.mu_; + nu = obj.nu_; + U = obj.U_; + + kappa = kappa+1; + mu = mu+(x-mu)/n; + nu = nu+1; + U = cholupdate(U,x,'+'); + + obj.kappa_ = kappa; + obj.mu_ = mu; + obj.nu_ = nu; + obj.U_ = U; + end + + function obj = delSample(obj, x) + kappa = obj.kappa_; + mu = obj.mu_; + nu = obj.nu_; + U = obj.U_; + + kappa = kappa-1; + mu = mu-(x-mu)/n; + nu = nu-1; + U = cholupdate(U,x,'-'); + + obj.kappa_ = kappa; + obj.mu_ = mu; + obj.nu_ = nu; + obj.U_ = U; + end + + function y = logPredPdf(obj,X) + kappa = obj.kappa_; + mu = obj.mu_; + nu = obj.nu_; + U = obj.U_; + + end + end +end From c678152c47d0f6aac9c70392477865377e5b6b8e Mon Sep 17 00:00:00 2001 From: sth4nth Date: Mon, 25 Jan 2016 17:00:38 +0800 Subject: [PATCH 139/149] fix constructor --- chapter11/Gauss.m | 2 +- chapter11/GaussWishart.m | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/chapter11/Gauss.m b/chapter11/Gauss.m index b9cb56a..f8e6507 100644 --- a/chapter11/Gauss.m +++ b/chapter11/Gauss.m @@ -6,7 +6,7 @@ end methods - function obj = Gaussian(X) + function obj = Gauss(X) n = size(X,2); mu = mean(X,2); U = chol(X*X'); diff --git a/chapter11/GaussWishart.m b/chapter11/GaussWishart.m index ad39690..84fa6ad 100644 --- a/chapter11/GaussWishart.m +++ b/chapter11/GaussWishart.m @@ -7,7 +7,7 @@ end methods - function obj = GaussianWishart(kappa,mu,nu,W) + function obj = GaussWishart(kappa,mu,nu,W) U = chol(W); obj.kappa_ = kappa; From 18ab242da2633c3df1fafed6cbbef777670984ae Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 25 Jan 2016 20:34:53 +0800 Subject: [PATCH 140/149] finished GaussWishart --- chapter02/logSt.m | 2 +- chapter11/GaussWishart.m | 36 +++++++++++++++++++------------ chapter11/demo.m | 46 +++++++++++++++++++++++++++++++++------- 3 files changed, 62 insertions(+), 22 deletions(-) diff --git a/chapter02/logSt.m b/chapter02/logSt.m index 93869bb..2321f03 100644 --- a/chapter02/logSt.m +++ b/chapter02/logSt.m @@ -4,7 +4,7 @@ [d,k] = size(mu); if size(sigma,1)==d && size(sigma,2)==d && k==1 - [R,p]= chol(sigma,0); + [R,p]= chol(sigma); if p ~= 0 error('ERROR: sigma is not SPD.'); end diff --git a/chapter11/GaussWishart.m b/chapter11/GaussWishart.m index 84fa6ad..6c98657 100644 --- a/chapter11/GaussWishart.m +++ b/chapter11/GaussWishart.m @@ -1,61 +1,71 @@ classdef GaussWishart properties kappa_ - mu_ + m_ nu_ U_ end methods - function obj = GaussWishart(kappa,mu,nu,W) - U = chol(W); - + function obj = GaussWishart(kappa,m,nu,S) + U = chol(S+kappa*m*m'); obj.kappa_ = kappa; - obj.mu_ = mu; + obj.m_ = m; obj.nu_ = nu; obj.U_ = U; end function obj = addSample(obj, x) kappa = obj.kappa_; - mu = obj.mu_; + m = obj.m_; nu = obj.nu_; U = obj.U_; kappa = kappa+1; - mu = mu+(x-mu)/n; + m = m+(x-m)/kappa; nu = nu+1; U = cholupdate(U,x,'+'); obj.kappa_ = kappa; - obj.mu_ = mu; + obj.m_ = m; obj.nu_ = nu; obj.U_ = U; end function obj = delSample(obj, x) kappa = obj.kappa_; - mu = obj.mu_; + m = obj.m_; nu = obj.nu_; U = obj.U_; kappa = kappa-1; - mu = mu-(x-mu)/n; + m = m-(x-m)/kappa; nu = nu-1; U = cholupdate(U,x,'-'); obj.kappa_ = kappa; - obj.mu_ = mu; + obj.m_ = m; obj.nu_ = nu; obj.U_ = U; end function y = logPredPdf(obj,X) kappa = obj.kappa_; - mu = obj.mu_; + m = obj.m_; nu = obj.nu_; U = obj.U_; - + + d = size(X,1); + v = (nu-d+1); + r = (1+1/kappa)/v; + U = cholupdate(U,sqrt(kappa)*m,'-')*sqrt(r); + + X = bsxfun(@minus,X,m); + Q = U'\X; + q = dot(Q,Q,1); % quadratic term (M distance) + o = -log(1+q/v)*((v+d)/2); + c = gammaln((v+d)/2)-gammaln(v/2)-(d*log(v*pi)+2*sum(log(diag(U))))/2; + y = c+o; end end end diff --git a/chapter11/demo.m b/chapter11/demo.m index 2911779..666cdf7 100644 --- a/chapter11/demo.m +++ b/chapter11/demo.m @@ -1,17 +1,47 @@ %% demo for sequential update Gaussian +% close all; clear; +% d = 2; +% n = 100; +% X = randn(d,n); +% x = randn(d,1); +% +% mu = mean(X,2); +% Xo = bsxfun(@minus,X,mu); +% Sigma = Xo*Xo'/n; +% p1 = logGauss(x,mu,Sigma); +% +% gauss = Gaussian(X(:,3:end)).addSample(X(:,1)).addSample(X(:,2)).addSample(X(:,3)).delSample(X(:,3)); +% p2 = gauss.logPdf(x); +% abs(p1-p2) +%% Gaussian Wishart close all; clear; d = 2; n = 100; X = randn(d,n); x = randn(d,1); -mu = mean(X,2); -Xo = bsxfun(@minus,X,mu); -Sigma = Xo*Xo'/n; -p1 = logGauss(x,mu,Sigma); +kappa0 = 1; +m0 = zeros(d,1); +nu0 = 1; +S0 = eye(d); -gauss = Gaussian(X(:,3:end)).addSample(X(:,1)).addSample(X(:,2)).addSample(X(:,3)).delSample(X(:,3)); -p2 = gauss.logPdf(x); -abs(p1-p2) -%% \ No newline at end of file +xbar = mean(X,2); +kappa = kappa0+n; +nu = nu0+n; +m = (n*xbar+kappa0*m0)/kappa; +Xo = bsxfun(@minus,X,m); +X0 = m0-m; +S = S0+Xo*Xo'+kappa0*(X0*X0'); +% S = S0+X*X'+kappa0*m0*m0'-kappa*m*m'; + +v = (nu-d+1); +r = (1+1/kappa)/v; +p1 = logSt(x,m,r*S,v); + +gw = GaussWishart(kappa0,m0,nu0,S0); +for i=1:n + gw = gw.addSample(X(:,i)); +end +p2 = gw.logPredPdf(x); +abs(p1-p2) \ No newline at end of file From 6ab1285be4e223f62f1de8d2e15a4e801e416a30 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 25 Jan 2016 21:13:18 +0800 Subject: [PATCH 141/149] added addData to GaussWishart --- chapter11/GaussWishart.m | 23 ++++++++++++++++++++--- chapter11/demo.m | 4 ++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/chapter11/GaussWishart.m b/chapter11/GaussWishart.m index 6c98657..569f863 100644 --- a/chapter11/GaussWishart.m +++ b/chapter11/GaussWishart.m @@ -8,7 +8,25 @@ methods function obj = GaussWishart(kappa,m,nu,S) - U = chol(S+kappa*m*m'); + U = chol(S+kappa*(m*m')); + obj.kappa_ = kappa; + obj.m_ = m; + obj.nu_ = nu; + obj.U_ = U; + end + + function obj = addData(obj, X) + kappa0 = obj.kappa_; + m0 = obj.m_; + nu0 = obj.nu_; + U0 = obj.U_; + + n = size(X,2); + kappa = kappa0+n; + m = (kappa0*m0+sum(X,2))/kappa; + nu = nu0+n; + U = chol(U0'*U0+X*X'); + obj.kappa_ = kappa; obj.m_ = m; obj.nu_ = nu; @@ -57,8 +75,7 @@ d = size(X,1); v = (nu-d+1); - r = (1+1/kappa)/v; - U = cholupdate(U,sqrt(kappa)*m,'-')*sqrt(r); + U = sqrt((1+1/kappa)/v)*cholupdate(U,sqrt(kappa)*m,'-'); X = bsxfun(@minus,X,m); Q = U'\X; diff --git a/chapter11/demo.m b/chapter11/demo.m index 666cdf7..2e53454 100644 --- a/chapter11/demo.m +++ b/chapter11/demo.m @@ -39,6 +39,10 @@ r = (1+1/kappa)/v; p1 = logSt(x,m,r*S,v); +gw0 = GaussWishart(kappa0,m0,nu0,S0); +gw0 = gw0.addData(X); +p0 = gw0.logPredPdf(x); + gw = GaussWishart(kappa0,m0,nu0,S0); for i=1:n gw = gw.addSample(X(:,i)); From c0db46abd3852c41b06a58c5ed08852df48a8428 Mon Sep 17 00:00:00 2001 From: Mo Chen Date: Mon, 25 Jan 2016 22:38:27 +0800 Subject: [PATCH 142/149] Gibbs not finished --- chapter11/GaussWishart.m | 34 ++++++++--------- chapter11/demo.m | 79 +++++++++++++++++++++++----------------- chapter11/mixGaussGb.m | 62 ++++++++++++++++++++++--------- 3 files changed, 106 insertions(+), 69 deletions(-) diff --git a/chapter11/GaussWishart.m b/chapter11/GaussWishart.m index 569f863..491bbfa 100644 --- a/chapter11/GaussWishart.m +++ b/chapter11/GaussWishart.m @@ -15,23 +15,23 @@ obj.U_ = U; end - function obj = addData(obj, X) - kappa0 = obj.kappa_; - m0 = obj.m_; - nu0 = obj.nu_; - U0 = obj.U_; - - n = size(X,2); - kappa = kappa0+n; - m = (kappa0*m0+sum(X,2))/kappa; - nu = nu0+n; - U = chol(U0'*U0+X*X'); - - obj.kappa_ = kappa; - obj.m_ = m; - obj.nu_ = nu; - obj.U_ = U; - end +% function obj = addData(obj, X) +% kappa0 = obj.kappa_; +% m0 = obj.m_; +% nu0 = obj.nu_; +% U0 = obj.U_; +% +% n = size(X,2); +% kappa = kappa0+n; +% m = (kappa0*m0+sum(X,2))/kappa; +% nu = nu0+n; +% U = chol(U0'*U0+X*X'); +% +% obj.kappa_ = kappa; +% obj.m_ = m; +% obj.nu_ = nu; +% obj.U_ = U; +% end function obj = addSample(obj, x) kappa = obj.kappa_; diff --git a/chapter11/demo.m b/chapter11/demo.m index 2e53454..0431f3b 100644 --- a/chapter11/demo.m +++ b/chapter11/demo.m @@ -15,37 +15,48 @@ % p2 = gauss.logPdf(x); % abs(p1-p2) %% Gaussian Wishart -close all; clear; -d = 2; -n = 100; -X = randn(d,n); -x = randn(d,1); - -kappa0 = 1; -m0 = zeros(d,1); -nu0 = 1; -S0 = eye(d); - -xbar = mean(X,2); -kappa = kappa0+n; -nu = nu0+n; -m = (n*xbar+kappa0*m0)/kappa; -Xo = bsxfun(@minus,X,m); -X0 = m0-m; -S = S0+Xo*Xo'+kappa0*(X0*X0'); -% S = S0+X*X'+kappa0*m0*m0'-kappa*m*m'; - -v = (nu-d+1); -r = (1+1/kappa)/v; -p1 = logSt(x,m,r*S,v); - -gw0 = GaussWishart(kappa0,m0,nu0,S0); -gw0 = gw0.addData(X); -p0 = gw0.logPredPdf(x); - -gw = GaussWishart(kappa0,m0,nu0,S0); -for i=1:n - gw = gw.addSample(X(:,i)); -end -p2 = gw.logPredPdf(x); -abs(p1-p2) \ No newline at end of file +% close all; clear; +% d = 2; +% n = 100; +% X = randn(d,n); +% x = randn(d,1); +% +% kappa0 = 1; +% m0 = zeros(d,1); +% nu0 = d; +% S0 = eye(d); +% +% xbar = mean(X,2); +% kappa = kappa0+n; +% nu = nu0+n; +% m = (n*xbar+kappa0*m0)/kappa; +% Xo = bsxfun(@minus,X,m); +% X0 = m0-m; +% S = S0+Xo*Xo'+kappa0*(X0*X0'); +% % S = S0+X*X'+kappa0*m0*m0'-kappa*m*m'; +% +% v = (nu-d+1); +% r = (1+1/kappa)/v; +% p1 = logSt(x,m,r*S,v); +% +% gw0 = GaussWishart(kappa0,m0,nu0,S0); +% gw0 = gw0.addData(X); +% p0 = gw0.logPredPdf(x); +% +% gw = GaussWishart(kappa0,m0,nu0,S0); +% for i=1:n +% gw = gw.addSample(X(:,i)); +% end +% p2 = gw.logPredPdf(x); +% abs(p1-p2) +%% Demo for DPGM +% close all; clear; +% d = 2; +% k = 3; +% n = 1000; +% [X,label] = mixGaussRnd(d,k,n); +% plotClass(X,label); +% +% [y,model] = mixGaussGb(X); +% figure +% plotClass(X,y); diff --git a/chapter11/mixGaussGb.m b/chapter11/mixGaussGb.m index 0ab335e..336cf27 100644 --- a/chapter11/mixGaussGb.m +++ b/chapter11/mixGaussGb.m @@ -1,31 +1,57 @@ function [label, model, llh] = mixGaussGb(X, prior) % Collapsed Gibbs sampling for (infinite) Gaussian mixture model (a.k.a. % DPGM) -n = size(X,2); +[d,n] = size(X); +% parmaters of Gaussian-Wishart prior +if nargin == 1 + kappa0 = 1; + m0 = zeros(d,1); + nu0 = d; + S0 = eye(d); + alpha0 = 1; +else + kappa0 = prior.kappa; + m0 = prior.m; + nu0 = prior.nu; + S0 = prior.S; + alpha0 = prior.alpha; +end -Theta = prior.theta; -alpha = prior.alpha; -Z = ones(1,n); -maxIter = 1000; +[Theta,Z] = init(X,GaussWishart(kappa0,m0,nu0,S0)); +maxIter = 50; for iter = 1:maxIter for i = randperm(n) - rest = true(1,n); - rest(i) = false; x = X(:,i); z = Z(:,i); - - Theta{z} = delSample(Theta{z},x); - logFx = cellfun(x,Theta.Pred); - logNk = log(sum(Z(:,rest),2)); - logNk(1) = log(alpha); - logR = logFx+logNk; + try + Theta{z} = Theta{z}.delSample(x); + catch + error('error!\n'); + end + logPk = cellfun(@(theta) theta.logPredPdf(x),Theta)'; + logNk = log(sum(Z(:,~id(i,n)),2)); + logNk(1) = log(alpha0); + logR = logPk+logNk; p = exp(logR-logsumexp(logR)); - z = discreteRnd(p); + z = id(discreteRnd(p),numel(p)); Z(:,i) = z; - Theta{z} = addSample(Theta{z},x); - index = any(Z,2); - Z = [zeros(1,n);Z(index)]; - Theta = {prior.theta,Theta{index}}; % remove empty + Theta{z} = Theta{z}.addSample(x); + ne = any(Z,2); % non-empty + Z = [false(1,n);Z(ne,:)]; + gw = GaussWishart(kappa0,m0,nu0,S0); + Theta = {gw,Theta{ne}}; % remove empty end end +model = Theta{2:end}; +label = max(Z(2:end,:),[],1); + +function [Theta,Z] = init(X, theta) +n = size(X,2); +for i = randperm(n) + x = X(:,i); +end + +function indicator = id(i, n) +indicator = false(n,1); +indicator(i) = true; From d3eb24d1bd5722c8f3761211cc02f32c051fd8a5 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Tue, 26 Jan 2016 10:57:58 +0800 Subject: [PATCH 143/149] fix common functions --- common/logsumexp.m | 10 +++++----- common/normalize.m | 2 +- common/standardize.m | 2 +- common/unitize.m | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/common/logsumexp.m b/common/logsumexp.m index c7b3aa2..e622ef4 100644 --- a/common/logsumexp.m +++ b/common/logsumexp.m @@ -1,16 +1,16 @@ -function s = logsumexp(x, dim) -% Compute log(sum(exp(x),dim)) while avoiding numerical underflow. +function s = logsumexp(X, dim) +% Compute log(sum(exp(X),dim)) while avoiding numerical underflow. % By default dim = 1 (columns). % Written by Michael Chen (sth4nth@gmail.com). if nargin == 1, % Determine which dimension sum will use - dim = find(size(x)~=1,1); + dim = find(size(X)~=1,1); if isempty(dim), dim = 1; end end % subtract the largest in each dim -y = max(x,[],dim); -s = y+log(sum(exp(bsxfun(@minus,x,y)),dim)); % TODO: use log1p +y = max(X,[],dim); +s = y+log(sum(exp(bsxfun(@minus,X,y)),dim)); % TODO: use log1p i = isinf(y); if any(i(:)) s(i) = y(i); diff --git a/common/normalize.m b/common/normalize.m index a383dca..be81e2c 100644 --- a/common/normalize.m +++ b/common/normalize.m @@ -4,7 +4,7 @@ % Written by Michael Chen (sth4nth@gmail.com). if nargin == 1, % Determine which dimension sum will use - dim = find(size(x)~=1,1); + dim = find(size(X)~=1,1); if isempty(dim), dim = 1; end end s = sum(X,dim); diff --git a/common/standardize.m b/common/standardize.m index 5dcfd0e..11a5805 100644 --- a/common/standardize.m +++ b/common/standardize.m @@ -4,7 +4,7 @@ % Written by Michael Chen (sth4nth@gmail.com). if nargin == 1, % Determine which dimension sum will use - dim = find(size(x)~=1,1); + dim = find(size(X)~=1,1); if isempty(dim), dim = 1; end end X = bsxfun(@minux,X,mean(X,2)); diff --git a/common/unitize.m b/common/unitize.m index 582cd7e..b27513f 100644 --- a/common/unitize.m +++ b/common/unitize.m @@ -4,7 +4,7 @@ % Written by Michael Chen (sth4nth@gmail.com). if nargin == 1, % Determine which dimension sum will use - dim = find(size(x)~=1,1); + dim = find(size(X)~=1,1); if isempty(dim), dim = 1; end end s = sqrt(sum(X.^2,dim)); From 8228a8187b7105e88d2ba78cce81a288c088c504 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Tue, 26 Jan 2016 11:10:40 +0800 Subject: [PATCH 144/149] added Gibbs online --- chapter11/mixGaussGbOl.m | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 chapter11/mixGaussGbOl.m diff --git a/chapter11/mixGaussGbOl.m b/chapter11/mixGaussGbOl.m new file mode 100644 index 0000000..7ef2554 --- /dev/null +++ b/chapter11/mixGaussGbOl.m @@ -0,0 +1,31 @@ +function [label,Theta,nk] = mixGaussGbOl(X,kappa0,m0,nu0,S0,alpha0) +[d,n] = size(X); +mu = mean(X,2); +Xo = bsxfun(@minus,X,mu); +s = sum(Xo(:).^2)/(d*n); +if nargin == 1 + kappa0 = 1; + m0 = mean(X,2); + nu0 = d; + S0 = s*eye(d); + alpha0 = 1; +end +Theta = {}; +nk = []; +prior = GaussWishart(kappa0,m0,nu0,S0); +label = zeros(1,n); +for i = randperm(n) + K = numel(Theta); + x = X(:,i); + Pk = nk.*exp(cellfun(@(theta) theta.logPredPdf(x),Theta)); + P0 = alpha0*exp(prior.logPredPdf(x)); + k = discreteRnd(normalize([Pk,P0])); + if k==K+1 + Theta{k} = GaussWishart(kappa0,m0,nu0,S0).addSample(x); + nk = [nk,1]; + else + Theta{k} = Theta{k}.addSample(x); + nk(k) = nk(k)+1; + end + label(i) = k; +end \ No newline at end of file From 3127359ab9814e11733c42e46e509e9239fcf03b Mon Sep 17 00:00:00 2001 From: sth4nth Date: Tue, 26 Jan 2016 14:16:13 +0800 Subject: [PATCH 145/149] DP mixture Gibbs --- chapter11/Gauss.m | 3 ++ chapter11/GaussWishart.m | 3 ++ chapter11/demo.m | 11 +++++++ chapter11/mixDpGb.m | 34 ++++++++++++++++++++++ chapter11/mixDpGbOl.m | 20 +++++++++++++ chapter11/mixGaussGb.m | 63 +++++++++------------------------------- chapter11/mixGaussGbOl.m | 31 -------------------- 7 files changed, 84 insertions(+), 81 deletions(-) create mode 100644 chapter11/mixDpGb.m create mode 100644 chapter11/mixDpGbOl.m delete mode 100644 chapter11/mixGaussGbOl.m diff --git a/chapter11/Gauss.m b/chapter11/Gauss.m index f8e6507..6aa6797 100644 --- a/chapter11/Gauss.m +++ b/chapter11/Gauss.m @@ -15,6 +15,9 @@ obj.mu_ = mu; obj.U_ = U; end + + function obj = clone(obj) + end function obj = addSample(obj, x) n = obj.n_; diff --git a/chapter11/GaussWishart.m b/chapter11/GaussWishart.m index 491bbfa..8d08e46 100644 --- a/chapter11/GaussWishart.m +++ b/chapter11/GaussWishart.m @@ -15,6 +15,9 @@ obj.U_ = U; end + function obj = clone(obj) + end + % function obj = addData(obj, X) % kappa0 = obj.kappa_; % m0 = obj.m_; diff --git a/chapter11/demo.m b/chapter11/demo.m index 0431f3b..9c64ca4 100644 --- a/chapter11/demo.m +++ b/chapter11/demo.m @@ -60,3 +60,14 @@ % [y,model] = mixGaussGb(X); % figure % plotClass(X,y); +%% Demo for online DPGM +close all; clear; +d = 2; +k = 3; +n = 500; +[X,label] = mixGaussRnd(d,k,n); +plotClass(X,label); + +[y,model] = mixGaussGb(X); +figure +plotClass(X,y); diff --git a/chapter11/mixDpGb.m b/chapter11/mixDpGb.m new file mode 100644 index 0000000..a5ba8f6 --- /dev/null +++ b/chapter11/mixDpGb.m @@ -0,0 +1,34 @@ +function [label, Theta, w] = mixDpGb(X, alpha, theta) +% Collapsed Gibbs sampling for (infinite) Gaussian mixture model (a.k.a. +% DPGM) +n = size(X,2); +[label,Theta,w] = mixDpGbOl(X,alpha,theta); +nk = n*w; +maxIter = 100; +for iter = 1:maxIter + for i = randperm(n) + x = X(:,i); + k = label(i); + Theta{k} = Theta{k}.delSample(x); + nk(k) = nk(k)-1; + if nk(k) == 0 % remove empty cluster + Theta(k) = []; + nk(k) = []; + which = label>k; + label(which) = label(which)-1; + end + Pk = nk.*exp(cellfun(@(theta) theta.logPredPdf(x),Theta)); + P0 = alpha*exp(theta.logPredPdf(x)); + k = discreteRnd(normalize([Pk,P0])); + if k == numel(Theta)+1 % add extra cluster + Theta{k} = theta.clone.addSample(x); + nk = [nk,1]; + else + Theta{k} = Theta{k}.addSample(x); + nk(k) = nk(k)+1; + end + label(i) = k; + end +end +w = nk/n; + diff --git a/chapter11/mixDpGbOl.m b/chapter11/mixDpGbOl.m new file mode 100644 index 0000000..d30a322 --- /dev/null +++ b/chapter11/mixDpGbOl.m @@ -0,0 +1,20 @@ +function [label, Theta, w, llh] = mixGaussGbOl(X, alpha, theta) +n = size(X,2); +Theta = {}; +nk = []; +label = zeros(1,n); +for i = randperm(n) + x = X(:,i); + Pk = nk.*exp(cellfun(@(t) t.logPredPdf(x), Theta)); + P0 = alpha*exp(theta.logPredPdf(x)); + k = discreteRnd(normalize([Pk,P0])); + if k == numel(Theta)+1 + Theta{k} = theta.clone().addSample(x); + nk = [nk,1]; + else + Theta{k} = Theta{k}.addSample(x); + nk(k) = nk(k)+1; + end + label(i) = k; +end +w = nk/n; \ No newline at end of file diff --git a/chapter11/mixGaussGb.m b/chapter11/mixGaussGb.m index 336cf27..0d4cc83 100644 --- a/chapter11/mixGaussGb.m +++ b/chapter11/mixGaussGb.m @@ -1,57 +1,20 @@ -function [label, model, llh] = mixGaussGb(X, prior) -% Collapsed Gibbs sampling for (infinite) Gaussian mixture model (a.k.a. -% DPGM) +function [label, Theta, w] = mixGaussGb( X, opt ) [d,n] = size(X); -% parmaters of Gaussian-Wishart prior +mu = mean(X,2); +Xo = bsxfun(@minus,X,mu); +s = sum(Xo(:).^2)/(d*n); if nargin == 1 kappa0 = 1; - m0 = zeros(d,1); + m0 = mean(X,2); nu0 = d; - S0 = eye(d); + S0 = s*eye(d); alpha0 = 1; else - kappa0 = prior.kappa; - m0 = prior.m; - nu0 = prior.nu; - S0 = prior.S; - alpha0 = prior.alpha; + kappa0 = opt.kappa; + m0 = opt.m; + nu0 = opt.nu; + S0 = opt.S; + alpha0 = opt.alpha; end - -[Theta,Z] = init(X,GaussWishart(kappa0,m0,nu0,S0)); -maxIter = 50; -for iter = 1:maxIter - for i = randperm(n) - x = X(:,i); - z = Z(:,i); - try - Theta{z} = Theta{z}.delSample(x); - catch - error('error!\n'); - end - logPk = cellfun(@(theta) theta.logPredPdf(x),Theta)'; - logNk = log(sum(Z(:,~id(i,n)),2)); - logNk(1) = log(alpha0); - logR = logPk+logNk; - p = exp(logR-logsumexp(logR)); - z = id(discreteRnd(p),numel(p)); - Z(:,i) = z; - Theta{z} = Theta{z}.addSample(x); - ne = any(Z,2); % non-empty - Z = [false(1,n);Z(ne,:)]; - gw = GaussWishart(kappa0,m0,nu0,S0); - Theta = {gw,Theta{ne}}; % remove empty - end -end -model = Theta{2:end}; -label = max(Z(2:end,:),[],1); - -function [Theta,Z] = init(X, theta) -n = size(X,2); -for i = randperm(n) - x = X(:,i); -end - - -function indicator = id(i, n) -indicator = false(n,1); -indicator(i) = true; +prior = GaussWishart(kappa0,m0,nu0,S0); +[label, Theta, w] = mixDpGb(X,alpha0,prior); \ No newline at end of file diff --git a/chapter11/mixGaussGbOl.m b/chapter11/mixGaussGbOl.m deleted file mode 100644 index 7ef2554..0000000 --- a/chapter11/mixGaussGbOl.m +++ /dev/null @@ -1,31 +0,0 @@ -function [label,Theta,nk] = mixGaussGbOl(X,kappa0,m0,nu0,S0,alpha0) -[d,n] = size(X); -mu = mean(X,2); -Xo = bsxfun(@minus,X,mu); -s = sum(Xo(:).^2)/(d*n); -if nargin == 1 - kappa0 = 1; - m0 = mean(X,2); - nu0 = d; - S0 = s*eye(d); - alpha0 = 1; -end -Theta = {}; -nk = []; -prior = GaussWishart(kappa0,m0,nu0,S0); -label = zeros(1,n); -for i = randperm(n) - K = numel(Theta); - x = X(:,i); - Pk = nk.*exp(cellfun(@(theta) theta.logPredPdf(x),Theta)); - P0 = alpha0*exp(prior.logPredPdf(x)); - k = discreteRnd(normalize([Pk,P0])); - if k==K+1 - Theta{k} = GaussWishart(kappa0,m0,nu0,S0).addSample(x); - nk = [nk,1]; - else - Theta{k} = Theta{k}.addSample(x); - nk(k) = nk(k)+1; - end - label(i) = k; -end \ No newline at end of file From 7cb5bdf76cbd3b7895b306d74e11f154f2640418 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Tue, 26 Jan 2016 14:18:56 +0800 Subject: [PATCH 146/149] added comment for Gibbs --- chapter11/mixDpGb.m | 4 ++-- chapter11/mixDpGbOl.m | 2 ++ chapter11/mixGaussGb.m | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/chapter11/mixDpGb.m b/chapter11/mixDpGb.m index a5ba8f6..b0e7d8d 100644 --- a/chapter11/mixDpGb.m +++ b/chapter11/mixDpGb.m @@ -1,6 +1,6 @@ function [label, Theta, w] = mixDpGb(X, alpha, theta) -% Collapsed Gibbs sampling for (infinite) Gaussian mixture model (a.k.a. -% DPGM) +% Collapsed Gibbs sampling for Dirichlet process (infinite) mixture model (a.k.a. +% DPGM). Any component model can be used, such as Gaussian n = size(X,2); [label,Theta,w] = mixDpGbOl(X,alpha,theta); nk = n*w; diff --git a/chapter11/mixDpGbOl.m b/chapter11/mixDpGbOl.m index d30a322..1667a3a 100644 --- a/chapter11/mixDpGbOl.m +++ b/chapter11/mixDpGbOl.m @@ -1,4 +1,6 @@ function [label, Theta, w, llh] = mixGaussGbOl(X, alpha, theta) +% Online collapsed Gibbs sampling for Dirichlet process (infinite) mixture model (a.k.a. +% DPGM). Any component model can be used, such as Gaussian n = size(X,2); Theta = {}; nk = []; diff --git a/chapter11/mixGaussGb.m b/chapter11/mixGaussGb.m index 0d4cc83..ac2f54e 100644 --- a/chapter11/mixGaussGb.m +++ b/chapter11/mixGaussGb.m @@ -1,4 +1,6 @@ function [label, Theta, w] = mixGaussGb( X, opt ) +% Collapsed Gibbs sampling for Dirichlet process (infinite) Gaussian mixture model (a.k.a. +% DPGM). [d,n] = size(X); mu = mean(X,2); Xo = bsxfun(@minus,X,mu); From 3befd99d472a43e51da3b56440ac1015994fff5f Mon Sep 17 00:00:00 2001 From: sth4nth Date: Tue, 26 Jan 2016 14:26:33 +0800 Subject: [PATCH 147/149] modified to use log scale computation to improve numerical stability --- chapter11/mixDpGb.m | 7 ++++--- chapter11/mixDpGbOl.m | 9 +++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/chapter11/mixDpGb.m b/chapter11/mixDpGb.m index b0e7d8d..608825d 100644 --- a/chapter11/mixDpGb.m +++ b/chapter11/mixDpGb.m @@ -17,9 +17,10 @@ which = label>k; label(which) = label(which)-1; end - Pk = nk.*exp(cellfun(@(theta) theta.logPredPdf(x),Theta)); - P0 = alpha*exp(theta.logPredPdf(x)); - k = discreteRnd(normalize([Pk,P0])); + Pk = log(nk)+cellfun(@(t) t.logPredPdf(x), Theta); + P0 = log(alpha)+theta.logPredPdf(x); + p = [Pk,P0]; + k = discreteRnd(exp(p-logsumexp(p))); if k == numel(Theta)+1 % add extra cluster Theta{k} = theta.clone.addSample(x); nk = [nk,1]; diff --git a/chapter11/mixDpGbOl.m b/chapter11/mixDpGbOl.m index 1667a3a..dd36309 100644 --- a/chapter11/mixDpGbOl.m +++ b/chapter11/mixDpGbOl.m @@ -1,4 +1,4 @@ -function [label, Theta, w, llh] = mixGaussGbOl(X, alpha, theta) +function [label, Theta, w, llh] = mixDpGbOl(X, alpha, theta) % Online collapsed Gibbs sampling for Dirichlet process (infinite) mixture model (a.k.a. % DPGM). Any component model can be used, such as Gaussian n = size(X,2); @@ -7,9 +7,10 @@ label = zeros(1,n); for i = randperm(n) x = X(:,i); - Pk = nk.*exp(cellfun(@(t) t.logPredPdf(x), Theta)); - P0 = alpha*exp(theta.logPredPdf(x)); - k = discreteRnd(normalize([Pk,P0])); + Pk = log(nk)+cellfun(@(t) t.logPredPdf(x), Theta); + P0 = log(alpha)+theta.logPredPdf(x); + p = [Pk,P0]; + k = discreteRnd(exp(p-logsumexp(p))); if k == numel(Theta)+1 Theta{k} = theta.clone().addSample(x); nk = [nk,1]; From dc70181fef29a95525e36adadb94bb18af4700d4 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Tue, 26 Jan 2016 14:36:18 +0800 Subject: [PATCH 148/149] added llh computation --- chapter11/demo.m | 6 +++--- chapter11/mixDpGb.m | 6 ++++-- chapter11/mixDpGbOl.m | 2 ++ chapter11/mixGaussGb.m | 4 ++-- 4 files changed, 11 insertions(+), 7 deletions(-) diff --git a/chapter11/demo.m b/chapter11/demo.m index 9c64ca4..7d9f770 100644 --- a/chapter11/demo.m +++ b/chapter11/demo.m @@ -60,14 +60,14 @@ % [y,model] = mixGaussGb(X); % figure % plotClass(X,y); -%% Demo for online DPGM +%% Demo for DPGM close all; clear; d = 2; k = 3; -n = 500; +n = 200; [X,label] = mixGaussRnd(d,k,n); plotClass(X,label); -[y,model] = mixGaussGb(X); +[y,theta,w,llh] = mixGaussGb(X); figure plotClass(X,y); diff --git a/chapter11/mixDpGb.m b/chapter11/mixDpGb.m index 608825d..52b95d0 100644 --- a/chapter11/mixDpGb.m +++ b/chapter11/mixDpGb.m @@ -1,10 +1,11 @@ -function [label, Theta, w] = mixDpGb(X, alpha, theta) +function [label, Theta, w, llh] = mixDpGb(X, alpha, theta) % Collapsed Gibbs sampling for Dirichlet process (infinite) mixture model (a.k.a. % DPGM). Any component model can be used, such as Gaussian n = size(X,2); [label,Theta,w] = mixDpGbOl(X,alpha,theta); nk = n*w; -maxIter = 100; +maxIter = 200; +llh = zeros(1,maxIter); for iter = 1:maxIter for i = randperm(n) x = X(:,i); @@ -20,6 +21,7 @@ Pk = log(nk)+cellfun(@(t) t.logPredPdf(x), Theta); P0 = log(alpha)+theta.logPredPdf(x); p = [Pk,P0]; + llh(iter) = llh(iter)+sum(p-log(n)); k = discreteRnd(exp(p-logsumexp(p))); if k == numel(Theta)+1 % add extra cluster Theta{k} = theta.clone.addSample(x); diff --git a/chapter11/mixDpGbOl.m b/chapter11/mixDpGbOl.m index dd36309..410d20b 100644 --- a/chapter11/mixDpGbOl.m +++ b/chapter11/mixDpGbOl.m @@ -5,11 +5,13 @@ Theta = {}; nk = []; label = zeros(1,n); +llh = 0; for i = randperm(n) x = X(:,i); Pk = log(nk)+cellfun(@(t) t.logPredPdf(x), Theta); P0 = log(alpha)+theta.logPredPdf(x); p = [Pk,P0]; + llh = llh+sum(p-log(n)); k = discreteRnd(exp(p-logsumexp(p))); if k == numel(Theta)+1 Theta{k} = theta.clone().addSample(x); diff --git a/chapter11/mixGaussGb.m b/chapter11/mixGaussGb.m index ac2f54e..b50239f 100644 --- a/chapter11/mixGaussGb.m +++ b/chapter11/mixGaussGb.m @@ -1,4 +1,4 @@ -function [label, Theta, w] = mixGaussGb( X, opt ) +function [label, Theta, w, llh] = mixGaussGb(X, opt) % Collapsed Gibbs sampling for Dirichlet process (infinite) Gaussian mixture model (a.k.a. % DPGM). [d,n] = size(X); @@ -19,4 +19,4 @@ alpha0 = opt.alpha; end prior = GaussWishart(kappa0,m0,nu0,S0); -[label, Theta, w] = mixDpGb(X,alpha0,prior); \ No newline at end of file +[label, Theta, w, llh] = mixDpGb(X,alpha0,prior); \ No newline at end of file From dd51ff81417edf875c789ab0b6d0bab82e34c530 Mon Sep 17 00:00:00 2001 From: sth4nth Date: Tue, 26 Jan 2016 14:36:48 +0800 Subject: [PATCH 149/149] updated TODO --- TODO.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/TODO.txt b/TODO.txt index 275a4f1..15b4fb6 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,5 +1,4 @@ TODO: -chapter11: collapsed Gibss sampling for gmm and dpgm chapter13: demo for time series models chapter12: prediction functions for ppca chapter05: MLP