Skip to content

Commit

Permalink
Fix mixture of linear regression
Browse files Browse the repository at this point in the history
  • Loading branch information
sth4nth committed Nov 5, 2013
1 parent 9956c39 commit 77b0e0c
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 135 deletions.
12 changes: 6 additions & 6 deletions chapter04/demo.m
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@

% clear; close all;
% k = 2;
% n = 1000;
% [X,t] = rndKCluster(2,k,n);
%
% [x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n));
clear; close all;
k = 2;
n = 1000;
[X,t] = rndKCluster(2,k,n);

[x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n));
[w, llh] = logitReg(X,t-1,0.0001);
plot(llh);
figure;
Expand Down
65 changes: 38 additions & 27 deletions chapter07/demo.m
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
% clear; close all;
%
% %% regression
% n = 100;
% beta = 1e-1;
% X = rand(1,n);
% w = randn;
% b = randn;
% t = w'*X+b+beta*randn(1,n);
%
% x = linspace(min(X)-1,max(X)+1,n); % test data
% %%
%% regression
n = 100;
beta = 1e-1;
X = rand(1,n);
w = randn;
b = randn;
t = w'*X+b+beta*randn(1,n);

x = linspace(min(X)-1,max(X)+1,n); % test data
%%
% [model,llh] = rvmRegEbFp(X,t);
% figure
% plot(llh);
Expand All @@ -20,7 +20,7 @@
% plot(X,t,'o');
% plot(x,y,'r-');
% hold off
% %%
%%
% [model,llh] = rvmRegEbEm(X,t);
% figure
% plot(llh);
Expand All @@ -31,27 +31,38 @@
% plot(X,t,'o');
% plot(x,y,'r-');
% hold off

%% classification
k = 2;
d = 2;
n = 1000;
[X,t] = rndKCluster(d,k,n);
[x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n));

%%
[model, llh] = rvmEbFp(X,t-1);
[model,llh] = rvmRegEbCd(X,t);
figure
plot(llh);
[y, sigma] = linInfer(x,model,t);
figure;
spread(X,t);

w = zeros(3,1);
w(model.used) = model.w;
y = w(1)*x1+w(2)*x2+w(3);
hold on;
contour(x1,x2,y,1);
hold off;
plotBand(x,y,2*sigma);
plot(X,t,'o');
plot(x,y,'r-');
hold off

%% classification
% k = 2;
% d = 2;
% n = 1000;
% [X,t] = rndKCluster(d,k,n);
% [x1,x2] = meshgrid(linspace(min(X(1,:)),max(X(1,:)),n), linspace(min(X(2,:)),max(X(2,:)),n));

%%
% [model, llh] = rvmEbFp(X,t-1);
% figure
% plot(llh);
% figure;
% spread(X,t);
%
% w = zeros(3,1);
% w(model.used) = model.w;
% y = w(1)*x1+w(2)*x2+w(3);
% hold on;
% contour(x1,x2,y,1);
% hold off;
%%
% [model, llh] = rvmEbEm(X,t-1);
% figure
Expand Down
27 changes: 13 additions & 14 deletions chapter07/rvmRegEbCd.m
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
function [model, llh] = rvmRegEbCd(X, t)
% TODO: llh not increasing. verify with sparse high dimensional data
% Sparse Bayesian Regression (RVM) using Coordinate Descent
% reference:
% Analysis of sparse Bayesian learning. NIPS(2002). By Faul and Tipping
% Fast marginal likelihood maximisation for sparse Bayesian models.
% AISTATS(2003). by Tipping and Faul
% Tipping and Faul. Fast marginal likelihood maximisation for sparse Bayesian models. AISTATS 2003.
% Written by Mo Chen ([email protected]).
[d,n] = size(X);
xbar = mean(X,2);
Expand All @@ -16,23 +16,22 @@
Q = beta*(X*t');
Sigma = zeros(0,0);
mu = zeros(0,1);
dim = zeros(0,1);
Phi = zeros(0,n);
dim = zeros(0,1);

iter = 1;
maxiter = 1000;
tol = 1e-4;
maxiter = 100;
tol = 1e-2;
llh = -inf(1,maxiter);
indAction = zeros(d,3);
iAct = zeros(d,3);
iUse = false(d,1);
s = S; q = Q;
for iter = 2:maxiter
theta = q.^2-s;
iNew = theta>0;

iUpd = (iNew & iUse); % update
iAdd = (iNew~=iUpd); % add
iDel = (iUse~=iUpd); % del
iAdd = (iNew ~= iUpd); % add
iDel = (iUse ~= iUpd); % del

% find the next alpha that maximizes the marginal likilihood
tllh = -inf(d,1); % trial (temptoray) likelihood
Expand All @@ -51,12 +50,12 @@
[llh(iter),j] = max(tllh);
if abs(llh(iter)-llh(iter-1)) < tol*llh(iter-1); break; end

indAction(:,1) = iAdd;
indAction(:,2) = iDel;
indAction(:,3) = iUpd;
iAct(:,1) = iAdd;
iAct(:,2) = iDel;
iAct(:,3) = iUpd;

% update parameters
switch find(indAction(j,:))
switch find(iAct(j,:))
case 1 % Add
alpha(j) = s(j)^2/theta(j);
Sigma_jj = 1/(alpha(j)+S(j));
Expand Down
56 changes: 27 additions & 29 deletions chapter14/demo.m
Original file line number Diff line number Diff line change
@@ -1,35 +1,33 @@
clear all;
%% Generate Data
W = randn(2,2);
w1 = W(1,1);
b1 = W(1,2);
w2 = W(2,1);
b2 = W(2,2);

x = linspace(-5,5,50);
x1 = x + randn(size(x)) * 0.001;
x1(x1 < -3 | x1 > 3) = [];
x2 = x + randn(size(x)) * 0.001;
x2(x2 < 3 & x2 > -3) = [];
y1 = w1 * x1 + b1 + 5;
y2 = w2 * x2 + b2 - 5;
d = 1;
k = 3;
n = 500;
W = randn(d+1,k);

X = [x1 x2];
y = [y1 y2];
[x, label] = kmeansrnd(d, k, n);
X = [x; ones(1,n)];
y = zeros(1,n);
for j = 1:k
idx = (label == j);
y(idx) = W(:,j)'*X(:,idx);
end

[model,llh,R] = mixRegress(X, y, 2);
plot(x,y,'.');

figure();
subplot(1,3,1);
plot(x1,y1,'r*'); hold on;
plot(x2,y2,'bx');
hold off;
subplot(1,3,2);
plot(X,y,'r*');
% subplot(1,3,3);
hold on;
plot(x1,x1*model.W(1,1) + model.W(2,1),'r-');hold on;
plot(x2,x2*model.W(1,2) + model.W(2,2),'b-');
hold off;
subplot(1,3,3);
plot(llh);
[model,llh] = mixLinReg(X, y, 2);
plot(llh);
%
% figure();
% subplot(1,3,1);
% plot(x1,y1,'r*'); hold on;
% plot(x2,y2,'bx');
% hold off;
% subplot(1,3,2);
% plot(X,y,'r*');
% % subplot(1,3,3);
% hold on;
% plot(x1,x1*model.W(1,1) + model.W(2,1),'r-');hold on;
% plot(x2,x2*model.W(1,2) + model.W(2,2),'b-');
% hold off;
92 changes: 33 additions & 59 deletions chapter14/mixLinReg.m
Original file line number Diff line number Diff line change
@@ -1,72 +1,46 @@
function [model, llh, Rnew] = mixLinReg(X, y, k)
% mixture of linear regression model
function [model, llh] = mixLinReg(X, y, k, lambda)
% mixture of linear regression
% Written by Mo Chen ([email protected]).
X = [X;ones(1,size(X,2))]; % adding the bias term

[d,n] = size(X);

ridx = randperm(n);
X = X(:,ridx);
y = y(ridx);

z = ceil(k*rand(1,n));
% R = full(sparse(1:n,z,1,n,k,n)); % k x n
% initialize with random weights
R = rand(n,k);
R = bsxfun(@times, R, 1./sum(R));

W = zeros(d,k);
tol = 1e-10;
maxiter = 50000;
if nargin < 4
lambda = 1;
end
n = size(X,2);
X = [X;ones(1,n)]; % adding the bias term
d = size(X,1);
idx = (1:d)';
dg = sub2ind([d,d],idx,idx);
label = ceil(k*rand(1,n)); % random initialization
R = sparse(label,1:n,1,k,n,n);
tol = 1e-4;
maxiter = 200;
llh = -inf(1,maxiter);
converged = false;
t = 1;


while ~converged && t < maxiter
t = t+1;
lambda = lambda*ones(d,1);
W = zeros(d,k);
beta = 1;
for iter = 2 : maxiter
% maximization
nk = sum(R,1);
nk = sum(R,2);
alpha = nk/n;

Xbar = bsxfun(@times, X*R,1./nk);
ybar = y*R./nk;
% beta = n/dot(R(:),D(:));
for j = 1:k
% Xo = bsxfun(@minus,X,Xbar(:,j));
Xo = X;
% yo = y-ybar(j);
yo = y;
% XR = bsxfun(@times,Xo,R(:,j)');
XR = Xo * diag(R(:,j));
% W(:,j) = (XR*Xo' + 1e-4*eye(d))\(XR*yo');
% W(:,j) = (XR*Xo' )\(XR*yo');
W(:,j) = (Xo * diag(R(:,j)) * y')' / (Xo * diag(R(:,j)) * Xo');
Xw = bsxfun(@times,X,sqrt(R(j,:)));
C = Xw*Xw';
C(dg) = C(dg)+lambda;
U = chol(C);
W(:,j) = U\(U'\(X*(R(j,:).*y)')); % 3.15 & 3.28
end
% w0 = ybar-dot(W,Xbar,1);
w0 = 0*(ybar-dot(W,Xbar,1));

% E = (bsxfun(@minus,y',w0)-X'*W).^2;
E = (bsxfun(@minus,y',X'*W)).^2;
beta = n/dot(R(:),E(:));

D = (bsxfun(@minus,W'*X,y)).^2;
% expectation
logRho = (-0.5)*beta*E;
% divide by the "beta"
logRho = bsxfun(@plus,logRho,log(alpha./sqrt(2 * pi * beta)));
T = logsumexp(logRho,2);
logRho = (-0.5)*beta*D;
logRho = bsxfun(@plus,logRho,log(alpha));
T = logsumexp(logRho,1);
logR = bsxfun(@minus,logRho,T);
R = exp(logR);

% llh(t) = sum(T)/n; % loglikelihood
% we do not need to normalize the T.
llh(t) = sum(T);
% add abs to avoid fluctuation when llh(t) < llh(t+1) to stop unexpectedly
% converged = abs(llh(t)-llh(t-1)) < tol*abs(llh(t));
llh(iter) = sum(T)/n;
if abs(llh(iter)-llh(iter-1)) < tol; break; end
end
llh = llh(2:t);
llh = llh(2:iter);

model.alpha = alpha; % mixing coefficient
model.beta = beta; % mixture component precision
model.W = W; % linear model coefficent
model.w0 = w0; % linear model intersection
Rnew = zeros(size(R));
Rnew(ridx,:) = R;

0 comments on commit 77b0e0c

Please sign in to comment.