Skip to content

Commit

Permalink
binaryFA code now works in unsupervised setting
Browse files Browse the repository at this point in the history
git-svn-id: https://pmtk3.googlecode.com/svn/trunk@2769 b6abd7f4-f95b-11de-aa3c-59de0406b4f5
  • Loading branch information
[email protected] committed Apr 12, 2011
1 parent c98489c commit 5c68983
Show file tree
Hide file tree
Showing 10 changed files with 212 additions and 241 deletions.
48 changes: 0 additions & 48 deletions demos/binaryPcaDemoImpute.m

This file was deleted.

54 changes: 36 additions & 18 deletions demos/binaryPcaDemoTipping.m
Original file line number Diff line number Diff line change
@@ -1,37 +1,55 @@
% Demo of factor analysis applied to some synthetic binary data
% Demo of factor analysis applied to some synthetic 2d binary data

% This file is from pmtk3.googlecode.com

%function [W, b, proto] = tippingDemo();

p = 16;
setSeed(0);
D = 16;
K = 3;
proto = rand(p,K) < 0.5;
data = [];
dataClean = [];
proto = rand(D,K) < 0.5;
M = 50;
source = [1*ones(1,M) 2*ones(1,M) 3*ones(1,M)];
for k=1:K
tmp = repmat(proto(:,k), 1, M);
dataClean = [dataClean tmp];
noise = rand(p, M) < 0.05;
tmp(noise) = 1-tmp(noise);
data = [data tmp];
N = numel(source);
dataClean = zeros(N, D);
for n=1:N
src = source(n);
dataClean(n, :) = proto(:, src)';
end
noiseLevel = 0.05;
flipMask = rand(N,D) < noiseLevel;
dataNoisy = dataClean;
dataNoisy(flipMask) = 1-dataClean(flipMask);
dataMissing = dataClean;
dataMissing(flipMask) = nan;

figure; imagesc(dataNoisy); colormap(gray);
title('noisy binary data')
printPmtkFigure('binaryPCAinput');

figure; imagesc(data'); colormap(gray);printPmtkFigure('binaryPCAinput');
figure; imagesc(dataClean'); colormap(gray)
figure; imagesc(dataClean); colormap(gray); title('hidden truth')

q = 2;
%[W, b, muPost] = binaryPcaFitVarEm(data, 2);
model = binaryFAfit(data, 2);
muPost = binaryFAinferLatent(model, data);
% Fit model
[model, loglikHist] = binaryFAfit(dataNoisy, 2, 'maxIter', 10, 'verbose', true);
figure; plot(loglikHist); title('(lower bound on) loglik vs iter for EM')

% Latent 2d embedding
muPost = binaryFAinferLatent(model, dataNoisy);
figure;
symbols = {'ro', 'gs', 'k*'};
for k=1:K
ndx = (source==k);
plot(muPost(1,ndx), muPost(2,ndx), symbols{k});
hold on
end
title('latent embedding')
printPmtkFigure('binaryPCAoutput')

% Denoising
[postPred] = binaryFApredictMissing(model, dataNoisy);
yhat = postPred > 0.5;
figure; imagesc(yhat); colormap(gray); title('prediciton given noisy')

% Imputation
[postPred] = binaryFApredictMissing(model, dataMissing);
yhat = postPred > 0.5;
figure; imagesc(yhat); colormap(gray); title('prediction given missing data')
File renamed without changes.
2 changes: 1 addition & 1 deletion toolbox/BasicModels/gauss/sub/gaussLogprobMissingData.m
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
logp = NaN(n,1);
logp(~missRows) = gaussLogprob(mu, Sigma, X(~missRows,:));

XmissCell = mat2Cell(X(missRows,:), ones(1,nMiss), d);
XmissCell = mat2cell(X(missRows,:), ones(1,nMiss), d);
% XmissCell{i} is a 1xd vector with some NaNs
logp(missRows) = cellfun(@(y)lognormpdfNaN(mu, Sigma, y), XmissCell);

Expand Down
193 changes: 45 additions & 148 deletions toolbox/LatentVariableModels/binaryFA/binaryFAfit.m
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,20 @@


EMargs = varargin;
Y = canonizeLabels(Y) - 1; % {0,1}
[N,T] = size(Y);
model.type = 'binaryFA';
model.K = K;
model.T = T;

[model, loglikHist] = emAlgo(model, data, initFn, @estep, @mstep , ...
'verbose', true, EMargs{:});
[model, loglikHist] = emAlgo(model, Y, @initFn, @estep, @mstep , EMargs{:});
end

function model = initFn(model, Y, restartNum) %#ok
K = model.K;
T = model.T;
function model = initFn(model, Y, restartNum) %#ok ignores Y
K = model.K; % num latent
T = model.T; % num observed
model.W = 0.1*randn(K, T);
model.b = 0.01*randn(K, 1);
model.b = 0.01*randn(T, 1); % bias on observed nodes
model.muPrior = zeros(K,1);
model.SigmaPrior = eye(K,K);

Expand All @@ -34,52 +34,48 @@

function [ess, loglik] = estep(model, data)
q = model.K;
Y = data;
debug = true;
timCan = 0; timMom = 0; % timing for the 2 methods
Y = data';
[p,N] = size(Y);
debug = false;
q1 = q+1;
S2 = zeros(q1,q1,p);
S1 = zeros(q1,p);
logZ = 0;
for n=1:N
% The canonical function also returns the loglikelihood
[muPost(:,n), SigmaPost, lambda, LL] = ...
varInferLogisticGaussCanonical(Y(:,n), W, b, muPrior, SigmaPrior);
logZ = logZ + LL;

if debug
% Check Tippings formulas agree with Murphy UAI'99
% Only equivalent if we use the same number of iterations inside varInfer
tic;
[muPost(:,n), SigmaPost, lambda, LL] = ...
varInferLogisticGaussCanonical(Y(:,n), W, b, muPrior, SigmaPrior, ...
'fixedNumIter', true, 'maxIter', 3);
timCan = timCan + toc;

tic;
[muPost2(:,n), SigmaPost2, lambda2] = ...
varInferLogisticGauss(Y(:,n), W, b, muPrior, SigmaPrior, 'maxIter', 3);
timMom = timMom + toc;
assert(approxeq(muPost(:,n), muPost2(:,n)))
assert(approxeq(SigmaPost, SigmaPost2))
assert(approxeq(lambda, lambda2))
end

mu = muPost(:,n);
for i=1:p % can we vectorize this?
Sn = SigmaPost + mu*mu';
Mn = zeros(q1, q1);
Mn(1:q,1:q) = Sn;
Mn(q+1,1:q) = mu';
Mn(1:q,q+1) = mu;
Mn(q+1,q+1) = 1;
S2(:,:,i) = S2(:,:,i) + 2*lambda(i)*Mn;
S1(:,i) = S1(:,i) + (Y(i,n) - 0.5) * [mu; 1];
end

S2 = zeros(q1,q1,p);
S1 = zeros(q1,p);
loglik = 0;
W = model.W; b = model.b;
muPrior = model.muPrior; SigmaPriorInv = inv(model.SigmaPrior);
for n=1:N

[muPost, SigmaPost, logZ, lambda] = ...
varInferLogisticGauss(Y(:,n), W, b, muPrior, SigmaPriorInv);
loglik = loglik + logZ;

if debug
% Check Tipping's formulas agree with Murphy UAI'99
% Only equivalent if we use the same number of iterations inside
% varInfer and if we initialize the var params in the same way
[muPost, SigmaPost, logZ] = ...
varInferLogisticGaussCanonical(Y(:,n), W, b, muPrior, SigmaPriorInv, 'maxIter', 3);
[muPost2, SigmaPost2, lambda2, logZ2] = ...
varInferLogisticGauss(Y(:,n), W, b, muPrior, SigmaPriorInv, 'maxIter', 3);
assert(approxeq(muPost(:,n), muPost2(:,n)))
assert(approxeq(SigmaPost, SigmaPost2))
assert(approxeq(lambda, lambda2))
assert(approxeq(logZ, logZ2))
end

for i=1:p % can we vectorize this?
Sn = SigmaPost + muPost*muPost';
Mn = zeros(q1, q1);
Mn(1:q,1:q) = Sn;
Mn(q+1,1:q) = muPost';
Mn(1:q,q+1) = muPost;
Mn(q+1,q+1) = 1;
S2(:,:,i) = S2(:,:,i) + 2*lambda(i)*Mn;
S1(:,i) = S1(:,i) + (Y(i,n) - 0.5) * [muPost; 1];
end

end
ess.S1 = S1; ess.S2 = S2;
loglik = logZ;
end

function model = mstep(model, ess)
Expand All @@ -91,102 +87,3 @@
model.b(i) = what(q+1);
end
end

%




function [W, b, muPost, logZ] = tippingEM(Y, q)

% Y(i,n) is the n'th example - each COLUMN is an example
% q is the number of latent variables

% This file is from pmtk3.googlecode.com


[p N] = size(Y);

% initialization
W = 0.1*randn(q,p); % W(j,i) for Xj -> Yi
b = 0.01*randn(p,1);
muPrior = zeros(q,1);
SigmaPrior = eye(q);

iter = 1;
thresh = 1e-3;
maxIter = 30;
muPost = zeros(q,N);
logZold = -inf;

debug = false;
timCan = 0; timMom = 0; % timing for the 2 methods

while 1
% E step
q1 = q+1;
S2 = zeros(q1,q1,p);
S1 = zeros(q1,p);
logZ = 0;
for n=1:N
% The canonical function also returns the loglikelihood
[muPost(:,n), SigmaPost, lambda, LL] = ...
varInferLogisticGaussCanonical(Y(:,n), W, b, muPrior, SigmaPrior);
logZ = logZ + LL;

if debug
% Check Tippings formulas agree with Murphy UAI'99
% Only equivalent if we use the same number of iterations inside varInfer
tic;
[muPost(:,n), SigmaPost, lambda, LL] = ...
varInferLogisticGaussCanonical(Y(:,n), W, b, muPrior, SigmaPrior, ...
'fixedNumIter', true, 'maxIter', 3);
timCan = timCan + toc;

tic;
[muPost2(:,n), SigmaPost2, lambda2] = ...
varInferLogisticGauss(Y(:,n), W, b, muPrior, SigmaPrior, 'maxIter', 3);
timMom = timMom + toc;
assert(approxeq(muPost(:,n), muPost2(:,n)))
assert(approxeq(SigmaPost, SigmaPost2))
assert(approxeq(lambda, lambda2))
end

mu = muPost(:,n);
for i=1:p % can we vectorize this?
Sn = SigmaPost + mu*mu';
Mn = zeros(q1, q1);
Mn(1:q,1:q) = Sn;
Mn(q+1,1:q) = mu';
Mn(1:q,q+1) = mu;
Mn(q+1,q+1) = 1;
S2(:,:,i) = S2(:,:,i) + 2*lambda(i)*Mn;
S1(:,i) = S1(:,i) + (Y(i,n) - 0.5) * [mu; 1];
end
end

% M step
%what = zeros(q1, 1);
for i=1:p
what = -S2(:,:,i) \ S1(:,i);
W(:,i) = what(1:q);
b(i) = what(q+1);
end

% Converged?
delta = logZ - logZold;
%fprintf('EM iter %d, logZ = %10.6f, relative delta = %10.6f\n', iter, logZ, delta/abs(logZold));
if delta < 0
error(sprintf('logZ decreased from %10.6f to %10.6f', logZold, logZ))
end
if (delta/abs(logZold) < thresh) | (iter > maxIter)
break;
end
iter = iter + 1;
logZold = logZ;
end

if debug
timCan
timMom
end
14 changes: 9 additions & 5 deletions toolbox/LatentVariableModels/binaryFA/binaryFAinferLatent.m
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
function [muPost, SigmaPost] = binaryFAinferLatent(model, data, varargin)
function [muPost, SigmaPost, loglik] = binaryFAinferLatent(model, data, varargin)
% Infer distribution over latent factors given observed data
%
% data(n,j) in {0,1} or {1,2}
Expand All @@ -15,17 +15,21 @@
W = model.W;
b = model.b;
[K, T2] = size(W);
muPrior = zeros(K,1);
SigmaPrior = eye(K,K);
assert(T==T2);
muPrior = model.muPrior;
SigmaPriorInv = inv(model.SigmaPrior);
muPost = zeros(K,N);
loglik = zeros(1,N);
if nargout >= 2
SigmaPost = zeros(K,K,N);
else
SigmaPost = [];
end
for n=1:N
[muPost(:,n), SigmaPost(:,:,n)] = ...
varInferLogisticGauss(y(n,:), W, b, muPrior, SigmaPrior);
[muPost(:,n), Sigma, logZ] = ...
varInferLogisticGauss(y(n,:)', W, b, muPrior, SigmaPriorInv);
if nargout >= 2, SigmaPost(:,:,n) = Sigma; end
loglik(n) = logZ;
end


Expand Down
Loading

0 comments on commit 5c68983

Please sign in to comment.