binaryFA code now works in unsupervised setting

git-svn-id: https://pmtk3.googlecode.com/svn/trunk@2769 b6abd7f4-f95b-11de-aa3c-59de0406b4f5
cinkie · Apr 12, 2011 · 5c68983 · 5c68983
1 parent c98489c
commit 5c68983
Show file tree

Hide file tree

Showing 10 changed files with 212 additions and 241 deletions.
diff --git a/demos/binaryPcaDemoImpute.m b/demos/binaryPcaDemoImpute.m
diff --git a/demos/binaryPcaDemoTipping.m b/demos/binaryPcaDemoTipping.m
@@ -1,37 +1,55 @@
-% Demo of factor analysis applied to some synthetic binary data
+% Demo of factor analysis applied to some synthetic 2d binary data
 
 % This file is from pmtk3.googlecode.com
 
-%function [W, b, proto] = tippingDemo();
 
-p = 16;
+setSeed(0);
+D = 16;
 K = 3;
-proto = rand(p,K) < 0.5;
-data = [];
-dataClean = [];
+proto = rand(D,K) < 0.5;
 M = 50;
 source = [1*ones(1,M) 2*ones(1,M) 3*ones(1,M)];
-for k=1:K
-  tmp = repmat(proto(:,k), 1, M);
-  dataClean  = [dataClean tmp];
-  noise = rand(p, M) < 0.05;
-  tmp(noise) = 1-tmp(noise);
-  data  = [data tmp];
+N = numel(source);
+dataClean = zeros(N, D);
+for n=1:N
+  src = source(n);
+  dataClean(n, :) = proto(:, src)';
 end
+noiseLevel = 0.05;
+flipMask = rand(N,D) < noiseLevel;
+dataNoisy = dataClean;
+dataNoisy(flipMask) = 1-dataClean(flipMask);
+dataMissing = dataClean;
+dataMissing(flipMask) = nan;
+
+figure; imagesc(dataNoisy); colormap(gray);
+title('noisy binary data')
+printPmtkFigure('binaryPCAinput');
 
-figure; imagesc(data'); colormap(gray);printPmtkFigure('binaryPCAinput');
-figure; imagesc(dataClean'); colormap(gray)
+figure; imagesc(dataClean); colormap(gray); title('hidden truth')
 
-q = 2;
-%[W, b, muPost] = binaryPcaFitVarEm(data, 2);
-model = binaryFAfit(data, 2);
-muPost = binaryFAinferLatent(model, data);
+% Fit model
+[model, loglikHist] = binaryFAfit(dataNoisy, 2, 'maxIter', 10, 'verbose', true);
+figure; plot(loglikHist); title('(lower bound on) loglik vs iter for EM')
 
+% Latent 2d embedding
+muPost = binaryFAinferLatent(model, dataNoisy);
 figure;
 symbols = {'ro', 'gs', 'k*'};
 for k=1:K
   ndx = (source==k);
   plot(muPost(1,ndx), muPost(2,ndx), symbols{k});
   hold on
 end
+title('latent embedding')
 printPmtkFigure('binaryPCAoutput')
+
+% Denoising
+[postPred] = binaryFApredictMissing(model, dataNoisy);
+yhat = postPred > 0.5;
+figure; imagesc(yhat); colormap(gray); title('prediciton given noisy')
+
+% Imputation
+[postPred] = binaryFApredictMissing(model, dataMissing);
+yhat = postPred > 0.5;
+figure; imagesc(yhat); colormap(gray); title('prediction given missing data')
diff --git a/...iableModels/ppca/sub/screeplotChooseDim.m → matlabTools/stats/screeplotChooseDim.m b/...iableModels/ppca/sub/screeplotChooseDim.m → matlabTools/stats/screeplotChooseDim.m
diff --git a/toolbox/BasicModels/gauss/sub/gaussLogprobMissingData.m b/toolbox/BasicModels/gauss/sub/gaussLogprobMissingData.m
@@ -33,7 +33,7 @@
 logp = NaN(n,1);
 logp(~missRows) = gaussLogprob(mu, Sigma, X(~missRows,:));
 
-XmissCell = mat2Cell(X(missRows,:), ones(1,nMiss), d);
+XmissCell = mat2cell(X(missRows,:), ones(1,nMiss), d);
 % XmissCell{i} is a 1xd vector with some NaNs
 logp(missRows) = cellfun(@(y)lognormpdfNaN(mu, Sigma, y), XmissCell);
 

diff --git a/toolbox/LatentVariableModels/binaryFA/binaryFAfit.m b/toolbox/LatentVariableModels/binaryFA/binaryFAfit.m
@@ -12,20 +12,20 @@
 
 
 EMargs = varargin;
+Y = canonizeLabels(Y) - 1; % {0,1}
 [N,T] = size(Y);
 model.type  = 'binaryFA';
 model.K = K;
 model.T = T;
 
-[model, loglikHist] = emAlgo(model, data, initFn, @estep, @mstep , ...
-                            'verbose', true, EMargs{:});
+[model, loglikHist] = emAlgo(model, Y, @initFn, @estep, @mstep , EMargs{:});
 end
 
-function model = initFn(model, Y, restartNum) %#ok
-K = model.K;
-T = model.T;
+function model = initFn(model, Y, restartNum) %#ok ignores Y
+K = model.K; % num latent
+T = model.T; % num observed
 model.W = 0.1*randn(K, T);
-model.b = 0.01*randn(K, 1);
+model.b = 0.01*randn(T, 1); % bias on observed nodes
 model.muPrior = zeros(K,1);
 model.SigmaPrior = eye(K,K);
 
@@ -34,52 +34,48 @@
 
 function [ess, loglik] = estep(model, data)
 q = model.K;
-Y = data; 
-debug = true;
-timCan = 0; timMom = 0; % timing for the 2 methods
+Y  = data';
+[p,N] = size(Y);
+debug = false;
 q1 = q+1;
-  S2 = zeros(q1,q1,p);
-  S1 = zeros(q1,p);
-  logZ = 0;
-  for n=1:N
-   % The canonical function also returns the loglikelihood
-    [muPost(:,n), SigmaPost, lambda, LL] = ...
-      varInferLogisticGaussCanonical(Y(:,n), W, b, muPrior, SigmaPrior);
-    logZ = logZ + LL;
-
-    if debug 
-      % Check Tippings formulas agree with Murphy UAI'99
-      % Only equivalent if we use the same number of iterations inside varInfer
-       tic;
-      [muPost(:,n), SigmaPost, lambda, LL] = ...
-        varInferLogisticGaussCanonical(Y(:,n), W, b, muPrior, SigmaPrior, ...
-        'fixedNumIter', true, 'maxIter', 3);
-      timCan = timCan + toc;
-
-      tic;
-      [muPost2(:,n), SigmaPost2, lambda2] = ...
-        varInferLogisticGauss(Y(:,n), W, b, muPrior, SigmaPrior, 'maxIter', 3);
-      timMom = timMom + toc;
-      assert(approxeq(muPost(:,n), muPost2(:,n)))
-      assert(approxeq(SigmaPost, SigmaPost2))
-      assert(approxeq(lambda, lambda2))
-    end
-
-    mu = muPost(:,n);
-    for i=1:p % can we vectorize this?
-      Sn = SigmaPost + mu*mu';
-      Mn = zeros(q1, q1);
-      Mn(1:q,1:q) = Sn;
-      Mn(q+1,1:q) = mu';
-      Mn(1:q,q+1) = mu;
-      Mn(q+1,q+1) = 1;
-      S2(:,:,i) = S2(:,:,i) + 2*lambda(i)*Mn;
-      S1(:,i) = S1(:,i) + (Y(i,n) - 0.5) * [mu; 1];
-    end
-
+S2 = zeros(q1,q1,p);
+S1 = zeros(q1,p);
+loglik = 0;
+W = model.W; b = model.b;
+muPrior = model.muPrior; SigmaPriorInv = inv(model.SigmaPrior);
+for n=1:N
+
+  [muPost, SigmaPost, logZ, lambda] = ...
+    varInferLogisticGauss(Y(:,n), W, b, muPrior, SigmaPriorInv);
+  loglik = loglik + logZ;
+
+  if debug
+    % Check Tipping's formulas agree with Murphy UAI'99
+    % Only equivalent if we use the same number of iterations inside
+    % varInfer and if we initialize the var params in the same way
+    [muPost, SigmaPost, logZ] = ...
+      varInferLogisticGaussCanonical(Y(:,n), W, b, muPrior, SigmaPriorInv, 'maxIter', 3);
+    [muPost2, SigmaPost2, lambda2, logZ2] = ...
+      varInferLogisticGauss(Y(:,n), W, b, muPrior, SigmaPriorInv, 'maxIter', 3);
+    assert(approxeq(muPost(:,n), muPost2(:,n)))
+    assert(approxeq(SigmaPost, SigmaPost2))
+    assert(approxeq(lambda, lambda2))
+    assert(approxeq(logZ, logZ2))
+  end
+
+  for i=1:p % can we vectorize this?
+    Sn = SigmaPost + muPost*muPost';
+    Mn = zeros(q1, q1);
+    Mn(1:q,1:q) = Sn;
+    Mn(q+1,1:q) = muPost';
+    Mn(1:q,q+1) = muPost;
+    Mn(q+1,q+1) = 1;
+    S2(:,:,i) = S2(:,:,i) + 2*lambda(i)*Mn;
+    S1(:,i) = S1(:,i) + (Y(i,n) - 0.5) * [muPost; 1];
   end
+
+end
 ess.S1 = S1; ess.S2 = S2;
-loglik = logZ;
 end
 
 function model = mstep(model, ess)
@@ -91,102 +87,3 @@
     model.b(i) = what(q+1);
   end
 end
-
-%
-
-
-
-
-function [W, b, muPost, logZ] = tippingEM(Y, q)
-
-% Y(i,n) is the n'th example - each COLUMN is an example
-% q is the number of latent variables
-
-% This file is from pmtk3.googlecode.com
-
-
-[p N] = size(Y);
-
-% initialization
-W = 0.1*randn(q,p); % W(j,i) for Xj -> Yi
-b = 0.01*randn(p,1);
-muPrior = zeros(q,1);
-SigmaPrior = eye(q);
-
-iter = 1;
-thresh = 1e-3;
-maxIter = 30;
-muPost = zeros(q,N);
-logZold = -inf;
-
-debug = false;
-timCan = 0; timMom = 0; % timing for the 2 methods
-
-while 1
-  % E step
-  q1 = q+1;
-  S2 = zeros(q1,q1,p);
-  S1 = zeros(q1,p);
-  logZ = 0;
-  for n=1:N
-   % The canonical function also returns the loglikelihood
-    [muPost(:,n), SigmaPost, lambda, LL] = ...
-      varInferLogisticGaussCanonical(Y(:,n), W, b, muPrior, SigmaPrior);
-    logZ = logZ + LL;
-
-    if debug 
-      % Check Tippings formulas agree with Murphy UAI'99
-      % Only equivalent if we use the same number of iterations inside varInfer
-       tic;
-      [muPost(:,n), SigmaPost, lambda, LL] = ...
-        varInferLogisticGaussCanonical(Y(:,n), W, b, muPrior, SigmaPrior, ...
-        'fixedNumIter', true, 'maxIter', 3);
-      timCan = timCan + toc;
-
-      tic;
-      [muPost2(:,n), SigmaPost2, lambda2] = ...
-        varInferLogisticGauss(Y(:,n), W, b, muPrior, SigmaPrior, 'maxIter', 3);
-      timMom = timMom + toc;
-      assert(approxeq(muPost(:,n), muPost2(:,n)))
-      assert(approxeq(SigmaPost, SigmaPost2))
-      assert(approxeq(lambda, lambda2))
-    end
-
-    mu = muPost(:,n);
-    for i=1:p % can we vectorize this?
-      Sn = SigmaPost + mu*mu';
-      Mn = zeros(q1, q1);
-      Mn(1:q,1:q) = Sn;
-      Mn(q+1,1:q) = mu';
-      Mn(1:q,q+1) = mu;
-      Mn(q+1,q+1) = 1;
-      S2(:,:,i) = S2(:,:,i) + 2*lambda(i)*Mn;
-      S1(:,i) = S1(:,i) + (Y(i,n) - 0.5) * [mu; 1];
-    end
-  end
-
-  % M step
-  %what = zeros(q1, 1);
-  for i=1:p
-    what = -S2(:,:,i) \ S1(:,i);
-    W(:,i) = what(1:q);
-    b(i) = what(q+1);
-  end
-
-  % Converged?
-  delta = logZ - logZold;
-  %fprintf('EM iter %d, logZ = %10.6f, relative delta = %10.6f\n', iter, logZ, delta/abs(logZold));
-  if delta < 0
-    error(sprintf('logZ decreased from %10.6f to %10.6f', logZold, logZ))
-  end
-  if (delta/abs(logZold) < thresh) | (iter > maxIter)
-    break;
-  end
-  iter = iter + 1;
-  logZold = logZ;
-end
-
-if debug
-  timCan
-  timMom
-end
diff --git a/toolbox/LatentVariableModels/binaryFA/binaryFAinferLatent.m b/toolbox/LatentVariableModels/binaryFA/binaryFAinferLatent.m
@@ -1,4 +1,4 @@
-function [muPost, SigmaPost] = binaryFAinferLatent(model, data, varargin)
+function [muPost, SigmaPost, loglik] = binaryFAinferLatent(model, data, varargin)
 % Infer distribution over latent factors given observed data
 %
 % data(n,j) in {0,1} or {1,2}
@@ -15,17 +15,21 @@
 W = model.W;
 b = model.b;
 [K, T2] = size(W);
-muPrior = zeros(K,1);
-SigmaPrior = eye(K,K);
+assert(T==T2);
+muPrior = model.muPrior;
+SigmaPriorInv = inv(model.SigmaPrior);
 muPost = zeros(K,N);
+loglik = zeros(1,N);
 if nargout >= 2
   SigmaPost = zeros(K,K,N);
 else
   SigmaPost = [];
 end
 for n=1:N
-  [muPost(:,n), SigmaPost(:,:,n)] = ...
-    varInferLogisticGauss(y(n,:), W, b, muPrior, SigmaPrior);
+  [muPost(:,n), Sigma, logZ] = ...
+    varInferLogisticGauss(y(n,:)', W, b, muPrior, SigmaPriorInv);
+  if nargout >= 2, SigmaPost(:,:,n) = Sigma; end
+  loglik(n) = logZ;
 end