diff --git a/demos/binaryPcaDemoImpute.m b/demos/binaryPcaDemoImpute.m
deleted file mode 100644
index 7a53a90ef..000000000
--- a/demos/binaryPcaDemoImpute.m
+++ /dev/null
@@ -1,48 +0,0 @@
-
-
-[W,b,proto] = tippingDemo();
-
-setSeed(1);
-[L p]= size(W); % L is size of latent space
-src = [1 2 3 1];
-Ntest = length(src);
-% each example is a ROW vector
-
-% This file is from pmtk3.googlecode.com
-
-
-dataTestNoisy = zeros(Ntest, p);
-dataTestClean = zeros(Ntest, p);
-dataTestMissing = zeros(Ntest, p);
-for n=1:Ntest
-  dataTestClean(n,:) = proto(:,src(n))';
-  noise = rand(p,1) < 0.05;
-  dataTestNoisy(n,:) = dataTestClean(n,:);
-  dataTestNoisy(n,noise) = 1-dataTestNoisy(n,noise); % flip bits
-  missing = rand(p,1) < 0.5;
-  dataTestMissing(n,:) = dataTestNoisy(n,:);
-  dataTestMissing(n,missing) = NaN;
-end
-
-yhat = zeros(Ntest, p);
-postPred = zeros(Ntest, p);
-logprob = zeros(Ntest,1);
-hammingErr = zeros(Ntest, 1);
-for n=1:Ntest
-  y = dataTestMissing(n,:);
-  [yhat(n,:), postPred(n,:), loglikV] = imputeBinaryVectorPCA(y, W, b);
-  % Compute p(h|v) where v is like action, h is like response
-  yFullObs = dataTestNoisy(n,:);
-  [muPost, SigmaPost, lambda, loglikHV] = varInferLogisticGaussCanonical(yFullObs, W, b);
-  logprob(n) = loglikHV - loglikV;
-  hammingErr(n) = sum(yhat(n,:) ~= yFullObs);
-end
-logprob
-hammingErr
-
-figure; image_rgb(dataTestNoisy+1); colorbar; title('observed noisy');
-Y = dataTestMissing+1; Y(isnan(Y))=3;
-figure;image_rgb(Y); colorbar; title('observed missing');
-figure; imagesc(postPred); colorbar; title('postpred');
-figure; image_rgb(yhat+1); colorbar; title('map')
-
diff --git a/demos/binaryPcaDemoTipping.m b/demos/binaryPcaDemoTipping.m
index 29150e529..9524ae3bb 100644
--- a/demos/binaryPcaDemoTipping.m
+++ b/demos/binaryPcaDemoTipping.m
@@ -1,32 +1,39 @@
-% Demo of factor analysis applied to some synthetic binary data
+% Demo of factor analysis applied to some synthetic 2d binary data
 
 % This file is from pmtk3.googlecode.com
 
-%function [W, b, proto] = tippingDemo();
 
-p = 16;
+setSeed(0);
+D = 16;
 K = 3;
-proto = rand(p,K) < 0.5;
-data = [];
-dataClean = [];
+proto = rand(D,K) < 0.5;
 M = 50;
 source = [1*ones(1,M) 2*ones(1,M) 3*ones(1,M)];
-for k=1:K
-  tmp = repmat(proto(:,k), 1, M);
-  dataClean  = [dataClean tmp];
-  noise = rand(p, M) < 0.05;
-  tmp(noise) = 1-tmp(noise);
-  data  = [data tmp];
+N = numel(source);
+dataClean = zeros(N, D);
+for n=1:N
+  src = source(n);
+  dataClean(n, :) = proto(:, src)';
 end
+noiseLevel = 0.05;
+flipMask = rand(N,D) < noiseLevel;
+dataNoisy = dataClean;
+dataNoisy(flipMask) = 1-dataClean(flipMask);
+dataMissing = dataClean;
+dataMissing(flipMask) = nan;
+
+figure; imagesc(dataNoisy); colormap(gray);
+title('noisy binary data')
+printPmtkFigure('binaryPCAinput');
 
-figure; imagesc(data'); colormap(gray);printPmtkFigure('binaryPCAinput');
-figure; imagesc(dataClean'); colormap(gray)
+figure; imagesc(dataClean); colormap(gray); title('hidden truth')
 
-q = 2;
-%[W, b, muPost] = binaryPcaFitVarEm(data, 2);
-model = binaryFAfit(data, 2);
-muPost = binaryFAinferLatent(model, data);
+% Fit model
+[model, loglikHist] = binaryFAfit(dataNoisy, 2, 'maxIter', 10, 'verbose', true);
+figure; plot(loglikHist); title('(lower bound on) loglik vs iter for EM')
 
+% Latent 2d embedding
+muPost = binaryFAinferLatent(model, dataNoisy);
 figure;
 symbols = {'ro', 'gs', 'k*'};
 for k=1:K
@@ -34,4 +41,15 @@
   plot(muPost(1,ndx), muPost(2,ndx), symbols{k});
   hold on
 end
+title('latent embedding')
 printPmtkFigure('binaryPCAoutput')
+
+% Denoising
+[postPred] = binaryFApredictMissing(model, dataNoisy);
+yhat = postPred > 0.5;
+figure; imagesc(yhat); colormap(gray); title('prediciton given noisy')
+
+% Imputation
+[postPred] = binaryFApredictMissing(model, dataMissing);
+yhat = postPred > 0.5;
+figure; imagesc(yhat); colormap(gray); title('prediction given missing data')
diff --git a/toolbox/LatentVariableModels/ppca/sub/screeplotChooseDim.m b/matlabTools/stats/screeplotChooseDim.m
similarity index 100%
rename from toolbox/LatentVariableModels/ppca/sub/screeplotChooseDim.m
rename to matlabTools/stats/screeplotChooseDim.m
diff --git a/toolbox/BasicModels/gauss/sub/gaussLogprobMissingData.m b/toolbox/BasicModels/gauss/sub/gaussLogprobMissingData.m
index 7ef166afe..00e31b98a 100644
--- a/toolbox/BasicModels/gauss/sub/gaussLogprobMissingData.m
+++ b/toolbox/BasicModels/gauss/sub/gaussLogprobMissingData.m
@@ -33,7 +33,7 @@
 logp = NaN(n,1);
 logp(~missRows) = gaussLogprob(mu, Sigma, X(~missRows,:));
 
-XmissCell = mat2Cell(X(missRows,:), ones(1,nMiss), d);
+XmissCell = mat2cell(X(missRows,:), ones(1,nMiss), d);
 % XmissCell{i} is a 1xd vector with some NaNs
 logp(missRows) = cellfun(@(y)lognormpdfNaN(mu, Sigma, y), XmissCell);
  
diff --git a/toolbox/LatentVariableModels/binaryFA/binaryFAfit.m b/toolbox/LatentVariableModels/binaryFA/binaryFAfit.m
index 2f724b1a5..f515a0e8e 100644
--- a/toolbox/LatentVariableModels/binaryFA/binaryFAfit.m
+++ b/toolbox/LatentVariableModels/binaryFA/binaryFAfit.m
@@ -12,20 +12,20 @@
 
 
 EMargs = varargin;
+Y = canonizeLabels(Y) - 1; % {0,1}
 [N,T] = size(Y);
 model.type  = 'binaryFA';
 model.K = K;
 model.T = T;
 
-[model, loglikHist] = emAlgo(model, data, initFn, @estep, @mstep , ...
-                            'verbose', true, EMargs{:});
+[model, loglikHist] = emAlgo(model, Y, @initFn, @estep, @mstep , EMargs{:});
 end
 
-function model = initFn(model, Y, restartNum) %#ok
-K = model.K;
-T = model.T;
+function model = initFn(model, Y, restartNum) %#ok ignores Y
+K = model.K; % num latent
+T = model.T; % num observed
 model.W = 0.1*randn(K, T);
-model.b = 0.01*randn(K, 1);
+model.b = 0.01*randn(T, 1); % bias on observed nodes
 model.muPrior = zeros(K,1);
 model.SigmaPrior = eye(K,K);
 
@@ -34,52 +34,48 @@
 
 function [ess, loglik] = estep(model, data)
 q = model.K;
-Y = data; 
-debug = true;
-timCan = 0; timMom = 0; % timing for the 2 methods
+Y  = data';
+[p,N] = size(Y);
+debug = false;
 q1 = q+1;
-  S2 = zeros(q1,q1,p);
-  S1 = zeros(q1,p);
-  logZ = 0;
-  for n=1:N
-   % The canonical function also returns the loglikelihood
-    [muPost(:,n), SigmaPost, lambda, LL] = ...
-      varInferLogisticGaussCanonical(Y(:,n), W, b, muPrior, SigmaPrior);
-    logZ = logZ + LL;
-
-    if debug 
-      % Check Tippings formulas agree with Murphy UAI'99
-      % Only equivalent if we use the same number of iterations inside varInfer
-       tic;
-      [muPost(:,n), SigmaPost, lambda, LL] = ...
-        varInferLogisticGaussCanonical(Y(:,n), W, b, muPrior, SigmaPrior, ...
-        'fixedNumIter', true, 'maxIter', 3);
-      timCan = timCan + toc;
-      
-      tic;
-      [muPost2(:,n), SigmaPost2, lambda2] = ...
-        varInferLogisticGauss(Y(:,n), W, b, muPrior, SigmaPrior, 'maxIter', 3);
-      timMom = timMom + toc;
-      assert(approxeq(muPost(:,n), muPost2(:,n)))
-      assert(approxeq(SigmaPost, SigmaPost2))
-      assert(approxeq(lambda, lambda2))
-    end
-    
-    mu = muPost(:,n);
-    for i=1:p % can we vectorize this?
-      Sn = SigmaPost + mu*mu';
-      Mn = zeros(q1, q1);
-      Mn(1:q,1:q) = Sn;
-      Mn(q+1,1:q) = mu';
-      Mn(1:q,q+1) = mu;
-      Mn(q+1,q+1) = 1;
-      S2(:,:,i) = S2(:,:,i) + 2*lambda(i)*Mn;
-      S1(:,i) = S1(:,i) + (Y(i,n) - 0.5) * [mu; 1];
-    end
-   
+S2 = zeros(q1,q1,p);
+S1 = zeros(q1,p);
+loglik = 0;
+W = model.W; b = model.b;
+muPrior = model.muPrior; SigmaPriorInv = inv(model.SigmaPrior);
+for n=1:N
+ 
+  [muPost, SigmaPost, logZ, lambda] = ...
+    varInferLogisticGauss(Y(:,n), W, b, muPrior, SigmaPriorInv);
+  loglik = loglik + logZ;
+  
+  if debug
+    % Check Tipping's formulas agree with Murphy UAI'99
+    % Only equivalent if we use the same number of iterations inside
+    % varInfer and if we initialize the var params in the same way
+    [muPost, SigmaPost, logZ] = ...
+      varInferLogisticGaussCanonical(Y(:,n), W, b, muPrior, SigmaPriorInv, 'maxIter', 3);
+    [muPost2, SigmaPost2, lambda2, logZ2] = ...
+      varInferLogisticGauss(Y(:,n), W, b, muPrior, SigmaPriorInv, 'maxIter', 3);
+    assert(approxeq(muPost(:,n), muPost2(:,n)))
+    assert(approxeq(SigmaPost, SigmaPost2))
+    assert(approxeq(lambda, lambda2))
+    assert(approxeq(logZ, logZ2))
+  end
+  
+  for i=1:p % can we vectorize this?
+    Sn = SigmaPost + muPost*muPost';
+    Mn = zeros(q1, q1);
+    Mn(1:q,1:q) = Sn;
+    Mn(q+1,1:q) = muPost';
+    Mn(1:q,q+1) = muPost;
+    Mn(q+1,q+1) = 1;
+    S2(:,:,i) = S2(:,:,i) + 2*lambda(i)*Mn;
+    S1(:,i) = S1(:,i) + (Y(i,n) - 0.5) * [muPost; 1];
   end
+  
+end
 ess.S1 = S1; ess.S2 = S2;
-loglik = logZ;
 end
 
 function model = mstep(model, ess)
@@ -91,102 +87,3 @@
     model.b(i) = what(q+1);
   end
 end
-
-%
-
-
-
-
-function [W, b, muPost, logZ] = tippingEM(Y, q)
-
-% Y(i,n) is the n'th example - each COLUMN is an example
-% q is the number of latent variables
-
-% This file is from pmtk3.googlecode.com
-
-
-[p N] = size(Y);
-
-% initialization
-W = 0.1*randn(q,p); % W(j,i) for Xj -> Yi
-b = 0.01*randn(p,1);
-muPrior = zeros(q,1);
-SigmaPrior = eye(q);
-
-iter = 1;
-thresh = 1e-3;
-maxIter = 30;
-muPost = zeros(q,N);
-logZold = -inf;
-
-debug = false;
-timCan = 0; timMom = 0; % timing for the 2 methods
-
-while 1
-  % E step
-  q1 = q+1;
-  S2 = zeros(q1,q1,p);
-  S1 = zeros(q1,p);
-  logZ = 0;
-  for n=1:N
-   % The canonical function also returns the loglikelihood
-    [muPost(:,n), SigmaPost, lambda, LL] = ...
-      varInferLogisticGaussCanonical(Y(:,n), W, b, muPrior, SigmaPrior);
-    logZ = logZ + LL;
-
-    if debug 
-      % Check Tippings formulas agree with Murphy UAI'99
-      % Only equivalent if we use the same number of iterations inside varInfer
-       tic;
-      [muPost(:,n), SigmaPost, lambda, LL] = ...
-        varInferLogisticGaussCanonical(Y(:,n), W, b, muPrior, SigmaPrior, ...
-        'fixedNumIter', true, 'maxIter', 3);
-      timCan = timCan + toc;
-      
-      tic;
-      [muPost2(:,n), SigmaPost2, lambda2] = ...
-        varInferLogisticGauss(Y(:,n), W, b, muPrior, SigmaPrior, 'maxIter', 3);
-      timMom = timMom + toc;
-      assert(approxeq(muPost(:,n), muPost2(:,n)))
-      assert(approxeq(SigmaPost, SigmaPost2))
-      assert(approxeq(lambda, lambda2))
-    end
-    
-    mu = muPost(:,n);
-    for i=1:p % can we vectorize this?
-      Sn = SigmaPost + mu*mu';
-      Mn = zeros(q1, q1);
-      Mn(1:q,1:q) = Sn;
-      Mn(q+1,1:q) = mu';
-      Mn(1:q,q+1) = mu;
-      Mn(q+1,q+1) = 1;
-      S2(:,:,i) = S2(:,:,i) + 2*lambda(i)*Mn;
-      S1(:,i) = S1(:,i) + (Y(i,n) - 0.5) * [mu; 1];
-    end
-  end
-  
-  % M step
-  %what = zeros(q1, 1);
-  for i=1:p
-    what = -S2(:,:,i) \ S1(:,i);
-    W(:,i) = what(1:q);
-    b(i) = what(q+1);
-  end
-
-  % Converged?
-  delta = logZ - logZold;
-  %fprintf('EM iter %d, logZ = %10.6f, relative delta = %10.6f\n', iter, logZ, delta/abs(logZold));
-  if delta < 0
-    error(sprintf('logZ decreased from %10.6f to %10.6f', logZold, logZ))
-  end
-  if (delta/abs(logZold) < thresh) | (iter > maxIter)
-    break;
-  end
-  iter = iter + 1;
-  logZold = logZ;
-end
-
-if debug
-  timCan
-  timMom
-end
diff --git a/toolbox/LatentVariableModels/binaryFA/binaryFAinferLatent.m b/toolbox/LatentVariableModels/binaryFA/binaryFAinferLatent.m
index 6a6046de9..bc669b0ee 100644
--- a/toolbox/LatentVariableModels/binaryFA/binaryFAinferLatent.m
+++ b/toolbox/LatentVariableModels/binaryFA/binaryFAinferLatent.m
@@ -1,4 +1,4 @@
-function [muPost, SigmaPost] = binaryFAinferLatent(model, data, varargin)
+function [muPost, SigmaPost, loglik] = binaryFAinferLatent(model, data, varargin)
 % Infer distribution over latent factors given observed data
 %
 % data(n,j) in {0,1} or {1,2}
@@ -15,17 +15,21 @@
 W = model.W;
 b = model.b;
 [K, T2] = size(W);
-muPrior = zeros(K,1);
-SigmaPrior = eye(K,K);
+assert(T==T2);
+muPrior = model.muPrior;
+SigmaPriorInv = inv(model.SigmaPrior);
 muPost = zeros(K,N);
+loglik = zeros(1,N);
 if nargout >= 2
   SigmaPost = zeros(K,K,N);
 else
   SigmaPost = [];
 end
 for n=1:N
-  [muPost(:,n), SigmaPost(:,:,n)] = ...
-    varInferLogisticGauss(y(n,:), W, b, muPrior, SigmaPrior);
+  [muPost(:,n), Sigma, logZ] = ...
+    varInferLogisticGauss(y(n,:)', W, b, muPrior, SigmaPriorInv);
+  if nargout >= 2, SigmaPost(:,:,n) = Sigma; end
+  loglik(n) = logZ;
 end
 
 
diff --git a/toolbox/LatentVariableModels/binaryFA/binaryFApredictMissing.m b/toolbox/LatentVariableModels/binaryFA/binaryFApredictMissing.m
index de10c2916..93c7bde93 100644
--- a/toolbox/LatentVariableModels/binaryFA/binaryFApredictMissing.m
+++ b/toolbox/LatentVariableModels/binaryFA/binaryFApredictMissing.m
@@ -1,19 +1,26 @@
-function [yhat, postPred,loglik] = imputeBinaryVectorPCA(y, W, b)
-% y(i) in {0,1,NaN} where NaN represents missing data
-% W,b are learned using tippingEM (from fully observed bit vectors)
-% yhat(i) in {0,1}
-% postPred(i) = p(y(i)=1)
-% loglik = log p(yobs)
+function [postPred] = binaryFApredictMissing(model, y)
+% Compute postPred(n,t) = p(yt=1|y(n,:)), 
+% where y(n,t) in {0,1,NaN} where NaN represents missing data
 
 % This file is from pmtk3.googlecode.com
 
-
-[mu, Sigma, lambda, loglik] = varInferLogisticGaussCanonical(y(:), W, b);
+y = canonizeLabels(y) - 1; % ensure {0,1}
+[N,T] = size(y);
+postPred = zeros(N,T);
+W = model.W; b = model.b;
 [L p]= size(W);
-X = [b(:) W']; % p * (L+1)
-mu1 = [1;mu];
-Sigma1 = zeros(L+1,L+1);
-Sigma1(2:end,2:end) = Sigma;
-postPred = sigmoidTimesGauss(X, mu1, Sigma1);
-yhat = postPred  > 0.5;
+B = [b(:) W']; % p * (L+1)
+muPrior = model.muPrior; SigmaPriorInv = inv(model.SigmaPrior);
+for n=1:N
+  [muPost, SigmaPost] = varInferLogisticGauss(y(n,:)', W, b, muPrior, SigmaPriorInv);
+  mu1 = [1;muPost];
+  Sigma1 = zeros(L+1,L+1);
+  Sigma1(2:end,2:end) = SigmaPost;
+  postPred(n,:) = sigmoidTimesGauss(B, mu1, Sigma1);
+  visNdx = ~isnan(y(n,:));
+  %postPred(n,visNdx) = y(n, visNdx); % this would restore the noise!
+end
+
+
+
 
diff --git a/toolbox/LatentVariableModels/binaryFA/varInferLogisticGauss.m b/toolbox/LatentVariableModels/binaryFA/varInferLogisticGauss.m
index 637feb6b6..c28b3dfc9 100644
--- a/toolbox/LatentVariableModels/binaryFA/varInferLogisticGauss.m
+++ b/toolbox/LatentVariableModels/binaryFA/varInferLogisticGauss.m
@@ -1,4 +1,4 @@
-function [muPost, SigmaPost, lambda] = varInferLogisticGauss(y, W, b, muPrior, SigmaPrior, varargin)
+function [muPost, SigmaPost, logZ, lambda] = varInferLogisticGauss(y, W, b, muPrior, SigmaPriorInv, varargin)
 % Use a variational approximation to infer a Gaussian posterior 
 % given a Gaussian prior and a logistic likelihood for single case
 %
@@ -14,23 +14,29 @@
 
 % This file is from pmtk3.googlecode.com
 
+y = colvec(y);
+if any(isnan(y))
+  [muPost, SigmaPost, logZ, lambda] = varInferLogisticGaussMissing(y, W, b, muPrior, SigmaPriorInv, varargin{:});
+  return;
+end
 
 [maxIter] = process_options(varargin, 'maxIter', 3);
 [q p] = size(W);
-debug = 1;
+debug = 0;
 
 % initialize variational param 
 xi = (2*y-1) .* (W'*muPrior + b);
 ndx = find(xi==0);
 xi(ndx) = 0.01*rand(size(ndx));
 
-SigmaInv = inv(SigmaPrior);
+SigmaInv = SigmaPriorInv; %inv(SigmaPrior);
 for iter=1:maxIter
   lambda = (0.5-sigmoid(xi)) ./ (2*xi);
 
   tmp = W*diag(lambda)*W';
   SigmaPost = inv(SigmaInv - 2*tmp);
 
+  %{
   if debug
     tmpB = zeros(q,q);
     for i=1:p
@@ -38,11 +44,13 @@
     end
     assert(approxeq(tmp, tmpB))
   end
-    
+  %}
+  
   tmp = y-0.5 + 2*lambda.*b;
   tmp2 = sum(W*diag(tmp), 2);
   muPost = SigmaPost*(SigmaInv*muPrior + tmp2);
   
+  %{
   if debug
     tmp2B = zeros(q,1);
     for i=1:p
@@ -50,11 +58,13 @@
     end
     assert(approxeq(tmp2, tmp2B))
   end
+  %}
   
   tmp = diag(W'*(SigmaPost + muPost*muPost') * W);
   tmp2 = 2*(W*diag(b))'*muPost;
   xi = sqrt(tmp + tmp2 + b.^2);
   
+  %{
   if debug
     tmptmp = SigmaPost + muPost*muPost';
     tmpB = zeros(p,1);
@@ -66,6 +76,23 @@
     assert(approxeq(tmp, tmpB))
     assert(approxeq(tmp2, tmp2B))
   end
+  %}
+  
+  % Normalization constant
+  lam = -lambda;
+   % -ve sign needed because Tipping
+  % uses different sign convention for lambda to Emt/Bishop/Murphy
+  A = diag(2*lam);
+  invA = diag(1./(2*lam));
+  bb = -0.5*ones(p,1);
+  c = -lam .* xi.^2 - 0.5*xi + log(1+exp(xi));
+  ytilde = invA*(bb + y);
+  B = W'; % T*K
+  logconst1 = -0.5*sum(log(lam/pi)); 
+  %assert(approxeq(logconst1, 0.5*logdet(2*pi*invA)))
+  logconst2 = 0.5*ytilde'*A*ytilde - sum(c);
+  logconst3 = gaussLogprob(B*muPrior + b, invA + B*SigmaPost*B', rowvec(ytilde));
+  logZ = logconst1 + logconst2 + logconst3;
 end
   
 %%%%%%%
diff --git a/toolbox/LatentVariableModels/binaryFA/varInferLogisticGaussCanonical.m b/toolbox/LatentVariableModels/binaryFA/varInferLogisticGaussCanonical.m
index f15f9fbec..28e956dfb 100644
--- a/toolbox/LatentVariableModels/binaryFA/varInferLogisticGaussCanonical.m
+++ b/toolbox/LatentVariableModels/binaryFA/varInferLogisticGaussCanonical.m
@@ -1,5 +1,5 @@
-function [muPost, SigmaPost, lambda, logZ] = varInferLogisticGaussCanonical(...
-  y, W, b, muPrior, SigmaPrior, varargin)
+function [muPost, SigmaPost, logZ, lambda] = varInferLogisticGaussCanonical(...
+  y, W, b, muPrior, SigmaPriorInv, varargin)
 % Use a variational approximation to infer a Gaussian posterior 
 % given a Gaussian prior and a logistic likelihood
 % 
@@ -49,7 +49,7 @@
 ndx = find(xi==0);
 xi(ndx) = 0.01*rand(size(ndx));
 
-Kprior = inv(SigmaPrior);
+Kprior = SigmaPriorInv; % inv(SigmaPrior);
 hprior = Kprior*muPrior;
 gprior = 0.5*logdet(Kprior) -(q/2)*log2pi -0.5*muPrior'*Kprior*muPrior;
 
diff --git a/toolbox/LatentVariableModels/binaryFA/varInferLogisticGaussMissing.m b/toolbox/LatentVariableModels/binaryFA/varInferLogisticGaussMissing.m
new file mode 100644
index 000000000..5f58ee7ef
--- /dev/null
+++ b/toolbox/LatentVariableModels/binaryFA/varInferLogisticGaussMissing.m
@@ -0,0 +1,66 @@
+function [muPost, SigmaPost, logZ, lambda] = varInferLogisticGaussMissing(y, W, b, muPrior, SigmaPriorInv, varargin)
+% Just like varInferLogisticGauss except y(t) may be NaN
+
+% This file is from pmtk3.googlecode.com
+
+
+[maxIter] = process_options(varargin, 'maxIter', 3);
+y = colvec(y);
+p = numel(y);
+visNdx = ~isnan(y);
+
+% initialize variational param 
+xi = zeros(p,1);
+xi(visNdx) = (2*y(visNdx)-1) .* (W(:,visNdx)'*muPrior + b(visNdx));
+ndx = find(xi==0);
+xi(ndx) = 0.01*rand(size(ndx));
+
+SigmaInv = SigmaPriorInv; %inv(SigmaPrior);
+for iter=1:maxIter
+  lambda = zeros(p,1);
+  lambda(visNdx) = (0.5-sigmoid(xi(visNdx))) ./ (2*xi(visNdx));
+
+  tmp = W*diag(lambda)*W'; % missing entries have lambda=0
+  SigmaPost = inv(SigmaInv - 2*tmp);
+
+  tmp = zeros(p,1);
+  tmp(visNdx) = y(visNdx)-0.5 + 2*lambda(visNdx).*b(visNdx);
+  tmp2 = sum(W*diag(tmp), 2); % missing entries have tmp=0
+  muPost = SigmaPost*(SigmaInv*muPrior + tmp2);
+  
+ 
+  tmp = diag(W'*(SigmaPost + muPost*muPost') * W);
+  tmp2 = 2*(W*diag(b))'*muPost;
+  xi(visNdx) = sqrt(tmp(visNdx) + tmp2(visNdx) + b(visNdx).^2);
+  
+  
+  % Normalization constant
+  lam = -lambda;
+   % -ve sign needed because Tipping
+  % uses different sign convention for lambda to Emt/Bishop/Murphy
+  A = diag(2*lam);
+  invA = diag(1./(2*lam));
+  hidNdx = find(~visNdx);
+  ndx = sub2ind(size(A), hidNdx, hidNdx);
+  invA(ndx) = 0; % set diagonals to 0 for missing entries
+  
+  bb = -0.5*ones(p,1);
+  c = -lam .* xi.^2 - 0.5*xi + log(1+exp(xi));
+  ytilde = zeros(p,1);
+  ytilde(visNdx) = invA(visNdx,visNdx)*(bb(visNdx) + y(visNdx)); % ytilde is 0 for missing entries
+  B = W'; % T*K
+  logconst1 = -0.5*sum(log(lam(visNdx)/pi)); 
+  %assert(approxeq(logconst1, 0.5*logdet(2*pi*invA)))
+  logconst2 = 0.5*ytilde'*A*ytilde - sum(c(visNdx));
+  predMu = B*muPrior + b;
+  predPost = invA + B*SigmaPost*B';
+  logconst3 = gaussLogprob(predMu(visNdx), predPost(visNdx,visNdx), rowvec(ytilde(visNdx)));
+  logZ = logconst1 + logconst2 + logconst3;
+  assert(~isnan(logZ))
+end
+  
+%%%%%%%
+
+function y=sigmoid(x)
+
+y = 1./(1+exp(-x));