Merge pull request SheffieldML#492 from pgmoren/devel

We did some benchmarking on classification. These changes should be fine. Let's merge it in.
lucasmaystre · Mar 27, 2017 · 2f7ab3f · 2f7ab3f
2 parents cc4606b + 0c248e7
commit 2f7ab3f
Show file tree

Hide file tree

Showing 7 changed files with 523 additions and 118 deletions.
diff --git a/GPy/inference/latent_function_inference/expectation_propagation.py b/GPy/inference/latent_function_inference/expectation_propagation.py
diff --git a/GPy/inference/latent_function_inference/posterior.py b/GPy/inference/latent_function_inference/posterior.py
@@ -192,7 +192,7 @@ def K_chol(self):
     def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
         woodbury_vector = self.woodbury_vector
         woodbury_inv = self.woodbury_inv
-        
+
         if not isinstance(Xnew, VariationalPosterior):
             Kx = kern.K(pred_var, Xnew)
             mu = np.dot(Kx.T, woodbury_vector)
@@ -220,7 +220,7 @@ def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
         else:
             psi0_star = kern.psi0(pred_var, Xnew)
             psi1_star = kern.psi1(pred_var, Xnew)
-            psi2_star = kern.psi2n(pred_var, Xnew) 
+            psi2_star = kern.psi2n(pred_var, Xnew)
             la = woodbury_vector
             mu = np.dot(psi1_star, la) # TODO: dimensions?
             N,M,D = psi0_star.shape[0],psi1_star.shape[1], la.shape[1]
@@ -231,18 +231,19 @@ def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
                 di = np.diag_indices(la.shape[1])
             else:
                 tmp = psi2_star - psi1_star[:,:,None]*psi1_star[:,None,:]
-                var = (tmp.reshape(-1,M).dot(la).reshape(N,M,D)*la[None,:,:]).sum(1) + psi0_star[:,None] 
+                var = (tmp.reshape(-1,M).dot(la).reshape(N,M,D)*la[None,:,:]).sum(1) + psi0_star[:,None]
                 if woodbury_inv.ndim==2:
                     var += -psi2_star.reshape(N,-1).dot(woodbury_inv.flat)[:,None]
                 else:
                     var += -psi2_star.reshape(N,-1).dot(woodbury_inv.reshape(-1,D))
         var = np.clip(var,1e-15,np.inf)
         return mu, var
-
+
+
 class PosteriorExact(Posterior):
-     
+
     def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
-        
+
         Kx = kern.K(pred_var, Xnew)
         mu = np.dot(Kx.T, self.woodbury_vector)
         if len(mu.shape)==1:
@@ -270,3 +271,37 @@ def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
                     var[:, i] = (Kxx - np.square(tmp).sum(0))
             var = var
         return mu, var
+
+class PosteriorEP(Posterior):
+
+    def _raw_predict(self, kern, Xnew, pred_var, full_cov=False):
+
+        Kx = kern.K(pred_var, Xnew)
+        mu = np.dot(Kx.T, self.woodbury_vector)
+        if len(mu.shape)==1:
+            mu = mu.reshape(-1,1)
+
+        if full_cov:
+            Kxx = kern.K(Xnew)
+            if self._woodbury_inv.ndim == 2:
+                tmp = np.dot(Kx.T,np.dot(self._woodbury_inv, Kx))
+                var = Kxx - tmp
+            elif self._woodbury_inv.ndim == 3: # Missing data
+                var = np.empty((Kxx.shape[0],Kxx.shape[1],self._woodbury_inv.shape[2]))
+                for i in range(var.shape[2]):
+                    tmp = np.dot(Kx.T,np.dot(self._woodbury_inv[:,:,i], Kx))
+                    var[:, :, i] = (Kxx - tmp)
+            var = var
+        else:
+            Kxx = kern.Kdiag(Xnew)
+            if self._woodbury_inv.ndim == 2:
+                tmp = (np.dot(self._woodbury_inv, Kx) * Kx).sum(0)
+                var = (Kxx - tmp)[:,None]
+            elif self._woodbury_inv.ndim == 3: # Missing data
+                var = np.empty((Kxx.shape[0],self._woodbury_inv.shape[2]))
+                for i in range(var.shape[1]):
+                    tmp = (Kx * np.dot(self._woodbury_inv[:,:,i], Kx)).sum(0)
+                    var[:, i] = (Kxx - tmp)
+            var = var
+
+        return mu, var
diff --git a/GPy/inference/latent_function_inference/var_dtc.py b/GPy/inference/latent_function_inference/var_dtc.py
@@ -88,6 +88,10 @@ def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, mean_function=No
             Kmm = kern.K(Z).copy()
             diag.add(Kmm, self.const_jitter)
             Lm = jitchol(Kmm)
+        else:
+            Kmm = tdot(Lm)
+            symmetrify(Kmm)
+
 
         # The rather complex computations of A, and the psi stats
         if uncertain_inputs:

diff --git a/GPy/likelihoods/bernoulli.py b/GPy/likelihoods/bernoulli.py
@@ -2,7 +2,7 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
 
 import numpy as np
-from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf
+from ..util.univariate_Gaussian import std_norm_pdf, std_norm_cdf, derivLogCdfNormal, logCdfNormal
 from . import link_functions
 from .likelihood import Likelihood
 
@@ -59,24 +59,24 @@ def moments_match_ep(self, Y_i, tau_i, v_i, Y_metadata_i=None):
             raise ValueError("bad value for Bernoulli observation (0, 1)")
         if isinstance(self.gp_link, link_functions.Probit):
             z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
-            Z_hat = std_norm_cdf(z)
-            Z_hat = np.where(Z_hat==0, 1e-15, Z_hat)
-            phi = std_norm_pdf(z)
+            phi_div_Phi = derivLogCdfNormal(z)
+            log_Z_hat = logCdfNormal(z)
 
-            mu_hat = v_i/tau_i + sign*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
-            sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
+            mu_hat = v_i/tau_i + sign*phi_div_Phi/np.sqrt(tau_i**2 + tau_i)
+            sigma2_hat = 1./tau_i - (phi_div_Phi/(tau_i**2+tau_i))*(z+phi_div_Phi)
 
         elif isinstance(self.gp_link, link_functions.Heaviside):
-            a = sign*v_i/np.sqrt(tau_i)
-            Z_hat = np.max(1e-13, std_norm_cdf(z))
-            N = std_norm_pdf(a)
-            mu_hat = v_i/tau_i + sign*N/Z_hat/np.sqrt(tau_i)
-            sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i
+            z = sign*v_i/np.sqrt(tau_i)
+            phi_div_Phi = derivLogCdfNormal(z)
+            log_Z_hat = logCdfNormal(z)
+            mu_hat = v_i/tau_i + sign*phi_div_Phi/np.sqrt(tau_i)
+            sigma2_hat = (1. - a*phi_div_Phi - np.square(phi_div_Phi))/tau_i
         else:
             #TODO: do we want to revert to numerical quadrature here?
             raise ValueError("Exact moment matching not available for link {}".format(self.gp_link.__name__))
 
-        return Z_hat, mu_hat, sigma2_hat
+        # TODO: Output log_Z_hat instead of Z_hat (needs to be change in all others likelihoods)
+        return np.exp(log_Z_hat), mu_hat, sigma2_hat
 
     def variational_expectations(self, Y, m, v, gh_points=None, Y_metadata=None):
         if isinstance(self.gp_link, link_functions.Probit):

diff --git a/GPy/testing/inference_tests.py b/GPy/testing/inference_tests.py
@@ -49,6 +49,43 @@ def test_inferenceX_GPLVM_RBF(self):
         m.optimize()
         x, mi = m.infer_newX(m.Y, optimize=True)
         np.testing.assert_array_almost_equal(m.X, mi.X, decimal=2)
+class InferenceGPEP(unittest.TestCase):
+
+    def genData(self):
+        np.random.seed(1)
+        k = GPy.kern.RBF(1, variance=7., lengthscale=0.2)
+        X = np.random.rand(200,1)
+        f = np.random.multivariate_normal(np.zeros(200), k.K(X) + 1e-5 * np.eye(X.shape[0]))
+        lik = GPy.likelihoods.Bernoulli()
+        p = lik.gp_link.transf(f) # squash the latent function
+        Y = lik.samples(f).reshape(-1,1)
+        return X, Y
+
+    def test_inference_EP(self):
+        from paramz import ObsAr
+        X, Y = self.genData()
+        lik = GPy.likelihoods.Bernoulli()
+        k = GPy.kern.RBF(1, variance=7., lengthscale=0.2)
+        inf = GPy.inference.latent_function_inference.expectation_propagation.EP(max_iters=30, delta=0.5)
+        self.model = GPy.core.GP(X=X,
+                        Y=Y,
+                        kernel=k,
+                        inference_method=inf,
+                        likelihood=lik)
+        K = self.model.kern.K(X)
+        mu, Sigma, mu_tilde, tau_tilde, log_Z_tilde = self.model.inference_method.expectation_propagation(K, ObsAr(Y), lik, None)
+
+        v_tilde = mu_tilde * tau_tilde
+        p, m, d = self.model.inference_method._inference(K, tau_tilde, v_tilde, lik, Y_metadata=None,  Z_tilde=log_Z_tilde.sum())
+        p0, m0, d0 = super(GPy.inference.latent_function_inference.expectation_propagation.EP, inf).inference(k, X,lik ,mu_tilde[:,None], mean_function=None, variance=1./tau_tilde, K=K, Z_tilde=log_Z_tilde.sum() + np.sum(- 0.5*np.log(tau_tilde) + 0.5*(v_tilde*v_tilde*1./tau_tilde)))
+
+        assert (np.sum(np.array([m - m0,
+                    np.sum(d['dL_dK'] - d0['dL_dK']),
+                    np.sum(d['dL_dthetaL'] - d0['dL_dthetaL']),
+                    np.sum(d['dL_dm'] - d0['dL_dm']),
+                    np.sum(p._woodbury_vector - p0._woodbury_vector),
+                    np.sum(p.woodbury_inv - p0.woodbury_inv)])) < 1e6)
+
 
 class HMCSamplerTest(unittest.TestCase):
 
@@ -64,7 +101,7 @@ def test_sampling(self):
 
         hmc = GPy.inference.mcmc.HMC(m,stepsize=1e-2)
         s = hmc.sample(num_samples=3)
-        
+
 class MCMCSamplerTest(unittest.TestCase):
 
     def test_sampling(self):

diff --git a/GPy/testing/util_tests.py b/GPy/testing/util_tests.py
@@ -1,21 +1,21 @@
 #===============================================================================
 # Copyright (c) 2016, Max Zwiessele, Alan Saul
 # All rights reserved.
-# 
+#
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
-# 
+#
 # * Redistributions of source code must retain the above copyright notice, this
 #   list of conditions and the following disclaimer.
-# 
+#
 # * Redistributions in binary form must reproduce the above copyright notice,
 #   this list of conditions and the following disclaimer in the documentation
 #   and/or other materials provided with the distribution.
-# 
+#
 # * Neither the name of GPy.testing.util_tests nor the names of its
 #   contributors may be used to endorse or promote products derived from
 #   this software without specific prior written permission.
-# 
+#
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -36,7 +36,7 @@ def test_checkFinite(self):
         from GPy.util.debug import checkFinite
         array = np.random.normal(0, 1, 100).reshape(25,4)
         self.assertTrue(checkFinite(array, name='test'))
-        
+
         array[np.random.binomial(1, .3, array.shape).astype(bool)] = np.nan
         self.assertFalse(checkFinite(array))
 
@@ -45,10 +45,10 @@ def test_checkFullRank(self):
         from GPy.util.linalg import tdot
         array = np.random.normal(0, 1, 100).reshape(25,4)
         self.assertFalse(checkFullRank(tdot(array), name='test'))
-        
+
         array = np.random.normal(0, 1, (25,25))
         self.assertTrue(checkFullRank(tdot(array)))
-    
+
     def test_fixed_inputs_median(self):
         """ test fixed_inputs convenience function """
         from GPy.plotting.matplot_dep.util import fixed_inputs
@@ -113,8 +113,8 @@ def test_offset_cluster(self):
         #test data set. Not using random noise just in case it occasionally
         #causes it not to cluster correctly.
         #groundtruth cluster identifiers are: [0,1,1,0]
-        
-        #data contains a list of the four sets of time series (3 per data point)      
+
+        #data contains a list of the four sets of time series (3 per data point)
 
         data = [np.array([[ 2.18094245,  1.96529789,  2.00265523,  2.18218742,  2.06795428],
                 [ 1.62254829,  1.75748448,  1.83879347,  1.87531326,  1.52503496],
@@ -130,7 +130,7 @@ def test_offset_cluster(self):
                 [ 1.69336013,  1.72285186,  1.6339506 ,  1.61212022,  1.39198698]])]
 
         #inputs contains their associated X values
-        
+
         inputs = [np.array([[ 0.        ],
                 [ 0.68040097],
                 [ 1.20316795],
@@ -148,10 +148,66 @@ def test_offset_cluster(self):
                 [ 1.71881079],
                 [ 2.67162871],
                 [ 3.23761907]])]
-                    
+
         #try doing the clustering
         active = GPy.util.cluster_with_offset.cluster(data,inputs)
         #check to see that the clustering has correctly clustered the time series.
         clusters = set([frozenset(cluster) for cluster in active])
         assert set([1,2]) in clusters, "Offset Clustering algorithm failed"
         assert set([0,3]) in clusters, "Offset Clustering algoirthm failed"
+
+
+class TestUnivariateGaussian(unittest.TestCase):
+    def setUp(self):
+        self.zz = [-5.0, -0.8, 0.0, 0.5, 2.0, 10.0]
+
+    def test_logPdfNormal(self):
+        from GPy.util.univariate_Gaussian import logPdfNormal
+        pySols = [-13.4189385332,
+            -1.2389385332,
+            -0.918938533205,
+            -1.0439385332,
+            -2.9189385332,
+            -50.9189385332]
+        diff = 0.0
+        for i in range(len(pySols)):
+            diff += abs(logPdfNormal(self.zz[i]) - pySols[i])
+        self.assertTrue(diff  < 1e-10)
+
+    def test_cdfNormal(self):
+        from GPy.util.univariate_Gaussian import cdfNormal
+        pySols = [2.86651571879e-07,
+          0.211855398583,
+          0.5,
+          0.691462461274,
+          0.977249868052,
+          1.0]
+        diff = 0.0
+        for i in range(len(pySols)):
+            diff += abs(cdfNormal(self.zz[i]) - pySols[i])
+        self.assertTrue(diff  < 1e-10)
+
+    def test_logCdfNormal(self):
+        from GPy.util.univariate_Gaussian import logCdfNormal
+        pySols = [-15.064998394,
+          -1.55185131919,
+          -0.69314718056,
+          -0.368946415289,
+          -0.023012909329,
+          0.0]
+        diff = 0.0
+        for i in range(len(pySols)):
+            diff += abs(logCdfNormal(self.zz[i]) - pySols[i])
+        self.assertTrue(diff  < 1e-10)
+    def test_derivLogCdfNormal(self):
+        from GPy.util.univariate_Gaussian import derivLogCdfNormal
+        pySols = [5.18650396941,
+          1.3674022693,
+          0.79788456081,
+          0.50916043387,
+          0.0552478626962,
+          0.0]
+        diff = 0.0
+        for i in range(len(pySols)):
+          diff += abs(derivLogCdfNormal(self.zz[i]) - pySols[i])
+        self.assertTrue(diff  < 1e-8)