added speed comparisons for pymc and mcex

jsalvatier · Jul 25, 2012 · 3a82fab · 3a82fab
1 parent 08d4781
commit 3a82fab
Show file tree

Hide file tree

Showing 2 changed files with 111 additions and 0 deletions.
diff --git a/examples/speedtest_mcex.py b/examples/speedtest_mcex.py
@@ -0,0 +1,75 @@
+"""
+From the PyMC example list
+latent_occupancy.py
+
+Simple model demonstrating the estimation of occupancy, using latent variables. Suppose
+a population of n sites, with some proportion pi being occupied. Each site is surveyed, 
+yielding an array of counts, y:
+
+y = [3, 0, 0, 2, 1, 0, 1, 0, ..., ]
+
+This is a classic zero-inflated count problem, where more zeros appear in the data than would
+be predicted by a simple Poisson model. We have, in fact, a mixture of models; one, conditional
+on occupancy, with a poisson mean of theta, and another, conditional on absence, with mean zero. 
+One way to tackle the problem is to model the latent state of 'occupancy' as a Bernoulli variable 
+at each site, with some unknown probability:
+
+z_i ~ Bern(pi)
+
+These latent variables can then be used to generate an array of Poisson parameters:
+
+t_i = theta (if z_i=1) or 0 (if z_i=0)
+
+Hence, the likelihood is just:
+
+y_i = Poisson(t_i)
+
+(Note in this elementary model, we are ignoring the issue of imperfect detection.)
+
+Created by Chris Fonnesbeck on 2008-07-28.
+Copyright (c) 2008 University of Otago. All rights reserved.
+"""
+
+# Import statements
+from mcex import *
+from numpy import random, array, arange, ones 
+import theano.tensor as t
+# Sample size
+n = 100000
+# True mean count, given occupancy
+theta = 2.1
+# True occupancy
+pi = 0.4
+
+# Simulate some data data
+y = array([(random.random()<pi) * random.poisson(theta) for i in range(n)])
+
+
+model = Model()
+# Estimated occupancy
+
+p = AddVar(model, 'p', Beta(1,1))
+
+# Latent variable for occupancy
+z = AddVar(model, 'z', Bernoulli(p) , y.shape, dtype = 'int8')
+
+# Estimated mean count
+theta = AddVar( model, 'theta', Uniform(0, 100))
+
+# Poisson likelihood
+
+
+
+AddData(model, y, ZeroInflatedPoisson(theta, z))
+
+point = {'p' : np.array([.5]),
+         'z' : (y > 0)*1,
+         'theta' : np.array([10.])}
+
+_mcex_logp = model_logp(model)
+def mcex_logp():
+    _mcex_logp(point) 
+_mcex_dlogp = model_dlogp(model)
+
+def mcex_dlogp():
+    _mcex_dlogp(point) 
diff --git a/examples/speedtest_pymc.py b/examples/speedtest_pymc.py
@@ -0,0 +1,36 @@
+#from https://github.com/pymc-devs/pymc/wiki/LatentOccupancy
+
+# Import statements
+from numpy import random, array
+from pymc import MCMC, Matplot, Beta, Bernoulli, Lambda, Poisson, Uniform, deterministic, logp_of_set, logp_gradient_of_set
+
+n = 100000
+theta = 2
+pi = 0.4
+y = [(random.random()<pi) * random.poisson(theta) for i in range(n)]
+
+def remcache(s):
+    s._cache_depth = 0
+    s.gen_lazy_function()
+
+p = Beta('p', 1, 1)
+
+z = Bernoulli('z', p, value=array(y)>0, plot=False)
+
+theta_hat = Uniform('theta_hat', 0, 100, value=3)
+
+
+t = z*theta
+counts = Poisson('counts', t, value=y, observed=True)
+model = [p, z, theta_hat, counts]
+
+#disable caching for all the nodes
+v = model + [t]
+for s in v:
+    remcache(s)
+
+def pymc_logp():
+    return logp_of_set(model)
+
+def pymc_dlogp():
+    return logp_gradient_of_set(model)