Skip to content

Commit

Permalink
added speed comparisons for pymc and mcex
Browse files Browse the repository at this point in the history
  • Loading branch information
jsalvatier committed Jul 25, 2012
1 parent 08d4781 commit 3a82fab
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 0 deletions.
75 changes: 75 additions & 0 deletions examples/speedtest_mcex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""
From the PyMC example list
latent_occupancy.py
Simple model demonstrating the estimation of occupancy, using latent variables. Suppose
a population of n sites, with some proportion pi being occupied. Each site is surveyed,
yielding an array of counts, y:
y = [3, 0, 0, 2, 1, 0, 1, 0, ..., ]
This is a classic zero-inflated count problem, where more zeros appear in the data than would
be predicted by a simple Poisson model. We have, in fact, a mixture of models; one, conditional
on occupancy, with a poisson mean of theta, and another, conditional on absence, with mean zero.
One way to tackle the problem is to model the latent state of 'occupancy' as a Bernoulli variable
at each site, with some unknown probability:
z_i ~ Bern(pi)
These latent variables can then be used to generate an array of Poisson parameters:
t_i = theta (if z_i=1) or 0 (if z_i=0)
Hence, the likelihood is just:
y_i = Poisson(t_i)
(Note in this elementary model, we are ignoring the issue of imperfect detection.)
Created by Chris Fonnesbeck on 2008-07-28.
Copyright (c) 2008 University of Otago. All rights reserved.
"""

# Import statements
from mcex import *
from numpy import random, array, arange, ones
import theano.tensor as t
# Sample size
n = 100000
# True mean count, given occupancy
theta = 2.1
# True occupancy
pi = 0.4

# Simulate some data data
y = array([(random.random()<pi) * random.poisson(theta) for i in range(n)])


model = Model()
# Estimated occupancy

p = AddVar(model, 'p', Beta(1,1))

# Latent variable for occupancy
z = AddVar(model, 'z', Bernoulli(p) , y.shape, dtype = 'int8')

# Estimated mean count
theta = AddVar( model, 'theta', Uniform(0, 100))

# Poisson likelihood



AddData(model, y, ZeroInflatedPoisson(theta, z))

point = {'p' : np.array([.5]),
'z' : (y > 0)*1,
'theta' : np.array([10.])}

_mcex_logp = model_logp(model)
def mcex_logp():
_mcex_logp(point)
_mcex_dlogp = model_dlogp(model)

def mcex_dlogp():
_mcex_dlogp(point)
36 changes: 36 additions & 0 deletions examples/speedtest_pymc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#from https://github.com/pymc-devs/pymc/wiki/LatentOccupancy

# Import statements
from numpy import random, array
from pymc import MCMC, Matplot, Beta, Bernoulli, Lambda, Poisson, Uniform, deterministic, logp_of_set, logp_gradient_of_set

n = 100000
theta = 2
pi = 0.4
y = [(random.random()<pi) * random.poisson(theta) for i in range(n)]

def remcache(s):
s._cache_depth = 0
s.gen_lazy_function()

p = Beta('p', 1, 1)

z = Bernoulli('z', p, value=array(y)>0, plot=False)

theta_hat = Uniform('theta_hat', 0, 100, value=3)


t = z*theta
counts = Poisson('counts', t, value=y, observed=True)
model = [p, z, theta_hat, counts]

#disable caching for all the nodes
v = model + [t]
for s in v:
remcache(s)

def pymc_logp():
return logp_of_set(model)

def pymc_dlogp():
return logp_gradient_of_set(model)

0 comments on commit 3a82fab

Please sign in to comment.