-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added speed comparisons for pymc and mcex
- Loading branch information
1 parent
08d4781
commit 3a82fab
Showing
2 changed files
with
111 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
""" | ||
From the PyMC example list | ||
latent_occupancy.py | ||
Simple model demonstrating the estimation of occupancy, using latent variables. Suppose | ||
a population of n sites, with some proportion pi being occupied. Each site is surveyed, | ||
yielding an array of counts, y: | ||
y = [3, 0, 0, 2, 1, 0, 1, 0, ..., ] | ||
This is a classic zero-inflated count problem, where more zeros appear in the data than would | ||
be predicted by a simple Poisson model. We have, in fact, a mixture of models; one, conditional | ||
on occupancy, with a poisson mean of theta, and another, conditional on absence, with mean zero. | ||
One way to tackle the problem is to model the latent state of 'occupancy' as a Bernoulli variable | ||
at each site, with some unknown probability: | ||
z_i ~ Bern(pi) | ||
These latent variables can then be used to generate an array of Poisson parameters: | ||
t_i = theta (if z_i=1) or 0 (if z_i=0) | ||
Hence, the likelihood is just: | ||
y_i = Poisson(t_i) | ||
(Note in this elementary model, we are ignoring the issue of imperfect detection.) | ||
Created by Chris Fonnesbeck on 2008-07-28. | ||
Copyright (c) 2008 University of Otago. All rights reserved. | ||
""" | ||
|
||
# Import statements | ||
from mcex import * | ||
from numpy import random, array, arange, ones | ||
import theano.tensor as t | ||
# Sample size | ||
n = 100000 | ||
# True mean count, given occupancy | ||
theta = 2.1 | ||
# True occupancy | ||
pi = 0.4 | ||
|
||
# Simulate some data data | ||
y = array([(random.random()<pi) * random.poisson(theta) for i in range(n)]) | ||
|
||
|
||
model = Model() | ||
# Estimated occupancy | ||
|
||
p = AddVar(model, 'p', Beta(1,1)) | ||
|
||
# Latent variable for occupancy | ||
z = AddVar(model, 'z', Bernoulli(p) , y.shape, dtype = 'int8') | ||
|
||
# Estimated mean count | ||
theta = AddVar( model, 'theta', Uniform(0, 100)) | ||
|
||
# Poisson likelihood | ||
|
||
|
||
|
||
AddData(model, y, ZeroInflatedPoisson(theta, z)) | ||
|
||
point = {'p' : np.array([.5]), | ||
'z' : (y > 0)*1, | ||
'theta' : np.array([10.])} | ||
|
||
_mcex_logp = model_logp(model) | ||
def mcex_logp(): | ||
_mcex_logp(point) | ||
_mcex_dlogp = model_dlogp(model) | ||
|
||
def mcex_dlogp(): | ||
_mcex_dlogp(point) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#from https://github.com/pymc-devs/pymc/wiki/LatentOccupancy | ||
|
||
# Import statements | ||
from numpy import random, array | ||
from pymc import MCMC, Matplot, Beta, Bernoulli, Lambda, Poisson, Uniform, deterministic, logp_of_set, logp_gradient_of_set | ||
|
||
n = 100000 | ||
theta = 2 | ||
pi = 0.4 | ||
y = [(random.random()<pi) * random.poisson(theta) for i in range(n)] | ||
|
||
def remcache(s): | ||
s._cache_depth = 0 | ||
s.gen_lazy_function() | ||
|
||
p = Beta('p', 1, 1) | ||
|
||
z = Bernoulli('z', p, value=array(y)>0, plot=False) | ||
|
||
theta_hat = Uniform('theta_hat', 0, 100, value=3) | ||
|
||
|
||
t = z*theta | ||
counts = Poisson('counts', t, value=y, observed=True) | ||
model = [p, z, theta_hat, counts] | ||
|
||
#disable caching for all the nodes | ||
v = model + [t] | ||
for s in v: | ||
remcache(s) | ||
|
||
def pymc_logp(): | ||
return logp_of_set(model) | ||
|
||
def pymc_dlogp(): | ||
return logp_gradient_of_set(model) |