diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..c4f8cdf --- /dev/null +++ b/.travis.yml @@ -0,0 +1,38 @@ +sudo: required + +language: r + +os: + - linux + + +# Setup anaconda +before_install: + - wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh + - chmod +x miniconda.sh + - ./miniconda.sh -b -p /home/travis/miniconda3 + - export PATH=/home/travis/miniconda3/bin:$PATH + - export LD_LIBRARY_PATH=/home/travis/R-bin/lib/R/lib:/home/travis/miniconda3/lib:$LD_LIBRARY_PATH + - R -e 'install.packages(c("Matrix", "RcppEigen", "numDeriv"), repos="http://cran.us.r-project.org", dependencies=T)' + +## Cache R packages +cache: packages + +## Set up conda environment, TMB and PyMB +install: + - conda create -c conda-forge -n pymb_test -y python=3.6 numpy scipy libiconv libxml2 lxml scikit-sparse + - source activate pymb_test + - which -a python && pip install rpy2 && python -c "import rpy2.rinterface as rinterface" + - git clone https://github.com/kaskr/adcomp.git && cd adcomp && make install-metis-full && cd .. + - cd ../PyMB && python setup.py install + +jobs: + include: + - stage: Run PyMB tests + script: + - python Examples/linreg.py + +notifications: + email: + on_success: never + on_failure: never diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..79183cb --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1 @@ +Package: PyMB diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..b91bc02 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,44 @@ +FROM continuumio/miniconda3 + +## Get all the Linux dependencies +RUN apt-get update --fix-missing && \ + apt-get install --yes \ + wget \ + curl \ + bzip2 \ + ca-certificates \ + libglib2.0-0 \ + libxext6 \ + libsm6 \ + libxrender1 \ + git \ + gcc \ + g++ \ + libreadline-dev \ + libeigen3-dev \ + tar \ + build-essential && \ + apt-get purge && \ + apt-get clean + +## Install Python dependencies +## NOTE: DO NOT install rpy2 from conda-forge, got issues +RUN conda install -c conda-forge -y numpy scipy && \ + scikit-sparse libiconv libxml2 lxml + +## Install R and rpy2 +RUN conda install -c r -y \ + r-base=3.5.0 && \ + pip install rpy2 + + +## Build TMB and it's dependencies +RUN R -e 'install.packages(c("Matrix", "numDeriv", "RcppEigen"), repos="http://cran.us.r-project.org", dependencies=T)' && \ + cd /opt && \ + git clone https://github.com/kaskr/adcomp.git && cd adcomp && \ + make install-metis-full + +## Install PyMB +RUN cd /opt && \ + git clone https://github.com/kforeman/PyMB.git && \ + cd PyMB && python setup.py install \ No newline at end of file diff --git a/Examples/linreg.py b/Examples/linreg.py new file mode 100644 index 0000000..80f4df4 --- /dev/null +++ b/Examples/linreg.py @@ -0,0 +1,68 @@ +import numpy as np +import os + + +import PyMB +import rpy2.robjects as ro +import rpy2.rinterface as rin + +# create an empty model +m = PyMB.model(name='linreg_code') + +# code for a simple linear regression +linreg_code = ''' +#include +template +Type objective_function::operator() (){ +// DATA + DATA_VECTOR(Y); + DATA_VECTOR(x); + +// PARAMETERS + PARAMETER(alpha); + PARAMETER(Beta); + PARAMETER(logSigma); + +// MODEL + vector Y_hat = alpha + Beta*x; + REPORT(Y_hat); + Type nll = -sum(dnorm(Y, Y_hat, exp(logSigma), true)); + return nll; +} +''' + +# Get the necessary paths to compile TMB code +tmbinclude = ro.r('paste0(find.package("TMB"), "/include")')[0] +# eigeninclude = ro.r('paste0(find.package("RcppEigen"), "/include")')[0] +# tmbinclude = tmbinclude + ' -I' + eigeninclude +rinclude = rin.R_HOME + "/include" +rlib = rin.R_HOME + "/lib" + +# rpy2.rinterface.R_HOME + +# compile the model +m.compile(codestr=linreg_code, + cc='g++', + R=rinclude, + TMB=tmbinclude, + LR=rlib, + verbose=True, + load=True) + +# simulate data +N = 100 +m.data['x'] = np.arange(N) +m.data['Y'] = m.data['x'] + 0.5 + np.random.rand(N) + +# set initial parameter values +m.init['alpha'] = 0. +m.init['Beta'] = 0. +m.init['logSigma'] = 0. + +# set random parameters +m.random = ['alpha', 'Beta'] + +# fit the model +m.optimize(draws=500) +print(m.report('Y_hat')) +m.print_parameters() diff --git a/PyMB/__init__.py b/PyMB/__init__.py new file mode 100644 index 0000000..dad5f04 --- /dev/null +++ b/PyMB/__init__.py @@ -0,0 +1 @@ +from PyMB.model import * diff --git a/magic/PyMB_magic.py b/PyMB/magic/PyMB_magic.py similarity index 97% rename from magic/PyMB_magic.py rename to PyMB/magic/PyMB_magic.py index 1e1d49a..c65f37c 100644 --- a/magic/PyMB_magic.py +++ b/PyMB/magic/PyMB_magic.py @@ -100,7 +100,8 @@ def PyMB(self, line, cell): code += '}\n' # compile model - model.compile(codestr=code, output_dir=args.OUTPUT_DIR, cc=args.CCOMPILER, R=args.R_DIR, TMB=args.TMB_DIR, LR=args.R_LIB, verbose=args.VERBOSE) + model.compile(codestr=code, output_dir=args.OUTPUT_DIR, cc=args.CCOMPILER, R=args.R_DIR, + TMB=args.TMB_DIR, LR=args.R_LIB, verbose=args.VERBOSE) # print if verbose if args.VERBOSE == True: diff --git a/magic/__init__.py b/PyMB/magic/__init__.py similarity index 100% rename from magic/__init__.py rename to PyMB/magic/__init__.py diff --git a/model.py b/PyMB/model.py similarity index 70% rename from model.py rename to PyMB/model.py index a0b1031..844cf13 100644 --- a/model.py +++ b/PyMB/model.py @@ -1,9 +1,30 @@ -import numpy as np -import time -import warnings + +from pprint import pprint import os +import re import readline +import shutil +import subprocess +import time +import warnings + +import numpy as np +from rpy2.robjects.packages import importr +from rpy2 import robjects as ro +import rpy2.robjects.numpy2ri from scipy.sparse import csc_matrix +from scipy.sparse.linalg import spsolve +try: + from sksparse.cholmod import cholesky +except: + from scikits.sparse.cholmod import cholesky + + + + + +__all__ = ['get_R_attr', 'model'] + def get_R_attr(obj, attr): ''' @@ -12,6 +33,7 @@ def get_R_attr(obj, attr): ''' return obj[obj.names.index(attr)] + class model: def __init__(self, name=None, filepath=None, codestr=None, **kwargs): ''' @@ -35,18 +57,16 @@ def __init__(self, name=None, filepath=None, codestr=None, **kwargs): raise Exception('"name" cannot contain hyphens.') # set model name - self.name = name if name else 'TMB_{}'.format(np.random.randint(1e10,9e10)) + self.name = name if name else 'TMB_{}'.format( + np.random.randint(1e10, 9e10)) # initiate R session - from rpy2 import robjects as ro self.R = ro # turn on numpy to R conversion - import rpy2.robjects.numpy2ri rpy2.robjects.numpy2ri.activate() # create TMB link - from rpy2.robjects.packages import importr self.TMB = importr('TMB') importr('Matrix') @@ -58,9 +78,14 @@ def __init__(self, name=None, filepath=None, codestr=None, **kwargs): if filepath or codestr: self.compile(filepath=filepath, codestr=codestr, **kwargs) - def compile(self, filepath=None, codestr=None, output_dir='tmb_tmp', - cc='g++', R='/usr/share/R/include', - TMB='/usr/local/lib/R/site-library/TMB/include', LR='/usr/lib/R/lib', verbose=False, load=True): + def compile(self, filepath=None, codestr=None, + output_dir='tmb_tmp', + cc='g++', + R='/usr/share/R/include', + TMB='/usr/local/lib/R/site-library/TMB/include', + LR='/usr/lib/R/lib', + verbose=False, + load=True): ''' Compile TMB C++ code and load into R Parameters @@ -81,7 +106,7 @@ def compile(self, filepath=None, codestr=None, output_dir='tmb_tmp', location of TMB library LR : str, default '/usr/lib/R/lib' location of R's library files - verbose : boolean, default False + verbose : boolean, default False print compiler warnings load : boolean, default True load the model into Python after compilation @@ -99,42 +124,52 @@ def compile(self, filepath=None, codestr=None, output_dir='tmb_tmp', # if given just a filepath, copy the code into the output directory if filepath: - self.filepath = os.path.join(output_dir, '{name}.cpp'.format(name=self.name)) - import shutil + self.filepath = os.path.join( + output_dir, '{name}.cpp'.format(name=self.name)) shutil.copy(filepath, self.filepath) if codestr: - warnings.warn('Both filepath and codestr specified. Ignoring codestr.') + warnings.warn( + 'Both filepath and codestr specified. Ignoring codestr.') # otherwise write code to file elif codestr: - self.filepath = '{output_dir}/{name}.cpp'.format(output_dir=output_dir, name=self.name) + self.filepath = '{output_dir}/{name}.cpp'.format( + output_dir=output_dir, name=self.name) # only rewrite cpp if identical code found - if os.path.isfile(self.filepath) == False or file(self.filepath, 'r').read() != codestr: + if os.path.isfile(self.filepath) == False or \ + open(self.filepath, 'r').read() != codestr: print('Saving model to {}.'.format(self.filepath)) - with file(self.filepath, 'w') as f: + with open(self.filepath, 'w') as f: f.write(codestr) else: print('Using {}.'.format(self.filepath)) # compile the model - # NOTE: cannot just call TMB.compile unfortunately - something about shared libraries not being hooked up correctly inside of embedded R sessions + # NOTE: cannot just call TMB.compile unfortunately - + # something about shared libraries + # not being hooked up correctly inside of embedded R sessions # TODO: skip recompiling when model has not changed - import subprocess - from pprint import pprint + # compile cpp comp = '{cc} {include} {options} {f} -o {o}'.format( cc=cc, include='-I{R} -I{TMB}'.format(R=R, TMB=TMB), - options='-DNDEBUG -DTMB_SAFEBOUNDS -DLIB_UNLOAD=R_unload_{} -fpic -O3 -pipe -g -c'.format(self.name), - f='{output_dir}/{name}.cpp'.format(output_dir=output_dir, name=self.name), - o='{output_dir}/{name}.o'.format(output_dir=output_dir, name=self.name)) + options='-DNDEBUG -DTMB_SAFEBOUNDS -DLIB_UNLOAD=' + + 'R_unload_{} -fpic -O3 -pipe -g -c'.format( + self.name), + f='{output_dir}/{name}.cpp'.format( + output_dir=output_dir, name=self.name), + o='{output_dir}/{name}.o'.format( + output_dir=output_dir, name=self.name)) try: - cmnd_output = subprocess.check_output(comp, stderr=subprocess.STDOUT, shell=True) + cmnd_output = subprocess.check_output( + comp, stderr=subprocess.STDOUT, shell=True) except subprocess.CalledProcessError as exc: print(comp) print(exc.output) - raise Exception('Your TMB code could not compile. See error above.') + raise Exception( + 'Your TMB code could not compile. See error above.') if verbose: print(comp) print(cmnd_output) @@ -142,38 +177,48 @@ def compile(self, filepath=None, codestr=None, output_dir='tmb_tmp', link = '{cc} {options} -o {so} {o} {link}'.format( cc=cc, options='-shared', - so='{output_dir}/{name}.so'.format(output_dir=output_dir, name=self.name), - o='{output_dir}/{name}.o'.format(output_dir=output_dir, name=self.name), + so='{output_dir}/{name}.so'.format( + output_dir=output_dir, name=self.name), + o='{output_dir}/{name}.o'.format( + output_dir=output_dir, name=self.name), link='-L{LR} -lR'.format(LR=LR)) try: - cmnd_output = subprocess.check_output(link, stderr=subprocess.STDOUT, shell=True) + cmnd_output = subprocess.check_output( + link, stderr=subprocess.STDOUT, shell=True) except subprocess.CalledProcessError as exc: print(link) print(exc.output) - raise Exception('Your TMB code could not be linked. See error above.') + raise Exception( + 'Your TMB code could not be linked. See error above.') - # if a module of the same name has already been loaded, must unload R entirely it seems + # if a module of the same name has already been loaded, + # must unload R entirely it seems """ - TODO: fix this so R doesn't have to be restarted, potentially losing things the user has already loaded into R + TODO: fix this so R doesn't have to be restarted, potentially + losing things the user has already loaded into R judging by https://github.com/kaskr/adcomp/issues/27 this should work: - self.R.r('try(dyn.unload("{output_dir}/{name}.so"), silent=TRUE)'.format(output_dir=output_dir, name=self.name)) + self.R.r('try(dyn.unload("{output_dir}/{name}.so"), silent=TRUE)'.format( + output_dir=output_dir, name=self.name)) but it doesn't - gives odd vector errors when trying to optimize """ - if self.name in [str(get_R_attr(i, 'name')[0]) for i in self.R.r('getLoadedDLLs()')]: - warnings.warn('A model has already been loaded into TMB. Restarting R and reloading model to prevent conflicts.') + if self.name in [str(get_R_attr(i, 'name')[0]) + for i in self.R.r('getLoadedDLLs()')]: + warnings.warn('A model has already been loaded into TMB.') + warnings.warn( + 'Restarting R and reloading model to prevent conflicts.') self.R.r('sink("/dev/null")') - self.R.r('try(dyn.unload("{output_dir}/{name}.so"), silent=TRUE)'.format(output_dir=output_dir, name=self.name)) + self.R.r('try(dyn.unload("{output_dir}/{name}.so"), silent=TRUE)'.format( + output_dir=output_dir, name=self.name)) self.R.r('sink()') del self.R - from rpy2 import robjects as ro self.R = ro del self.TMB - from rpy2.robjects.packages import importr self.TMB = importr('TMB') # load the model into R if load: - self.load_model(so_file='{output_dir}/{name}.so'.format(output_dir=output_dir, name=self.name)) + self.load_model( + so_file='{output_dir}/{name}.so'.format(output_dir=output_dir, name=self.name)) # output time print('Compiled in {:.1f}s.\n'.format(time.time()-start)) @@ -183,7 +228,7 @@ def load_model(self, so_file=''): so_file = 'tmb_tmp/{name}.so'.format(name=self.name) if not hasattr(self, 'filepath'): # assume that the cpp file is in the same directory with the same name if it wasn't specified - self.filepath = so_file.replace('.so','.cpp') + self.filepath = so_file.replace('.so', '.cpp') self.R.r('sink("/dev/null")') self.R.r('dyn.load("{so_file}")'.format(so_file=so_file)) self.R.r('sink()') @@ -191,9 +236,8 @@ def load_model(self, so_file=''): self.dll = os.path.splitext(os.path.basename(so_file))[0] def check_inputs(self, thing): - import re missing = [] - with file(self.filepath) as f: + with open(self.filepath) as f: for l in f: if re.match('^PARAMETER' if thing == 'init' else '^{}'.format(thing.upper()), l.strip()): i = re.search(r"\(([A-Za-z0-9_]+)\)", l.strip()).group(1) @@ -218,7 +262,8 @@ def build_objective_function(self, random=[], hessian=True, **kwargs): ''' # first check to make sure everything necessary has been loaded if not hasattr(self, 'model_loaded'): - raise Exception('Model not yet compiled/loaded. See TMB_model.compile().') + raise Exception( + 'Model not yet compiled/loaded. See TMB_model.compile().') self.check_inputs('data') self.check_inputs('init') @@ -226,7 +271,8 @@ def build_objective_function(self, random=[], hessian=True, **kwargs): if hasattr(self, 'obj_fun_built'): try: del self.TMB.model - self.R.r('dyn.load("{filepath}")'.format(filepath=self.filepath.replace('.cpp','.so'))) + self.R.r('dyn.load("{filepath}")'.format( + filepath=self.filepath.replace('.cpp', '.so'))) except: pass @@ -244,14 +290,14 @@ def build_objective_function(self, random=[], hessian=True, **kwargs): # build the objective function self.TMB.model = self.TMB.MakeADFun(data=self.R.ListVector(self.data), - parameters=self.R.ListVector(self.init), hessian=hessian, - DLL=self.dll, **kwargs) + parameters=self.R.ListVector(self.init), hessian=hessian, + DLL=self.dll, **kwargs) # set obj_fun_built self.obj_fun_built = True - - def optimize(self, opt_fun='nlminb', method='L-BFGS-B', draws=100, verbose=False, random=None, quiet=False, params=[], noparams=False, constrain=False, warning=True, **kwargs): + def optimize(self, opt_fun='nlminb', method='L-BFGS-B', draws=100, verbose=False, + random=None, quiet=False, params=[], noparams=False, constrain=False, warning=True, **kwargs): ''' Optimize the model and store results in TMB_Model.TMB.fit Parameters @@ -307,24 +353,30 @@ def optimize(self, opt_fun='nlminb', method='L-BFGS-B', draws=100, verbose=False # fit the model if quiet: self.R.r('sink("/dev/null")') - self.TMB.fit = self.R.r[opt_fun](start=get_R_attr(self.TMB.model, 'par'), objective=get_R_attr(self.TMB.model, 'fn'), - gradient=get_R_attr(self.TMB.model, 'gr'), method=method, **kwargs) + self.TMB.fit = self.R.r[opt_fun](start=get_R_attr(self.TMB.model, 'par'), + objective=get_R_attr(self.TMB.model, 'fn'), + gradient=get_R_attr(self.TMB.model, 'gr'), + method=method, **kwargs) if quiet: self.R.r('sink()') else: - print('\nModel optimization complete in {:.1f}s.\n'.format(time.time()-start)) + print('\nModel optimization complete in {:.1f}s.\n'.format( + time.time()-start)) # check for convergence - self.convergence = self.TMB.fit[self.TMB.fit.names.index('convergence')][0] + self.convergence = self.TMB.fit[self.TMB.fit.names.index( + 'convergence')][0] if warning and self.convergence != 0: - print '\nThe model did not successfully converge, exited with the following warning message:' - print self.TMB.fit[self.TMB.fit.names.index('message')][0] + '\n' + print( + '\nThe model did not successfully converge, exited with the following warning message:') + print(self.TMB.fit[self.TMB.fit.names.index('message')][0] + '\n') # simulate parameters if not quiet: print('\n{}\n'.format(''.join(['-' for i in range(80)]))) if not noparams: - self.simulate_parameters(draws=draws, quiet=quiet, params=params, constrain=constrain) + self.simulate_parameters( + draws=draws, quiet=quiet, params=params, constrain=constrain) def report(self, name): ''' @@ -368,7 +420,8 @@ def simulate_parameters(self, draws=100, params=[], quiet=False, constrain=False self.sdreport = self.TMB.sdreport(self.TMB.model, getJointPrecision=True, hessian_fixed=get_R_attr(self.TMB.model, 'he')()) else: - self.sdreport = self.TMB.sdreport(self.TMB.model, getJointPrecision=True) + self.sdreport = self.TMB.sdreport( + self.TMB.model, getJointPrecision=True) # extraction convenience function # filters the object down to just a list of desired parameters @@ -380,18 +433,21 @@ def simulate_parameters(self, draws=100, params=[], quiet=False, constrain=False return(obj[ii,ii]); }; }''') - + # extract the joint precision matrix if not self.random: - joint_prec_full = self.R.r('function(m) { return(1./m) }')(get_R_attr(self.sdreport, 'cov.fixed')) + joint_prec_full = self.R.r( + 'function(m) { return(1./m) }')(get_R_attr(self.sdreport, 'cov.fixed')) joint_prec_names = get_R_attr(self.sdreport, 'par.fixed').names - joint_prec_dense = extract_params(joint_prec_full, joint_prec_names, params) + joint_prec_dense = extract_params( + joint_prec_full, joint_prec_names, params) joint_prec_R = self.R.r('as')(joint_prec_dense, 'sparseMatrix') else: joint_prec_full = get_R_attr(self.sdreport, 'jointPrecision') joint_prec_names = self.R.r('row.names')(joint_prec_full) - joint_prec_R = extract_params(joint_prec_full, joint_prec_names, params) - + joint_prec_R = extract_params( + joint_prec_full, joint_prec_names, params) + # convert joint precision matrix to scipy.sparse.csc_matrix # (bypassing dense conversion step that happens if you use rpy2) def get_R_slot(obj, slot): @@ -406,7 +462,7 @@ def get_R_slot(obj, slot): else: ordered_params = np.array(self.R.r('row.names')(joint_prec_R)) - ### extract fixed effects + # extract fixed effects # means fixed_mean_raw = get_R_attr(self.sdreport, 'par.fixed') # sds @@ -415,9 +471,10 @@ def get_R_slot(obj, slot): fixed_names = fixed_mean_raw.names # keep just selected fixed_mean = extract_params(fixed_mean_raw, fixed_names, params) - fixed_sd = extract_params(self.R.r('sqrt')(fixed_sd_raw), fixed_names, params) + fixed_sd = extract_params(self.R.r('sqrt')( + fixed_sd_raw), fixed_names, params) - ### extract random effects + # extract random effects if self.random: # means ran_mean_raw = get_R_attr(self.sdreport, 'par.random') @@ -427,69 +484,74 @@ def get_R_slot(obj, slot): ran_names = ran_mean_raw.names # keep just selected ran_mean = extract_params(ran_mean_raw, ran_names, params) - ran_sd = extract_params(self.R.r('sqrt')(ran_sd_raw), ran_names, params) + ran_sd = extract_params(self.R.r('sqrt')( + ran_sd_raw), ran_names, params) else: ran_names = [] - ### put the means/sds in the right order - ## the reason being that the joint precision matrix is based on order of model specification, ignoring random vs fixed + # put the means/sds in the right order + # the reason being that the joint precision matrix is based on order of + # model specification, ignoring random vs fixed # initialize R vectors - means = self.R.FloatVector([self.R.NA_Real for i in xrange(len(ordered_params))]) - sds = self.R.FloatVector([self.R.NA_Real for i in xrange(len(ordered_params))]) + means = self.R.FloatVector( + [self.R.NA_Real for i in range(len(ordered_params))]) + sds = self.R.FloatVector( + [self.R.NA_Real for i in range(len(ordered_params))]) # loop through and add the parameters to the means/sds in the correct order for p in set(ordered_params): # index in joint precision matrix - i_joint = [ii for ii,pp in enumerate(ordered_params) if pp == p] + i_joint = [ii for ii, pp in enumerate(ordered_params) if pp == p] # index in random effects - i_ran = [ii for ii,pp in enumerate(ran_names) if pp == p] + i_ran = [ii for ii, pp in enumerate(ran_names) if pp == p] # index in fixed effects - i_fixed = [ii for ii,pp in enumerate(fixed_names) if pp == p] + i_fixed = [ii for ii, pp in enumerate(fixed_names) if pp == p] # copy into the appropriate position if i_ran: means[i_joint[0]:i_joint[-1]+1] = ran_mean[i_ran[0]:i_ran[-1]+1] sds[i_joint[0]:i_joint[-1]+1] = ran_sd[i_ran[0]:i_ran[-1]+1] elif i_fixed: - means[i_joint[0]:i_joint[-1]+1] = fixed_mean[i_fixed[0]:i_fixed[-1]+1] + means[i_joint[0]:i_joint[-1] + + 1] = fixed_mean[i_fixed[0]:i_fixed[-1]+1] sds[i_joint[0]:i_joint[-1]+1] = fixed_sd[i_fixed[0]:i_fixed[-1]+1] - ### generate draws + # generate draws # convert mean/sd to python means = np.array(means) sds = np.array(sds) - ## simulation function + # simulation function # see http://en.wikipedia.org/wiki/Multivariate_normal_distribution#Drawing_values_from_the_distribution + def gen_draws(mu, prec, n): - try: - from sksparse.cholmod import cholesky - except: - from scikits.sparse.cholmod import cholesky - from scipy.sparse.linalg import spsolve - z = np.random.normal(size=(mu.shape[0],n)) + + z = np.random.normal(size=(mu.shape[0], n)) chol_jp = cholesky(prec) - ### note: would typically use scikits.sparse.cholmod.cholesky.solve_Lt, - ### but there seems to be a bug there: https://github.com/njsmith/scikits-sparse/issues/9#issuecomment-76862652 - return mu[:,np.newaxis] + chol_jp.apply_Pt(spsolve(chol_jp.L().T, z)) - #### make draws + # note: would typically use scikits.sparse.cholmod.cholesky.solve_Lt, + # but there seems to be a bug there: + # https://github.com/njsmith/scikits-sparse/issues/9#issuecomment-76862652 + return mu[:, np.newaxis] + chol_jp.apply_Pt(spsolve(chol_jp.L().T, z)) + # make draws if draws: param_draws = gen_draws(means, joint_prec, draws) - ### store results + # store results # constrain draws if requested if constrain and draws: # find which draws are more than {constraint} standard deviations from the mean - wacky_draws = np.where(np.any([ \ - np.greater(param_draws.T, means + (constrain * sds)), \ - np.less(param_draws.T, means - (constrain * sds))], axis=0).T) + wacky_draws = np.where(np.any([ + np.greater(param_draws.T, means + (constrain * sds)), + np.less(param_draws.T, means - (constrain * sds))], axis=0).T) # replace those draws with the mean param_draws[wacky_draws] = means[wacky_draws[0]] # add parameters' mean, sd, and optionally draws to the parameters dictionary for p in set(ordered_params): - i = [ii for ii,pp in enumerate(ordered_params) if pp == p] # names will be the same for every item in a vector/matrix, so find all corresponding indices + # names will be the same for every item in a vector/matrix, so find all corresponding indices + i = [ii for ii, pp in enumerate(ordered_params) if pp == p] if draws: if type(self.init[p]) == np.ndarray: - these_draws = param_draws[i,].reshape(list(self.init[p].shape) + [draws], order='F') + these_draws = param_draws[i, ].reshape( + list(self.init[p].shape) + [draws], order='F') else: - these_draws = param_draws[i,] + these_draws = param_draws[i, ] self.parameters[p] = { 'mean': means[i], 'sd': sds[i], @@ -508,9 +570,10 @@ def gen_draws(mu, prec, n): 'name': ordered_params } - ### print results + # print results if not quiet: - print('\nSimulated {n} draws in {t:.1f}s.\n'.format(n=draws, t=time.time()-start)) + print('\nSimulated {n} draws in {t:.1f}s.\n'.format( + n=draws, t=time.time()-start)) self.print_parameters() def draws(self, parameter): @@ -524,16 +587,21 @@ def print_parameters(self): Print summary statistics of the model parameter fits ''' np.set_printoptions(threshold=5, edgeitems=2) - for p,v in self.parameters.iteritems(): + for p, v in self.parameters.items(): if 'draws' in v: d = v['draws'] if d.shape[0] == 1: - print('{p}:\n\tmean\t{m}\n\tsd\t{s}\n\tdraws\t{d}\n\tshape\t{z}'.format(p=p, m=v['mean'], s=v['sd'], d=d, z=d.shape)) + print('{p}:\n\tmean\t{m}\n\tsd\t{s}\n\tdraws\t{d}\n\tshape\t{z}'.format( + p=p, m=v['mean'], s=v['sd'], d=d, z=d.shape)) elif len(d.shape) == 2: - print('{p}:\n\tmean\t{m}\n\tsd\t{s}\n\tdraws\t{d}\n\tshape\t{z}'.format(p=p, m=v['mean'], s=v['sd'], d='[{0},\n\t\t ...,\n\t\t {1}]'.format(d[0,:], d[d.shape[0]-1,:]), z=d.shape)) + print('{p}:\n\tmean\t{m}\n\tsd\t{s}\n\tdraws\t{d}\n\tshape\t{z}'.format( + p=p, m=v['mean'], s=v['sd'], d='[{0},\n\t\t ...,\n\t\t {1}]'.format( + d[0, :], d[d.shape[0]-1, :]), z=d.shape)) else: - print('{p}:\n\tmean\t{m}\n\tsd\t{s}\n\tdraws\t{d}\n\tshape\t{z}'.format(p=p, m=v['mean'], s=v['sd'], d='[[{0},\n\t\t ...,\n\t\t {1}]]'.format(d[0,0,:], d[d.shape[0]-1,d.shape[1]-1,:]), z=d.shape)) + print('{p}:\n\tmean\t{m}\n\tsd\t{s}\n\tdraws\t{d}\n\tshape\t{z}'.format( + p=p, m=v['mean'], s=v['sd'], d='[[{0},\n\t\t ...,\n\t\t {1}]]'.format( + d[0, 0, :], d[d.shape[0]-1, d.shape[1]-1, :]), z=d.shape)) else: - print('{p}:\n\tmean\t{m}\n\tsd\t{s}\n\tdraws\tNone'.format(p=p, m=v['mean'], s=v['sd'])) + print('{p}:\n\tmean\t{m}\n\tsd\t{s}\n\tdraws\tNone'.format( + p=p, m=v['mean'], s=v['sd'])) np.set_printoptions(threshold=1000, edgeitems=3) - diff --git a/README.md b/README.md index 4a93235..55404dd 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@ # PyMB Python Model Builder - fit statistical models using algorithmic differentiation +[![Build Status](https://travis-ci.org/sadatnfs/PyMB.svg?branch=py36_migration)](https://travis-ci.org/sadatnfs/PyMB) + + ## Concept PyMB uses [algorithmic differentiation](http://en.wikipedia.org/wiki/Automatic_differentiation) from [CppAD](http://www.coin-or.org/CppAD/) to compute derivatives of objective functions, allowing users to quickly find @@ -21,12 +24,14 @@ A demo [iPython notebook](http://ipython.org/notebook.html) can be found at [Examples/Linear Regression.ipynb](Examples/Linear Regression.ipynb) or an online version can be viewed [here](http://nbviewer.ipython.org/github/kforeman/PyMB/blob/master/Examples/Linear%20Regression%20-%20Magic.ipynb). ## Installation -An example Dockerfile that will create a working PyMB install can be found [here](https://gist.github.com/kforeman/4cd9495f0222b6fbfa79ebcee993ae78). +An example Dockerfile that will create a working PyMB install can be found [here](Dockerfile). #### Dependencies * [R](http://www.r-project.org/) * [TMB](https://github.com/kaskr/adcomp) * [rpy2](http://rpy.sourceforge.net/) * [numpy](http://www.numpy.org/) +* [scipy](https://scipy.org/) +* [sksparse](https://github.com/scikit-sparse/scikit-sparse) #### Importing `setup.py` has not yet been added. For now simply `git clone git@github.com:kforeman/PyMB.git` into your working directory, diff --git a/__init__.py b/__init__.py deleted file mode 100644 index 4a10a05..0000000 --- a/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from model import * diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..b69108b --- /dev/null +++ b/setup.py @@ -0,0 +1,31 @@ +from setuptools import setup, find_packages +import numpy +VERSION = 0.1 + +interactive_requirements = [ + 'IPython', + 'jupyter', +] + + +setup( + name='PyMB', + version=VERSION, + # packages=['', 'PyMB.magic'], + packages=find_packages(), + url='https://github.com/kforeman/PyMB', + license='GNU General Public License v2.0', + author='Kyle Foreman', + author_email='kforeman post harvard edu', + description='Python Model Builder', + include_dirs=[ + numpy.get_include(), + ], + install_requires=[ + 'numpy', 'rpy2', 'scipy', 'scikit-sparse', + ], + extras_require={ + 'interactive': interactive_requirements, + } + +)