Skip to content

Commit

Permalink
fixed docstrings, bumped version (#34)
Browse files Browse the repository at this point in the history
* fixed docstrings, bumped version

* fixed bug where classes weren't using custom inputs

Co-authored-by: rkobrosly <[email protected]>
  • Loading branch information
ronikobrosly and rkobrosly authored Jan 17, 2021
1 parent 25c4e15 commit 0610924
Show file tree
Hide file tree
Showing 14 changed files with 317 additions and 248 deletions.
2 changes: 1 addition & 1 deletion causal_curve/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class Core:
"""Base class for causal_curve module"""

def __init__(self):
__version__ = "1.0.1"
__version__ = "1.0.2"

def get_params(self):
"""Returns a dict of all of the object's user-facing parameters
Expand Down
42 changes: 29 additions & 13 deletions causal_curve/gps_classifier.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Defines the Generalized Prospensity Score (GPS) classifier model class
"""
from pprint import pprint

import numpy as np
from scipy.special import logit
Expand All @@ -12,6 +13,15 @@ class GPS_Classifier(GPS_Core):
"""
A GPS tool that handles binary outcomes. Inherits the GPS_core
base class. See that base class code its docstring for more details.
Methods
----------
estimate_log_odds: (self, T)
Calculates the predicted log odds of the highest integer class. Can
only be used when the outcome is binary.
"""

def __init__(
Expand All @@ -27,19 +37,25 @@ def __init__(
random_seed=None,
verbose=False,
):
GPS_Core.__init__(
self,
gps_family=None,
treatment_grid_num=100,
lower_grid_constraint=0.01,
upper_grid_constraint=0.99,
spline_order=3,
n_splines=30,
lambda_=0.5,
max_iter=100,
random_seed=None,
verbose=False,
)

self.gps_family = gps_family
self.treatment_grid_num = treatment_grid_num
self.lower_grid_constraint = lower_grid_constraint
self.upper_grid_constraint = upper_grid_constraint
self.spline_order = spline_order
self.n_splines = n_splines
self.lambda_ = lambda_
self.max_iter = max_iter
self.random_seed = random_seed
self.verbose = verbose

# Validate the params
self._validate_init_params()
self.rand_seed_wrapper()

self.if_verbose_print("Using the following params for GPS model:")
if self.verbose:
pprint(self.get_params(), indent=4)

def _cdrc_predictions_binary(self, ci):
"""Returns the predictions of CDRC for each value of the treatment grid. Essentially,
Expand Down
40 changes: 14 additions & 26 deletions causal_curve/gps_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import contextlib
import io
from pprint import pprint

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -121,31 +120,28 @@ class GPS_Core(Core):
calculate_CDRC: (self, ci)
Calculates the CDRC (and confidence interval) from trained model.
predict: (self, T)
Calculates point estimate within the CDRC given treatment values.
Can only be used when outcome is continuous.
predict_interval: (self, T, ci)
Calculates the prediction confidence interval associated with a point estimate
within the CDRC given treatment values. Can only be used when outcome is continuous.
predict_log_odds: (self, T)
Calculates the predicted log odds of the highest integer class. Can
only be used when the outcome is binary.
print_gam_summary: (self)
Prints pyGAM text summary of GAM predicting outcome from the treatment and the GPS.
Examples
--------
>>> from causal_curve import GPS
>>> gps = GPS(treatment_grid_num = 200, random_seed = 512)
>>> # With continuous outcome
>>> from causal_curve import GPS_Regressor
>>> gps = GPS_Regressor(treatment_grid_num = 200, random_seed = 512)
>>> gps.fit(T = df['Treatment'], X = df[['X_1', 'X_2']], y = df['Outcome'])
>>> gps_results = gps.calculate_CDRC(0.95)
>>> treatment_points = np.array([10,15,20,25])
>>> preds = gps.predict(treatment_points)
>>> conf_ints = gps.predict_interval(treatment_points, 0.95)
>>> point_estimate = gps.point_estimate(np.array([5.0]))
>>> point_estimate_interval = gps.point_estimate_interval(np.array([5.0]), 0.95)
>>> # With binary outcome
>>> from causal_curve import GPS_Classifier
>>> gps = GPS_Classifier()
>>> gps.fit(T = df['Treatment'], X = df[['X_1', 'X_2']], y = df['Binary_Outcome'])
>>> gps_results = gps.calculate_CDRC(0.95)
>>> log_odds = gps.estimate_log_odds(np.array([5.0]))
References
Expand Down Expand Up @@ -188,14 +184,6 @@ def __init__(
self.random_seed = random_seed
self.verbose = verbose

# Validate the params
self._validate_init_params()
self.rand_seed_wrapper()

self.if_verbose_print("Using the following params for GPS model:")
if self.verbose:
pprint(self.get_params(), indent=4)

def _validate_init_params(self):
"""
Checks that the params used when instantiating GPS model are formatted correctly
Expand Down
45 changes: 32 additions & 13 deletions causal_curve/gps_regressor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Defines the Generalized Prospensity Score (GPS) regressor model class
"""
from pprint import pprint

import numpy as np

Expand All @@ -11,6 +12,18 @@ class GPS_Regressor(GPS_Core):
"""
A GPS tool that handles continuous outcomes. Inherits the GPS_core
base class. See that base class code its docstring for more details.
Methods
----------
point_estimate: (self, T)
Calculates point estimate within the CDRC given treatment values.
Can only be used when outcome is continuous.
point_estimate_interval: (self, T, ci)
Calculates the prediction confidence interval associated with a point estimate
within the CDRC given treatment values. Can only be used when outcome is continuous.
"""

def __init__(
Expand All @@ -26,19 +39,25 @@ def __init__(
random_seed=None,
verbose=False,
):
GPS_Core.__init__(
self,
gps_family=None,
treatment_grid_num=100,
lower_grid_constraint=0.01,
upper_grid_constraint=0.99,
spline_order=3,
n_splines=30,
lambda_=0.5,
max_iter=100,
random_seed=None,
verbose=False,
)

self.gps_family = gps_family
self.treatment_grid_num = treatment_grid_num
self.lower_grid_constraint = lower_grid_constraint
self.upper_grid_constraint = upper_grid_constraint
self.spline_order = spline_order
self.n_splines = n_splines
self.lambda_ = lambda_
self.max_iter = max_iter
self.random_seed = random_seed
self.verbose = verbose

# Validate the params
self._validate_init_params()
self.rand_seed_wrapper()

self.if_verbose_print("Using the following params for GPS model:")
if self.verbose:
pprint(self.get_params(), indent=4)

def _cdrc_predictions_continuous(self, ci):
"""Returns the predictions of CDRC for each value of the treatment grid. Essentially,
Expand Down
25 changes: 15 additions & 10 deletions causal_curve/tmle_core.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
"""
Defines the Targetted Maximum likelihood Estimation (TMLE) model class
"""
from pprint import pprint

import numpy as np
import pandas as pd
from pandas.api.types import is_float_dtype, is_numeric_dtype
Expand Down Expand Up @@ -85,6 +83,14 @@ class TMLE_Core(Core):
Attributes
----------
grid_values: array of shape (treatment_grid_num, )
The gridded values of the treatment variable. Equally spaced.
final_gam: `pygam.LinearGAM` class
trained final model of `LinearGAM` class, from pyGAM library
pseudo_out: array of shape (observations, )
Adjusted, pseudo-outcome observations
Methods
Expand All @@ -99,6 +105,13 @@ class TMLE_Core(Core):
Examples
--------
>>> # With continuous outcome
>>> from causal_curve import TMLE_Regressor
>>> tmle = TMLE_Regressor()
>>> tmle.fit(T = df['Treatment'], X = df[['X_1', 'X_2']], y = df['Outcome'])
>>> tmle_results = tmle.calculate_CDRC(0.95)
>>> point_estimate = tmle.point_estimate(np.array([5.0]))
>>> point_estimate_interval = tmle.point_estimate_interval(np.array([5.0]), 0.95)
References
Expand Down Expand Up @@ -138,14 +151,6 @@ def __init__(
self.random_seed = random_seed
self.verbose = verbose

# Validate the params
self._validate_init_params()
self.rand_seed_wrapper()

self.if_verbose_print("Using the following params for TMLE model:")
if self.verbose:
pprint(self.get_params(), indent=4)

def _validate_init_params(self):
"""
Checks that the params used when instantiating TMLE model are formatted correctly
Expand Down
39 changes: 39 additions & 0 deletions causal_curve/tmle_regressor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Defines the Targetted Maximum likelihood Estimation (TMLE) regressor model class
"""
from pprint import pprint

import numpy as np

Expand All @@ -11,8 +12,46 @@ class TMLE_Regressor(TMLE_Core):
"""
A TMLE tool that handles continuous outcomes. Inherits the TMLE_core
base class. See that base class code its docstring for more details.
Methods
----------
point_estimate: (self, T)
Calculates point estimate within the CDRC given treatment values.
Can only be used when outcome is continuous.
"""

def __init__(
self,
treatment_grid_num=100,
lower_grid_constraint=0.01,
upper_grid_constraint=0.99,
n_estimators=200,
learning_rate=0.01,
max_depth=3,
bandwidth=0.5,
random_seed=None,
verbose=False,
):

self.treatment_grid_num = treatment_grid_num
self.lower_grid_constraint = lower_grid_constraint
self.upper_grid_constraint = upper_grid_constraint
self.n_estimators = n_estimators
self.learning_rate = learning_rate
self.max_depth = max_depth
self.bandwidth = bandwidth
self.random_seed = random_seed
self.verbose = verbose

# Validate the params
self._validate_init_params()
self.rand_seed_wrapper()

self.if_verbose_print("Using the following params for TMLE model:")
if self.verbose:
pprint(self.get_params(), indent=4)

def _cdrc_predictions_continuous(self, ci):
"""Returns the predictions of CDRC for each value of the treatment grid. Essentially,
we're making predictions using the original treatment against the pseudo-outcome.
Expand Down
7 changes: 7 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ Change Log
==========


Version 1.0.2
-------------
- Updated end-to-end example notebook in `/examples` folder
- Fixed various class docstrings if they still reference old v0.5.2 API
- Fixed bug where custom class input parameters weren't being used


Version 1.0.1
-------------
- Added to TMLE overview in the docs (including plot)
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
author = "Roni Kobrosly"

# The full version, including alpha/beta/rc tags
release = "1.0.1"
release = "1.0.2"

# -- General configuration ---------------------------------------------------

Expand Down
37 changes: 16 additions & 21 deletions examples/NHANES_BLL_example.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="causal-curve",
version="1.0.1",
version="1.0.2",
author="Roni Kobrosly",
author_email="[email protected]",
description="A python library with tools to perform causal inference using \
Expand Down
Loading

0 comments on commit 0610924

Please sign in to comment.