fixed docstrings, bumped version (#34)

* fixed docstrings, bumped version * fixed bug where classes weren't using custom inputs Co-authored-by: rkobrosly <[email protected]>
ronikobrosly · Jan 17, 2021 · 0610924 · 0610924
1 parent 25c4e15
commit 0610924
Show file tree

Hide file tree

Showing 14 changed files with 317 additions and 248 deletions.
diff --git a/causal_curve/core.py b/causal_curve/core.py
@@ -10,7 +10,7 @@ class Core:
     """Base class for causal_curve module"""
 
     def __init__(self):
-        __version__ = "1.0.1"
+        __version__ = "1.0.2"
 
     def get_params(self):
         """Returns a dict of all of the object's user-facing parameters

diff --git a/causal_curve/gps_classifier.py b/causal_curve/gps_classifier.py
@@ -1,6 +1,7 @@
 """
 Defines the Generalized Prospensity Score (GPS) classifier model class
 """
+from pprint import pprint
 
 import numpy as np
 from scipy.special import logit
@@ -12,6 +13,15 @@ class GPS_Classifier(GPS_Core):
     """
     A GPS tool that handles binary outcomes. Inherits the GPS_core
     base class. See that base class code its docstring for more details.
+
+
+    Methods
+    ----------
+
+    estimate_log_odds: (self, T)
+        Calculates the predicted log odds of the highest integer class. Can
+        only be used when the outcome is binary.
+
     """
 
     def __init__(
@@ -27,19 +37,25 @@ def __init__(
         random_seed=None,
         verbose=False,
     ):
-        GPS_Core.__init__(
-            self,
-            gps_family=None,
-            treatment_grid_num=100,
-            lower_grid_constraint=0.01,
-            upper_grid_constraint=0.99,
-            spline_order=3,
-            n_splines=30,
-            lambda_=0.5,
-            max_iter=100,
-            random_seed=None,
-            verbose=False,
-        )
+
+        self.gps_family = gps_family
+        self.treatment_grid_num = treatment_grid_num
+        self.lower_grid_constraint = lower_grid_constraint
+        self.upper_grid_constraint = upper_grid_constraint
+        self.spline_order = spline_order
+        self.n_splines = n_splines
+        self.lambda_ = lambda_
+        self.max_iter = max_iter
+        self.random_seed = random_seed
+        self.verbose = verbose
+
+        # Validate the params
+        self._validate_init_params()
+        self.rand_seed_wrapper()
+
+        self.if_verbose_print("Using the following params for GPS model:")
+        if self.verbose:
+            pprint(self.get_params(), indent=4)
 
     def _cdrc_predictions_binary(self, ci):
         """Returns the predictions of CDRC for each value of the treatment grid. Essentially,

diff --git a/causal_curve/gps_core.py b/causal_curve/gps_core.py
@@ -4,7 +4,6 @@
 
 import contextlib
 import io
-from pprint import pprint
 
 import numpy as np
 import pandas as pd
@@ -121,31 +120,28 @@ class GPS_Core(Core):
     calculate_CDRC: (self, ci)
         Calculates the CDRC (and confidence interval) from trained model.
 
-    predict: (self, T)
-        Calculates point estimate within the CDRC given treatment values.
-        Can only be used when outcome is continuous.
-
-    predict_interval: (self, T, ci)
-        Calculates the prediction confidence interval associated with a point estimate
-        within the CDRC given treatment values. Can only be used when outcome is continuous.
-
-    predict_log_odds: (self, T)
-        Calculates the predicted log odds of the highest integer class. Can
-        only be used when the outcome is binary.
-
     print_gam_summary: (self)
         Prints pyGAM text summary of GAM predicting outcome from the treatment and the GPS.
 
 
     Examples
     --------
-    >>> from causal_curve import GPS
-    >>> gps = GPS(treatment_grid_num = 200, random_seed = 512)
+
+    >>> # With continuous outcome
+    >>> from causal_curve import GPS_Regressor
+    >>> gps = GPS_Regressor(treatment_grid_num = 200, random_seed = 512)
     >>> gps.fit(T = df['Treatment'], X = df[['X_1', 'X_2']], y = df['Outcome'])
     >>> gps_results = gps.calculate_CDRC(0.95)
-    >>> treatment_points = np.array([10,15,20,25])
-    >>> preds = gps.predict(treatment_points)
-    >>> conf_ints = gps.predict_interval(treatment_points, 0.95)
+    >>> point_estimate = gps.point_estimate(np.array([5.0]))
+    >>> point_estimate_interval = gps.point_estimate_interval(np.array([5.0]), 0.95)
+
+
+    >>> # With binary outcome
+    >>> from causal_curve import GPS_Classifier
+    >>> gps = GPS_Classifier()
+    >>> gps.fit(T = df['Treatment'], X = df[['X_1', 'X_2']], y = df['Binary_Outcome'])
+    >>> gps_results = gps.calculate_CDRC(0.95)
+    >>> log_odds = gps.estimate_log_odds(np.array([5.0]))
 
 
     References
@@ -188,14 +184,6 @@ def __init__(
         self.random_seed = random_seed
         self.verbose = verbose
 
-        # Validate the params
-        self._validate_init_params()
-        self.rand_seed_wrapper()
-
-        self.if_verbose_print("Using the following params for GPS model:")
-        if self.verbose:
-            pprint(self.get_params(), indent=4)
-
     def _validate_init_params(self):
         """
         Checks that the params used when instantiating GPS model are formatted correctly

diff --git a/causal_curve/gps_regressor.py b/causal_curve/gps_regressor.py
@@ -1,6 +1,7 @@
 """
 Defines the Generalized Prospensity Score (GPS) regressor model class
 """
+from pprint import pprint
 
 import numpy as np
 
@@ -11,6 +12,18 @@ class GPS_Regressor(GPS_Core):
     """
     A GPS tool that handles continuous outcomes. Inherits the GPS_core
     base class. See that base class code its docstring for more details.
+
+    Methods
+    ----------
+
+    point_estimate: (self, T)
+        Calculates point estimate within the CDRC given treatment values.
+        Can only be used when outcome is continuous.
+
+    point_estimate_interval: (self, T, ci)
+        Calculates the prediction confidence interval associated with a point estimate
+        within the CDRC given treatment values. Can only be used when outcome is continuous.
+
     """
 
     def __init__(
@@ -26,19 +39,25 @@ def __init__(
         random_seed=None,
         verbose=False,
     ):
-        GPS_Core.__init__(
-            self,
-            gps_family=None,
-            treatment_grid_num=100,
-            lower_grid_constraint=0.01,
-            upper_grid_constraint=0.99,
-            spline_order=3,
-            n_splines=30,
-            lambda_=0.5,
-            max_iter=100,
-            random_seed=None,
-            verbose=False,
-        )
+
+        self.gps_family = gps_family
+        self.treatment_grid_num = treatment_grid_num
+        self.lower_grid_constraint = lower_grid_constraint
+        self.upper_grid_constraint = upper_grid_constraint
+        self.spline_order = spline_order
+        self.n_splines = n_splines
+        self.lambda_ = lambda_
+        self.max_iter = max_iter
+        self.random_seed = random_seed
+        self.verbose = verbose
+
+        # Validate the params
+        self._validate_init_params()
+        self.rand_seed_wrapper()
+
+        self.if_verbose_print("Using the following params for GPS model:")
+        if self.verbose:
+            pprint(self.get_params(), indent=4)
 
     def _cdrc_predictions_continuous(self, ci):
         """Returns the predictions of CDRC for each value of the treatment grid. Essentially,

diff --git a/causal_curve/tmle_core.py b/causal_curve/tmle_core.py
@@ -1,8 +1,6 @@
 """
 Defines the Targetted Maximum likelihood Estimation (TMLE) model class
 """
-from pprint import pprint
-
 import numpy as np
 import pandas as pd
 from pandas.api.types import is_float_dtype, is_numeric_dtype
@@ -85,6 +83,14 @@ class TMLE_Core(Core):
     Attributes
     ----------
 
+    grid_values: array of shape (treatment_grid_num, )
+        The gridded values of the treatment variable. Equally spaced.
+
+    final_gam: `pygam.LinearGAM` class
+        trained final model of `LinearGAM` class, from pyGAM library
+
+    pseudo_out: array of shape (observations, )
+        Adjusted, pseudo-outcome observations
 
 
     Methods
@@ -99,6 +105,13 @@ class TMLE_Core(Core):
     Examples
     --------
 
+    >>> # With continuous outcome
+    >>> from causal_curve import TMLE_Regressor
+    >>> tmle = TMLE_Regressor()
+    >>> tmle.fit(T = df['Treatment'], X = df[['X_1', 'X_2']], y = df['Outcome'])
+    >>> tmle_results = tmle.calculate_CDRC(0.95)
+    >>> point_estimate = tmle.point_estimate(np.array([5.0]))
+    >>> point_estimate_interval = tmle.point_estimate_interval(np.array([5.0]), 0.95)
 
 
     References
@@ -138,14 +151,6 @@ def __init__(
         self.random_seed = random_seed
         self.verbose = verbose
 
-        # Validate the params
-        self._validate_init_params()
-        self.rand_seed_wrapper()
-
-        self.if_verbose_print("Using the following params for TMLE model:")
-        if self.verbose:
-            pprint(self.get_params(), indent=4)
-
     def _validate_init_params(self):
         """
         Checks that the params used when instantiating TMLE model are formatted correctly

diff --git a/causal_curve/tmle_regressor.py b/causal_curve/tmle_regressor.py
@@ -1,6 +1,7 @@
 """
 Defines the Targetted Maximum likelihood Estimation (TMLE) regressor model class
 """
+from pprint import pprint
 
 import numpy as np
 
@@ -11,8 +12,46 @@ class TMLE_Regressor(TMLE_Core):
     """
     A TMLE tool that handles continuous outcomes. Inherits the TMLE_core
     base class. See that base class code its docstring for more details.
+
+    Methods
+    ----------
+
+    point_estimate: (self, T)
+        Calculates point estimate within the CDRC given treatment values.
+        Can only be used when outcome is continuous.
     """
 
+    def __init__(
+        self,
+        treatment_grid_num=100,
+        lower_grid_constraint=0.01,
+        upper_grid_constraint=0.99,
+        n_estimators=200,
+        learning_rate=0.01,
+        max_depth=3,
+        bandwidth=0.5,
+        random_seed=None,
+        verbose=False,
+    ):
+
+        self.treatment_grid_num = treatment_grid_num
+        self.lower_grid_constraint = lower_grid_constraint
+        self.upper_grid_constraint = upper_grid_constraint
+        self.n_estimators = n_estimators
+        self.learning_rate = learning_rate
+        self.max_depth = max_depth
+        self.bandwidth = bandwidth
+        self.random_seed = random_seed
+        self.verbose = verbose
+
+        # Validate the params
+        self._validate_init_params()
+        self.rand_seed_wrapper()
+
+        self.if_verbose_print("Using the following params for TMLE model:")
+        if self.verbose:
+            pprint(self.get_params(), indent=4)
+
     def _cdrc_predictions_continuous(self, ci):
         """Returns the predictions of CDRC for each value of the treatment grid. Essentially,
         we're making predictions using the original treatment against the pseudo-outcome.

diff --git a/docs/changelog.rst b/docs/changelog.rst
@@ -5,6 +5,13 @@ Change Log
 ==========
 
 
+Version 1.0.2
+-------------
+- Updated end-to-end example notebook in `/examples` folder
+- Fixed various class docstrings if they still reference old v0.5.2 API
+- Fixed bug where custom class input parameters weren't being used
+
+
 Version 1.0.1
 -------------
 - Added to TMLE overview in the docs (including plot)

diff --git a/docs/conf.py b/docs/conf.py
@@ -23,7 +23,7 @@
 author = "Roni Kobrosly"
 
 # The full version, including alpha/beta/rc tags
-release = "1.0.1"
+release = "1.0.2"
 
 # -- General configuration ---------------------------------------------------
 

diff --git a/examples/NHANES_BLL_example.ipynb b/examples/NHANES_BLL_example.ipynb
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name="causal-curve",
-    version="1.0.1",
+    version="1.0.2",
     author="Roni Kobrosly",
     author_email="[email protected]",
     description="A python library with tools to perform causal inference using \