Merge branch 'PDEFIND_and_IntegralSINDy' of https://github.com/dynami…

…cslab/pysindy into PDEFIND_and_IntegralSINDy
OP-Luffy · Nov 20, 2021 · 83f1932 · 83f1932
2 parents e6a3f15 + 2e1a61f
commit 83f1932
Show file tree

Hide file tree

Showing 8 changed files with 72 additions and 80 deletions.
diff --git a/docs/JOSS2/Fig3.png b/docs/JOSS2/Fig3.png
diff --git a/docs/JOSS2/paper.md b/docs/JOSS2/paper.md
@@ -91,7 +91,7 @@ In addition, we have extended `PySINDy` to handle more complex modeling scenario
 
 `PySINDy` includes extensive Jupyter notebook tutorials that demonstrate the usage of various features of the package and reproduce nearly the entirety of the examples from the original SINDy paper [@brunton2016pnas], trapping SINDy paper [@kaptanoglu2021promoting], and the PDE-FIND paper [@Rudy2017sciadv]. 
 We include an extended example for the quasiperiodic shear-driven cavity flow [@callaham2021role].
-As a simple illustration of the new functionality, we demonstrate how SINDy can be used to identify the Kuramoto-Sivashinsky (KS) PDE from data. We train the model on the first 60\% of the data from Rudy et al. [@Rudy2017sciadv], which in total contains 1024 spatial grid points and 251 time steps. The KS model is identified correctly and the prediction for $\dot{\mathbf{q}}$ on the remaining testing data indicates strong performance in \autoref{fig:pde_id}. 
+As a simple illustration of the new functionality, we demonstrate how SINDy can be used to identify the Kuramoto-Sivashinsky (KS) PDE from data. We train the model on the first 60\% of the data from Rudy et al. [@Rudy2017sciadv], which in total contains 1024 spatial grid points and 251 time steps. The KS model is identified correctly and the prediction for $\dot{\mathbf{q}}$ on the remaining testing data indicates strong performance in \autoref{fig:pde_id}. Lastly, we provide a useful flow chart in \autoref{fig:flow_chart} so that users can make informed choices about which advanced methods are suitable for their datasets. 
 
 # Conclusion
 The goal of the `PySINDy` package is to enable anyone with access to measurement data to engage in scientific model discovery. The package is designed to be accessible to inexperienced users, adhere to `scikit-learn` standards, include most of the existing SINDy variations in the literature, and provide a large variety of functionality for more advanced users. We hope that researchers will use and contribute to the code in the future, pushing the boundaries of what is possible in system identification.
@@ -105,4 +105,6 @@ SLB, AAK, KK, and UF acknowledge support from the Army Research Office (ARO  W91
 
 ![`PySINDy` can now be used for PDE identification; we illustrate this new capability by accurately capturing a set of testing data from the Kuramoto-Sivashinsky system, described by $q_t = -qq_x - q_{xx} - q_{xxxx}$. The identified model is $q_t = -0.98qq_x -0.99q_{xx} - 1.0q_{xxxx}$.\label{fig:pde_id}](Fig2.png)
 
+![This flow chart summarizes how `PySINDy` users can start with a dataset and systematically choose the proper candidate library and sparse regression optimizer that are tailored for a specific scientific task. The `GeneralizedLibrary` class allows for tensoring, concatenating, and otherwise combining many different candidate libraries.\label{fig:flow_chart}](Fig3.png)
+
 # References
diff --git a/examples/1_feature_overview.ipynb b/examples/1_feature_overview.ipynb
diff --git a/pysindy/optimizers/constrained_sr3.py b/pysindy/optimizers/constrained_sr3.py
@@ -222,11 +222,11 @@ def _update_coef_cvxpy(self, x, y, coef_sparse):
         if self.thresholder.lower() == "l1":
             cost = cost + self.threshold * cp.norm1(xi)
         elif self.thresholder.lower() == "weighted_l1":
-            cost = cost + cp.norm1(np.ravel(self.thresholds) * xi)
+            cost = cost + cp.norm1(np.ravel(self.thresholds) @ xi)
         elif self.thresholder.lower() == "l2":
             cost = cost + self.threshold * cp.norm2(xi)
         elif self.thresholder.lower() == "weighted_l2":
-            cost = cost + cp.norm2(np.ravel(self.thresholds) * xi)
+            cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi)
         if self.use_constraints:
             if self.inequality_constraints:
                 prob = cp.Problem(

diff --git a/pysindy/optimizers/trapping_sr3.py b/pysindy/optimizers/trapping_sr3.py
@@ -7,8 +7,6 @@
 from scipy.linalg import cho_solve
 from sklearn.exceptions import ConvergenceWarning
 
-from ..utils import get_prox
-from ..utils import get_regularization
 from ..utils import reorder_constraints
 from .sr3 import SR3
 
@@ -273,8 +271,6 @@ def __init__(
         self.tol = tol
         self.tol_m = tol_m
         self.accel = accel
-        self.reg = get_regularization(thresholder)
-        self.prox = get_prox(thresholder)
         self.A_history_ = []
         self.m_history_ = []
         self.PW_history_ = []
@@ -396,11 +392,11 @@ def _solve_sparse_relax_and_split(self, r, N, x_expanded, y, Pmatrix, A, coef_pr
         if self.thresholder.lower() == "l1":
             cost = cost + self.threshold * cp.norm1(xi)
         elif self.thresholder.lower() == "weighted_l1":
-            cost = cost + cp.norm1(np.ravel(self.thresholds) * xi)
+            cost = cost + cp.norm1(np.ravel(self.thresholds) @ xi)
         elif self.thresholder.lower() == "l2":
             cost = cost + self.threshold * cp.norm2(xi) ** 2
         elif self.thresholder.lower() == "weighted_l2":
-            cost = cost + cp.norm2(np.ravel(self.thresholds) * xi) ** 2
+            cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) ** 2
         cost = cost + cp.sum_squares(Pmatrix @ xi - A.flatten()) / self.eta
         if self.use_constraints:
             if self.inequality_constraints:
@@ -483,12 +479,12 @@ def _solve_direct_cvxpy(self, r, N, x_expanded, y, Pmatrix, coef_prev):
         cost = cp.sum_squares(x_expanded @ xi - y.flatten())
         if self.thresholder.lower() == "l1":
             cost = cost + self.threshold * cp.norm1(xi)
-        elif self.thresholder.lower() == "weighted_l1":
-            cost = cost + cp.norm1(np.ravel(self.thresholds) * xi)
+        elif self.thresholder.lower() == "weighted_l1" and np.any(self.thresholds != 0):
+            cost = cost + cp.norm1(np.ravel(self.thresholds) @ xi)
         elif self.thresholder.lower() == "l2":
             cost = cost + self.threshold * cp.norm2(xi) ** 2
         elif self.thresholder.lower() == "weighted_l2":
-            cost = cost + cp.norm2(np.ravel(self.thresholds) * xi) ** 2
+            cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) ** 2
         cost = cost + cp.lambda_max(cp.reshape(Pmatrix @ xi, (r, r))) / self.eta
         if self.use_constraints:
             if self.inequality_constraints:

diff --git a/pysindy/pysindy.py b/pysindy/pysindy.py
@@ -52,20 +52,7 @@ class SINDy(BaseEstimator):
         Feature library object used to specify candidate right-hand side features.
         This must be a class extending
         :class:`pysindy.feature_library.base.BaseFeatureLibrary`.
-        The default option is :class:`PolynomialLibrary`. If
-        inputs_per_library parameter is also passed, the library will use only
-        the subset of the inputs specified in this parameter. If
-        inputs_per_library is more than 1D, then feature_library must be a list
-        of feature library objects with the same dimension as
-        inputs_per_library.
-
-    inputs_per_library : np.ndarray, shape equal to
-            (number of feature libraries, number of variable inputs)
-        Can be used to specify a subset of the variables to use to generate
-        a feature library. If number of feature libraries > 1, then can be
-        use to generate a large number of libraries, each using their own
-        subsets of the input variables. This requires a feature_library to be
-        a GeneralizedLibrary object.
+        The default option is :class:`PolynomialLibrary`.
 
     differentiation_method : differentiation object, optional
         Method for differentiating the data. This must be a class extending
@@ -170,15 +157,12 @@ def __init__(
         feature_names=None,
         t_default=1,
         discrete_time=False,
-        inputs_per_library=None,
     ):
         if optimizer is None:
             optimizer = STLSQ()
         self.optimizer = optimizer
         if feature_library is None:
             feature_library = PolynomialLibrary()
-        if inputs_per_library is None:
-            self.inputs_per_library = inputs_per_library
         self.feature_library = feature_library
         if differentiation_method is None:
             differentiation_method = FiniteDifference()

diff --git a/pysindy/utils/base.py b/pysindy/utils/base.py
@@ -231,7 +231,7 @@ def prox_l1(x, threshold):
 
 def prox_weighted_l1(x, thresholds):
     """Proximal operator for weighted l1 regularization."""
-    return np.sign(x) * np.maximum(np.abs(x) - thresholds, np.ones(x.shape))
+    return np.sign(x) * np.maximum(np.abs(x) - thresholds, np.zeros(x.shape))
 
 
 def prox_l2(x, threshold):

diff --git a/test/optimizers/test_optimizers.py b/test/optimizers/test_optimizers.py
@@ -1,7 +1,6 @@
 """
 Unit tests for optimizers.
 """
-import cvxpy as cp
 import numpy as np
 import pytest
 from numpy.linalg import norm
@@ -31,9 +30,6 @@
 from pysindy.utils.odes import enzyme
 from pysindy.utils.odes import lorenz
 
-# ignore cvxpy warnings
-cp.error.disable_warnings()
-
 # For reproducibility
 np.random.seed(100)
 
@@ -153,12 +149,12 @@ def test_alternate_parameters(data_derivative_1d, kwargs):
 @pytest.mark.parametrize(
     "optimizer",
     [
-        STLSQ(),
-        SSR(),
-        FROLS(),
-        SR3(),
-        ConstrainedSR3(),
-        TrappingSR3(),
+        STLSQ,
+        SSR,
+        FROLS,
+        SR3,
+        ConstrainedSR3,
+        TrappingSR3,
     ],
 )
 def test_sample_weight_optimizers(data_lorenz, optimizer):
@@ -168,7 +164,7 @@ def test_sample_weight_optimizers(data_lorenz, optimizer):
 
     sample_weight = np.ones(x[:, 0].shape)
     sample_weight[::2] = 0
-    model = optimizer
+    model = optimizer()
     model.fit(x, x_dot)
     model.fit(x, x_dot, sample_weight=sample_weight)
     model.fit(x, x_dot, sample_weight=sample_weight)
@@ -421,6 +417,7 @@ def test_constrained_sr3_quadratic_library(params):
     model = SINDy(optimizer=opt, feature_library=sindy_library)
     model.fit(x)
     check_is_fitted(model)
+    assert np.allclose((model.coefficients().flatten())[:p], 0.0)
 
 
 @pytest.mark.parametrize(
@@ -436,7 +433,13 @@ def test_constrained_sr3_quadratic_library(params):
     [
         dict(thresholder="l1", threshold=0),
         dict(thresholder="l1", threshold=1e-5),
-        dict(thresholder="weighted_l1", thresholds=np.zeros((3, 9))),
+        dict(
+            thresholder="weighted_l1",
+            thresholds=np.zeros((3, 9)),
+            eta=1e5,
+            alpha_m=1e4,
+            alpha_A=1e5,
+        ),
         dict(thresholder="weighted_l1", thresholds=1e-5 * np.ones((3, 9))),
         dict(thresholder="l2", threshold=0),
         dict(thresholder="l2", threshold=1e-5),
@@ -447,6 +450,7 @@ def test_constrained_sr3_quadratic_library(params):
 def test_trapping_sr3_quadratic_library(
     params, trapping_sr3_params, data_quadratic_library
 ):
+    np.random.seed(100)
     PL_shape = (3, 3, 3, 9)
     PQ_shape = (3, 3, 3, 3, 9)
     PL = np.ones(PL_shape)
@@ -480,6 +484,10 @@ def test_trapping_sr3_quadratic_library(
     assert opt.PL.shape == (3, 3, 3, 9)
     assert opt.PQ.shape == (3, 3, 3, 3, 9)
     check_is_fitted(model)
+    # check is solve was infeasible first
+    if not np.allclose(opt.m_history_[-1], opt.m_history_[0]):
+        zero_inds = [0, 1, 3, 6, 9, 12, 15, 18, 21, 24]
+        assert np.allclose((model.coefficients().flatten())[zero_inds], 0.0, atol=1e-5)
 
 
 @pytest.mark.parametrize(