Skip to content

Commit

Permalink
Merge branch 'PDEFIND_and_IntegralSINDy' of https://github.com/dynami…
Browse files Browse the repository at this point in the history
…cslab/pysindy into PDEFIND_and_IntegralSINDy
  • Loading branch information
briandesilva committed Nov 20, 2021
2 parents e6a3f15 + 2e1a61f commit 83f1932
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 80 deletions.
Binary file added docs/JOSS2/Fig3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 3 additions & 1 deletion docs/JOSS2/paper.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ In addition, we have extended `PySINDy` to handle more complex modeling scenario

`PySINDy` includes extensive Jupyter notebook tutorials that demonstrate the usage of various features of the package and reproduce nearly the entirety of the examples from the original SINDy paper [@brunton2016pnas], trapping SINDy paper [@kaptanoglu2021promoting], and the PDE-FIND paper [@Rudy2017sciadv].
We include an extended example for the quasiperiodic shear-driven cavity flow [@callaham2021role].
As a simple illustration of the new functionality, we demonstrate how SINDy can be used to identify the Kuramoto-Sivashinsky (KS) PDE from data. We train the model on the first 60\% of the data from Rudy et al. [@Rudy2017sciadv], which in total contains 1024 spatial grid points and 251 time steps. The KS model is identified correctly and the prediction for $\dot{\mathbf{q}}$ on the remaining testing data indicates strong performance in \autoref{fig:pde_id}.
As a simple illustration of the new functionality, we demonstrate how SINDy can be used to identify the Kuramoto-Sivashinsky (KS) PDE from data. We train the model on the first 60\% of the data from Rudy et al. [@Rudy2017sciadv], which in total contains 1024 spatial grid points and 251 time steps. The KS model is identified correctly and the prediction for $\dot{\mathbf{q}}$ on the remaining testing data indicates strong performance in \autoref{fig:pde_id}. Lastly, we provide a useful flow chart in \autoref{fig:flow_chart} so that users can make informed choices about which advanced methods are suitable for their datasets.

# Conclusion
The goal of the `PySINDy` package is to enable anyone with access to measurement data to engage in scientific model discovery. The package is designed to be accessible to inexperienced users, adhere to `scikit-learn` standards, include most of the existing SINDy variations in the literature, and provide a large variety of functionality for more advanced users. We hope that researchers will use and contribute to the code in the future, pushing the boundaries of what is possible in system identification.
Expand All @@ -105,4 +105,6 @@ SLB, AAK, KK, and UF acknowledge support from the Army Research Office (ARO W91

![`PySINDy` can now be used for PDE identification; we illustrate this new capability by accurately capturing a set of testing data from the Kuramoto-Sivashinsky system, described by $q_t = -qq_x - q_{xx} - q_{xxxx}$. The identified model is $q_t = -0.98qq_x -0.99q_{xx} - 1.0q_{xxxx}$.\label{fig:pde_id}](Fig2.png)

![This flow chart summarizes how `PySINDy` users can start with a dataset and systematically choose the proper candidate library and sparse regression optimizer that are tailored for a specific scientific task. The `GeneralizedLibrary` class allows for tensoring, concatenating, and otherwise combining many different candidate libraries.\label{fig:flow_chart}](Fig3.png)

# References
78 changes: 40 additions & 38 deletions examples/1_feature_overview.ipynb

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions pysindy/optimizers/constrained_sr3.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,11 +222,11 @@ def _update_coef_cvxpy(self, x, y, coef_sparse):
if self.thresholder.lower() == "l1":
cost = cost + self.threshold * cp.norm1(xi)
elif self.thresholder.lower() == "weighted_l1":
cost = cost + cp.norm1(np.ravel(self.thresholds) * xi)
cost = cost + cp.norm1(np.ravel(self.thresholds) @ xi)
elif self.thresholder.lower() == "l2":
cost = cost + self.threshold * cp.norm2(xi)
elif self.thresholder.lower() == "weighted_l2":
cost = cost + cp.norm2(np.ravel(self.thresholds) * xi)
cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi)
if self.use_constraints:
if self.inequality_constraints:
prob = cp.Problem(
Expand Down
14 changes: 5 additions & 9 deletions pysindy/optimizers/trapping_sr3.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
from scipy.linalg import cho_solve
from sklearn.exceptions import ConvergenceWarning

from ..utils import get_prox
from ..utils import get_regularization
from ..utils import reorder_constraints
from .sr3 import SR3

Expand Down Expand Up @@ -273,8 +271,6 @@ def __init__(
self.tol = tol
self.tol_m = tol_m
self.accel = accel
self.reg = get_regularization(thresholder)
self.prox = get_prox(thresholder)
self.A_history_ = []
self.m_history_ = []
self.PW_history_ = []
Expand Down Expand Up @@ -396,11 +392,11 @@ def _solve_sparse_relax_and_split(self, r, N, x_expanded, y, Pmatrix, A, coef_pr
if self.thresholder.lower() == "l1":
cost = cost + self.threshold * cp.norm1(xi)
elif self.thresholder.lower() == "weighted_l1":
cost = cost + cp.norm1(np.ravel(self.thresholds) * xi)
cost = cost + cp.norm1(np.ravel(self.thresholds) @ xi)
elif self.thresholder.lower() == "l2":
cost = cost + self.threshold * cp.norm2(xi) ** 2
elif self.thresholder.lower() == "weighted_l2":
cost = cost + cp.norm2(np.ravel(self.thresholds) * xi) ** 2
cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) ** 2
cost = cost + cp.sum_squares(Pmatrix @ xi - A.flatten()) / self.eta
if self.use_constraints:
if self.inequality_constraints:
Expand Down Expand Up @@ -483,12 +479,12 @@ def _solve_direct_cvxpy(self, r, N, x_expanded, y, Pmatrix, coef_prev):
cost = cp.sum_squares(x_expanded @ xi - y.flatten())
if self.thresholder.lower() == "l1":
cost = cost + self.threshold * cp.norm1(xi)
elif self.thresholder.lower() == "weighted_l1":
cost = cost + cp.norm1(np.ravel(self.thresholds) * xi)
elif self.thresholder.lower() == "weighted_l1" and np.any(self.thresholds != 0):
cost = cost + cp.norm1(np.ravel(self.thresholds) @ xi)
elif self.thresholder.lower() == "l2":
cost = cost + self.threshold * cp.norm2(xi) ** 2
elif self.thresholder.lower() == "weighted_l2":
cost = cost + cp.norm2(np.ravel(self.thresholds) * xi) ** 2
cost = cost + cp.norm2(np.ravel(self.thresholds) @ xi) ** 2
cost = cost + cp.lambda_max(cp.reshape(Pmatrix @ xi, (r, r))) / self.eta
if self.use_constraints:
if self.inequality_constraints:
Expand Down
18 changes: 1 addition & 17 deletions pysindy/pysindy.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,20 +52,7 @@ class SINDy(BaseEstimator):
Feature library object used to specify candidate right-hand side features.
This must be a class extending
:class:`pysindy.feature_library.base.BaseFeatureLibrary`.
The default option is :class:`PolynomialLibrary`. If
inputs_per_library parameter is also passed, the library will use only
the subset of the inputs specified in this parameter. If
inputs_per_library is more than 1D, then feature_library must be a list
of feature library objects with the same dimension as
inputs_per_library.
inputs_per_library : np.ndarray, shape equal to
(number of feature libraries, number of variable inputs)
Can be used to specify a subset of the variables to use to generate
a feature library. If number of feature libraries > 1, then can be
use to generate a large number of libraries, each using their own
subsets of the input variables. This requires a feature_library to be
a GeneralizedLibrary object.
The default option is :class:`PolynomialLibrary`.
differentiation_method : differentiation object, optional
Method for differentiating the data. This must be a class extending
Expand Down Expand Up @@ -170,15 +157,12 @@ def __init__(
feature_names=None,
t_default=1,
discrete_time=False,
inputs_per_library=None,
):
if optimizer is None:
optimizer = STLSQ()
self.optimizer = optimizer
if feature_library is None:
feature_library = PolynomialLibrary()
if inputs_per_library is None:
self.inputs_per_library = inputs_per_library
self.feature_library = feature_library
if differentiation_method is None:
differentiation_method = FiniteDifference()
Expand Down
2 changes: 1 addition & 1 deletion pysindy/utils/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ def prox_l1(x, threshold):

def prox_weighted_l1(x, thresholds):
"""Proximal operator for weighted l1 regularization."""
return np.sign(x) * np.maximum(np.abs(x) - thresholds, np.ones(x.shape))
return np.sign(x) * np.maximum(np.abs(x) - thresholds, np.zeros(x.shape))


def prox_l2(x, threshold):
Expand Down
32 changes: 20 additions & 12 deletions test/optimizers/test_optimizers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""
Unit tests for optimizers.
"""
import cvxpy as cp
import numpy as np
import pytest
from numpy.linalg import norm
Expand Down Expand Up @@ -31,9 +30,6 @@
from pysindy.utils.odes import enzyme
from pysindy.utils.odes import lorenz

# ignore cvxpy warnings
cp.error.disable_warnings()

# For reproducibility
np.random.seed(100)

Expand Down Expand Up @@ -153,12 +149,12 @@ def test_alternate_parameters(data_derivative_1d, kwargs):
@pytest.mark.parametrize(
"optimizer",
[
STLSQ(),
SSR(),
FROLS(),
SR3(),
ConstrainedSR3(),
TrappingSR3(),
STLSQ,
SSR,
FROLS,
SR3,
ConstrainedSR3,
TrappingSR3,
],
)
def test_sample_weight_optimizers(data_lorenz, optimizer):
Expand All @@ -168,7 +164,7 @@ def test_sample_weight_optimizers(data_lorenz, optimizer):

sample_weight = np.ones(x[:, 0].shape)
sample_weight[::2] = 0
model = optimizer
model = optimizer()
model.fit(x, x_dot)
model.fit(x, x_dot, sample_weight=sample_weight)
model.fit(x, x_dot, sample_weight=sample_weight)
Expand Down Expand Up @@ -421,6 +417,7 @@ def test_constrained_sr3_quadratic_library(params):
model = SINDy(optimizer=opt, feature_library=sindy_library)
model.fit(x)
check_is_fitted(model)
assert np.allclose((model.coefficients().flatten())[:p], 0.0)


@pytest.mark.parametrize(
Expand All @@ -436,7 +433,13 @@ def test_constrained_sr3_quadratic_library(params):
[
dict(thresholder="l1", threshold=0),
dict(thresholder="l1", threshold=1e-5),
dict(thresholder="weighted_l1", thresholds=np.zeros((3, 9))),
dict(
thresholder="weighted_l1",
thresholds=np.zeros((3, 9)),
eta=1e5,
alpha_m=1e4,
alpha_A=1e5,
),
dict(thresholder="weighted_l1", thresholds=1e-5 * np.ones((3, 9))),
dict(thresholder="l2", threshold=0),
dict(thresholder="l2", threshold=1e-5),
Expand All @@ -447,6 +450,7 @@ def test_constrained_sr3_quadratic_library(params):
def test_trapping_sr3_quadratic_library(
params, trapping_sr3_params, data_quadratic_library
):
np.random.seed(100)
PL_shape = (3, 3, 3, 9)
PQ_shape = (3, 3, 3, 3, 9)
PL = np.ones(PL_shape)
Expand Down Expand Up @@ -480,6 +484,10 @@ def test_trapping_sr3_quadratic_library(
assert opt.PL.shape == (3, 3, 3, 9)
assert opt.PQ.shape == (3, 3, 3, 3, 9)
check_is_fitted(model)
# check is solve was infeasible first
if not np.allclose(opt.m_history_[-1], opt.m_history_[0]):
zero_inds = [0, 1, 3, 6, 9, 12, 15, 18, 21, 24]
assert np.allclose((model.coefficients().flatten())[zero_inds], 0.0, atol=1e-5)


@pytest.mark.parametrize(
Expand Down

0 comments on commit 83f1932

Please sign in to comment.