From 4742f48f92d2cc438df7d18f5621cc423b182084 Mon Sep 17 00:00:00 2001 From: bbayukari <40588568+bbayukari@users.noreply.github.com> Date: Thu, 18 Apr 2024 23:25:29 +0800 Subject: [PATCH] feat: change api of ic and cv (#85) --- docs/source/feature/DataScienceTool.rst | 64 +++++------- pytest/create_test_model.py | 15 ++- pytest/test_args.py | 24 +++-- pytest/test_except.py | 35 ++++--- skscope/__init__.py | 2 +- skscope/base_solver.py | 85 ++++----------- skscope/solver.py | 133 ++++++++++-------------- skscope/utilities.py | 73 +++++++++++++ src/Metric.h | 94 +++++++++-------- src/pywrap.cpp | 14 +-- 10 files changed, 275 insertions(+), 264 deletions(-) diff --git a/docs/source/feature/DataScienceTool.rst b/docs/source/feature/DataScienceTool.rst index f616413..20eea0f 100644 --- a/docs/source/feature/DataScienceTool.rst +++ b/docs/source/feature/DataScienceTool.rst @@ -71,16 +71,6 @@ In other places, we presume the sparsity level would be appropriate set. However Note that when using a list for ``sparsity``, the ``sample_size`` parameter must also be provided to the solver in ``skscope``. - -.. code-block:: python - - solver = ScopeSolver( - dimensionality=p, ## there are p parameters - sparsity=[1, 2, 3, 4, 5], ## the candidate support sizes - sample_size=n, ## the number of samples - ) - - There are two ways to evaluate sparsity levels: `Information Criterion`_ and `Cross Validation`_. @@ -90,19 +80,42 @@ Information Criterion Information criterion is a statistical measure used to assess the goodness of fit of a model while penalizing model complexity. It helps in selecting the optimal model from a set of competing models. In the context of sparsity-constrained optimization, information criterion can be used to evaluate different sparsity levels and identify the most suitable support size. .. There is another way to evaluate sparsity levels, which is information criterion. The larger the information criterion, the better the model. -There are four types of information criterion can be used in ``skscope``: Akaike information criterion `[1]`_, Bayesian information criterion (BIC, `[2]`_), extend BIC `[3]`_, and special information criterion (SIC `[4]`_). +There are four types of information criterion can be implemented in ``skscope.utilities``: Akaike information criterion `[1]`_, Bayesian information criterion (BIC, `[2]`_), extend BIC `[3]`_, and special information criterion (SIC `[4]`_). .. If sparsity is list and ``cv=None``, the solver will use information criterions to evaluate the sparsity level. -The input parameter ``ic`` in the solvers of skscope can be used to choose the information criterion. The default value is ``ic='sic'``. Here is an example using SIC to find the optimal support size. +The input parameter ``ic_method`` in the solvers of skscope can be used to choose the information criterion. It should be a method to compute information criterion which has the same parameters with this example: .. code-block:: python + def SIC( + objective_value: float, + dimensionality: int, + effective_params_num: int, + train_size: int, + ): + return 2 * objective_value + effective_params_num * np.log(np.log(train_size)) * np.log(dimensionality) + + +Here is an example using SIC to find the optimal support size. + +.. code-block:: python + import jax.numpy as jnp + import numpy as np + from skscope.utilities import LinearSIC + + n, p = 100, 10 solver = ScopeSolver( dimensionality=p, sparsity=[1, 2, 3, 4, 5] ## we want to select 1-5 variables sample_size=n, ## the number of samples - ic='sic', ## use SIC to evaluate sparsity levels + ic_method=LinearSIC, ## use SIC to evaluate sparsity levels ) + solver.solve( + lambda params: jnp.sum(jnp.square(np.random.randn(n, p) @ params - np.random.randn(n))), + jit = True, + ) + print(solver.get_result()) +Please note that the effectiveness of information criterion heavily depends on the implementation of the objective function. Before usage, carefully check whether the objective function and the information criterion implementations match. Cross Validation ^^^^^^^^^^^^^^^^^^^^ @@ -149,31 +162,6 @@ To utilizing cross validation `[5]`_, there are some requirements: params = solver.solve(custom_objective, data = (X, y)) -There is a simpler way to use cross validation: let objective function comprise a additional parameter ``index``. In this case, we do not need to set ``split_method``. Below is the illustrative example for this usage. - -.. code-block:: python - - import jax.numpy as jnp - from sklearn.datasets import make_regression - - ## generate data - n, p, k= 10, 5, 3 - X, y, true_params = make_regression(n_samples=n, n_features=p, n_informative=k, coef=True) - - def custom_objective(params, index): - return jnp.sum( - jnp.square(y[index] - X[index,:] @ params) - ) - - solver = ScopeSolver( - dimensionality=p, ## there are p parameters - sparsity=[1, 2, 3, 4, 5] ## we want to select 1-5 variables - sample_size=n, ## the number of samples - cv=10, ## 10-folds use cross validation - ) - - params = solver.solve(custom_objective) - Reference ------------------------------ diff --git a/pytest/create_test_model.py b/pytest/create_test_model.py index ff9bb43..7680a5f 100644 --- a/pytest/create_test_model.py +++ b/pytest/create_test_model.py @@ -22,24 +22,23 @@ def create_linear_model(self): ) def linear_model(params): - return jnp.sum(jnp.square(Y - jnp.matmul(X, params))) / 2 + return jnp.sum(jnp.square(Y - jnp.matmul(X, params))) def linear_model_numpy(params): - return np.sum(np.square(Y - np.matmul(X, params))) / 2 + return np.sum(np.square(Y - np.matmul(X, params))) def grad_linear_model(params): - return -np.matmul(X.T, (Y - np.matmul(X, params))) + return -np.matmul(X.T, (Y - np.matmul(X, params))) * 2 def hess_linear_model(params): - return np.matmul(X.T, X) + return np.matmul(X.T, X) * 2 X_jnp = jnp.array(X) Y_jnp = jnp.array(Y) - def linear_model_data(params, data_indices): - return jnp.sum( - jnp.square(Y_jnp[data_indices] - X_jnp[data_indices,] @ params) - ) + def linear_model_data(params, data): + x, y = data + return jnp.sum(jnp.square(y - x @ params)) return { "n_samples": self.N, diff --git a/pytest/test_args.py b/pytest/test_args.py index 45cf03f..760cbaf 100644 --- a/pytest/test_args.py +++ b/pytest/test_args.py @@ -8,7 +8,14 @@ import pytest from create_test_model import CreateTestModel -from skscope import ScopeSolver, BaseSolver, HTPSolver, GraspSolver, IHTSolver +from skscope import ( + utilities, + ScopeSolver, + BaseSolver, + HTPSolver, + GraspSolver, + IHTSolver, +) import skscope._scope as _scope model_creator = CreateTestModel() @@ -53,12 +60,12 @@ def test_init_params(model, solver_creator): @pytest.mark.parametrize("model", models, ids=models_ids) @pytest.mark.parametrize("solver_creator", solvers, ids=solvers_ids) -@pytest.mark.parametrize("ic_type", ["aic", "bic", "sic", "ebic"]) -def test_ic(model, solver_creator, ic_type): +def test_ic(model, solver_creator): solver = solver_creator( model["n_features"], + [0, model["n_informative"]], sample_size=model["n_samples"], - ic_type=ic_type, + ic_method=utilities.LinearSIC, ) solver.solve(model["loss"], jit=True) @@ -73,8 +80,9 @@ def test_cv_random_split(model, solver_creator): [0, model["n_informative"]], model["n_samples"], cv=2, + split_method=lambda data, indeices: (data[0][indeices], data[1][indeices]), ) - solver.solve(model["loss_data"], jit=True) + solver.solve(model["loss_data"], data=model["data"], jit=True) assert set(model["support_set"]) == set(solver.support_set) @@ -92,8 +100,9 @@ def test_cv_given_split(model, solver_creator): model["n_samples"], cv=n_fold, cv_fold_id=cv_fold_id, + split_method=lambda data, indeices: (data[0][indeices], data[1][indeices]), ) - solver.solve(model["loss_data"], jit=True) + solver.solve(model["loss_data"], data=model["data"], jit=True) assert set(model["support_set"]) == set(solver.support_set) @@ -190,6 +199,7 @@ def test_scope_args(): path_type="gs", sample_size=linear["n_samples"], cv=2, + split_method=lambda data, indeices: (data[0][indeices], data[1][indeices]), file_log_level="error", ) - solver.solve(linear["loss_data"]) + solver.solve(linear["loss_data"], data=linear["data"]) diff --git a/pytest/test_except.py b/pytest/test_except.py index b7396c9..d9b59d6 100644 --- a/pytest/test_except.py +++ b/pytest/test_except.py @@ -9,7 +9,7 @@ import re from create_test_model import CreateTestModel -from skscope import ScopeSolver, BaseSolver +from skscope import utilities, ScopeSolver, BaseSolver import skscope._scope as _scope model_creator = CreateTestModel() @@ -75,13 +75,18 @@ def test_sparsity(model, solver_creator): @pytest.mark.parametrize("model", models, ids=models_ids) @pytest.mark.parametrize("solver_creator", solvers, ids=solvers_ids) def test_ic(model, solver_creator): - solver = solver_creator( - model["n_features"], model["n_informative"], model["n_samples"] - ) - solver.set_config(ic_type="ic") + solver = solver_creator(model["n_features"]) + with pytest.raises( + ValueError, + match=re.escape( + "ic_method should be provided for choosing sparsity level with information criterion." + ), + ): + solver.solve(model["loss"], jit=True) + solver.set_config(ic_method=utilities.LinearSIC) with pytest.raises( ValueError, - match=re.escape("ic_type should be one of ['aic', 'bic', 'sic','ebic']."), + match=re.escape("sample_size should be given when using ic_method."), ): solver.solve(model["loss"], jit=True) @@ -89,30 +94,30 @@ def test_ic(model, solver_creator): @pytest.mark.parametrize("model", models, ids=models_ids) @pytest.mark.parametrize("solver_creator", solvers, ids=solvers_ids) def test_cv(model, solver_creator): - solver = solver_creator( - model["n_features"], model["n_informative"], model["n_samples"] - ) - solver.set_config(cv=1 + model["n_samples"]) + solver = solver_creator(model["n_features"], cv=2) with pytest.raises( ValueError, match=re.escape("cv should not be greater than sample_size.") ): solver.solve(model["loss"], jit=True) - solver.set_config(cv=model["n_samples"]) + solver.set_config(sample_size=model["n_samples"]) with pytest.raises( ValueError, match=re.escape("split_method should be provided when cv > 1.") ): - solver.solve(model["loss"], data=(), jit=True) + solver.solve(model["loss"], jit=True) + solver.set_config( + split_method=lambda data, indeices: (data[0][indeices], data[1][indeices]) + ) solver.set_config(cv_fold_id=np.zeros((1, model["n_samples"]))) with pytest.raises( ValueError, match=re.escape("cv_fold_id should be an 1D array of integers.") ): - solver.solve(model["loss"], jit=True) + solver.solve(model["loss_data"], data=model["data"], jit=True) solver.set_config(cv_fold_id=np.zeros(1 + model["n_samples"])) with pytest.raises( ValueError, match=re.escape("The length of cv_fold_id should be equal to sample_size."), ): - solver.solve(model["loss"], jit=True) + solver.solve(model["loss_data"], data=model["data"], jit=True) solver.set_config(cv_fold_id=np.zeros(model["n_samples"])) with pytest.raises( ValueError, @@ -120,7 +125,7 @@ def test_cv(model, solver_creator): "The number of different elements in cv_fold_id should be equal to cv." ), ): - solver.solve(model["loss"], jit=True) + solver.solve(model["loss_data"], data=model["data"], jit=True) @pytest.mark.parametrize("model", models, ids=models_ids) diff --git a/skscope/__init__.py b/skscope/__init__.py index 92564e6..5bcda60 100644 --- a/skscope/__init__.py +++ b/skscope/__init__.py @@ -4,7 +4,7 @@ # Copyright (C) 2023 abess-team # Licensed under the MIT License. -__version__ = "0.1.6" +__version__ = "0.1.7" __author__ = "Zezhi Wang, Jin Zhu," "Peng Chen," "Junxian Zhu, Xueqin Wang" diff --git a/skscope/base_solver.py b/skscope/base_solver.py index e6676d5..c25d23d 100644 --- a/skscope/base_solver.py +++ b/skscope/base_solver.py @@ -10,7 +10,7 @@ import jax from .numeric_solver import convex_solver_nlopt import math -from . import layer +from . import utilities class BaseSolver(BaseEstimator): @@ -41,9 +41,11 @@ class BaseSolver(BaseEstimator): Here are wrong examples: ``[0,2,1,2]`` (not incremental), ``[1,2,3,3]`` (not start from 0), ``[0,2,2,3]`` (there is a gap). It's worth mentioning that the concept "a variable" means "a group of variables" in fact. For example,``sparsity=[3]`` means there will be 3 groups of variables selected rather than 3 variables, and ``always_include=[0,3]`` means the 0-th and 3-th groups must be selected. - ic_type : {'aic', 'bic', 'sic', 'ebic'}, default='aic' - The type of information criterion for choosing the sparsity level. + ic_method : callable, optional + A function to calculate the information criterion for choosing the sparsity level. + ``ic(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size. Used only when ``sparsity`` is array and ``cv`` is 1. + Note that ``sample_size`` must be given when using ``ic_method``. cv : int, default=1 The folds number when use the cross-validation method. - If ``cv`` = 1, the sparsity level will be chosen by the information criterion. @@ -55,10 +57,7 @@ class BaseSolver(BaseEstimator): An array indicates different folds in CV, which samples in the same fold should be given the same number. The number of different elements should be equal to ``cv``. Used only when `cv` > 1. - metric_method : callable, optional - A function to calculate the information criterion. - ``metric(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size. - random_state : int, optional + random_state : int, optional The random seed used for cross-validation. Attributes @@ -82,8 +81,7 @@ def __init__( numeric_solver=convex_solver_nlopt, max_iter=100, group=None, - ic_type="aic", - metric_method=None, + ic_method=None, cv=1, cv_fold_id=None, split_method=None, @@ -95,9 +93,7 @@ def __init__( self.preselect = preselect self.max_iter = max_iter self.group = group - self.ic_type = ic_type - self.ic_coef = 1.0 - self.metric_method = metric_method + self.ic_method = ic_method self.cv = cv self.cv_fold_id = cv_fold_id self.split_method = split_method @@ -243,16 +239,17 @@ def solve( BaseSolver._check_positive_integer(self.cv, "cv") if self.cv == 1: - if self.ic_type not in ["aic", "bic", "sic", "ebic"]: + if self.sparsity.size == 1 and self.ic_method is None: + self.ic_method = utilities.AIC + elif self.sparsity.size > 1 and self.ic_method is None: raise ValueError( - "ic_type should be one of ['aic', 'bic', 'sic','ebic']." + "ic_method should be provided for choosing sparsity level with information criterion." ) - else: + elif self.sample_size <= 1: + raise ValueError("sample_size should be given when using ic_method.") + elif self.cv > 1: if self.cv > self.sample_size: raise ValueError("cv should not be greater than sample_size.") - if data is None and self.split_method is None: - data = np.arange(self.sample_size) - self.split_method = lambda data, index: index if self.split_method is None: raise ValueError("split_method should be provided when cv > 1.") if self.cv_fold_id is None: @@ -357,9 +354,9 @@ def value_and_grad(params, data): group, ) # warm start: use results of previous sparsity as initial value objective_value = loss_fn(init_params, data) - eval = self._metric( + eval = self.ic_method( objective_value, - self.ic_type, + self.dimensionality, s, self.sample_size, ) @@ -466,51 +463,6 @@ def grad_(params, data): return loss_, grad_ - def _metric( - self, - objective_value: float, - method: str, - effective_params_num: int, - train_size: int, - ) -> float: - """ - aic: 2L + 2s - bic: 2L + s * log(n) - sic: 2L + s * log(log(n)) * log(p) - ebic: 2L + s * (log(n) + 2 * log(p)) - """ - if self.metric_method is not None: - return self.metric_method( - objective_value, - self.dimensionality, - effective_params_num, - train_size, - ) - - if method == "aic": - return 2 * objective_value + 2 * effective_params_num - elif method == "bic": - return ( - objective_value - if train_size <= 1.0 - else 2 * objective_value - + self.ic_coef * effective_params_num * np.log(train_size) - ) - elif method == "sic": - return ( - objective_value - if train_size <= 1.0 - else 2 * objective_value - + self.ic_coef - * effective_params_num - * np.log(np.log(train_size)) - * np.log(self.dimensionality) - ) - elif method == "ebic": - return 2 * objective_value + self.ic_coef * effective_params_num * ( - np.log(train_size) + 2 * np.log(self.dimensionality) - ) - def _solve( self, sparsity, @@ -637,11 +589,14 @@ def get_result(self): The support set of the optimal parameters. + ``objective_value`` : float The value of objective function at the optimal parameters. + + ``eval_objective`` : float + The value of information criterion or mean loss of cross-validation. """ return { "params": self.params, "support_set": self.support_set, "objective_value": self.objective_value, + "eval_objective": self.eval_objective, } def get_estimated_params(self): diff --git a/skscope/solver.py b/skscope/solver.py index 914f0c0..85c925f 100644 --- a/skscope/solver.py +++ b/skscope/solver.py @@ -10,7 +10,7 @@ import numpy as np import jax from jax import numpy as jnp -from . import _scope +from . import _scope, utilities from .numeric_solver import convex_solver_nlopt @@ -36,9 +36,11 @@ class ScopeSolver(BaseEstimator): It should have the same interface as ``skscope.convex_solver_nlopt``. max_iter : int, default=20 Maximum number of iterations taken for converging. - ic_type : {'aic', 'bic', 'sic', 'ebic'}, default='sic' - The type of information criterion for choosing the sparsity level. + ic_method : callable, optional + A function to calculate the information criterion for choosing the sparsity level. + ``ic(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size. Used only when ``sparsity`` is array and ``cv`` is 1. + Note that ``sample_size`` must be given when using ``ic_method``. cv : int, default=1 The folds number when use the cross-validation method. If ``cv`` = 1, the sparsity level will be chosen by the information criterion. If ``cv`` > 1, the sparsity level will be chosen by the cross-validation method. split_method: callable, optional @@ -122,7 +124,7 @@ def __init__( preselect=[], numeric_solver=convex_solver_nlopt, max_iter=20, - ic_type="aic", + ic_method=None, cv=1, split_method=None, cv_fold_id=None, @@ -150,8 +152,7 @@ def __init__( self.preselect = preselect self.numeric_solver = numeric_solver self.max_iter = max_iter - self.ic_type = ic_type - self.ic_coef = 1.0 + self.ic_method = ic_method self.cv = cv self.split_method = split_method self.deleter = None @@ -322,17 +323,6 @@ def solve( # max_exchange_num BaseSolver._check_positive_integer(self.max_exchange_num, "max_exchange_num") - # ic_type - information_criterion_dict = { - "aic": 1, - "bic": 2, - "sic": 3, - "ebic": 4, - } - if self.ic_type not in information_criterion_dict.keys(): - raise ValueError("ic_type should be one of ['aic', 'bic', 'sic','ebic'].") - ic_type = information_criterion_dict[self.ic_type] - # group if self.group is None: group = np.arange(p, dtype="int32") @@ -459,9 +449,7 @@ def solve( if self.cv > n: raise ValueError("cv should not be greater than sample_size.") if self.cv > 1: - if data is None and self.split_method is None: - data = np.arange(n) - self.split_method = lambda data, index: index + ic_method = utilities.AIC if self.ic_method is None else self.ic_method if self.split_method is None: raise ValueError("split_method should be provided when cv > 1.") self.model.set_slice_by_sample(self.split_method) @@ -488,7 +476,16 @@ def solve( ) else: self.cv_fold_id = np.array([], dtype="int32") - + if sparsity.size == 1 and self.ic_method is None: + ic_method = utilities.AIC + elif sparsity.size > 1 and self.ic_method is None: + raise ValueError( + "ic_method should be provided for choosing sparsity level with information criterion." + ) + elif self.sample_size <= 1: + raise ValueError("sample_size should be given when using ic_method.") + else: + ic_method = self.ic_method if gradient is None and len(layers) > 0: if len(layers) == 1: assert layers[0].out_features == self.dimensionality @@ -553,8 +550,7 @@ def solve( self.use_hessian, self.is_dynamic_max_exchange_num, self.warm_start, - ic_type, - self.ic_coef, + ic_method, self.cv, sparsity, np.array([0.0]), @@ -579,9 +575,8 @@ def solve( self.params = layer.transform_params(self.params) self.support_set = np.sort(np.nonzero(self.params)[0]) - self.cv_train_loss = result[1] if self.cv == 1 else 0.0 - self.cv_test_loss = result[2] if self.cv == 1 else 0.0 self.information_criterion = result[3] + self.cross_validation_loss = result[2] if self.cv > 1 else None return self.params @@ -600,20 +595,17 @@ def get_result(self): The support set of the optimal parameters. + ``objective_value`` : float The value of objective function at the optimal parameters. - + ``cv_train_loss`` : float - The average value of objective function on training sets. - + ``cv_test_loss`` : float - The average value of objective function on testing sets. + ``information_criterion`` : float The value of information criterion. + + ``cross_validation_loss`` : float + The mean loss of cross-validation. """ return { "params": self.params, "support_set": self.support_set, "objective_value": self.objective_value, - "cv_train_loss": self.cv_train_loss, - "cv_test_loss": self.cv_test_loss, "information_criterion": self.information_criterion, + "cross_validation_loss": self.cross_validation_loss, } def __set_objective_cpp(self, objective, gradient, hessian): @@ -716,9 +708,11 @@ class HTPSolver(BaseSolver): Here are wrong examples: ``[0,2,1,2]`` (not incremental), ``[1,2,3,3]`` (not start from 0), ``[0,2,2,3]`` (there is a gap). It's worth mentioning that the concept "a variable" means "a group of variables" in fact. For example,``sparsity=[3]`` means there will be 3 groups of variables selected rather than 3 variables, and ``always_include=[0,3]`` means the 0-th and 3-th groups must be selected. - ic_type : {'aic', 'bic', 'sic', 'ebic'}, default='aic' - The type of information criterion for choosing the sparsity level. + ic_method : callable, optional + A function to calculate the information criterion for choosing the sparsity level. + ``ic(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size. Used only when ``sparsity`` is array and ``cv`` is 1. + Note that ``sample_size`` must be given when using ``ic_method``. cv : int, default=1 The folds number when use the cross-validation method. - If ``cv`` = 1, the sparsity level will be chosen by the information criterion. @@ -730,9 +724,6 @@ class HTPSolver(BaseSolver): An array indicates different folds in CV, which samples in the same fold should be given the same number. The number of different elements should be equal to ``cv``. Used only when `cv` > 1. - metric_method : callable, optional - A function to calculate the information criterion. - `` metric(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size. random_state : int, optional The random seed used for cross-validation. @@ -762,8 +753,7 @@ def __init__( numeric_solver=convex_solver_nlopt, max_iter=100, group=None, - ic_type="aic", - metric_method=None, + ic_method=None, cv=1, cv_fold_id=None, split_method=None, @@ -777,8 +767,7 @@ def __init__( numeric_solver=numeric_solver, max_iter=max_iter, group=group, - ic_type=ic_type, - metric_method=metric_method, + ic_method=ic_method, cv=cv, cv_fold_id=cv_fold_id, split_method=split_method, @@ -880,9 +869,11 @@ class IHTSolver(HTPSolver): Here are wrong examples: ``[0,2,1,2]`` (not incremental), ``[1,2,3,3]`` (not start from 0), ``[0,2,2,3]`` (there is a gap). It's worth mentioning that the concept "a variable" means "a group of variables" in fact. For example,``sparsity=[3]`` means there will be 3 groups of variables selected rather than 3 variables, and ``always_include=[0,3]`` means the 0-th and 3-th groups must be selected. - ic_type : {'aic', 'bic', 'sic', 'ebic'}, default='aic' - The type of information criterion for choosing the sparsity level. + ic_method : callable, optional + A function to calculate the information criterion for choosing the sparsity level. + ``ic(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size. Used only when ``sparsity`` is array and ``cv`` is 1. + Note that ``sample_size`` must be given when using ``ic_method``. cv : int, default=1 The folds number when use the cross-validation method. - If ``cv`` = 1, the sparsity level will be chosen by the information criterion. @@ -894,9 +885,6 @@ class IHTSolver(HTPSolver): An array indicates different folds in CV, which samples in the same fold should be given the same number. The number of different elements should be equal to ``cv``. Used only when `cv` > 1. - metric_method : callable, optional - A function to calculate the information criterion. - `` metric(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size. random_state : int, optional The random seed used for cross-validation. @@ -966,7 +954,6 @@ def _solve( params[support_new] = params_bias[support_new] # final optimization for IHT - params = np.zeros_like(init_params) _, params = self._numeric_solver( loss_fn, value_and_grad, params, support_new, data ) @@ -1002,9 +989,11 @@ class GraspSolver(BaseSolver): Here are wrong examples: ``[0,2,1,2]`` (not incremental), ``[1,2,3,3]`` (not start from 0), ``[0,2,2,3]`` (there is a gap). It's worth mentioning that the concept "a variable" means "a group of variables" in fact. For example,``sparsity=[3]`` means there will be 3 groups of variables selected rather than 3 variables, and ``always_include=[0,3]`` means the 0-th and 3-th groups must be selected. - ic_type : {'aic', 'bic', 'sic', 'ebic'}, default='aic' - The type of information criterion for choosing the sparsity level. + ic_method : callable, optional + A function to calculate the information criterion for choosing the sparsity level. + ``ic(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size. Used only when ``sparsity`` is array and ``cv`` is 1. + Note that ``sample_size`` must be given when using ``ic_method``. cv : int, default=1 The folds number when use the cross-validation method. - If ``cv`` = 1, the sparsity level will be chosen by the information criterion. @@ -1016,9 +1005,6 @@ class GraspSolver(BaseSolver): An array indicates different folds in CV, which samples in the same fold should be given the same number. The number of different elements should be equal to ``cv``. Used only when `cv` > 1. - metric_method : callable, optional - A function to calculate the information criterion. - `` metric(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size. random_state : int, optional The random seed used for cross-validation. @@ -1164,9 +1150,11 @@ class FobaSolver(BaseSolver): Here are wrong examples: ``[0,2,1,2]`` (not incremental), ``[1,2,3,3]`` (not start from 0), ``[0,2,2,3]`` (there is a gap). It's worth mentioning that the concept "a variable" means "a group of variables" in fact. For example,``sparsity=[3]`` means there will be 3 groups of variables selected rather than 3 variables, and ``always_include=[0,3]`` means the 0-th and 3-th groups must be selected. - ic_type : {'aic', 'bic', 'sic', 'ebic'}, default='aic' - The type of information criterion for choosing the sparsity level. + ic_method : callable, optional + A function to calculate the information criterion for choosing the sparsity level. + ``ic(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size. Used only when ``sparsity`` is array and ``cv`` is 1. + Note that ``sample_size`` must be given when using ``ic_method``. cv : int, default=1 The folds number when use the cross-validation method. - If ``cv`` = 1, the sparsity level will be chosen by the information criterion. @@ -1178,9 +1166,6 @@ class FobaSolver(BaseSolver): An array indicates different folds in CV, which samples in the same fold should be given the same number. The number of different elements should be equal to ``cv``. Used only when `cv` > 1. - metric_method : callable, optional - A function to calculate the information criterion. - `` metric(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size. random_state : int, optional The random seed used for cross-validation. @@ -1212,8 +1197,7 @@ def __init__( numeric_solver=convex_solver_nlopt, max_iter=100, group=None, - ic_type="aic", - metric_method=None, + ic_method=None, cv=1, cv_fold_id=None, split_method=None, @@ -1227,8 +1211,7 @@ def __init__( numeric_solver=numeric_solver, max_iter=max_iter, group=group, - ic_type=ic_type, - metric_method=metric_method, + ic_method=ic_method, cv=cv, cv_fold_id=cv_fold_id, split_method=split_method, @@ -1442,9 +1425,11 @@ class ForwardSolver(FobaSolver): Here are wrong examples: ``[0,2,1,2]`` (not incremental), ``[1,2,3,3]`` (not start from 0), ``[0,2,2,3]`` (there is a gap). It's worth mentioning that the concept "a variable" means "a group of variables" in fact. For example,``sparsity=[3]`` means there will be 3 groups of variables selected rather than 3 variables, and ``always_include=[0,3]`` means the 0-th and 3-th groups must be selected. - ic_type : {'aic', 'bic', 'sic', 'ebic'}, default='aic' - The type of information criterion for choosing the sparsity level. + ic_method : callable, optional + A function to calculate the information criterion for choosing the sparsity level. + ``ic(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size. Used only when ``sparsity`` is array and ``cv`` is 1. + Note that ``sample_size`` must be given when using ``ic_method``. cv : int, default=1 The folds number when use the cross-validation method. - If ``cv`` = 1, the sparsity level will be chosen by the information criterion. @@ -1456,9 +1441,6 @@ class ForwardSolver(FobaSolver): An array indicates different folds in CV, which samples in the same fold should be given the same number. The number of different elements should be equal to ``cv``. Used only when `cv` > 1. - metric_method : callable, optional - A function to calculate the information criterion. - `` metric(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size. random_state : int, optional The random seed used for cross-validation. @@ -1485,8 +1467,7 @@ def __init__( numeric_solver=convex_solver_nlopt, max_iter=100, group=None, - ic_type="aic", - metric_method=None, + ic_method=None, cv=1, cv_fold_id=None, split_method=None, @@ -1504,8 +1485,7 @@ def __init__( numeric_solver=numeric_solver, max_iter=max_iter, group=group, - ic_type=ic_type, - metric_method=metric_method, + ic_method=ic_method, cv=cv, cv_fold_id=cv_fold_id, split_method=split_method, @@ -1593,9 +1573,11 @@ class OMPSolver(ForwardSolver): Here are wrong examples: ``[0,2,1,2]`` (not incremental), ``[1,2,3,3]`` (not start from 0), ``[0,2,2,3]`` (there is a gap). It's worth mentioning that the concept "a variable" means "a group of variables" in fact. For example,``sparsity=[3]`` means there will be 3 groups of variables selected rather than 3 variables, and ``always_include=[0,3]`` means the 0-th and 3-th groups must be selected. - ic_type : {'aic', 'bic', 'sic', 'ebic'}, default='aic' - The type of information criterion for choosing the sparsity level. + ic_method : callable, optional + A function to calculate the information criterion for choosing the sparsity level. + ``ic(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size. Used only when ``sparsity`` is array and ``cv`` is 1. + Note that ``sample_size`` must be given when using ``ic_method``. cv : int, default=1 The folds number when use the cross-validation method. - If ``cv`` = 1, the sparsity level will be chosen by the information criterion. @@ -1607,9 +1589,6 @@ class OMPSolver(ForwardSolver): An array indicates different folds in CV, which samples in the same fold should be given the same number. The number of different elements should be equal to ``cv``. Used only when `cv` > 1. - metric_method : callable, optional - A function to calculate the information criterion. - `` metric(loss, p, s, n) -> ic_value``, where ``loss`` is the value of the objective function, ``p`` is the dimensionality, ``s`` is the sparsity level and ``n`` is the sample size. random_state : int, optional The random seed used for cross-validation. @@ -1639,8 +1618,7 @@ def __init__( numeric_solver=convex_solver_nlopt, max_iter=100, group=None, - ic_type="aic", - metric_method=None, + ic_method=None, cv=1, cv_fold_id=None, split_method=None, @@ -1656,8 +1634,7 @@ def __init__( numeric_solver=numeric_solver, max_iter=max_iter, group=group, - ic_type=ic_type, - metric_method=metric_method, + ic_method=ic_method, cv=cv, cv_fold_id=cv_fold_id, split_method=split_method, diff --git a/skscope/utilities.py b/skscope/utilities.py index d779f34..5968469 100644 --- a/skscope/utilities.py +++ b/skscope/utilities.py @@ -99,3 +99,76 @@ def check_array_survival(X, y): event, time = check_y_survival(y) check_consistent_length(X, event, time) return event, time + + +def AIC( + objective_value: float, + dimensionality: int, + effective_params_num: int, + train_size: int, +): + return 2 * objective_value + 2 * effective_params_num + + +def BIC( + objective_value: float, + dimensionality: int, + effective_params_num: int, + train_size: int, +): + return 2 * objective_value + effective_params_num * np.log(train_size) + + +def SIC( + objective_value: float, + dimensionality: int, + effective_params_num: int, + train_size: int, +): + return 2 * objective_value + effective_params_num * np.log( + np.log(train_size) + ) * np.log(dimensionality) + + +def GIC( + objective_value: float, + dimensionality: int, + effective_params_num: int, + train_size: int, +): + return 2 * objective_value + effective_params_num * np.log( + np.log(train_size) + ) * np.log(dimensionality) + + +def EBIC( + objective_value: float, + dimensionality: int, + effective_params_num: int, + train_size: int, +): + return 2 * objective_value + effective_params_num * ( + np.log(train_size) + 2 * np.log(dimensionality) + ) + + +def LinearSIC( + objective_value: float, + dimensionality: int, + effective_params_num: int, + train_size: int, +): + return train_size * np.log(objective_value) + 2 * effective_params_num * np.log( + np.log(train_size) + ) * np.log(dimensionality) + + +def LinearGIC( + objective_value: float, + dimensionality: int, + effective_params_num: int, + train_size: int, +): + return train_size * np.log(objective_value) + 2 * effective_params_num * np.log( + np.log(train_size) + ) * np.log(dimensionality) diff --git a/src/Metric.h b/src/Metric.h index 07a0a37..f85a24e 100644 --- a/src/Metric.h +++ b/src/Metric.h @@ -13,12 +13,11 @@ #include "Data.h" #include "utilities.h" - -class Metric { - public: +class Metric +{ +public: bool is_cv; int Kfold; - int ic_type; // Eigen::Matrix cv_initial_model_param; // Eigen::Matrix cv_initial_coef0; @@ -39,16 +38,14 @@ class Metric { // std::vector> group_XTX_list; - double ic_coef; - - Metric() = default; + std::function ic_method; - Metric(int ic_type, double ic_coef = 1.0, int Kfold = 5) { + Metric(std::function ic_method, int Kfold = 5) : ic_method(ic_method) + { this->is_cv = Kfold > 1; - this->ic_type = ic_type; this->Kfold = Kfold; - this->ic_coef = ic_coef; - if (is_cv) { + if (is_cv) + { cv_init_fit_arg.resize(Kfold); train_X_list.resize(Kfold); test_X_list.resize(Kfold); @@ -59,8 +56,10 @@ class Metric { } }; - void set_cv_init_fit_arg(int beta_size, int M) { - for (int i = 0; i < this->Kfold; i++) { + void set_cv_init_fit_arg(int beta_size, int M) + { + for (int i = 0; i < this->Kfold; i++) + { Eigen::VectorXd beta_init; Eigen::VectorXd coef0_init; coef_set_zero(beta_size, M, beta_init, coef0_init); @@ -107,38 +106,47 @@ class Metric { // this->cv_initial_coef0[k] = coef0; // } - void set_cv_train_test_mask(Data &data, int n, Eigen::VectorXi &cv_fold_id) { + void set_cv_train_test_mask(Data &data, int n, Eigen::VectorXi &cv_fold_id) + { Eigen::VectorXi index_list = Eigen::VectorXi::LinSpaced(n, 0, n - 1); - auto rule = [&cv_fold_id](int i, int j) -> bool { return cv_fold_id(i) < cv_fold_id(j); }; + auto rule = [&cv_fold_id](int i, int j) -> bool + { return cv_fold_id(i) < cv_fold_id(j); }; std::sort(index_list.data(), index_list.data() + index_list.size(), rule); int k = 0, st = 0, ed = 1; std::vector group_list((unsigned int)this->Kfold); - while (k < this->Kfold && ed < n) { + while (k < this->Kfold && ed < n) + { int mask = cv_fold_id(index_list(st)); - while (ed < n && mask == cv_fold_id(index_list(ed))) ed++; + while (ed < n && mask == cv_fold_id(index_list(ed))) + ed++; group_list[k] = index_list.segment(st, ed - st); st = ed; ed++; k++; } - - for (int k = 0; k < this->Kfold; k++) { + + for (int k = 0; k < this->Kfold; k++) + { std::sort(group_list[k].data(), group_list[k].data() + group_list[k].size()); } // cv train-test partition: std::vector train_mask_list_tmp((unsigned int)this->Kfold); std::vector test_mask_list_tmp((unsigned int)this->Kfold); - for (int k = 0; k < this->Kfold; k++) { + for (int k = 0; k < this->Kfold; k++) + { int train_x_size = n - group_list[k].size(); // get train_mask Eigen::VectorXi train_mask(train_x_size); int i = 0; - for (int j = 0; j < this->Kfold; j++) { - if (j != k) { - for (int s = 0; s < group_list[j].size(); s++) { + for (int j = 0; j < this->Kfold; j++) + { + if (j != k) + { + for (int s = 0; s < group_list[j].size(); s++) + { train_mask(i) = group_list[j](s); i++; } @@ -182,24 +190,15 @@ class Metric { // this->group_XTX_list = group_XTX_list_tmp; // } - double ic(int train_n, int M, int N, Algorithm *algorithm) { + double ic(int train_n, int M, int N, Algorithm *algorithm) + { double loss = 2 * (algorithm->get_train_loss() - algorithm->lambda_level * algorithm->beta.cwiseAbs2().sum()); - - if (ic_type == 1) { - return loss + 2.0 * algorithm->get_effective_number(); - } else if (ic_type == 2) { - return loss + this->ic_coef * log(double(train_n)) * algorithm->get_effective_number(); - } else if (ic_type == 3) { - return loss + - this->ic_coef * log(double(N)) * log(log(double(train_n))) * algorithm->get_effective_number(); - } else if (ic_type == 4) { - return loss + - this->ic_coef * (log(double(train_n)) + 2 * log(double(N))) * algorithm->get_effective_number(); - }; + return this->ic_method(loss, N, algorithm->get_effective_number(), train_n); }; double loss_function(UniversalData &train_x, Eigen::MatrixXd &train_y, Eigen::VectorXd &train_weight, Eigen::VectorXi &g_index, - Eigen::VectorXi &g_size, int train_n, int p, int N, Algorithm *algorithm) { + Eigen::VectorXi &g_size, int train_n, int p, int N, Algorithm *algorithm) + { Eigen::VectorXi A = algorithm->get_A_out(); Eigen::VectorXd beta = algorithm->get_beta(); Eigen::VectorXd coef0 = algorithm->get_coef0(); @@ -220,10 +219,12 @@ class Metric { // to do Eigen::VectorXd fit_and_evaluate_in_metric(std::vector algorithm_list, - Data &data, FIT_ARG &fit_arg) { + Data &data, FIT_ARG &fit_arg) + { Eigen::VectorXd loss_list(this->Kfold); - if (!is_cv) { + if (!is_cv) + { algorithm_list[0]->update_sparsity_level(fit_arg.support_size); algorithm_list[0]->update_lambda_level(fit_arg.lambda); algorithm_list[0]->update_beta_init(fit_arg.beta_init); @@ -233,14 +234,17 @@ class Metric { algorithm_list[0]->fit(data.x, data.y, data.weight, data.g_index, data.g_size, data.n, data.p, data.g_num); - if (algorithm_list[0]->get_warm_start()) { + if (algorithm_list[0]->get_warm_start()) + { fit_arg.beta_init = algorithm_list[0]->get_beta(); fit_arg.coef0_init = algorithm_list[0]->get_coef0(); fit_arg.bd_init = algorithm_list[0]->get_bd(); } loss_list(0) = this->ic(data.n, data.M, data.g_num, algorithm_list[0]); - } else { + } + else + { Eigen::VectorXi g_index = data.g_index; Eigen::VectorXi g_size = data.g_size; int p = data.p; @@ -248,7 +252,8 @@ class Metric { #pragma omp parallel for // parallel - for (int k = 0; k < this->Kfold; k++) { + for (int k = 0; k < this->Kfold; k++) + { // get test_x, test_y int test_n = this->test_mask_list[k].size(); int train_n = this->train_mask_list[k].size(); @@ -278,7 +283,8 @@ class Metric { algorithm_list[k]->fit(this->train_X_list[k], this->train_y_list[k], this->train_weight_list[k], g_index, g_size, train_n, p, N); - if (algorithm_list[k]->get_warm_start()) { + if (algorithm_list[k]->get_warm_start()) + { this->cv_init_fit_arg[k].beta_init = algorithm_list[k]->get_beta(); this->cv_init_fit_arg[k].coef0_init = algorithm_list[k]->get_coef0(); this->cv_init_fit_arg[k].bd_init = algorithm_list[k]->get_bd(); @@ -296,5 +302,3 @@ class Metric { return loss_list; }; }; - - diff --git a/src/pywrap.cpp b/src/pywrap.cpp index 3467d74..4f9000b 100644 --- a/src/pywrap.cpp +++ b/src/pywrap.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include "Algorithm.h" #include "UniversalData.h" @@ -47,8 +48,7 @@ using namespace std; * s_max), where the specific support size to be considered is determined by golden section. * @param is_warm_start When tuning the optimal parameter combination, whether to use the last solution * as a warm start to accelerate the iterative convergence of the splicing algorithm. - * @param ic_type The type of criterion for choosing the support size. Available options are - * "sic", "ebic", "bic", "aic". + * @param ic_method A function pointer to the information criterion function. * @param Kfold The folds number to use the Cross-validation method. If Kfold=1, * Cross-validation will not be used. * @param sequence An integer vector representing the alternative support sizes. Only used for @@ -88,7 +88,7 @@ using namespace std; * @param algorithm_type type of algorithm * @param path_type type of path: 1 for sequencial search and 2 for golden section search * @param is_warm_start whether enable warm-start - * @param ic_type type of information criterion, used for not CV + * @param ic_method a function pointer to the information criterion function, used for not CV * @param Kfold number of folds, used for CV * @param parameters parameters to be selected, including `support_size`, `lambda` * @param screening_size size of screening @@ -104,7 +104,7 @@ using namespace std; tuple pywrap_Universal(pybind11::object universal_data, UniversalModel universal_model, ConvexSolver convex_solver, int model_size, int sample_size, int aux_para_size, int max_iter, - int exchange_num, int path_type, bool is_greedy, bool use_hessian, bool is_dynamic_exchange_num, bool is_warm_start, int ic_type, double ic_coef, int Kfold, VectorXi sequence, + int exchange_num, int path_type, bool is_greedy, bool use_hessian, bool is_dynamic_exchange_num, bool is_warm_start, std::function ic_method, int Kfold, VectorXi sequence, VectorXd lambda_seq, int s_min, int s_max, int screening_size, VectorXi g_index, VectorXi always_select, int thread, int splicing_type, int sub_search, VectorXi cv_fold_id, VectorXi A_init, VectorXd beta_init, VectorXd coef0_init) { @@ -150,14 +150,14 @@ pywrap_Universal(pybind11::object universal_data, UniversalModel universal_model if (screening_size >= 0) { screening_A = screening(data, algorithm_list, screening_size, beta_size, - parameters.lambda_list(0), A_init); + parameters.lambda_list(0), A_init); } // Prepare for CV: // if CV is enable, // specify train and test data, // and initialize the fitting argument inside each fold. - Metric *metric = new Metric(ic_type, ic_coef, Kfold); + Metric *metric = new Metric(ic_method, Kfold); if (Kfold > 1) { metric->set_cv_train_test_mask(data, data.n, cv_fold_id); @@ -292,7 +292,7 @@ PYBIND11_MODULE(_scope, m) m.def("pywrap_Universal", &pywrap_Universal); pybind11::class_(m, "UniversalModel").def(pybind11::init<>()).def("set_loss_of_model", &UniversalModel::set_loss_of_model).def("set_gradient_autodiff", &UniversalModel::set_gradient_autodiff).def("set_hessian_autodiff", &UniversalModel::set_hessian_autodiff).def("set_gradient_user_defined", &UniversalModel::set_gradient_user_defined).def("set_hessian_user_defined", &UniversalModel::set_hessian_user_defined).def("set_slice_by_sample", &UniversalModel::set_slice_by_sample).def("set_deleter", &UniversalModel::set_deleter); m.def("init_spdlog", &init_spdlog); - //pybind11::class_(m, "NloptConfig").def(pybind11::init()); + // pybind11::class_(m, "NloptConfig").def(pybind11::init()); pybind11::class_(m, "QuadraticData") .def(pybind11::init()); m.def("quadratic_loss", &quadratic_loss);