remove uses of super()

microsoft · jameslamb · Oct 9, 2024 · Sep 11, 2024 · Sep 11, 2024 · Sep 11, 2024
commit 7eb861addebd6d446be511c35a00b85158d11fa9
@@ -103,6 +103,7 @@ if [[ $TASK == "lint" ]]; then
         'mypy>=1.11.1' \
         'pre-commit>=3.8.0' \
         'pyarrow-core>=17.0' \
+        'scikit-learn>=1.15.0' \
         'r-lintr>=3.1.2'
     source activate $CONDA_ENV
     echo "Linting Python code"

@@ -4,7 +4,7 @@
 import copy
 from inspect import signature
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
 
 import numpy as np
 import scipy.sparse
@@ -46,6 +46,12 @@
 )
 from .engine import train
 
+if TYPE_CHECKING:
+    try:
+        from sklearn.utils import Tags as _sklearn_Tags
+    except ImportError:
+        _sklearn_Tags = None
+
 __all__ = [
     "LGBMClassifier",
     "LGBMModel",
@@ -673,41 +679,45 @@ def _more_tags(self) -> Dict[str, Any]:
             "_xfail_checks": {
                 "check_no_attributes_set_in_init": "scikit-learn incorrectly asserts that private attributes "
                 "cannot be set in __init__: "
-                "(see https://github.com/microsoft/LightGBM/issues/2628)"
+                "(see https://github.com/microsoft/LightGBM/issues/2628)",
+                "check_n_features_in_after_fitting": (
+                    "validate_data() was first added in scikit-learn 1.6 and lightgbm"
+                    "supports much older versions than that"
 try: 
     from sklearn.utils.validation import _check_sample_weight 
 except ImportError: 
     from sklearn.utils.validation import check_consistent_length 
     # dummy function to support older version of scikit-learn 
     def _check_sample_weight(sample_weight: Any, X: Any, dtype: Any = None) -> Any: 
         check_consistent_length(sample_weight, X) 
         return sample_weight 
 n_features = X.shape[1] 
 if self._n_features != n_features: 
     raise ValueError( 
         "Number of features of the model must " 
         f"match the input. Model n_features_ is {self._n_features} and " 
         f"input n_features is {n_features}" 
     ) 
 try: 
     from sklearn.utils.validation import _check_sample_weight 
 except ImportError: 
     from sklearn.utils.validation import check_consistent_length 
  
     # dummy function to support older version of scikit-learn 
     def _check_sample_weight(sample_weight: Any, X: Any, dtype: Any = None) -> Any: 
         check_consistent_length(sample_weight, X) 
         return sample_weight 
 n_features = X.shape[1] 
 if self._n_features != n_features: 
     raise ValueError( 
         "Number of features of the model must " 
         f"match the input. Model n_features_ is {self._n_features} and " 
         f"input n_features is {n_features}" 
     ) 
+                ),
 @property 
 def n_features_in_(self) -> int: 
     """:obj:`int`: The number of features of fitted model.""" 
     if not self.__sklearn_is_fitted__(): 
         raise LGBMNotFittedError("No n_features_in found. Need to call fit beforehand.") 
     return self._n_features_in 
 @property 
 def n_features_in_(self) -> int: 
     """:obj:`int`: The number of features of fitted model.""" 
     if not self.__sklearn_is_fitted__(): 
         raise LGBMNotFittedError("No n_features_in found. Need to call fit beforehand.") 
     return self._n_features_in 
             },
         }
 
     @staticmethod
     def _update_sklearn_tags_from_dict(
         *,
-        tags: "sklearn.utils.Tags",
-        tags_dict: Dict[str, Any]
-    ) -> "sklearn.utils.Tags":
-        """
-        scikit-learn 1.6 introduced a dataclass-based interface for estimator tags.
+        tags: "_sklearn_Tags",
+        tags_dict: Dict[str, Any],
+    ) -> "_sklearn_Tags":
+        """Update ``sklearn.utils.Tags`` inherited from ``scikit-learn`` base classes.
+
+        ``scikit-learn`` 1.6 introduced a dataclass-based interface for estimator tags.
         ref: https://github.com/scikit-learn/scikit-learn/pull/29677
 
-        That interface means that each 
+        This method handles updating that instance based on the value in ``self._more_tags()``.
         """
-        tags.input_tags.allow_nan = more_tags["allow_nan"]
-        tags.input_tags.sparse = "sparse" in more_tags["X_types"]
-        tags.target_tags.one_d_labels = "1dlabels" in more_tags["X_types"]
-        tags._xfail_checks = more_tags["_xfail_checks"]
+        tags.input_tags.allow_nan = tags_dict["allow_nan"]
+        tags.input_tags.sparse = "sparse" in tags_dict["X_types"]
+        tags.target_tags.one_d_labels = "1dlabels" in tags_dict["X_types"]
+        tags._xfail_checks = tags_dict["_xfail_checks"]
         return tags
 
-    def __sklearn_tags__(self):
-        # super().__sklearn_tags__() cannot be called unconditionally,
+    def __sklearn_tags__(self) -> Optional["_sklearn_Tags"]:
+        # _LGBMModelBase.__sklearn_tags__() cannot be called unconditionally,
         # because that method isn't defined for scikit-learn<1.6
-        if not callable(getattr(super(), "__sklearn_tags__", None)):
+        if not callable(getattr(_LGBMModelBase, "__sklearn_tags__", None)):
             return None
 
         # take whatever tags are provided by BaseEstimator, then modify
         # them with LightGBM-specific values
-        tags = self._update_sklearn_tags_from_dict(
-            tags=super().__sklearn_tags__(),
-            tags_dict=self._more_tags()
+        return self._update_sklearn_tags_from_dict(
+            tags=_LGBMModelBase.__sklearn_tags__(self),
+            tags_dict=self._more_tags(),
         )
-        return tags
 
     def __sklearn_is_fitted__(self) -> bool:
         return getattr(self, "fitted_", False)
@@ -1206,15 +1216,17 @@ class LGBMRegressor(_LGBMRegressorBase, LGBMModel):
     """LightGBM regressor."""
 
     def _more_tags(self) -> Dict[str, Any]:
-        tags = super(LGBMModel, self)._more_tags()
-        tags.update(super(_LGBMRegressorBase, self)._more_tags())
+        # handle the case where ClassifierMixin possibly provides _more_tags()
+        if callable(getattr(_LGBMClassifierBase, "_more_tags", None)):
+            tags = _LGBMClassifierBase._more_tags(self)
+        else:
+            tags = {}
+        # override those with LightGBM-specific preferences
+        tags.update(LGBMModel._more_tags(self))
         return tags
 
-    def __sklearn_tags__(self):
-        tags = super().__sklearn_tags__()
-        if tags is None:
-            return None
-
+    def __sklearn_tags__(self) -> Optional["_sklearn_Tags"]:
+        return LGBMModel.__sklearn_tags__(self)
 
     def fit(  # type: ignore[override]
         self,
@@ -1263,12 +1275,17 @@ class LGBMClassifier(_LGBMClassifierBase, LGBMModel):
     """LightGBM classifier."""
 
     def _more_tags(self) -> Dict[str, Any]:
-        tags = super(LGBMModel, self)._more_tags()
-        tags.update(super(_LGBMClassifierBase, self)._more_tags())
+        # handle the case where ClassifierMixin possibly provides _more_tags()
+        if callable(getattr(_LGBMClassifierBase, "_more_tags", None)):
+            tags = _LGBMClassifierBase._more_tags(self)
+        else:
+            tags = {}
+        # override those with LightGBM-specific preferences
+        tags.update(LGBMModel._more_tags(self))
         return tags
 
-    def __sklearn_tags__(self):
-        return super().__
+    def __sklearn_tags__(self) -> Optional["_sklearn_Tags"]:
+        return LGBMModel.__sklearn_tags__(self)
 
     def fit(  # type: ignore[override]
         self,

@@ -1441,11 +1441,16 @@ def test_sklearn_integration(estimator, check):
 def test_sklearn_tags_should_correctly_reflect_lightgbm_specific_values(estimator_class):
     est = estimator_class()
     more_tags = est._more_tags()
-    assert (
-        more_tags["X_types"] == ["2darray", "sparse", "1dlabels"],
-        "List of supported X_types has changed. Update LGBMModel.__sklearn__tags() to match.",
-    )
+    err_msg = "List of supported X_types has changed. Update LGBMModel.__sklearn__tags() to match."
+    assert more_tags["X_types"] == ["2darray", "sparse", "1dlabels"], err_msg
     sklearn_tags = est.__sklearn_tags__()
+    # these tests should be run unconditionally (no 'if') once lightgbm's
+    # minimum scikit-learn version is 1.6 or higher
+    if sklearn_tags is not None:
+        assert sklearn_tags.input_tags.allow_nan is True
+        assert sklearn_tags.input_tags.sparse is True
+        assert sklearn_tags.target_tags.one_d_labels is True
+        assert sklearn_tags._xfail_checks == more_tags["_xfail_checks"]
 
 
 @pytest.mark.parametrize("task", ["binary-classification", "multiclass-classification", "ranking", "regression"])