Reduce requirements (automl#516)

* Drop unnecessary requirements: sphinx, nose, six, statsmodels, cython * Remove optional dependencies from requirements.txt, move them to setup.py * Make a symlink to requirements.txt in the smac package and drop the copy-pasted __MANDATORY_PACKAGES__ * Split out optional dependencies as extras_require * Determine at runtime what optional dependencies are installed * Add lazy_import and use it to import lazily all optional dependencies at package init * SMBO: Determine default model hyperparameter based on extras_installed * Skip tests that require specific extras * Add an 'all' extra for installing all extra requirements * Update README and ci scripts * Fix .travis.yml to install optional dependencies * Update documentation building dependencies * Lazy import of scikit-optimize * Change sobol dependency from optional to required
mens-artis · Jul 23, 2019 · f659800 · f659800
1 parent 281e4d9
commit f659800
Show file tree

Hide file tree

Showing 25 changed files with 149 additions and 75 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -60,7 +60,7 @@ before_install:
 install:
   - pip install pep8 codecov mypy flake8
   - cat requirements.txt | xargs -n 1 -L 1 pip install
-  - python setup.py install
+  - pip install .[all]
 
 script:
   - ci_scripts/$TESTSUITE

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1 +1,2 @@
-include requirements.txt
+include requirements.txt
+include extras_require.json
diff --git a/README.md b/README.md
@@ -54,7 +54,7 @@ used in SMAC3 requires SWIG (>= 3.0).
 
 ## Installation via pip
 
-SMAC3 is available on pipy.
+SMAC3 is available on PyPI.
 
 ```pip install smac```
 
@@ -63,7 +63,7 @@ SMAC3 is available on pipy.
 ```
 git clone https://github.com/automl/SMAC3.git && cd SMAC3
 cat requirements.txt | xargs -n 1 -L 1 pip install
-python setup.py install
+pip install .
 ```
 
 ## Installation in Anaconda
@@ -73,6 +73,29 @@ packages **before** you can install SMAC:
 
 ```conda install gxx_linux-64 gcc_linux-64 swig```
 
+## Optional dependencies
+
+SMAC3 comes with a set of optional dependencies that can be installed using
+[setuptools extras](https://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-extras-optional-features-with-their-own-dependencies):
+
+- `lhd`: Latin hypercube design
+- `gp`: Gaussian process models
+
+These can be installed from PyPI or manually:
+
+```
+# from PyPI
+pip install smac[gp]
+
+# manually
+pip install .[gp,lhd]
+```
+
+For convenience there is also an `all` meta-dependency that installs all optional dependencies:
+```
+pip install smac[all]
+```
+
 # License
 
 This program is free software: you can redistribute it and/or modify

diff --git a/ci_scripts/circle_install.sh b/ci_scripts/circle_install.sh
@@ -8,12 +8,9 @@ source activate testenv
 
 # install documentation building dependencies
 pip install --upgrade numpy
-pip install --upgrade matplotlib setuptools nose coverage sphinx pillow sphinx-gallery sphinx_bootstrap_theme cython numpydoc
-# And finally, all other dependencies
-cat requirements.txt | xargs -n 1 -L 1 pip install
+pip install --upgrade matplotlib setuptools nose coverage sphinx pillow sphinx-gallery sphinx_bootstrap_theme numpydoc
 
-python setup.py clean
-python setup.py develop
+pip install -e .[all]
 
 # pipefail is necessary to propagate exit codes
 set -o pipefail && cd doc && make html 2>&1 | tee ~/log.txt
diff --git a/ci_scripts/create_doc.sh b/ci_scripts/create_doc.sh
@@ -9,7 +9,7 @@ if ! [[ -z ${DOCPUSH+x} ]]; then
     if [[ "$DOCPUSH" == "true" ]]; then
 
         # install documentation building dependencies
-        pip install --upgrade matplotlib seaborn setuptools nose coverage sphinx pillow sphinx-gallery sphinx_bootstrap_theme cython numpydoc nbformat nbconvert mock
+        pip install --upgrade sphinx_rtd_theme
 
         # $1 is the branch name
         # $2 is the global variable where we set the script status

diff --git a/doc/installation.rst b/doc/installation.rst
@@ -54,4 +54,4 @@ command line:
     git clone https://github.com/automl/SMAC3
     cd SMAC3
     cat requirements.txt | xargs -n 1 -L 1 pip install
-    python setup.py install
+    pip install .
diff --git a/extras_require.json b/extras_require.json
@@ -0,0 +1,9 @@
+{
+  "gp": [
+    "emcee>=2.1.0",
+    "scikit-optimize"
+  ],
+  "lhd": [
+    "pyDOE"
+  ]
+}
diff --git a/requirements.txt b/requirements.txt
@@ -1,19 +1,10 @@
-setuptools
-cython
 numpy>=1.7.1
 scipy>=0.18.1
-six
 psutil
 pynisher>=0.4.1
 ConfigSpace>=0.4.6,<0.5
 scikit-learn>=0.18.0
 pyrfr>=0.8.0
-sphinx
-sphinx_rtd_theme
-joblib
-nose>=1.3.0
-pyDOE
 sobol_seq
-statsmodels
-emcee>=2.1.0
-scikit-optimize
+joblib
+lazy_import
diff --git a/setup.py b/setup.py
@@ -1,12 +1,14 @@
 #!/usr/bin/env python3
+import json
 import os
 from setuptools import setup
 
 
 with open('requirements.txt') as fh:
-    requirements = fh.read()
-requirements = requirements.split('\n')
-requirements = [requirement.strip() for requirement in requirements]
+    requirements = [line.strip() for line in fh.readlines()]
+with open('extras_require.json') as fh:
+    extras_require = json.load(fh)
+    extras_require['all'] = set(sum(extras_require.values(), []))
 
 
 def get_version():
@@ -30,6 +32,8 @@ def get_author():
 setup(
     python_requires=">=3.5.2",
     install_requires=requirements,
+    extras_require=extras_require,
+    package_data={'smac': ['requirements.txt', 'extras_require.json']},
     author=get_author(),
     version=get_version(),
     test_suite="nose.collector",

diff --git a/smac/__init__.py b/smac/__init__.py
@@ -1,23 +1,29 @@
+import json
 import os
 import sys
 
+import lazy_import
 from smac.utils import dependencies
 
 __version__ = '0.10.1.dev'
 __author__ = 'Marius Lindauer, Matthias Feurer, Katharina Eggensperger, Joshua Marben, André Biedenkapp, Aaron Klein, Stefan Falkner and Frank Hutter'
 
-__MANDATORY_PACKAGES__ = """
-numpy>=1.7.1
-scipy>=0.18.1
-six
-psutil
-pynisher>=0.4.1
-ConfigSpace>=0.4.6,<0.5
-scikit-learn>=0.18.0
-pyrfr>=0.5.0
-joblib
-"""
-dependencies.verify_packages(__MANDATORY_PACKAGES__)
+
+with open(os.path.join(os.path.dirname(__file__), 'requirements.txt')) as fh:
+    dependencies.verify_packages(fh.read())
+
+with open(os.path.join(os.path.dirname(__file__), 'extras_require.json')) as fh:
+    extras_require = json.load(fh)
+
+extras_installed = set()
+for name, requirements in extras_require.items():
+    if dependencies.are_valid_packages(requirements):
+        extras_installed.add(name)
+    for requirement in requirements:
+        package_name = dependencies.RE_PATTERN.match(requirement).group('name')
+        if package_name == 'scikit-optimize':
+            package_name = 'skopt'
+        lazy_import.lazy_module(package_name)
 
 if sys.version_info < (3, 5, 2):
     raise ValueError("SMAC requires Python 3.5.2 or newer.")

diff --git a/smac/epm/gaussian_process.py b/smac/epm/gaussian_process.py
@@ -2,15 +2,17 @@
 import typing
 
 import numpy as np
-import skopt.learning.gaussian_process
-import skopt.learning.gaussian_process.kernels
+from lazy_import import lazy_callable
 from scipy import optimize
 
 from smac.configspace import ConfigurationSpace
 from smac.epm.base_gp import BaseModel
 from smac.utils.constants import VERY_SMALL_NUMBER
 
 logger = logging.getLogger(__name__)
+Kernel = lazy_callable('skopt.learning.gaussian_process.kernels.Kernel')
+GaussianProcessRegressor = lazy_callable(
+    'skopt.learning.gaussian_process.GaussianProcessRegressor')
 
 
 class GaussianProcess(BaseModel):
@@ -54,7 +56,7 @@ def __init__(
         types: np.ndarray,
         bounds: typing.List[typing.Tuple[float, float]],
         seed: int,
-        kernel: skopt.learning.gaussian_process.kernels.Kernel,
+        kernel: Kernel,
         normalize_y: bool=True,
         n_opt_restarts=10,
         **kwargs
@@ -99,7 +101,7 @@ def _train(self, X: np.ndarray, y: np.ndarray, do_optimize: bool=True):
         n_tries = 10
         for i in range(n_tries):
             try:
-                self.gp = skopt.learning.gaussian_process.GaussianProcessRegressor(
+                self.gp = GaussianProcessRegressor(
                     kernel=self.kernel,
                     normalize_y=False,
                     optimizer=None,

diff --git a/smac/epm/gaussian_process_mcmc.py b/smac/epm/gaussian_process_mcmc.py
@@ -4,14 +4,16 @@
 
 import emcee
 import numpy as np
-import skopt.learning.gaussian_process
-import skopt.learning.gaussian_process.kernels
+from lazy_import import lazy_callable
 
 from smac.configspace import ConfigurationSpace
 from smac.epm.base_gp import BaseModel
 from smac.epm.gaussian_process import GaussianProcess
 
 logger = logging.getLogger(__name__)
+Kernel = lazy_callable('skopt.learning.gaussian_process.kernels.Kernel')
+GaussianProcessRegressor = lazy_callable(
+    'skopt.learning.gaussian_process.GaussianProcessRegressor')
 
 
 class GaussianProcessMCMC(BaseModel):
@@ -22,7 +24,7 @@ def __init__(
         types: np.ndarray,
         bounds: typing.List[typing.Tuple[float, float]],
         seed: int,
-        kernel: skopt.learning.gaussian_process.kernels.Kernel,
+        kernel: Kernel,
         n_mcmc_walkers: int = 20,
         chain_length: int = 50,
         burnin_steps: int = 50,
@@ -115,7 +117,7 @@ def _train(self, X: np.ndarray, y: np.ndarray, do_optimize: bool=True):
             hyperparameter specified in the kernel.
         """
         X = self._impute_inactive(X)
-        self.gp = skopt.learning.gaussian_process.GaussianProcessRegressor(
+        self.gp = GaussianProcessRegressor(
             kernel=self.kernel,
             normalize_y=self.normalize_y,
             optimizer=None,

diff --git a/smac/epm/gp_kernels.py b/smac/epm/gp_kernels.py
@@ -4,11 +4,12 @@
 
 import numpy as np
 import sklearn.gaussian_process.kernels
-import skopt.learning.gaussian_process.kernels
 import scipy.optimize
 import scipy.spatial.distance
 import scipy.special
 
+from lazy_import import lazy_module
+kernels = lazy_module('skopt.learning.gaussian_process.kernels')
 
 # This file contains almost no type annotations to simplify comparing it to the original scikit-learn version!
 
@@ -63,22 +64,22 @@ def __call__(self, X, Y=None, eval_gradient=False, active=None):
         return rval
 
     def __add__(self, b):
-        if not isinstance(b, skopt.learning.gaussian_process.kernels.Kernel):
+        if not isinstance(b, kernels.Kernel):
             return Sum(self, ConstantKernel(b))
         return Sum(self, b)
 
     def __radd__(self, b):
-        if not isinstance(b, skopt.learning.gaussian_process.kernels.Kernel):
+        if not isinstance(b, kernels.Kernel):
             return Sum(ConstantKernel(b), self)
         return Sum(b, self)
 
     def __mul__(self, b):
-        if not isinstance(b, skopt.learning.gaussian_process.kernels.Kernel):
+        if not isinstance(b, kernels.Kernel):
             return Product(self, ConstantKernel(b))
         return Product(self, b)
 
     def __rmul__(self, b):
-        if not isinstance(b, skopt.learning.gaussian_process.kernels.Kernel):
+        if not isinstance(b, kernels.Kernel):
             return Product(ConstantKernel(b), self)
         return Product(b, self)
 
@@ -179,7 +180,7 @@ def set_active_dims(self, operate_on=None):
             self.len_active = None
 
 
-class Sum(MagicMixin, skopt.learning.gaussian_process.kernels.Sum):
+class Sum(MagicMixin, kernels.Sum):
 
     def __init__(self, k1, k2, operate_on=None, has_conditions=False):
         super(Sum, self).__init__(k1=k1, k2=k2)
@@ -223,7 +224,7 @@ def _call(self, X, Y=None, eval_gradient=False, active=None):
             return self.k1(X, Y, active=active) + self.k2(X, Y, active=active)
 
 
-class Product(MagicMixin, skopt.learning.gaussian_process.kernels.Product):
+class Product(MagicMixin, kernels.Product):
 
     def __init__(self, k1, k2, operate_on=None, has_conditions=False):
         super(Product, self).__init__(k1=k1, k2=k2)
@@ -268,7 +269,7 @@ def _call(self, X, Y=None, eval_gradient=False, active=None):
             return self.k1(X, Y, active=active) * self.k2(X, Y, active=active)
 
 
-class ConstantKernel(MagicMixin, skopt.learning.gaussian_process.kernels.ConstantKernel):
+class ConstantKernel(MagicMixin, kernels.ConstantKernel):
 
     def __init__(
             self,
@@ -331,7 +332,7 @@ def _call(self, X, Y=None, eval_gradient=False, active=None):
             return K
 
 
-class Matern(MagicMixin, skopt.learning.gaussian_process.kernels.Matern):
+class Matern(MagicMixin, kernels.Matern):
 
     def __init__(
         self,
@@ -439,7 +440,7 @@ def _call(self, X, Y=None, eval_gradient=False, active=None):
             return K
 
 
-class RBF(MagicMixin, skopt.learning.gaussian_process.kernels.RBF):
+class RBF(MagicMixin, kernels.RBF):
 
     def __init__(
         self,
@@ -514,7 +515,7 @@ def _call(self, X, Y=None, eval_gradient=False, active=None):
             return K
 
 
-class WhiteKernel(MagicMixin, skopt.learning.gaussian_process.kernels.WhiteKernel):
+class WhiteKernel(MagicMixin, kernels.WhiteKernel):
 
     def __init__(
         self,
@@ -576,7 +577,7 @@ def _call(self, X, Y=None, eval_gradient=False, active=None):
             return np.zeros((X.shape[0], Y.shape[0]))
 
 
-class HammingKernel(MagicMixin, skopt.learning.gaussian_process.kernels.HammingKernel):
+class HammingKernel(MagicMixin, kernels.HammingKernel):
 
     def __init__(
         self,

diff --git a/smac/extras_require.json b/smac/extras_require.json
@@ -0,0 +1 @@
+../extras_require.json
diff --git a/smac/facade/smac_bo_facade.py b/smac/facade/smac_bo_facade.py
@@ -3,7 +3,6 @@
 from smac.facade.smac_ac_facade import SMAC4AC
 from smac.epm.gaussian_process_mcmc import GaussianProcessMCMC, GaussianProcess
 from smac.epm.gp_base_prior import HorseshoePrior, LognormalPrior
-from smac.epm.gp_kernels import ConstantKernel, Matern, WhiteKernel, HammingKernel
 from smac.epm.util_funcs import get_types, get_rng
 from smac.initial_design.sobol_design import SobolDesign
 from smac.runhistory.runhistory2epm import RunHistory2EPM4LogScaledCost
@@ -49,6 +48,8 @@ def __init__(self, model_type='gp_mcmc', **kwargs):
         kwargs['initial_design_kwargs'] = init_kwargs
 
         if kwargs.get('model') is None:
+            from smac.epm.gp_kernels import ConstantKernel, Matern, WhiteKernel, HammingKernel
+
             model_kwargs = kwargs.get('model_kwargs', dict())
 
             _, rng = get_rng(rng=kwargs.get("rng", None), run_id=kwargs.get("run_id", None), logger=None)