Skip to content

Commit

Permalink
Merge pull request scikit-optimize#642 from kejiashi/refactor
Browse files Browse the repository at this point in the history
[MRG+1] Refactor initialization of Optimizer and improve readability of comments of base optimizer
  • Loading branch information
iaroslav-ai authored Mar 23, 2018
2 parents 0b49e4e + 8e06cde commit 9dd7a03
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 70 deletions.
44 changes: 19 additions & 25 deletions skopt/optimizer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,85 +175,79 @@ def base_minimize(func, dimensions, base_estimator,
"n_jobs": n_jobs}
acq_func_kwargs = {"xi": xi, "kappa": kappa}

# Initialize with provided points (x0 and y0) and/or random points
# Initialize optimization
# Suppose there are points provided (x0 and y0), record them

# check x0: list-like, requirement of minimal points
if x0 is None:
x0 = []
elif not isinstance(x0[0], (list, tuple)):
x0 = [x0]

if not isinstance(x0, list):
raise ValueError("`x0` should be a list, but got %s" % type(x0))

if n_random_starts == 0 and not x0:
if n_random_starts <= 0 and not x0:
raise ValueError("Either set `n_random_starts` > 0,"
" or provide `x0`")

# check y0: list-like, requirement of maximal calls
if isinstance(y0, Iterable):
y0 = list(y0)
elif isinstance(y0, numbers.Number):
y0 = [y0]

# is the budget for calling `func` large enough?
required_calls = n_random_starts + (len(x0) if not y0 else 0)
if n_calls < required_calls:
raise ValueError(
"Expected `n_calls` >= %d, got %d" % (required_calls, n_calls))

# Number of points the user wants to evaluate before it makes sense to
# fit a surrogate model
# calculate the total number of initial points
n_initial_points = n_random_starts + len(x0)

# Build optimizer

# create optimizer class
optimizer = Optimizer(dimensions, base_estimator,
n_initial_points=n_initial_points,
acq_func=acq_func, acq_optimizer=acq_optimizer,
random_state=random_state,
acq_optimizer_kwargs=acq_optimizer_kwargs,
acq_func_kwargs=acq_func_kwargs)

# check x0: element-wise data type, dimensionality
assert all(isinstance(p, Iterable) for p in x0)

if not all(len(p) == optimizer.space.n_dims for p in x0):
raise RuntimeError("Optimization space (%s) and initial points in x0 "
"use inconsistent dimensions." % optimizer.space)

# check callback
callbacks = check_callback(callback)
if verbose:
callbacks.append(VerboseCallback(
n_init=len(x0) if not y0 else 0,
n_random=n_random_starts,
n_total=n_calls))

# setting the scope for these variables
result = None
# Record provided points

# User suggested points at which to evaluate the objective first
# create return object
result = None
# evaluate y0 if only x0 is provided
if x0 and y0 is None:
y0 = list(map(func, x0))
n_calls -= len(y0)

# Pass user suggested initialisation points to the optimizer
# record through tell function
if x0:
if not (isinstance(y0, Iterable) or isinstance(y0, numbers.Number)):
raise ValueError(
"`y0` should be an iterable or a scalar, got %s" % type(y0))

if len(x0) != len(y0):
raise ValueError("`x0` and `y0` should have the same length")


result = optimizer.tell(x0, y0)
result.specs = specs

if eval_callbacks(callbacks, result):
return result

# Bayesian optimization loop
# Optimize
for n in range(n_calls):
next_x = optimizer.ask()

next_y = func(next_x)
result = optimizer.tell(next_x, next_y)
result.specs = specs

if eval_callbacks(callbacks, result):
break

Expand Down
105 changes: 60 additions & 45 deletions skopt/optimizer/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,15 +140,21 @@ def __init__(self, dimensions, base_estimator="gp",
acq_optimizer="auto",
random_state=None, acq_func_kwargs=None,
acq_optimizer_kwargs=None):
# Arguments that are just stored not checked
self.acq_func = acq_func

self.rng = check_random_state(random_state)

# Configure acquisition function

# Store and creat acquisition function set
self.acq_func = acq_func
self.acq_func_kwargs = acq_func_kwargs

allowed_acq_funcs = ["gp_hedge", "EI", "LCB", "PI", "EIps", "PIps"]
if self.acq_func not in allowed_acq_funcs:
raise ValueError("expected acq_func to be in %s, got %s" %
(",".join(allowed_acq_funcs), self.acq_func))

# treat hedging method separately
if self.acq_func == "gp_hedge":
self.cand_acq_funcs_ = ["EI", "LCB", "PI"]
self.gains_ = np.zeros(3)
Expand All @@ -159,73 +165,44 @@ def __init__(self, dimensions, base_estimator="gp",
acq_func_kwargs = dict()
self.eta = acq_func_kwargs.get("eta", 1.0)

if acq_optimizer_kwargs is None:
acq_optimizer_kwargs = dict()

self.n_points = acq_optimizer_kwargs.get("n_points", 10000)
self.n_restarts_optimizer = acq_optimizer_kwargs.get(
"n_restarts_optimizer", 5)
n_jobs = acq_optimizer_kwargs.get("n_jobs", 1)
self.acq_optimizer_kwargs = acq_optimizer_kwargs
# Configure counters of points

# Check `n_random_starts` deprecation first
if n_random_starts is not None:
warnings.warn(("n_random_starts will be removed in favour of "
"n_initial_points."),
DeprecationWarning)
n_initial_points = n_random_starts

self._check_arguments(base_estimator, n_initial_points, acq_optimizer,
dimensions)

if isinstance(self.base_estimator_, GaussianProcessRegressor):
dimensions = normalize_dimensions(dimensions)

self.space = Space(dimensions)
self.models = []
self.Xi = []
self.yi = []

self._cat_inds = []
self._non_cat_inds = []
for ind, dim in enumerate(self.space.dimensions):
if isinstance(dim, Categorical):
self._cat_inds.append(ind)
else:
self._non_cat_inds.append(ind)

self.n_jobs = n_jobs

# The cache of responses of `ask` method for n_points not None.
# This ensures that multiple calls to `ask` with n_points set
# return same sets of points.
# The cache is reset to {} at every call to `tell`.
self.cache_ = {}
if n_initial_points < 0:
raise ValueError(
"Expected `n_initial_points` >= 0, got %d" % n_initial_points)
self._n_initial_points = n_initial_points
self.n_initial_points_ = n_initial_points

def _check_arguments(self, base_estimator, n_initial_points,
acq_optimizer, dimensions):
"""Check arguments for sanity."""
# Configure estimator

# build base_estimator if doesn't exist
if isinstance(base_estimator, str):
base_estimator = cook_estimator(
base_estimator, space=dimensions,
random_state=self.rng.randint(0, np.iinfo(np.int32).max))

# check if regressor
if not is_regressor(base_estimator) and base_estimator is not None:
raise ValueError(
"%s has to be a regressor." % base_estimator)

# treat per second acqusition function specially
is_multi_regressor = isinstance(base_estimator, MultiOutputRegressor)
if "ps" in self.acq_func and not is_multi_regressor:
self.base_estimator_ = MultiOutputRegressor(base_estimator)
else:
self.base_estimator_ = base_estimator

if n_initial_points < 0:
raise ValueError(
"Expected `n_initial_points` >= 0, got %d" % n_initial_points)
self._n_initial_points = n_initial_points
self.n_initial_points_ = n_initial_points
# Configure optimizer

# decide optimizer based on gradient information
if acq_optimizer == "auto":
if has_gradients(self.base_estimator_):
acq_optimizer = "lbfgs"
Expand All @@ -241,9 +218,47 @@ def _check_arguments(self, base_estimator, n_initial_points,
raise ValueError("The regressor {0} should run with "
"acq_optimizer"
"='sampling'.".format(type(base_estimator)))

self.acq_optimizer = acq_optimizer

# record other arguments
if acq_optimizer_kwargs is None:
acq_optimizer_kwargs = dict()

self.n_points = acq_optimizer_kwargs.get("n_points", 10000)
self.n_restarts_optimizer = acq_optimizer_kwargs.get(
"n_restarts_optimizer", 5)
n_jobs = acq_optimizer_kwargs.get("n_jobs", 1)
self.n_jobs = n_jobs
self.acq_optimizer_kwargs = acq_optimizer_kwargs

# Configure search space

# normalize space if GP regressor
if isinstance(self.base_estimator_, GaussianProcessRegressor):
dimensions = normalize_dimensions(dimensions)
self.space = Space(dimensions)

# record categorical and non-categorical indices
self._cat_inds = []
self._non_cat_inds = []
for ind, dim in enumerate(self.space.dimensions):
if isinstance(dim, Categorical):
self._cat_inds.append(ind)
else:
self._non_cat_inds.append(ind)

# Initialize storage for optimization

self.models = []
self.Xi = []
self.yi = []

# Initialize cache for `ask` method responses

# This ensures that multiple calls to `ask` with n_points set
# return same sets of points. Reset to {} at every call to `tell`.
self.cache_ = {}

def copy(self, random_state=None):
"""Create a shallow copy of an instance of the optimizer.
Expand Down

0 comments on commit 9dd7a03

Please sign in to comment.