Hebo with rd (huawei-noah#59)

* Added RD kernel to GPyTorch model * Removed redundant print statement * Removed redundant code in the tests * Added GPy RD kernel * Added GPy RD kernel * Moved get_random_graph to common utils
qualtric · Sep 25, 2023 · 633e8a8 · 633e8a8
1 parent 460873a
commit 633e8a8
Show file tree

Hide file tree

Showing 6 changed files with 170 additions and 14 deletions.
diff --git a/HEBO/hebo/models/gp/gp.py b/HEBO/hebo/models/gp/gp.py
@@ -28,7 +28,7 @@
 from ..scalers import TorchMinMaxScaler, TorchStandardScaler
 from ..nn.sgld import pSGLD
 
-from .gp_util import DummyFeatureExtractor, default_kern
+from .gp_util import DummyFeatureExtractor, default_kern, default_kern_rd
 
 class GP(BaseModel):
     support_grad = True
@@ -153,7 +153,10 @@ def __init__(self,
         super().__init__((x, xe), y.squeeze(), lik)
         self.fe   = deepcopy(conf.get('fe',   DummyFeatureExtractor(x.shape[1], xe.shape[1], conf.get('num_uniqs'), conf.get('emb_sizes'))))
         self.mean = deepcopy(conf.get('mean', ConstantMean()))
-        self.cov  = deepcopy(conf.get('kern', default_kern(x, xe, y, self.fe.total_dim, conf.get('ard_kernel', True), conf.get('fe'))))
+        if conf.get("rd", False):
+            self.cov  = deepcopy(conf.get('kern', default_kern_rd(x, xe, y, self.fe.total_dim, conf.get('ard_kernel', True), conf.get('fe'), E=conf.get("E", 0.2))))
+        else:
+            self.cov  = deepcopy(conf.get('kern', default_kern(x, xe, y, self.fe.total_dim, conf.get('ard_kernel', True), conf.get('fe'))))
 
     def forward(self, x, xe):
         x_all = self.fe(x, xe)

diff --git a/HEBO/hebo/models/gp/gp_util.py b/HEBO/hebo/models/gp/gp_util.py
@@ -10,13 +10,14 @@
 import numpy as np
 import torch
 import torch.nn as nn
+from gpytorch.kernels import (AdditiveKernel, MaternKernel, ProductKernel,
+                              ScaleKernel)
+from gpytorch.priors import GammaPrior
 from torch import FloatTensor, LongTensor
 
-
-from gpytorch.kernels import MaternKernel, ScaleKernel, ProductKernel
-from gpytorch.priors  import GammaPrior
-
 from ..layers import EmbTransform
+from ..util import get_random_graph
+
 
 class DummyFeatureExtractor(nn.Module):
     def __init__(self, num_cont, num_enum, num_uniqs = None, emb_sizes = None):
@@ -63,3 +64,42 @@ def default_kern(x, xe, y, total_dim = None, ard_kernel = True, fe = None, max_x
             kernel = ScaleKernel(MaternKernel(nu = 1.5))
         kernel.outputscale = y[torch.isfinite(y)].var()
         return kernel
+
+def default_kern_rd(x, xe, y, total_dim = None, ard_kernel = True, fe = None, max_x = 1000, E=0.2):
+    '''
+    Get a default kernel with random decompositons. 0 <= E <=1 specifies random tree conectivity.
+    '''
+    kernels = []
+    random_graph = get_random_graph(total_dim, E)
+    for clique in random_graph:
+        if fe is None:
+            num_dims  = tuple(dim for dim in clique if dim < x.shape[1])
+            enum_dims = tuple(dim for dim in clique if x.shape[1] <= dim < total_dim)
+            clique_kernels = []
+            if len(num_dims) > 0:
+                ard_num_dims = len(num_dims) if ard_kernel else None
+                num_kernel       = MaternKernel(nu = 1.5, ard_num_dims = ard_num_dims, active_dims = num_dims)
+                if ard_kernel:
+                    lscales = num_kernel.lengthscale.detach().clone().view(1, -1)
+                    if len(num_dims) > 1 :
+                        for dim_no, dim_name in enumerate(num_dims):
+                            idx = np.random.choice(num_dims, min(len(num_dims), max_x), replace = False)
+                            lscales[0, dim_no] = torch.pdist(x[idx, dim_name].view(-1, 1)).median().clamp(min = 0.02)
+                    num_kernel.lengthscale = lscales
+                clique_kernels.append(num_kernel)
+            if len(enum_dims) > 0:
+                enum_kernel = MaternKernel(nu = 1.5, active_dims = enum_dims)
+                clique_kernels.append(enum_kernel)
+
+            kernel = ScaleKernel(ProductKernel(*clique_kernels), outputscale_prior = GammaPrior(0.5, 0.5))
+        else:
+            if ard_kernel:
+                kernel = ScaleKernel(MaternKernel(nu = 1.5, ard_num_dims = total_dim, active_dims=tuple(clique)))
+            else:
+                kernel = ScaleKernel(MaternKernel(nu = 1.5, active_dims=tuple(clique)))
+
+        kernels.append(kernel)
+
+    final_kern = ScaleKernel(AdditiveKernel(*kernels), outputscale_prior = GammaPrior(0.5, 0.5))
+    final_kern.outputscale = y[torch.isfinite(y)].var()
+    return final_kern
diff --git a/HEBO/hebo/models/gp/gpy_wgp.py b/HEBO/hebo/models/gp/gpy_wgp.py
@@ -13,7 +13,7 @@
 from ..base_model import BaseModel
 from ..layers import EmbTransform, OneHotTransform
 from ..scalers import TorchMinMaxScaler, TorchStandardScaler
-from ..util import filter_nan
+from ..util import filter_nan, get_random_graph
 
 import GPy
 import torch
@@ -44,6 +44,8 @@ def __init__(self, num_cont, num_enum, num_out, **conf):
         self.warp         = self.conf.get('warp', True)
         self.space        = self.conf.get('space') # DesignSpace
         self.num_restarts = self.conf.get('num_restarts', 10)
+        self.rd           = self.conf.get('rd', False)
+        self.E            = self.conf.get('E', 0.2)
         if self.space is None and self.warp:
             warnings.warn('Space not provided, set warp to False')
             self.warp = False
@@ -84,12 +86,37 @@ def fit(self, Xc : FloatTensor, Xe : LongTensor, y : LongTensor):
         self.fit_scaler(Xc, y)
         X, y = self.trans(Xc, Xe, y)
 
-        k1  = GPy.kern.Linear(X.shape[1],   ARD = False)
-        k2  = GPy.kern.Matern32(X.shape[1], ARD = True)
-        k2.lengthscale = np.std(X, axis = 0).clip(min = 0.02)
-        k2.variance    = 0.5
-        k2.variance.set_prior(GPy.priors.Gamma(0.5, 1), warning = False)
-        kern = k1 + k2
+        if self.rd:
+            cliques = get_random_graph(X.shape[1], self.E)
+
+            # process first clique
+            pair = cliques[0]
+            k1  = GPy.kern.Linear(len(pair), active_dims=pair,   ARD = False)
+            k2  = GPy.kern.Matern32(len(pair), active_dims=pair, ARD = True)
+            k2.lengthscale = np.std(X, axis = 0)[pair]
+            k2.variance    = 0.5
+            k2.variance.set_prior(GPy.priors.Gamma(0.5, 1))
+            kern = k1 + k2
+
+            # process remaining cliques
+            for pair in cliques[1:]:
+                k1  = GPy.kern.Linear(len(pair), active_dims=pair,   ARD = False)
+                k2  = GPy.kern.Matern32(len(pair), active_dims=pair, ARD = True)
+                geo_mean = 1
+                for d in pair:
+                    geo_mean *= np.std(X, axis = 0)[d]
+                k2.lengthscale = geo_mean**(1/len(pair))
+                k2.variance    = 0.5
+                k2.variance.set_prior(GPy.priors.Gamma(0.5, 1))
+                kern += k1 + k2
+        else:
+            k1  = GPy.kern.Linear(X.shape[1],   ARD = False)
+            k2  = GPy.kern.Matern32(X.shape[1], ARD = True)
+            k2.lengthscale = np.std(X, axis = 0).clip(min = 0.02)
+            k2.variance    = 0.5
+            k2.variance.set_prior(GPy.priors.Gamma(0.5, 1), warning = False)
+            kern = k1 + k2
+
         if not self.warp:
             self.gp = GPy.models.GPRegression(X, y, kern)
         else:

diff --git a/HEBO/hebo/models/util.py b/HEBO/hebo/models/util.py
@@ -7,8 +7,13 @@
 # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
 # PARTICULAR PURPOSE. See the MIT License for more details.
 
+import random
+
+import networkx as nx
 import torch
-from torch import nn, FloatTensor, LongTensor
+from disjoint_set import DisjointSet
+from torch import FloatTensor, LongTensor, nn
+
 
 def filter_nan(x : FloatTensor, xe : LongTensor, y : FloatTensor, keep_rule = 'any') -> (FloatTensor, LongTensor, FloatTensor):
     assert x  is None or torch.isfinite(x).all()
@@ -30,3 +35,20 @@ def construct_hidden(dim, num_layers, num_hiddens, act = nn.ReLU()) -> nn.Module
         layers.append(nn.Linear(num_hiddens, num_hiddens))
         layers.append(act)
     return nn.Sequential(*layers)
+
+def get_random_graph(size, E):
+    graph = nx.empty_graph(size)
+    disjoint_set = DisjointSet()
+    connections_made = 0
+    while connections_made < min(size - 1, max(int(E * size), 1)):
+        edge_in = random.randint(0, size - 1)
+        edge_out = random.randint(0, size - 1)
+
+        if edge_in == edge_out or disjoint_set.connected(edge_out, edge_in):
+            continue
+        else:
+            connections_made += 1
+            graph.add_edge(edge_in, edge_out)
+            disjoint_set.union(edge_in, edge_out)
+
+        return list(nx.find_cliques(graph))
diff --git a/HEBO/requirements.txt b/HEBO/requirements.txt
@@ -8,3 +8,4 @@ GPy>=1.9.9
 catboost>=0.24.4
 xgboost
 lightgbm
+disjoint-set
diff --git a/HEBO/test/test_rd.py b/HEBO/test/test_rd.py
@@ -0,0 +1,63 @@
+import pytest
+import numpy as np
+import pandas as pd
+from hebo.optimizers.hebo import HEBO
+from hebo.design_space.design_space import DesignSpace
+
+def obj(x : pd.DataFrame) -> np.ndarray:
+    return sum(x[f'x{i}'].values.astype(float).reshape(-1, 1) ** 2 for i in range(4))
+
+def obj_mixed(x : pd.DataFrame) -> np.ndarray:
+    a_bonus_term = (x['x1'] == "a").values.astype(float).reshape(-1, 1)
+    b_bonus_term = (x['x3'] == "b").values.astype(float).reshape(-1, 1)
+
+    return x['x0'].values.astype(float).reshape(-1, 1) ** 2 + x['x2'].values.astype(float).reshape(-1, 1) ** 2 - a_bonus_term + b_bonus_term
+
+
+@pytest.mark.parametrize('model_name', ['gp']) 
+@pytest.mark.parametrize('opt_cls', [HEBO], ids = ['hebo'])
+def test_opt_cont(model_name, opt_cls):
+    space = DesignSpace().parse([
+        {'name' : 'x0', 'type' : 'num', 'lb' : -3, 'ub' : 7},
+        {'name' : 'x1', 'type' : 'num', 'lb' : -3, 'ub' : 7},
+        {'name' : 'x2', 'type' : 'num', 'lb' : -3, 'ub' : 7},
+        {'name' : 'x3', 'type' : 'num', 'lb' : -3, 'ub' : 7}
+        ])
+    model_config = {
+        "rd": True,
+        "E": 0.2
+    }
+    opt = opt_cls(space, rand_sample = 8, model_name = model_name, model_config=model_config)
+    num_suggest = 0
+    for i in range(9):
+        num_suggest = 1
+        rec = opt.suggest(n_suggestions = num_suggest)
+        y   = obj(rec)
+        if y.shape[0] > 1 and i > 0:
+            y[np.argmax(y.reshape(-1))] = np.inf
+        opt.observe(rec, y)
+        num_suggest += rec.shape[0]
+
+@pytest.mark.parametrize('model_name', ['gp']) 
+@pytest.mark.parametrize('opt_cls', [HEBO], ids = ['hebo'])
+def test_opt_mixed(model_name, opt_cls):
+    space = DesignSpace().parse([
+        {'name' : 'x0', 'type' : 'num', 'lb' : -3, 'ub' : 7},
+        {'name' : 'x1', 'type' : 'cat', 'categories' : ['a', 'b', 'c']},
+        {'name' : 'x2', 'type' : 'num', 'lb' : -3, 'ub' : 7},
+        {'name' : 'x3', 'type' : 'cat', 'categories' : ['a', 'b', 'c', 'd']}
+        ])
+    model_config = {
+        "rd": True,
+        "E": 0.2
+    }
+    opt = opt_cls(space, rand_sample = 8, model_name = model_name, model_config=model_config)
+    num_suggest = 0
+    for i in range(9):
+        num_suggest = 1
+        rec = opt.suggest(n_suggestions = num_suggest)
+        y   = obj_mixed(rec)
+        if y.shape[0] > 1 and i > 0:
+            y[np.argmax(y.reshape(-1))] = np.inf
+        opt.observe(rec, y)
+        num_suggest += rec.shape[0]