Skip to content

Commit

Permalink
[UPDATE]. Clean Tr.
Browse files Browse the repository at this point in the history
  • Loading branch information
Antoine Grosnit 80055266 committed Oct 20, 2022
1 parent 462e94e commit 9e8299a
Show file tree
Hide file tree
Showing 54 changed files with 672 additions and 456 deletions.
5 changes: 3 additions & 2 deletions combopt/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ Download all submodules

> git submodule update --init --recursive
[Optional] If you plan to use the Antibody design task, install AbsolutNoLib by following the instructions from https://github.com/csi-greifflab/Absolut
[Optional] If you plan to use the Antibody design task, install AbsolutNoLib by following the instructions
from https://github.com/csi-greifflab/Absolut

Create a virtual environment and activate it

Expand All @@ -42,7 +43,7 @@ Install the package itself
- Random TSP

### Real-world
- Antibody Design (Needs to download `Absolut!` ~ 39GB) #TODO: indicate how to install absolute
- Antibody Design (Needs to download `Absolut!`)
- RNA Inverse Folding
- EDA Sequence Optimisation (AIG sequence optimisation)
- EDA Sequence and Parameter Optimisation (AIG sequence and parameter optimisation)
Expand Down
2 changes: 1 addition & 1 deletion combopt/comb_opt/acq_optimizers/acq_optimizer_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def optimize(self,
**kwargs
) -> torch.Tensor:
"""
Function used to optimise the acquisition function. Should return a 2D tensor with shape
Function used to optimize the acquisition function. Should return a 2D tensor with shape
(n_suggestions, n_dims), where n_dims is the dimensionality of x.
If an optimizer does not support return batches of data, this can be handled by imposing with "assert
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def __init__(self,
self.ga_allow_repeating_x = ga_allow_repeating_x

assert self.search_space.num_nominal + self.search_space.num_ordinal == self.search_space.num_dims, \
'The Categorical GA acq optimiser currently only supports nominal and ordinal variables'
'The Categorical GA acq optimizer currently only supports nominal and ordinal variables'

def optimize(self, x: torch.Tensor,
n_suggestions: int,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def __init__(self,
self.inverse_mapping = [(self.numeric_dims + self.search_space.nominal_dims).index(i) for i in
range(self.search_space.num_dims)]

# Determine the learning rate used to optimise numeric variables if needed
# Determine the learning rate used to optimize numeric variables if needed
if len(self.numeric_dims) > 0:
if num_lr is None:
if self.search_space.num_disc > 0:
Expand Down Expand Up @@ -194,6 +194,8 @@ def _optimize(self,
is_valid = True
else:
tol_ -= 1
if tol_ < 0:
break
if tol_ < 0:
break

Expand Down
4 changes: 0 additions & 4 deletions combopt/comb_opt/acq_optimizers/local_search_acq_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,6 @@ def _optimize(self,
search_space=self.search_space,
tr_manager=tr_manager,
n_points=self.n_random_vertices,
is_numeric=self.is_numeric,
is_mixed=self.is_mixed,
numeric_dims=self.numeric_dims,
discrete_choices=self.discrete_choices,
max_n_perturb_num=self.max_n_perturb_num,
Expand Down Expand Up @@ -184,8 +182,6 @@ def _optimize(self,
search_space=self.search_space,
tr_manager=tr_manager,
n_points=1,
is_numeric=self.is_numeric,
is_mixed=self.is_mixed,
numeric_dims=self.numeric_dims,
discrete_choices=self.discrete_choices,
max_n_perturb_num=self.max_n_perturb_num,
Expand Down
11 changes: 8 additions & 3 deletions combopt/comb_opt/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,14 @@ def task_factory(task_name: str, dtype: torch.dtype = torch.float32, **kwargs) -

elif task_name == 'antibody_design':
if 'antigen' not in kwargs:
print('Target antigen not specified. Using antigen 1ADQ_A.')
task = CDRH3Design(antigen=kwargs.get('antigen', '1ADQ_A'), cdrh3_length=kwargs.get('cdrh3_length', 11),
num_cpus=kwargs.get('num_cpus', 1), first_cpu=kwargs.get('first_cpu', 0))
print('Target antigen not specified. Using antigen 2DD8_S.')
task = CDRH3Design(
antigen=kwargs.get('antigen', '2DD8_S'),
cdrh3_length=kwargs.get('cdrh3_length', 11),
num_cpus=kwargs.get('num_cpus', 1),
first_cpu=kwargs.get('first_cpu', 0),
absolut_dir=kwargs.get('absolut_dir', None)
)
search_space = search_space_factory('antibody_design', dtype, cdrh3_length=kwargs.get('cdrh3_length', 11))

elif task_name == "rna_inverse_fold":
Expand Down
2 changes: 1 addition & 1 deletion combopt/comb_opt/models/gp/combo_gp.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ def cholesky_update(self, constmean: float, log_amp: float, log_beta: torch.Floa
self.gram_mat_update(constmean, log_amp, log_beta, log_likelihood_noise)

eye_mat = torch.diag(self.gram_mat.new_ones(self.gram_mat.size(0)))
for jitter_const in [0, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3]:
for jitter_const in [0, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2]:
chol_jitter = torch.trace(self.gram_mat).item() * jitter_const
try:
# cholesky is lower triangular matrix
Expand Down
2 changes: 1 addition & 1 deletion combopt/comb_opt/optimizers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from comb_opt.optimizers.casmopolitan import Casmopolitan
from comb_opt.optimizers.cocabo import CoCaBO
from comb_opt.optimizers.combo import COMBO
from comb_opt.optimizers.genetic_algorithm import PymooGeneticAlgorithm
from comb_opt.optimizers.genetic_algorithm import PymooGeneticAlgorithm, GeneticAlgorithm
from comb_opt.optimizers.local_search import LocalSearch
from comb_opt.optimizers.multi_armed_bandit import MultiArmedBandit
from comb_opt.optimizers.optimizer_base import OptimizerBase
Expand Down
6 changes: 4 additions & 2 deletions combopt/comb_opt/optimizers/bocs.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ class BOCS(BoBase):
@property
def name(self) -> str:
if self.use_tr:
name = f'LR ({self.model_estimator}) - Tr-Based LS acq optim'
name = f'LR ({self.model_estimator}) - Tr-Based SA acq optim'
elif self.model_estimator == "sparse_horseshoe": # Standard BOCS
name = "BOCS"
else:
name = f'BOCS ({self.model_estimator})'
return name
Expand Down Expand Up @@ -70,7 +72,7 @@ def __init__(self,
warning_message = 'This is the general form implementation of BOCS (see Appendix A of ' + \
'https://arxiv.org/abs/1806.08838), which differs from the standard implementation ' + \
'for purely binary problems. The differences are: (1) binary variables are ' + \
'represented by their one hot encoding, and (2) SA is used to optimise the acquisition' + \
'represented by their one hot encoding, and (2) SA is used to optimize the acquisition' + \
'in place of SDP.'
warnings.warn(warning_message, category=UserWarning)

Expand Down
4 changes: 3 additions & 1 deletion combopt/comb_opt/optimizers/cocabo.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ def name(self) -> str:
name = f'GP ({self.model_numeric_kernel_name}) - MAB acq optim'
elif self.is_nominal:
name = f'GP ({self.model_cat_kernel_name}) - MAB acq optim'
else:
raise ValueError()

return name

Expand All @@ -54,7 +56,7 @@ def __init__(self,
model_numeric_kernel_name: str = 'mat52',
model_num_kernel_ard: bool = True,
model_num_kernel_lengthscale_constr: Optional[Interval] = None,
model_cat_kernel_name='transformed_overlap',
model_cat_kernel_name='overlap',
model_cat_kernel_ard: bool = True,
model_cat_kernel_lengthscale_constr: Optional[Interval] = None,
model_noise_prior: Optional[Prior] = None,
Expand Down
2 changes: 1 addition & 1 deletion combopt/comb_opt/optimizers/combo.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def __init__(self,
acq_optim_n_greedy_ascent_init: int = 20,
acq_optim_n_spray: int = 10,
acq_optim_max_n_ascent: float = float('inf'),
use_tr: bool = True,
use_tr: bool = False,
tr_restart_acq_name: str = 'lcb',
tr_restart_n_cand: Optional[int] = None,
tr_min_nominal_radius: Optional[Union[int, float]] = None,
Expand Down
10 changes: 7 additions & 3 deletions combopt/comb_opt/optimizers/genetic_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
import numpy as np
import pandas as pd
import torch

from pymoo.config import Config
Config.warnings['not_compiled'] = False

from pymoo.algorithms.base.genetic import GeneticAlgorithm
from pymoo.algorithms.soo.nonconvex.ga import FitnessSurvival
from pymoo.algorithms.soo.nonconvex.ga import comp_by_cv_and_fitness
Expand Down Expand Up @@ -222,9 +226,9 @@ class CategoricalGeneticAlgorithm(OptimizerBase):
@property
def name(self) -> str:
if self.tr_manager is not None:
name = 'Tr-based Categorical Genetic Algorithm'
name = 'Tr-based Genetic Algorithm'
else:
name = 'Categorical Genetic Algorithm'
name = 'Genetic Algorithm'
return name

def __init__(self,
Expand Down Expand Up @@ -603,7 +607,7 @@ def _mutate(self, x: torch.Tensor) -> torch.Tensor:

class GeneticAlgorithm(OptimizerBase):
"""
A Genetic Algorithm (GA) optimiser that determines which exact GA algorithm to use based on the variable types in
A Genetic Algorithm (GA) optimizer that determines which exact GA algorithm to use based on the variable types in
the search space. If the search space contains only nominal variables, an elitist GA algorithm will be used. If the
search space contains any other variable type combinations, the Mixed Variable GA from pymoo will be used (see
https://pymoo.org/customization/mixed.html). On purely combinatorial problems, the elitist GA algorithm can
Expand Down
8 changes: 6 additions & 2 deletions combopt/comb_opt/optimizers/mix_and_match/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,17 @@
# PARTICULAR PURPOSE. See the MIT License for more details.

from comb_opt.optimizers.mix_and_match.gp_diff_ker_ga_acq_optim import GpDiffusionGaAcqOptim
from comb_opt.optimizers.mix_and_match.gp_diff_ker_is_acq_optim import GpDiffusionIsAcqOptim
from comb_opt.optimizers.mix_and_match.gp_diff_ker_sa_acq_optim import GpDiffusionSaAcqOptim
from comb_opt.optimizers.mix_and_match.gp_diff_ker_is_acq_optim import GpDiffusionTrLsAcqOptim
from comb_opt.optimizers.mix_and_match.gp_o_ker_ga_acq_optim import GpOGaAcqOptim
from comb_opt.optimizers.mix_and_match.gp_o_ker_is_acq_optim import GpOIsAcqOptim
from comb_opt.optimizers.mix_and_match.gp_o_ker_ls_acq_optim import GpOLsAcqOptim
from comb_opt.optimizers.mix_and_match.gp_o_ker_sa_acq_optim import GpOSaAcqOptim
from comb_opt.optimizers.mix_and_match.gp_ssk_ker_ls_acq_optim import GpSskLsAcqOptim
from comb_opt.optimizers.mix_and_match.gp_ssk_ker_sa_acq_optim import GpSskSaAcqOptim
from comb_opt.optimizers.mix_and_match.gp_to_ker_ls_acq_optim import GpToLsAcqOptim
from comb_opt.optimizers.mix_and_match.gp_to_ker_ga_acq_optim import GpToGaAcqOptim
from comb_opt.optimizers.mix_and_match.gp_to_ker_sa_acq_optim import GpToSaAcqOptim
from comb_opt.optimizers.mix_and_match.lr_ga_acq_optim import LrGaAcqOptim
from comb_opt.optimizers.mix_and_match.lr_ls_acq_optim import LrLsAcqOptim
from comb_opt.optimizers.mix_and_match.lr_is_acq_optim import LrIsAcqOptim
from comb_opt.optimizers.mix_and_match.lr_ls_acq_optim import LrLsAcqOptim
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def __init__(self,
acq_optim_ga_num_elite: int = 10,
acq_optim_ga_store_x: bool = False,
acq_optim_ga_allow_repeating_x: bool = True,
use_tr: bool = True,
use_tr: bool = False,
tr_restart_acq_name: str = 'lcb',
tr_restart_n_cand: Optional[int] = None,
tr_min_nominal_radius: Optional[Union[int, float]] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from comb_opt.utils.graph_utils import laplacian_eigen_decomposition


class GpDiffusionTrLsAcqOptim(BoBase):
class GpDiffusionIsAcqOptim(BoBase):

@property
def name(self) -> str:
Expand All @@ -48,7 +48,7 @@ def __init__(self,
acq_optim_num_optimizer: str = 'sgd',
acq_optim_num_lr: Optional[float] = 1e-3,
acq_optim_nominal_tol: int = 100,
use_tr: bool = True,
use_tr: bool = False,
tr_restart_acq_name: str = 'lcb',
tr_restart_n_cand: Optional[int] = None,
tr_min_nominal_radius: Optional[Union[int, float]] = None,
Expand Down Expand Up @@ -168,6 +168,6 @@ def __init__(self,

self.use_tr = use_tr

super(GpDiffusionTrLsAcqOptim, self).__init__(search_space, n_init, model, acq_func, acq_optim, tr_manager,
super(GpDiffusionIsAcqOptim, self).__init__(search_space, n_init, model, acq_func, acq_optim, tr_manager,
dtype,
device)
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __init__(self,
acq_optim_ga_num_elite: int = 10,
acq_optim_ga_store_x: bool = False,
acq_optim_ga_allow_repeating_x: bool = True,
use_tr: bool = True,
use_tr: bool = False,
tr_restart_acq_name: str = 'lcb',
tr_restart_n_cand: Optional[int] = None,
tr_min_num_radius: Optional[Union[int, float]] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def __init__(self,
acq_optim_num_optimizer: str = 'sgd',
acq_optim_num_lr: Optional[float] = 1e-3,
acq_optim_nominal_tol: int = 100,
use_tr: bool = True,
use_tr: bool = False,
tr_restart_acq_name: str = 'lcb',
tr_restart_n_cand: Optional[int] = None,
tr_min_num_radius: Optional[Union[int, float]] = None,
Expand Down Expand Up @@ -142,7 +142,7 @@ def __init__(self,
numeric_kernel_name='mat52',
numeric_kernel_use_ard=model_num_kernel_ard,
numeric_lengthscale_constraint=model_num_kernel_lengthscale_constr,
nominal_kernel_name='transformed_overlap',
nominal_kernel_name='overlap',
nominal_kernel_use_ard=model_cat_kernel_ard,
nominal_lengthscale_constraint=model_cat_kernel_lengthscale_constr)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def __init__(self,
acq_optim_n_greedy_ascent_init: int = 20,
acq_optim_n_spray: int = 10,
acq_optim_max_n_ascent: float = float('inf'),
use_tr: bool = True,
use_tr: bool = False,
tr_restart_acq_name: str = 'lcb',
tr_restart_n_cand: Optional[int] = None,
tr_min_num_radius: Optional[Union[int, float]] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __init__(self,
acq_optim_num_iter: int = 200,
acq_optim_init_temp: int = 1,
acq_optim_tolerance: int = 100,
use_tr: bool = True,
use_tr: bool = False,
tr_restart_acq_name: str = 'lcb',
tr_restart_n_cand: Optional[int] = None,
tr_min_num_radius: Optional[Union[int, float]] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __init__(self,
acq_optim_ga_num_elite: int = 10,
acq_optim_ga_store_x: bool = False,
acq_optim_ga_allow_repeating_x: bool = True,
use_tr: bool = True,
use_tr: bool = False,
tr_restart_acq_name: str = 'lcb',
tr_restart_n_cand: Optional[int] = None,
tr_min_num_radius: Optional[Union[int, float]] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def __init__(self,
acq_optim_n_greedy_ascent_init: int = 20,
acq_optim_n_spray: int = 10,
acq_optim_max_n_ascent: float = float('inf'),
use_tr: bool = True,
use_tr: bool = False,
tr_restart_acq_name: str = 'lcb',
tr_restart_n_cand: Optional[int] = None,
tr_min_num_radius: Optional[Union[int, float]] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def __init__(self,
acq_optim_num_iter: int = 200,
acq_optim_init_temp: int = 1,
acq_optim_tolerance: int = 100,
use_tr: bool = True,
use_tr: bool = False,
tr_restart_acq_name: str = 'lcb',
tr_restart_n_cand: Optional[int] = None,
tr_min_num_radius: Optional[Union[int, float]] = None,
Expand Down
11 changes: 11 additions & 0 deletions combopt/comb_opt/optimizers/simulated_annealing.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,3 +286,14 @@ def sample_unseen_nominal_neighbour(self, x_nominal: torch.Tensor):
x_nominal_neighbour = x_nominal_neighbour.view(-1)

return x_nominal_neighbour

def fill_field_after_pkl_load(self, search_space: SearchSpace, **kwargs):
""" As some elements are not pickled, need to reinstantiate them """
self.search_space = search_space

def __getstate__(self):
d = dict(self.__dict__)
to_remove = ["search_space"] # fields to remove when pickling this object
for attr in to_remove:
del d[attr]
return d
12 changes: 9 additions & 3 deletions combopt/comb_opt/tasks/antibody_design/cdrh3_design.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import os
import subprocess
from typing import Optional

import numpy as np
import pandas as pd
Expand All @@ -24,7 +25,8 @@ class CDRH3Design(TaskBase):
def name(self) -> str:
return f'{self.antigen} Antibody Design'

def __init__(self, antigen: str = '1ADQ_A', cdrh3_length: int = 11, num_cpus: int = 10, first_cpu: int = 0):
def __init__(self, antigen: str = '1ADQ_A', cdrh3_length: int = 11, num_cpus: int = 10, first_cpu: int = 0,
absolut_dir: Optional[str] = None):
super(CDRH3Design, self).__init__()
self.num_cpus = num_cpus
self.first_cpu = first_cpu
Expand All @@ -35,15 +37,19 @@ def __init__(self, antigen: str = '1ADQ_A', cdrh3_length: int = 11, num_cpus: in
self.amino_acid_to_idx = {aa: i for i, aa in enumerate(self.amino_acids)}
self.idx_to_amino_acid = {value: key for key, value in self.amino_acid_to_idx.items()}

self.AbsolutNoLib_dir = get_AbsolutNoLib_dir()
self.AbsolutNoLib_dir = get_AbsolutNoLib_dir(absolut_dir)
self.valid_antigens = get_valid_antigens(self.AbsolutNoLib_dir)
self.need_to_check_precomputed_antigen_structure = True
assert antigen in self.valid_antigens, f'Specified antigen is not valid. Please choose of from: \n\n {self.valid_antigens}'
download_precomputed_antigen_structure(self.AbsolutNoLib_dir, self.antigen)

def evaluate(self, x: pd.DataFrame) -> np.ndarray:

assert os.path.exists(os.path.join(self.AbsolutNoLib_dir, 'antigen_data', f'{self.antigen}'))

if self.need_to_check_precomputed_antigen_structure:
download_precomputed_antigen_structure(self.AbsolutNoLib_dir, self.antigen)
self.need_to_check_precomputed_antigen_structure = False

# Change working directory
current_dir = os.getcwd()
os.chdir(os.path.join(self.AbsolutNoLib_dir, 'antigen_data', f'{self.antigen}'))
Expand Down
Loading

0 comments on commit 9e8299a

Please sign in to comment.