forked from huawei-noah/HEBO
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain-chem.sh
94 lines (77 loc) · 3.91 KB
/
train-chem.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/bin/bash
# for this task we use the BO script of the molecule design task to get the training done
# Meta flags
gpu="--gpu" # change to "" if no GPU is to be used
train_data_path="weighted_retraining/data/chem/zinc/orig_model/tensors_train"
val_data_path="weighted_retraining/data/chem/zinc/orig_model/tensors_val"
vocab_file_path="weighted_retraining/data/chem/zinc/orig_model/vocab.txt"
property_file_path="weighted_retraining/data/chem/zinc/orig_model/pen_logP_all.pkl"
# Weighted retraining parameters ---
k="1e-3"
weight_type="rank"
#-- Choose dimension of the latent space --#
latent_dim=56
#-- Choose whether to use target prediction --#
predict_target=0
beta_target_pred_loss=10
target_predictor_hdims='[128,128]'
if ((predict_target == 0)); then predict_target=''; else predict_target='--predict_target'; fi
#-- Choose the metric loss you want to use -- (default contrastive: 2, default triplet: 3) --#
metric_loss_ind=2
metric_losses=('' 'contrastive' 'contrastive' 'triplet' 'triplet' 'triplet' 'log_ratio')
metric_loss_kws=("" "{'threshold':.05,'hard':True}" "{'threshold':.1}" "{'threshold':.1,'soft':True}"
"{'threshold':.2,'soft':True}" "{'threshold':.1,'soft':True,'eta':0.05}" "{}")
save_model_ids=('vanilla' 'contrastive_hard_05' 'contrastive' 'triplet' 'triplet_02' 'triplet_eta_005' 'log_ratio')
if ((metric_loss_ind == 0)); then metric_loss=''; else metric_loss="--metric_loss ${metric_losses[$metric_loss_ind]}"; fi
if ((metric_loss_ind == 0)); then metric_loss_kw=''; else metric_loss_kw="--metric_loss_kw ${metric_loss_kws[$metric_loss_ind]}"; fi
batch_size_s=(32 256 256 1024 1024 1024 128)
batch_size=${batch_size_s[$metric_loss_ind]}
save_model_id=${save_model_ids[$metric_loss_ind]}
save_model_path="./weighted_retraining/assets/pretrained_models/chem_$save_model_id/chem.ckpt"
beta_metric_loss=1
#-- Start from pretrained vanilla JTVAE
pretrained_model_id='vanilla'
pretrained_model_file="./weighted_retraining/assets/pretrained_models/chem_$pretrained_model_id/chem.ckpt"
#-- Choose number of training epochs
n_init_retrain_epochs=.1
# KL coef in ELBO
beta_final=0.001
#-- Run on the following seeds (repeat so that it restarts - not from scratch - after a potential crash)
seed_array=(0)
#-- Choose on which GPU to run --#
export CUDA_VISIBLE_DEVICES=0
expt_index=0 # Track experiments
for seed in "${seed_array[@]}"; do
# Increment experiment index
expt_index=$((expt_index + 1))
# Break loop if using slurm and it's not the right task
if [[ -n "${SLURM_ARRAY_TASK_ID}" ]] && [[ "${SLURM_ARRAY_TASK_ID}" != "$expt_index" ]]; then
continue
fi
# Echo info of task to be executed
echo "r=${r} k=${k} seed=${seed}"
# Run command
cmd="python ./weighted_retraining/weighted_retraining/robust_opt_scripts/robust_opt_chem.py \
--seed=$seed $gpu \
--query_budget=1 \
--retraining_frequency=50 \
--pretrained_model_file=$pretrained_model_file \
--pretrained_model_id $pretrained_model_id \
--batch_size $batch_size \
--lso_strategy=opt \
--train_path=$train_data_path \
--val_path=$val_data_path \
--vocab_file=$vocab_file_path \
--property_file=$property_file_path \
--n_retrain_epochs=.1 \
--latent_dim $latent_dim \
$predict_target --beta_target_pred_loss $beta_target_pred_loss \
--target_predictor_hdims $target_predictor_hdims $metric_loss $metric_loss_kw \
--beta_metric_loss $beta_metric_loss --beta_final $beta_final \
--n_init_retrain_epochs="$n_init_retrain_epochs" \
--n_best_points=2000 --n_rand_points=8000 \
--n_inducing_points=500 --save_model_path $save_model_path --train-only \
--weight_type=$weight_type --rank_weight_k=$k --use_pretrained"
$cmd
echo $cmd
done