forked from FeiSun/BERT4Rec
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_ml-20m.sh
executable file
·47 lines (39 loc) · 1.49 KB
/
run_ml-20m.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
CKPT_DIR="/path/BERT4Rec"
dataset_name="ml-20m"
max_seq_length=200
masked_lm_prob=0.2
max_predictions_per_seq=20
dim=64
batch_size=256
num_train_steps=400000
prop_sliding_window=0.5
mask_prob=1.0
dupe_factor=10
pool_size=10
signature="-mp${mask_prob}-sw${prop_sliding_window}-mlp${masked_lm_prob}-df${dupe_factor}-mpps${max_predictions_per_seq}-msl${max_seq_length}"
python -u gen_data_fin.py \
--dataset_name=${dataset_name} \
--max_seq_length=${max_seq_length} \
--max_predictions_per_seq=${max_predictions_per_seq} \
--mask_prob=${mask_prob} \
--dupe_factor=${dupe_factor} \
--masked_lm_prob=${masked_lm_prob} \
--prop_sliding_window=${prop_sliding_window} \
--signature=${signature} \
--pool_size=${pool_size} \
CUDA_VISIBLE_DEVICES=4 python -u run.py \
--train_input_file=./data/${dataset_name}${signature}.train.tfrecord \
--test_input_file=./data/${dataset_name}${signature}.test.tfrecord \
--vocab_filename=./data/${dataset_name}${signature}.vocab \
--user_history_filename=./data/${dataset_name}${signature}.his \
--checkpointDir=${CKPT_DIR}/${dataset_name} \
--signature=${signature}-${dim} \
--do_train=True \
--do_eval=True \
--bert_config_file=./bert_train/bert_config_${dataset_name}_${dim}.json \
--batch_size=${batch_size} \
--max_seq_length=${max_seq_length} \
--max_predictions_per_seq=${max_predictions_per_seq} \
--num_train_steps=${num_train_steps} \
--num_warmup_steps=100 \
--learning_rate=1e-4