Skip to content

Commit

Permalink
applied LoRA to DeBERTa and RoBERTa
Browse files Browse the repository at this point in the history
  • Loading branch information
edwardjhu committed Sep 20, 2021
1 parent ff46e01 commit b80d0c4
Show file tree
Hide file tree
Showing 53 changed files with 1,705 additions and 375 deletions.
331 changes: 76 additions & 255 deletions NLU/README.md

Large diffs are not rendered by default.

31 changes: 31 additions & 0 deletions NLU/deberta_v2_xxlarge_cola.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
export num_gpus=8
export CUBLAS_WORKSPACE_CONFIG=":16:8" # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
export PYTHONHASHSEED=0
export output_dir="./cola"
python -m torch.distributed.launch --nproc_per_node=$num_gpus \
examples/text-classification/run_glue.py \
--model_name_or_path microsoft/deberta-v2-xxlarge \
--task_name cola \
--do_train \
--do_eval \
--max_seq_length 64 \
--per_device_train_batch_size 4 \
--learning_rate 1.3e-4 \
--num_train_epochs 10 \
--output_dir $output_dir/model \
--overwrite_output_dir \
--logging_steps 10 \
--logging_dir $output_dir/log \
--fp16 \
--evaluation_strategy steps \
--eval_steps 20 \
--save_strategy steps \
--save_steps 20 \
--warmup_steps 100 \
--cls_dropout 0.1 \
--apply_lora \
--lora_r 16 \
--lora_alpha 32 \
--seed 0 \
--weight_decay 0 \
--use_deterministic_algorithms
31 changes: 31 additions & 0 deletions NLU/deberta_v2_xxlarge_mnli.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
export num_gpus=8
export CUBLAS_WORKSPACE_CONFIG=":16:8" # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
export PYTHONHASHSEED=0
export output_dir="./mnli"
python -m torch.distributed.launch --nproc_per_node=$num_gpus \
examples/text-classification/run_glue.py \
--model_name_or_path microsoft/deberta-v2-xxlarge \
--task_name mnli \
--do_train \
--do_eval \
--max_seq_length 256 \
--per_device_train_batch_size 8 \
--learning_rate 1e-4 \
--num_train_epochs 5 \
--output_dir $output_dir/model \
--overwrite_output_dir \
--logging_steps 10 \
--logging_dir $output_dir/log \
--fp16 \
--evaluation_strategy steps \
--eval_steps 500 \
--save_strategy steps \
--save_steps 500 \
--warmup_steps 1000 \
--cls_dropout 0.15 \
--apply_lora \
--lora_r 16 \
--lora_alpha 32 \
--seed 0 \
--weight_decay 0 \
--use_deterministic_algorithms
32 changes: 32 additions & 0 deletions NLU/deberta_v2_xxlarge_mrpc.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
export num_gpus=8
export CUBLAS_WORKSPACE_CONFIG=":16:8" # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
export PYTHONHASHSEED=0
export output_dir="./mrpc"
python -m torch.distributed.launch --nproc_per_node=$num_gpus \
examples/text-classification/run_glue.py \
--model_name_or_path microsoft/deberta-v2-xxlarge \
--lora_path mnli/pytorch_model_lora.bin \
--task_name mrpc \
--do_train \
--do_eval \
--max_seq_length 128 \
--per_device_train_batch_size 32 \
--learning_rate 2e-4 \
--num_train_epochs 30 \
--output_dir $output_dir/model \
--overwrite_output_dir \
--logging_steps 10 \
--logging_dir $output_dir/log \
--fp16 \
--evaluation_strategy steps \
--eval_steps 10 \
--save_strategy steps \
--save_steps 10 \
--warmup_ratio 0.1 \
--cls_dropout 0 \
--apply_lora \
--lora_r 16 \
--lora_alpha 32 \
--seed 0 \
--weight_decay 0.01 \
--use_deterministic_algorithms
31 changes: 31 additions & 0 deletions NLU/deberta_v2_xxlarge_qnli.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
export num_gpus=8
export CUBLAS_WORKSPACE_CONFIG=":16:8" # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
export PYTHONHASHSEED=0
export output_dir="./qnli"
python -m torch.distributed.launch --nproc_per_node=$num_gpus \
examples/text-classification/run_glue.py \
--model_name_or_path microsoft/deberta-v2-xxlarge \
--task_name qnli \
--do_train \
--do_eval \
--max_seq_length 512 \
--per_device_train_batch_size 6 \
--learning_rate 1e-4 \
--num_train_epochs 8 \
--output_dir $output_dir/model \
--overwrite_output_dir \
--logging_steps 10 \
--logging_dir $output_dir/log \
--fp16 \
--evaluation_strategy steps \
--eval_steps 500 \
--save_strategy steps \
--save_steps 500 \
--warmup_steps 500 \
--cls_dropout 0.1 \
--apply_lora \
--lora_r 16 \
--lora_alpha 32 \
--seed 0 \
--weight_decay 0.01 \
--use_deterministic_algorithms
31 changes: 31 additions & 0 deletions NLU/deberta_v2_xxlarge_qqp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
export num_gpus=8
export CUBLAS_WORKSPACE_CONFIG=":16:8" # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
export PYTHONHASHSEED=0
export output_dir="./qqp"
python -m torch.distributed.launch --nproc_per_node=$num_gpus \
examples/text-classification/run_glue.py \
--model_name_or_path microsoft/deberta-v2-xxlarge \
--task_name qqp \
--do_train \
--do_eval \
--max_seq_length 320 \
--per_device_train_batch_size 8 \
--learning_rate 1e-4 \
--num_train_epochs 11 \
--output_dir $output_dir/model \
--overwrite_output_dir \
--logging_steps 10 \
--logging_dir $output_dir/log \
--fp16 \
--evaluation_strategy steps \
--eval_steps 500 \
--save_strategy steps \
--save_steps 500 \
--warmup_steps 10000 \
--cls_dropout 0.2 \
--apply_lora \
--lora_r 16 \
--lora_alpha 32 \
--seed 0 \
--weight_decay 0.01 \
--use_deterministic_algorithms
32 changes: 32 additions & 0 deletions NLU/deberta_v2_xxlarge_rte.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
export num_gpus=8
export CUBLAS_WORKSPACE_CONFIG=":16:8" # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
export PYTHONHASHSEED=0
export output_dir="./rte"
python -m torch.distributed.launch --nproc_per_node=$num_gpus \
examples/text-classification/run_glue.py \
--model_name_or_path microsoft/deberta-v2-xxlarge \
--lora_path mnli/pytorch_model_lora.bin \
--task_name rte \
--do_train \
--do_eval \
--max_seq_length 320 \
--per_device_train_batch_size 4 \
--learning_rate 2.6e-4 \
--num_train_epochs 11 \
--output_dir $output_dir/model \
--overwrite_output_dir \
--logging_steps 10 \
--logging_dir $output_dir/log \
--fp16 \
--evaluation_strategy steps \
--eval_steps 20 \
--save_strategy steps \
--save_steps 20 \
--warmup_steps 50 \
--cls_dropout 0.2 \
--apply_lora \
--lora_r 16 \
--lora_alpha 32 \
--seed 0 \
--weight_decay 0.01 \
--use_deterministic_algorithms
31 changes: 31 additions & 0 deletions NLU/deberta_v2_xxlarge_sst2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
export num_gpus=8
export CUBLAS_WORKSPACE_CONFIG=":16:8" # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
export PYTHONHASHSEED=0
export output_dir="./sst2"
python -m torch.distributed.launch --nproc_per_node=$num_gpus \
examples/text-classification/run_glue.py \
--model_name_or_path microsoft/deberta-v2-xxlarge \
--task_name sst2 \
--do_train \
--do_eval \
--max_seq_length 128 \
--per_device_train_batch_size 8 \
--learning_rate 6e-5 \
--num_train_epochs 16 \
--output_dir $output_dir/model \
--overwrite_output_dir \
--logging_steps 10 \
--logging_dir $output_dir/log \
--fp16 \
--evaluation_strategy steps \
--eval_steps 100 \
--save_strategy steps \
--save_steps 100 \
--warmup_steps 1000 \
--cls_dropout 0 \
--apply_lora \
--lora_r 16 \
--lora_alpha 32 \
--seed 0 \
--weight_decay 0.01 \
--use_deterministic_algorithms
32 changes: 32 additions & 0 deletions NLU/deberta_v2_xxlarge_stsb.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
export num_gpus=8
export CUBLAS_WORKSPACE_CONFIG=":16:8" # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
export PYTHONHASHSEED=0
export output_dir="./stsb"
python -m torch.distributed.launch --nproc_per_node=$num_gpus \
examples/text-classification/run_glue.py \
--model_name_or_path microsoft/deberta-v2-xxlarge \
--lora_path mnli/pytorch_model_lora.bin \
--task_name stsb \
--do_train \
--do_eval \
--max_seq_length 128 \
--per_device_train_batch_size 4 \
--learning_rate 2e-4 \
--num_train_epochs 10 \
--output_dir $output_dir/model \
--overwrite_output_dir \
--logging_steps 10 \
--logging_dir $output_dir/log \
--fp16 \
--evaluation_strategy steps \
--eval_steps 50 \
--save_strategy steps \
--save_steps 50 \
--warmup_steps 100 \
--cls_dropout 0.2 \
--apply_lora \
--lora_r 16 \
--lora_alpha 32 \
--seed 0 \
--weight_decay 0.1 \
--use_deterministic_algorithms
25 changes: 25 additions & 0 deletions NLU/ds_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"fp16": {

"enabled": true,

"initial_scale_power": 12

},
"zero_optimization": {

"stage": 2,

"reduce_bucket_size": 5e7,

"allgather_bucket_size": 1.25e9,

"overlap_comm": true,

"contiguous_gradients": true

},
"zero_allow_untested_optimizer": true

}

106 changes: 106 additions & 0 deletions NLU/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
name: NLU
channels:
- pytorch
- nvidia
- defaults
dependencies:
- _libgcc_mutex=0.1=main
- _openmp_mutex=4.5=1_gnu
- blas=1.0=mkl
- bzip2=1.0.8=h7b6447c_0
- ca-certificates=2021.7.5=h06a4308_1
- certifi=2021.5.30=py37h06a4308_0
- cudatoolkit=11.1.74=h6bb024c_0
- ffmpeg=4.3=hf484d3e_0
- freetype=2.10.4=h5ab3b9f_0
- gmp=6.2.1=h2531618_2
- gnutls=3.6.15=he1e5248_0
- intel-openmp=2021.2.0=h06a4308_610
- jpeg=9b=h024ee3a_2
- lame=3.100=h7b6447c_0
- lcms2=2.12=h3be6417_0
- ld_impl_linux-64=2.35.1=h7274673_9
- libffi=3.3=he6710b0_2
- libgcc-ng=9.3.0=h5101ec6_17
- libgomp=9.3.0=h5101ec6_17
- libiconv=1.15=h63c8f33_5
- libidn2=2.3.1=h27cfd23_0
- libpng=1.6.37=hbc83047_0
- libstdcxx-ng=9.3.0=hd4cf53a_17
- libtasn1=4.16.0=h27cfd23_0
- libtiff=4.2.0=h85742a9_0
- libunistring=0.9.10=h27cfd23_0
- libuv=1.40.0=h7b6447c_0
- libwebp-base=1.2.0=h27cfd23_0
- lz4-c=1.9.3=h2531618_0
- mkl=2021.2.0=h06a4308_296
- mkl-service=2.4.0=py37h7f8727e_0
- mkl_fft=1.3.0=py37h42c9631_2
- mkl_random=1.2.1=py37ha9443f7_2
- ncurses=6.2=he6710b0_1
- nettle=3.7.3=hbbd107a_1
- numpy=1.20.2=py37h2d18471_0
- numpy-base=1.20.2=py37hfae3a4d_0
- olefile=0.46=py37_0
- openh264=2.1.0=hd408876_0
- openjpeg=2.3.0=h05c96fa_1
- openssl=1.1.1k=h27cfd23_0
- pillow=8.3.1=py37h2c7a002_0
- pip=21.1.3=py37h06a4308_0
- python=3.7.10=h12debd9_4
- pytorch=1.9.0=py3.7_cuda11.1_cudnn8.0.5_0
- readline=8.1=h27cfd23_0
- setuptools=52.0.0=py37h06a4308_0
- six=1.16.0=pyhd3eb1b0_0
- sqlite=3.36.0=hc218d9a_0
- tk=8.6.10=hbc83047_0
- torchaudio=0.9.0=py37
- torchvision=0.10.0=py37_cu111
- typing_extensions=3.10.0.0=pyh06a4308_0
- wheel=0.36.2=pyhd3eb1b0_0
- xz=5.2.5=h7b6447c_0
- zlib=1.2.11=h7b6447c_3
- zstd=1.4.9=haebb681_0
- pip:
- accelerate==0.3.0
- charset-normalizer==2.0.1
- click==8.0.1
- datasets==1.9.0
- deepspeed==0.5.0
- dill==0.3.4
- filelock==3.0.12
- fsspec==2021.7.0
- huggingface-hub==0.0.13
- idna==3.2
- importlib-metadata==4.6.1
- joblib==1.0.1
- multiprocess==0.70.12.2
- ninja==1.10.0.post2
- packaging==21.0
- pandas==1.3.0
- protobuf==3.17.3
- psutil==5.8.0
- pyaml==20.4.0
- pyarrow==4.0.1
- pyparsing==2.4.7
- python-dateutil==2.8.1
- pytz==2021.1
- pyyaml==5.4.1
- regex==2021.7.6
- requests==2.26.0
- sacremoses==0.0.45
- scikit-learn==0.24.2
- scipy==1.7.0
- sentencepiece==0.1.96
- sklearn==0.0
- tensorboardx==1.8
- threadpoolctl==2.2.0
- tokenizers==0.10.3
- tqdm==4.61.2
- triton==0.4.2
- urllib3==1.26.5
- xxhash==2.0.2
- zipp==3.5.0
- wrapt==1.12.1
- azureml-core==1.32.0
prefix: /opt/conda/envs/transformers
Loading

0 comments on commit b80d0c4

Please sign in to comment.