diff --git a/sbatch/D4_bits_sweep.sh b/sbatch/D4_bits_sweep.sh deleted file mode 100644 index d6e6a56..0000000 --- a/sbatch/D4_bits_sweep.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/jerry_data/checkpoints -HF=/mnt/jerry_data/hfized -HESS=/mnt/jerry_data/hessians -LOG=/mnt/jerry_data/logs -L1=/mnt/jerry_data/meta_llama1 - - -python quantize_llama.py --save_path $CKPT/2_70b_d4_2bit_nolr --codebook D4 --lora_rank 0 --scale_override 1.1 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_d4_2bit_nolr 2>&1 -python quantize_llama.py --save_path $CKPT/2_70b_d4_221bit_nolr --codebook D4221B --lora_rank 0 --scale_override 1.2 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_d4_221bit_nolr 2>&1 -python quantize_llama.py --save_path $CKPT/2_70b_d4_234bit_nolr --codebook D4234B --lora_rank 0 --scale_override 1.4 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_d4_234bit_nolr 2>&1 -python quantize_llama.py --save_path $CKPT/2_70b_d4_248bit_nolr --codebook D4248B --lora_rank 0 --scale_override 1.4 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_d4_248bit_nolr 2>&1 -python quantize_llama.py --save_path $CKPT/2_70b_d4_274bit_nolr --codebook D4274B --lora_rank 0 --scale_override 1.6 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_d4_274bit_nolr 2>&1 -python quantize_llama.py --save_path $CKPT/2_70b_d4_299bit_nolr --codebook D4299B --lora_rank 0 --scale_override 1.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_d4_299bit_nolr 2>&1 - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_d4_2bit_nolr --hf_output_path $HF/2_70b_d4_2bit_nolr & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_d4_221bit_nolr --hf_output_path $HF/2_70b_d4_221bit_nolr & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_d4_234bit_nolr --hf_output_path $HF/2_70b_d4_234bit_nolr & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_d4_248bit_nolr --hf_output_path $HF/2_70b_d4_248bit_nolr & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_70b_d4_274bit_nolr --hf_output_path $HF/2_70b_d4_274bit_nolr & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_70b_d4_299bit_nolr --hf_output_path $HF/2_70b_d4_299bit_nolr & - -wait - -# perplexity -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_70b_d4_2bit_nolr >> $LOG/2_70b_d4_2bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_70b_d4_221bit_nolr >> $LOG/2_70b_d4_221bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_70b_d4_234bit_nolr >> $LOG/2_70b_d4_234bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_70b_d4_248bit_nolr >> $LOG/2_70b_d4_248bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_70b_d4_274bit_nolr >> $LOG/2_70b_d4_274bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_70b_d4_299bit_nolr >> $LOG/2_70b_d4_299bit_nolr 2>&1 & - -wait - -# zero shot -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_d4_2bit_nolr >> $LOG/2_70b_d4_2bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_d4_221bit_nolr >> $LOG/2_70b_d4_221bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_d4_234bit_nolr >> $LOG/2_70b_d4_234bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_d4_248bit_nolr >> $LOG/2_70b_d4_248bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_d4_274bit_nolr >> $LOG/2_70b_d4_274bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_d4_299bit_nolr >> $LOG/2_70b_d4_299bit_nolr 2>&1 & - -wait diff --git a/sbatch/chat_4bit_packed.sh b/sbatch/chat_4bit_packed.sh deleted file mode 100644 index 2a015c7..0000000 --- a/sbatch/chat_4bit_packed.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/desa_data/checkpoints -HF=/mnt/desa_data/hfized -HESS=/mnt/desa_data/hessians -LOG=/mnt/desa_data/logs - - -CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6 python quantize_llama.py --save_path $CKPT/2_70b_chat_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model meta-llama/Llama-2-70b-chat-hf --hessian_path $HESS/llama2_70b_chat_6144 >> $LOG/2_70b_chat_hi_4bit_packed 2>&1 -CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6 python quantize_llama.py --save_path $CKPT/2_13b_chat_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model meta-llama/Llama-2-13b-chat-hf --hessian_path $HESS/llama2_13b_chat_6144 >> $LOG/2_13b_chat_hi_4bit_packed 2>&1 -CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6 python quantize_llama.py --save_path $CKPT/2_7b_chat_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model meta-llama/Llama-2-7b-chat-hf --hessian_path $HESS/llama2_7b_chat_6144 >> $LOG/2_7b_chat_hi_4bit_packed 2>&1 - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_hi_4bit_packed --hf_output_path $HF/2_70b_chat_hi_4bit_packed >> $LOG/2_70b_chat_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_hi_4bit_packed --hf_output_path $HF/2_13b_chat_hi_4bit_packed >> $LOG/2_13b_chat_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_hi_4bit_packed --hf_output_path $HF/2_7b_chat_hi_4bit_packed >> $LOG/2_7b_chat_hi_4bit_packed 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/2_70b_chat_hi_4bit_packed >> $LOG/2_70b_chat_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/2_13b_chat_hi_4bit_packed >> $LOG/2_13b_chat_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/2_7b_chat_hi_4bit_packed >> $LOG/2_7b_chat_hi_4bit_packed 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_hi_4bit_packed >> $LOG/2_70b_chat_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_hi_4bit_packed >> $LOG/2_13b_chat_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_hi_4bit_packed >> $LOG/2_7b_chat_hi_4bit_packed 2>&1 & - -wait diff --git a/sbatch/e8237b.sh b/sbatch/e8237b.sh deleted file mode 100644 index 67de8e1..0000000 --- a/sbatch/e8237b.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/jerry_data/checkpoints -HF=/mnt/jerry_data/hfized -HESS=/mnt/jerry_data/hessians -LOG=/mnt/jerry_data/logs - -NAME=2_70b_e8_237bit_nolr - -CUDA_VISIBLE_DEVICES=4,5,6,7 python quantize_llama.py --save_path $CKPT/$NAME --codebook E8237B --scale_override 1.13 --lora_rank 0 --hessian_path $HESS/llama2_70b_6144 >> $LOG/$NAME 2>&1 - -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/$NAME --hf_output_path $HF/$NAME - -CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --hf_path $HF/$NAME >> $LOG/$NAME 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/$NAME >> $LOG/$NAME 2>&1 & - -wait - diff --git a/sbatch/e8p_requant.sh b/sbatch/e8p_requant.sh deleted file mode 100644 index 2a4ab09..0000000 --- a/sbatch/e8p_requant.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/desa_data/checkpoints/new_e8p -HF=/mnt/desa_data/hfized/new_e8p -LOG=/mnt/desa_data/logs/new_e8p -HESS=/mnt/desa_data/hessians -L1=/mnt/desa_data/meta_llama1 - -mkdir $CKPT -mkdir $HF -mkdir $LOG - -''' -# llama 2 -python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/2_13b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 >> $LOG/2_13b_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/2_7b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 >> $LOG/2_7b_e8p_2bit 2>&1 -# llama 2 chat -python quantize_llama.py --save_path $CKPT/2_70b_chat_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-chat-hf --hessian_path $HESS/llama2_70b_chat_6144 >> $LOG/2_70b_chat_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/2_13b_chat_e8p_2bit --codebook E8P12 --scale_override 1.0 --base_model meta-llama/Llama-2-13b-chat-hf --hessian_path $HESS/llama2_13b_chat_6144 >> $LOG/2_13b_chat_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/2_7b_chat_e8p_2bit --codebook E8P12 --scale_override 1.0 --base_model meta-llama/Llama-2-7b-chat-hf --hessian_path $HESS/llama2_7b_chat_6144 >> $LOG/2_7b_chat_e8p_2bit 2>&1 -# llama 1 -python quantize_llama.py --save_path $CKPT/1_65b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_30b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_13b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_7b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-7b-hf --hessian_path $HESS/llama1_7b_6144 >> $LOG/1_7b_e8p_2bit 2>&1 -# mistral hermes -python quantize_llama.py --save_path $CKPT/mistral_7b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model mistralai/Mistral-7B-v0.1 --hessian_path $HESS/mistral_7b_4096 >> $LOG/mistral_7b_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/openhermes_7b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model teknium/OpenHermes-2.5-Mistral-7B --hessian_path $HESS/openhermes_7b_4096 >> $LOG/openhermes_7b_e8p_2bit 2>&1 - - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit --hf_output_path $HF/2_70b_e8p_2bit >> $LOG/2_70b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit --hf_output_path $HF/2_13b_e8p_2bit >> $LOG/2_13b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit --hf_output_path $HF/2_7b_e8p_2bit >> $LOG/2_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_e8p_2bit --hf_output_path $HF/2_70b_chat_e8p_2bit >> $LOG/2_70b_chat_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_e8p_2bit --hf_output_path $HF/2_13b_chat_e8p_2bit >> $LOG/2_13b_chat_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_e8p_2bit --hf_output_path $HF/2_7b_chat_e8p_2bit >> $LOG/2_7b_chat_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_65b_e8p_2bit --hf_output_path $HF/1_65b_e8p_2bit >> $LOG/1_65b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_30b_e8p_2bit --hf_output_path $HF/1_30b_e8p_2bit >> $LOG/1_30b_e8p_2bit 2>&1 & - -wait -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_13b_e8p_2bit --hf_output_path $HF/1_13b_e8p_2bit >> $LOG/1_13b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_7b_e8p_2bit --hf_output_path $HF/1_7b_e8p_2bit >> $LOG/1_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/mistral_7b_e8p_2bit --hf_output_path $HF/mistral_7b_e8p_2bit >> $LOG/mistral_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/openhermes_7b_e8p_2bit --hf_output_path $HF/openhermes_7b_e8p_2bit >> $LOG/openhermes_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_70b_e8p_2bit >> $LOG/2_70b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/2_13b_e8p_2bit >> $LOG/2_13b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/2_7b_e8p_2bit >> $LOG/2_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path $HF/2_70b_chat_e8p_2bit >> $LOG/2_70b_chat_e8p_2bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/mistral_7b_e8p_2bit >> $LOG/mistral_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/openhermes_7b_e8p_2bit >> $LOG/openhermes_7b_e8p_2bit 2>&1 & - -wait -''' - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_e8p_2bit >> $LOG/1_65b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_e8p_2bit >> $LOG/1_30b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_e8p_2bit >> $LOG/1_13b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_e8p_2bit >> $LOG/1_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_e8p_2bit >> $LOG/2_7b_chat_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_e8p_2bit >> $LOG/1_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/mistral_7b_e8p_2bit >> $LOG/mistral_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/openhermes_7b_e8p_2bit >> $LOG/openhermes_7b_e8p_2bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit >> $LOG/2_70b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8p_2bit >> $LOG/2_13b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8p_2bit >> $LOG/2_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_e8p_2bit >> $LOG/2_70b_chat_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_e8p_2bit >> $LOG/2_13b_chat_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_e8p_2bit >> $LOG/1_13b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_e8p_2bit >> $LOG/1_65b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_e8p_2bit >> $LOG/1_30b_e8p_2bit 2>&1 & - -wait diff --git a/sbatch/finetune.sh b/sbatch/finetune.sh deleted file mode 100644 index 8b11848..0000000 --- a/sbatch/finetune.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/desa_data/checkpoints/finetune_albert -HF=/mnt/desa_data/hfized/finetune_albert -HESS=/mnt/desa_data/hessians -LOG=/mnt/desa_data/logs/finetune_albert - -mkdir $CKPT -mkdir $HF -mkdir $LOG - -CUDA_VISIBLE_DEVICES=0,1 python quantize_llama_finetune.py --save_path $CKPT/2_70b_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 --devset_size 768 --ddp_port 12345 >> $LOG/2_70b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=2,3 python quantize_llama_finetune.py --save_path $CKPT/2_70b_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 --devset_size 768 --ddp_port 12346 >> $LOG/2_70b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4,5 python quantize_llama_finetune.py --save_path $CKPT/1_65b_2bit --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144 --devset_size 768 --ddp_port 12347 >> $LOG/1_65b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6,7 python quantize_llama_finetune.py --save_path $CKPT/1_65b_3bit --codebook E8P12RVQ3B --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144 --devset_size 768 --ddp_port 12348 >> $LOG/1_65b_3bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python quantize_llama_finetune.py --save_path $CKPT/2_13b_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 --devset_size 768 --ddp_port 12345 >> $LOG/2_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python quantize_llama_finetune.py --save_path $CKPT/2_13b_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 --devset_size 768 --ddp_port 12346 >> $LOG/2_13b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python quantize_llama_finetune.py --save_path $CKPT/1_13b_2bit --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144 --devset_size 768 --ddp_port 12347 >> $LOG/1_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python quantize_llama_finetune.py --save_path $CKPT/1_13b_3bit --codebook E8P12RVQ3B --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144 --devset_size 768 --ddp_port 12348 >> $LOG/1_13b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4,5 python quantize_llama_finetune.py --save_path $CKPT/1_30b_2bit --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144 --devset_size 768 --ddp_port 12349 >> $LOG/1_30b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6,7 python quantize_llama_finetune.py --save_path $CKPT/1_30b_3bit --codebook E8P12RVQ3B --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144 --devset_size 768 --ddp_port 12350 >> $LOG/1_30b_3bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python quantize_llama_finetune.py --save_path $CKPT/2_7b_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 --devset_size 768 --ddp_port 12345 >> $LOG/2_7b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python quantize_llama_finetune.py --save_path $CKPT/2_7b_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 --devset_size 768 --ddp_port 12346 >> $LOG/2_7b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python quantize_llama_finetune.py --save_path $CKPT/1_7b_2bit --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-7b-hf --hessian_path $HESS/llama1_7b_6144 --devset_size 768 --ddp_port 12347 >> $LOG/1_7b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python quantize_llama_finetune.py --save_path $CKPT/1_7b_3bit --codebook E8P12RVQ3B --base_model relaxml/Llama-1-7b-hf --hessian_path $HESS/llama1_7b_6144 --devset_size 768 --ddp_port 12348 >> $LOG/1_7b_3bit 2>&1 & - -CUDA_VISIBLE_DEVICES=4 python quantize_llama_finetune.py --save_path $CKPT/2_7b_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 --devset_size 768 --ddp_port 12349 >> $LOG/2_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python quantize_llama_finetune.py --save_path $CKPT/1_7b_4bit --codebook E8P12RVQ4B --base_model relaxml/Llama-1-7b-hf --hessian_path $HESS/llama1_7b_6144 --devset_size 768 --ddp_port 12350 >> $LOG/1_7b_4bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0,1 python quantize_llama_finetune.py --save_path $CKPT/1_30b_4bit --codebook E8P12RVQ4B --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144 --devset_size 768 --ddp_port 12345 >> $LOG/1_30b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2,3 python quantize_llama_finetune.py --save_path $CKPT/2_70b_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 --devset_size 768 --ddp_port 12346 >> $LOG/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4,5 python quantize_llama_finetune.py --save_path $CKPT/1_65b_4bit --codebook E8P12RVQ4B --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144 --devset_size 768 --ddp_port 12347 >> $LOG/1_65b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python quantize_llama_finetune.py --save_path $CKPT/2_13b_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 --devset_size 768 --ddp_port 12351 >> $LOG/2_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python quantize_llama_finetune.py --save_path $CKPT/1_13b_4bit --codebook E8P12RVQ4B --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144 --devset_size 768 --ddp_port 12352 >> $LOG/1_13b_4bit 2>&1 & -wait diff --git a/sbatch/finetune_4bit.sh b/sbatch/finetune_4bit.sh deleted file mode 100644 index ee5ef75..0000000 --- a/sbatch/finetune_4bit.sh +++ /dev/null @@ -1,123 +0,0 @@ -CKPT=/mnt/desa_data/checkpoints/finetune_llama -HF=/mnt/desa_data/hfized/finetune_llama -LOG=/mnt/desa_data/logs/finetune_llama -HESS=/mnt/desa_data/hessians -''' -# llama 2 4 bit_scale - -python finetune_susv_adam.py --save_path $CKPT/2_70b_4bit_scale --scale_override 0.9 --resid_scale_override 3.6 --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_4bit_scale 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_13b_4bit_scale --scale_override 0.9 --resid_scale_override 3.45 --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 >> $LOG/2_13b_4bit_scale 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_7b_4bit_scale --scale_override 0.9 --resid_scale_override 3.6 --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 >> $LOG/2_7b_4bit_scale 2>&1 - -# llama 1 4 bit_scale - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_65b_4bit_scale --scale_override 0.9 --resid_scale_override 3.45 --codebook E8P12RVQ4B --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/1_65b_4bit_scale 2>&1 - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_30b_4bit_scale --scale_override 0.9 --resid_scale_override 3.45 --codebook E8P12RVQ4B --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/1_30b_4bit_scale 2>&1 - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_13b_4bit_scale --scale_override 0.9 --resid_scale_override 3.6 --codebook E8P12RVQ4B --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 >> $LOG/1_13b_4bit_scale 2>&1 - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_7b_4bit_scale --scale_override 0.85 --resid_scale_override 3.45 --codebook E8P12RVQ4B --base_model relaxml/Llama-1-7b-hf --hessian_path $HESS/llama1_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 >> $LOG/1_7b_4bit_scale 2>&1 - -# llama 1 3 bit_scale - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_65b_3bit_scale --scale_override 0.93 --resid_scale_override 1.99 --codebook E8P12RVQ3B --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/1_65b_3bit_scale 2>&1 - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_30b_3bit_scale --scale_override 0.93 --resid_scale_override 2.04 --codebook E8P12RVQ3B --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/1_30b_3bit_scale 2>&1 - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_13b_3bit_scale --scale_override 0.98 --resid_scale_override 2.09 --codebook E8P12RVQ3B --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 >> $LOG/1_13b_3bit_scale 2>&1 - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_7b_3bit_scale --scale_override 0.93 --resid_scale_override 2.09 --codebook E8P12RVQ3B --base_model relaxml/Llama-1-7b-hf --hessian_path $HESS/llama1_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 >> $LOG/1_7b_3bit_scale 2>&1 - - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_4bit_scale --hf_output_path $HF/2_70b_4bit_scale >> $LOG/2_70b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_13b_4bit_scale --hf_output_path $HF/2_13b_4bit_scale >> $LOG/2_13b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_7b_4bit_scale --hf_output_path $HF/2_7b_4bit_scale >> $LOG/2_7b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_65b_4bit_scale --hf_output_path $HF/1_65b_4bit_scale >> $LOG/1_65b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_30b_4bit_scale --hf_output_path $HF/1_30b_4bit_scale >> $LOG/1_30b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_13b_4bit_scale --hf_output_path $HF/1_13b_4bit_scale >> $LOG/1_13b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_7b_4bit_scale --hf_output_path $HF/1_7b_4bit_scale >> $LOG/1_7b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_65b_3bit_scale --hf_output_path $HF/1_65b_3bit_scale >> $LOG/1_65b_3bit_scale 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_30b_3bit_scale --hf_output_path $HF/1_30b_3bit_scale >> $LOG/1_30b_3bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_13b_3bit_scale --hf_output_path $HF/1_13b_3bit_scale >> $LOG/1_13b_3bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_7b_3bit_scale --hf_output_path $HF/1_7b_3bit_scale >> $LOG/1_7b_3bit_scale 2>&1 & - -wait - -# tune llama 2 4 bit_scale -python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_4bit_scale >> $LOG/2_70b_4bit_scale 2>&1 -#CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-hf --hf_path $HF/2_13b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_4bit_scale >> $LOG/2_13b_4bit_scale 2>&1 & -#CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-hf --hf_path $HF/2_7b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_4bit_scale >> $LOG/2_7b_4bit_scale 2>&1 & -wait - - -python tune_susv_lmhead.py --base_model relaxml/Llama-1-65b-hf --hf_path $HF/1_65b_3bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_65b_3bit_scale >> $LOG/1_65b_3bit_scale 2>&1 -python tune_susv_lmhead.py --base_model relaxml/Llama-1-65b-hf --hf_path $HF/1_65b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_65b_4bit_scale >> $LOG/1_65b_4bit_scale 2>&1 -python tune_susv_lmhead.py --base_model relaxml/Llama-1-30b-hf --hf_path $HF/1_30b_3bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_30b_3bit_scale >> $LOG/1_30b_3bit_scale 2>&1 -#python tune_susv_lmhead.py --base_model relaxml/Llama-1-30b-hf --hf_path $HF/1_30b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_30b_4bit_scale >> $LOG/1_30b_4bit_scale 2>&1 -#CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model relaxml/Llama-1-13b-hf --hf_path $HF/1_13b_3bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_13b_3bit_scale >> $LOG/1_13b_3bit_scale 2>&1 & -#CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model relaxml/Llama-1-13b-hf --hf_path $HF/1_13b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_13b_4bit_scale >> $LOG/1_13b_4bit_scale 2>&1 & -wait -#CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model relaxml/Llama-1-7b-hf --hf_path $HF/1_7b_3bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_7b_3bit_scale >> $LOG/1_7b_3bit_scale 2>&1 & -#CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model relaxml/Llama-1-7b-hf --hf_path $HF/1_7b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_7b_4bit_scale >> $LOG/1_7b_4bit_scale 2>&1 & -wait - - - - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_4bit_scale --hf_output_path $HF/2_70b_4bit_scale >> $LOG/2_70b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_13b_4bit_scale --hf_output_path $HF/2_13b_4bit_scale >> $LOG/2_13b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_7b_4bit_scale --hf_output_path $HF/2_7b_4bit_scale >> $LOG/2_7b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_65b_4bit_scale --hf_output_path $HF/1_65b_4bit_scale >> $LOG/1_65b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_30b_4bit_scale --hf_output_path $HF/1_30b_4bit_scale >> $LOG/1_30b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_13b_4bit_scale --hf_output_path $HF/1_13b_4bit_scale >> $LOG/1_13b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_7b_4bit_scale --hf_output_path $HF/1_7b_4bit_scale >> $LOG/1_7b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_65b_3bit_scale --hf_output_path $HF/1_65b_3bit_scale >> $LOG/1_65b_3bit_scale 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_30b_3bit_scale --hf_output_path $HF/1_30b_3bit_scale >> $LOG/1_30b_3bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_13b_3bit_scale --hf_output_path $HF/1_13b_3bit_scale >> $LOG/1_13b_3bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_7b_3bit_scale --hf_output_path $HF/1_7b_3bit_scale >> $LOG/1_7b_3bit_scale 2>&1 & - -wait - -#CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_70b_4bit_scale >> $LOG/2_70b_4bit_scale 2>&1 & -#CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/2_13b_4bit_scale >> $LOG/2_13b_4bit_scale 2>&1 & -#CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_7b_4bit_scale >> $LOG/2_7b_4bit_scale 2>&1 & -wait -''' -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/1_65b_4bit_scale --seqlen 2048 >> $LOG/1_65b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/1_30b_4bit_scale --seqlen 2048 >> $LOG/1_30b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/1_13b_4bit_scale --seqlen 2048 >> $LOG/1_13b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/1_7b_4bit_scale --seqlen 2048 >> $LOG/1_7b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path $HF/1_65b_3bit_scale --seqlen 2048 >> $LOG/1_65b_3bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/1_30b_3bit_scale --seqlen 2048 >> $LOG/1_30b_3bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/1_13b_3bit_scale --seqlen 2048 >> $LOG/1_13b_3bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/1_7b_3bit_scale --seqlen 2048 >> $LOG/1_7b_3bit_scale 2>&1 & - -wait -''' -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_4bit_scale >> $LOG/2_70b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_4bit_scale >> $LOG/2_13b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_4bit_scale >> $LOG/2_7b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_4bit_scale >> $LOG/1_65b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_4bit_scale >> $LOG/1_30b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_4bit_scale >> $LOG/1_13b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_4bit_scale >> $LOG/1_7b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_3bit_scale >> $LOG/1_65b_3bit_scale 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_3bit_scale >> $LOG/1_30b_3bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_3bit_scale >> $LOG/1_13b_3bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_3bit_scale >> $LOG/1_7b_3bit_scale 2>&1 & -wait - - -''' diff --git a/sbatch/finetune_4bit_cr15.sh b/sbatch/finetune_4bit_cr15.sh deleted file mode 100644 index cb39b4d..0000000 --- a/sbatch/finetune_4bit_cr15.sh +++ /dev/null @@ -1,22 +0,0 @@ -CKPT=/mnt/desa_data/checkpoints/finetune_llama -HF=/mnt/desa_data/hfized/finetune_llama -LOG=/mnt/desa_data/logs/finetune_llama -HESS=/mnt/desa_data/hessians - - - -CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-hf --hf_path $HF/2_13b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_4bit_scale >> $LOG/2_13b_4bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=2,3 python tune_susv_lmhead.py --base_model relaxml/Llama-1-7b-hf --hf_path $HF/1_7b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_7b_4bit_scale >> $LOG/1_7b_4bit_scale 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python tune_susv_lmhead.py --base_model relaxml/Llama-1-30b-hf --hf_path $HF/1_30b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_30b_4bit_scale >> $LOG/1_30b_4bit_scale 2>&1 - - -CUDA_VISIBLE_DEVICES=2,3,4 python tune_susv_lmhead.py --base_model relaxml/Llama-1-13b-hf --hf_path $HF/1_13b_3bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_13b_3bit_scale >> $LOG/1_13b_3bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=5,6,7 python tune_susv_lmhead.py --base_model relaxml/Llama-1-13b-hf --hf_path $HF/1_13b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_13b_4bit_scale >> $LOG/1_13b_4bit_scale 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=5,6,7 python tune_susv_lmhead.py --base_model relaxml/Llama-1-7b-hf --hf_path $HF/1_7b_3bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_7b_3bit_scale >> $LOG/1_7b_3bit_scale 2>&1 & -CUDA_VISIBLE_DEVICES=2,3,4 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-hf --hf_path $HF/2_7b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_4bit_scale >> $LOG/2_7b_4bit_scale 2>&1 & -wait - diff --git a/sbatch/finetune_adam.sh b/sbatch/finetune_adam.sh deleted file mode 100644 index 16e53bf..0000000 --- a/sbatch/finetune_adam.sh +++ /dev/null @@ -1,20 +0,0 @@ -CKPT=/mnt/desa_data/checkpoints/finetune_albert -HF=/mnt/desa_data/hfized/finetune_albert -LOG=/mnt/desa_data/logs/finetune_albert -HESS=/mnt/desa_data/hessians - -CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python finetune_susv_adam.py --save_path $CKPT/2_70b_susv_adam2 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_susv_adam2 2>&1 -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_70b_susv_adam2 --hf_output_path $HF/2_70b_susv_adam2 >> $LOG/2_70b_susv_adam2 2>&1 -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_70b_susv_adam2 >> $LOG/2_70b_susv_adam2 2>&1 - -CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python finetune_susv_adam.py --save_path $CKPT/2_13b_susv_adam2 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 >> $LOG/2_13b_susv_adam2 2>&1 -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_susv_adam2 --hf_output_path $HF/2_13b_susv_adam2 >> $LOG/2_13b_susv_adam2 2>&1 -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_13b_susv_adam2 >> $LOG/2_13b_susv_adam2 2>&1 - -CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python finetune_susv_adam.py --save_path $CKPT/2_7b_susv_adam2 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 >> $LOG/2_7b_susv_adam2 2>&1 -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_7b_susv_adam2 --hf_output_path $HF/2_7b_susv_adam2 >> $LOG/2_7b_susv_adam2 2>&1 -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_7b_susv_adam2 >> $LOG/2_7b_susv_adam2 2>&1 - - - - diff --git a/sbatch/finetune_adam_early.sh b/sbatch/finetune_adam_early.sh deleted file mode 100644 index 2a0c19e..0000000 --- a/sbatch/finetune_adam_early.sh +++ /dev/null @@ -1,20 +0,0 @@ -CKPT=/mnt/desa_data/checkpoints/finetune_albert -HF=/mnt/desa_data/hfized/finetune_albert -LOG=/mnt/desa_data/logs/finetune_albert -HESS=/mnt/desa_data/hessians - -CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python finetune_susv_adam_early.py --save_path $CKPT/2_70b_susv_adam_early --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144/ --devset_size 640 --ft_valid_size 128 --ft_epochs 20 --ft_lr 0.00003 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_susv_adam_early 2>&1 -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_70b_susv_adam_early --hf_output_path $HF/2_70b_susv_adam_early >> $LOG/2_70b_susv_adam_early 2>&1 -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_70b_susv_adam_early >> $LOG/2_70b_susv_adam_early 2>&1 - -CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python finetune_susv_adam_early.py --save_path $CKPT/2_13b_susv_adam_early --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144/ --devset_size 640 --ft_valid_size 128 --ft_epochs 20 --ft_lr 0.00003 >> $LOG/2_13b_susv_adam_early 2>&1 -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_susv_adam_early --hf_output_path $HF/2_13b_susv_adam_early >> $LOG/2_13b_susv_adam_early 2>&1 -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_13b_susv_adam_early >> $LOG/2_13b_susv_adam_early 2>&1 - -CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python finetune_susv_adam_early.py --save_path $CKPT/2_7b_susv_adam_early --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144/ --devset_size 640 --ft_valid_size 128 --ft_epochs 20 --ft_lr 0.00003 >> $LOG/2_7b_susv_adam_early 2>&1 -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_7b_susv_adam_early --hf_output_path $HF/2_7b_susv_adam_early >> $LOG/2_7b_susv_adam_early 2>&1 -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_7b_susv_adam_early >> $LOG/2_7b_susv_adam_early 2>&1 - - - - diff --git a/sbatch/finetune_adamw.sh b/sbatch/finetune_adamw.sh deleted file mode 100644 index a64c045..0000000 --- a/sbatch/finetune_adamw.sh +++ /dev/null @@ -1,57 +0,0 @@ -CKPT=/mnt/desa_data/checkpoints/finetune_llama_adamw -HF=/mnt/desa_data/hfized/finetune_llama_adamw -LOG=/mnt/desa_data/logs/finetune_llama_adamw -HESS=/mnt/desa_data/hessians - -python finetune_susv_adam.py --save_path $CKPT/2_70b_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 8 --ft_lr 0.000025 --ft_opt adamw --ft_bs 4 >> $LOG/2_70b_2bit 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_70b_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 8 --ft_lr 0.000025 --ft_opt adamw --ft_bs 4 >> $LOG/2_70b_3bit 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_70b_4bit --codebook E8P12RVQ4B --scale_override 0.9 --resid_scale_override 3.6 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 8 --ft_lr 0.000025 --ft_opt adamw --ft_bs 4 >> $LOG/2_70b_4bit 2>&1 - - -python finetune_susv_adam.py --save_path $CKPT/2_13b_4bit --codebook E8P12RVQ4B --scale_override 0.9 --resid_scale_override 3.45 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 8 --ft_lr 0.000025 --ft_opt adamw --ft_bs 4 >> $LOG/2_13b_4bit 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_7b_4bit --codebook E8P12RVQ4B --scale_override 0.9 --resid_scale_override 3.6 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 8 --ft_lr 0.000025 --ft_opt adamw --ft_bs 4 >> $LOG/2_7b_4bit 2>&1 - - - -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_3bit --hf_output_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_70b_4bit --hf_output_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_13b_4bit --hf_output_path $HF/2_13b_3bit >> $LOG/2_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/2_7b_4bit --hf_output_path $HF/2_13b_4bit >> $LOG/2_7b_4bit 2>&1 & - -wait - -python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000001 --ft_bs 1 --ctx_size 3072 --ft_opt adamw --ckpt_path $CKPT/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 -python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000001 --ft_bs 1 --ctx_size 3072 --ft_opt adamw --ckpt_path $CKPT/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 -python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000001 --ft_bs 1 --ctx_size 3072 --ft_opt adamw --ckpt_path $CKPT/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 - -CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-hf --hf_path $HF/2_7b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000001 --ft_opt adamw --ft_bs 1 --ctx_size 4096 --ckpt_path $CKPT/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-hf --hf_path $HF/2_13b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000001 --ft_opt adamw --ft_bs 1 --ctx_size 4096 --ckpt_path $CKPT/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_3bit --hf_output_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_70b_4bit --hf_output_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/2_13b_4bit --hf_output_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_4bit --hf_output_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 & -wait - - diff --git a/sbatch/finetune_llama1.sh b/sbatch/finetune_llama1.sh deleted file mode 100644 index f7091c2..0000000 --- a/sbatch/finetune_llama1.sh +++ /dev/null @@ -1,121 +0,0 @@ -CKPT=/mnt/desa_data/checkpoints/icml_llama -HF=/mnt/desa_data/hfized/icml_llama -LOG=/mnt/desa_data/logs/icml_llama -HESS=/mnt/desa_data/hessians - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_65b_3bit --codebook E8P12RVQ3B --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_bs 4 --ft_update_freq 2 >> $LOG/1_65b_3bit 2>&1 - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_30b_3bit --codebook E8P12RVQ3B --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_bs 4 --ft_update_freq 2 >> $LOG/1_30b_3bit 2>&1 - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_13b_3bit --codebook E8P12RVQ3B --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/1_13b_3bit 2>&1 - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_7b_3bit --codebook E8P12RVQ3B --base_model relaxml/Llama-1-7b-hf --hessian_path $HESS/llama1_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/1_7b_3bit 2>&1 - - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_65b_4bit --codebook E8P12RVQ4B --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_bs 4 --ft_update_freq 2 >> $LOG/1_65b_4bit 2>&1 - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_30b_4bit --codebook E8P12RVQ4B --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_bs 4 --ft_update_freq 2 >> $LOG/1_30b_4bit 2>&1 - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_13b_4bit --codebook E8P12RVQ4B --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/1_13b_4bit 2>&1 - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_7b_4bit --codebook E8P12RVQ4B --base_model relaxml/Llama-1-7b-hf --hessian_path $HESS/llama1_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/1_7b_4bit 2>&1 - - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_65b_2bit --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_bs 4 --ft_update_freq 2 >> $LOG/1_65b_2bit 2>&1 - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_30b_2bit --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_bs 4 --ft_update_freq 2 >> $LOG/1_30b_2bit 2>&1 - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_13b_2bit --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/1_13b_2bit 2>&1 - -python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_7b_2bit --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-7b-hf --hessian_path $HESS/llama1_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/1_7b_2bit 2>&1 - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_65b_3bit --hf_output_path $HF/1_65b_3bit >> $LOG/1_65b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_65b_2bit --hf_output_path $HF/1_65b_2bit >> $LOG/1_65b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_65b_4bit --hf_output_path $HF/1_65b_4bit >> $LOG/1_65b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_30b_3bit --hf_output_path $HF/1_30b_3bit >> $LOG/1_30b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_30b_2bit --hf_output_path $HF/1_30b_2bit >> $LOG/1_30b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_30b_4bit --hf_output_path $HF/1_30b_4bit >> $LOG/1_30b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_13b_3bit --hf_output_path $HF/1_13b_3bit >> $LOG/1_13b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_13b_4bit --hf_output_path $HF/1_13b_4bit >> $LOG/1_13b_4bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_13b_2bit --hf_output_path $HF/1_13b_2bit >> $LOG/1_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_7b_3bit --hf_output_path $HF/1_7b_3bit >> $LOG/1_7b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_7b_4bit --hf_output_path $HF/1_7b_4bit >> $LOG/1_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_7b_2bit --hf_output_path $HF/1_7b_2bit >> $LOG/1_7b_2bit 2>&1 & - -wait - - -python tune_susv_lmhead.py --base_model relaxml/Llama-1-65b-hf --hf_path $HF/1_65b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_65b_3bit >> $LOG/1_65b_3bit 2>&1 -python tune_susv_lmhead.py --base_model relaxml/Llama-1-65b-hf --hf_path $HF/1_65b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_65b_4bit >> $LOG/1_65b_4bit 2>&1 -python tune_susv_lmhead.py --base_model relaxml/Llama-1-65b-hf --hf_path $HF/1_65b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_65b_2bit >> $LOG/1_65b_2bit 2>&1 - -python tune_susv_lmhead.py --base_model relaxml/Llama-1-30b-hf --hf_path $HF/1_30b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_30b_3bit >> $LOG/1_30b_3bit 2>&1 -python tune_susv_lmhead.py --base_model relaxml/Llama-1-30b-hf --hf_path $HF/1_30b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_30b_4bit >> $LOG/1_30b_4bit 2>&1 -python tune_susv_lmhead.py --base_model relaxml/Llama-1-30b-hf --hf_path $HF/1_30b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_30b_2bit >> $LOG/1_30b_2bit 2>&1 - - -CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model relaxml/Llama-1-13b-hf --hf_path $HF/1_13b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_13b_3bit >> $LOG/1_13b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model relaxml/Llama-1-13b-hf --hf_path $HF/1_13b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_13b_4bit >> $LOG/1_13b_4bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model relaxml/Llama-1-13b-hf --hf_path $HF/1_13b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_13b_2bit >> $LOG/1_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model relaxml/Llama-1-7b-hf --hf_path $HF/1_7b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_7b_3bit >> $LOG/1_7b_3bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model relaxml/Llama-1-7b-hf --hf_path $HF/1_7b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_7b_4bit >> $LOG/1_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model relaxml/Llama-1-7b-hf --hf_path $HF/1_7b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_7b_2bit >> $LOG/1_7b_2bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_65b_3bit --hf_output_path $HF/1_65b_3bit >> $LOG/1_65b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_65b_2bit --hf_output_path $HF/1_65b_2bit >> $LOG/1_65b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_65b_4bit --hf_output_path $HF/1_65b_4bit >> $LOG/1_65b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_30b_3bit --hf_output_path $HF/1_30b_3bit >> $LOG/1_30b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_30b_2bit --hf_output_path $HF/1_30b_2bit >> $LOG/1_30b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_30b_4bit --hf_output_path $HF/1_30b_4bit >> $LOG/1_30b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_13b_3bit --hf_output_path $HF/1_13b_3bit >> $LOG/1_13b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_13b_4bit --hf_output_path $HF/1_13b_4bit >> $LOG/1_13b_4bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_13b_2bit --hf_output_path $HF/1_13b_2bit >> $LOG/1_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_7b_3bit --hf_output_path $HF/1_7b_3bit >> $LOG/1_7b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_7b_4bit --hf_output_path $HF/1_7b_4bit >> $LOG/1_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_7b_2bit --hf_output_path $HF/1_7b_2bit >> $LOG/1_7b_2bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_3bit >> $LOG/1_65b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_4bit >> $LOG/1_65b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_2bit >> $LOG/1_65b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_3bit >> $LOG/1_30b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_4bit >> $LOG/1_30b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_2bit >> $LOG/1_30b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_3bit >> $LOG/1_13b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_4bit >> $LOG/1_13b_4bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_2bit >> $LOG/1_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_3bit >> $LOG/1_7b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_4bit >> $LOG/1_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_2bit >> $LOG/1_7b_2bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_3bit >> $LOG/1_65b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_4bit >> $LOG/1_65b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_2bit >> $LOG/1_65b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_3bit >> $LOG/1_30b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_4bit >> $LOG/1_30b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_2bit >> $LOG/1_30b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_3bit >> $LOG/1_13b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_4bit >> $LOG/1_13b_4bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_2bit >> $LOG/1_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_3bit >> $LOG/1_7b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_4bit >> $LOG/1_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_2bit >> $LOG/1_7b_2bit 2>&1 & -wait - - diff --git a/sbatch/finetune_llama2.sh b/sbatch/finetune_llama2.sh deleted file mode 100644 index 87e88fa..0000000 --- a/sbatch/finetune_llama2.sh +++ /dev/null @@ -1,172 +0,0 @@ -CKPT=/mnt/desa_data/checkpoints/icml_llama -HF=/mnt/desa_data/hfized/icml_llama -LOG=/mnt/desa_data/logs/icml_llama -HESS=/mnt/desa_data/hessians - - -python finetune_susv_adam.py --save_path $CKPT/2_70b_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_4bit 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_13b_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/2_13b_4bit 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_7b_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/2_7b_4bit 2>&1 - - -python finetune_susv_adam.py --save_path $CKPT/2_70b_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_3bit 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_13b_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/2_13b_3bit 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_7b_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/2_7b_3bit 2>&1 - - - -python finetune_susv_adam.py --save_path $CKPT/2_70b_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_2bit 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_13b_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/2_13b_2bit 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_7b_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/2_7b_2bit 2>&1 - - -python finetune_susv_adam.py --save_path $CKPT/2_70b_chat_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-70b-chat-hf --hessian_path $HESS/llama2_70b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_chat_4bit 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_13b_chat_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-13b-chat-hf --hessian_path $HESS/llama2_13b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/2_13b_chat_4bit 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_7b_chat_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-7b-chat-hf --hessian_path $HESS/llama2_7b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/2_7b_chat_4bit 2>&1 - - -python finetune_susv_adam.py --save_path $CKPT/2_70b_chat_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-70b-chat-hf --hessian_path $HESS/llama2_70b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_chat_3bit 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_13b_chat_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-13b-chat-hf --hessian_path $HESS/llama2_13b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/2_13b_chat_3bit 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_7b_chat_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-7b-chat-hf --hessian_path $HESS/llama2_7b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/2_7b_chat_3bit 2>&1 - - -python finetune_susv_adam.py --save_path $CKPT/2_70b_chat_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-chat-hf --hessian_path $HESS/llama2_70b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_chat_2bit 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_13b_chat_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-chat-hf --hessian_path $HESS/llama2_13b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/2_13b_chat_2bit 2>&1 - -python finetune_susv_adam.py --save_path $CKPT/2_7b_chat_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-chat-hf --hessian_path $HESS/llama2_7b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 >> $LOG/2_7b_chat_2bit 2>&1 - - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_4bit --hf_output_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_3bit --hf_output_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_13b_4bit --hf_output_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_3bit --hf_output_path $HF/2_13b_3bit >> $LOG/2_13b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_13b_2bit --hf_output_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/2_7b_4bit --hf_output_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_3bit --hf_output_path $HF/2_7b_3bit >> $LOG/2_7b_3bit 2>&1 & -wait -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_7b_2bit --hf_output_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_4bit --hf_output_path $HF/2_70b_chat_4bit >> $LOG/2_70b_chat_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_3bit --hf_output_path $HF/2_70b_chat_3bit >> $LOG/2_70b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_2bit --hf_output_path $HF/2_70b_chat_2bit >> $LOG/2_70b_chat_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_4bit --hf_output_path $HF/2_13b_chat_4bit >> $LOG/2_13b_chat_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_3bit --hf_output_path $HF/2_13b_chat_3bit >> $LOG/2_13b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_2bit --hf_output_path $HF/2_13b_chat_2bit >> $LOG/2_13b_chat_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_4bit --hf_output_path $HF/2_7b_chat_4bit >> $LOG/2_7b_chat_4bit 2>&1 & -wait -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_3bit --hf_output_path $HF/2_7b_chat_3bit >> $LOG/2_7b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_2bit --hf_output_path $HF/2_7b_chat_2bit >> $LOG/2_7b_chat_2bit 2>&1 & -wait - -python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 -python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 -python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 - -CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-hf --hf_path $HF/2_13b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_3bit >> $LOG/2_13b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-hf --hf_path $HF/2_13b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-hf --hf_path $HF/2_13b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-hf --hf_path $HF/2_7b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_3bit >> $LOG/2_7b_3bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-hf --hf_path $HF/2_7b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-hf --hf_path $HF/2_7b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 & -wait - -python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-chat-hf --hf_path $HF/2_70b_chat_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_chat_3bit >> $LOG/2_70b_chat_3bit 2>&1 -python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-chat-hf --hf_path $HF/2_70b_chat_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_chat_4bit >> $LOG/2_70b_chat_4bit 2>&1 -python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-chat-hf --hf_path $HF/2_70b_chat_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_chat_2bit >> $LOG/2_70b_chat_2bit 2>&1 - -CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-chat-hf --hf_path $HF/2_13b_chat_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_chat_3bit >> $LOG/2_13b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-chat-hf --hf_path $HF/2_13b_chat_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_chat_4bit >> $LOG/2_13b_chat_4bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-chat-hf --hf_path $HF/2_13b_chat_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_chat_2bit >> $LOG/2_13b_chat_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-chat-hf --hf_path $HF/2_7b_chat_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_chat_3bit >> $LOG/2_7b_chat_3bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-chat-hf --hf_path $HF/2_7b_chat_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_chat_4bit >> $LOG/2_7b_chat_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-chat-hf --hf_path $HF/2_7b_chat_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_chat_2bit >> $LOG/2_7b_chat_2bit 2>&1 & -wait - - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_4bit --hf_output_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_3bit --hf_output_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_13b_4bit --hf_output_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_3bit --hf_output_path $HF/2_13b_3bit >> $LOG/2_13b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_13b_2bit --hf_output_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/2_7b_4bit --hf_output_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_3bit --hf_output_path $HF/2_7b_3bit >> $LOG/2_7b_3bit 2>&1 & -wait -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_7b_2bit --hf_output_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_4bit --hf_output_path $HF/2_70b_chat_4bit >> $LOG/2_70b_chat_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_3bit --hf_output_path $HF/2_70b_chat_3bit >> $LOG/2_70b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_2bit --hf_output_path $HF/2_70b_chat_2bit >> $LOG/2_70b_chat_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_4bit --hf_output_path $HF/2_13b_chat_4bit >> $LOG/2_13b_chat_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_3bit --hf_output_path $HF/2_13b_chat_3bit >> $LOG/2_13b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_2bit --hf_output_path $HF/2_13b_chat_2bit >> $LOG/2_13b_chat_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_4bit --hf_output_path $HF/2_7b_chat_4bit >> $LOG/2_7b_chat_4bit 2>&1 & -wait -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_3bit --hf_output_path $HF/2_7b_chat_3bit >> $LOG/2_7b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_2bit --hf_output_path $HF/2_7b_chat_2bit >> $LOG/2_7b_chat_2bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/2_13b_3bit >> $LOG/2_13b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/2_7b_3bit >> $LOG/2_7b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 & -wait -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/2_70b_chat_3bit >> $LOG/2_70b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_70b_chat_4bit >> $LOG/2_70b_chat_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/2_70b_chat_2bit >> $LOG/2_70b_chat_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_13b_chat_3bit >> $LOG/2_13b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/2_13b_chat_4bit >> $LOG/2_13b_chat_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/2_13b_chat_2bit >> $LOG/2_13b_chat_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path $HF/2_7b_chat_3bit >> $LOG/2_7b_chat_3bit 2>&1 & -wait -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_7b_chat_4bit >> $LOG/2_7b_chat_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/2_7b_chat_2bit >> $LOG/2_7b_chat_2bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_3bit >> $LOG/2_13b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_3bit >> $LOG/2_7b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 & -wait -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_3bit >> $LOG/2_70b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_4bit >> $LOG/2_70b_chat_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_2bit >> $LOG/2_70b_chat_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_3bit >> $LOG/2_13b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_4bit >> $LOG/2_13b_chat_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_2bit >> $LOG/2_13b_chat_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_3bit >> $LOG/2_7b_chat_3bit 2>&1 & -wait -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_4bit >> $LOG/2_7b_chat_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_2bit >> $LOG/2_7b_chat_2bit 2>&1 & -wait - - - diff --git a/sbatch/finetune_orig.sh b/sbatch/finetune_orig.sh deleted file mode 100644 index ed8a440..0000000 --- a/sbatch/finetune_orig.sh +++ /dev/null @@ -1,20 +0,0 @@ -CKPT=/mnt/desa_data/checkpoints/finetune_albert -HF=/mnt/desa_data/hfized/finetune_albert -LOG=/mnt/desa_data/logs/finetune_albert -HESS=/mnt/desa_data/hessians - -python finetune_susv.py --save_path $CKPT/2_70b_susv --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144/ --devset_size 640 --ft_valid_size 128 --ft_epochs 5 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_susv 2>&1 -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_susv --hf_output_path $HF/2_70b_susv >> $LOG/2_70b_susv 2>&1 -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_70b_susv >> $LOG/2_70b_susv 2>&1 - -python finetune_susv.py --save_path $CKPT/2_13b_susv --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144/ --devset_size 640 --ft_valid_size 128 --ft_epochs 5 >> $LOG/2_13b_susv 2>&1 -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_13b_susv --hf_output_path $HF/2_13b_susv >> $LOG/2_13b_susv 2>&1 -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_13b_susv >> $LOG/2_13b_susv 2>&1 - -python finetune_susv.py --save_path $CKPT/2_7b_susv --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144/ --devset_size 640 --ft_valid_size 128 --ft_epochs 5 >> $LOG/2_7b_susv 2>&1 -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_7b_susv --hf_output_path $HF/2_7b_susv >> $LOG/2_7b_susv 2>&1 -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_7b_susv >> $LOG/2_7b_susv 2>&1 - - - - diff --git a/sbatch/finetune_test.sh b/sbatch/finetune_test.sh deleted file mode 100644 index 71b7446..0000000 --- a/sbatch/finetune_test.sh +++ /dev/null @@ -1,24 +0,0 @@ -CKPT=/mnt/desa_data/checkpoints/fttest -HF=/mnt/desa_data/hfized/fttest -LOG=/mnt/desa_data/logs/fttest -HESS=/mnt/desa_data/hessians - -mkdir $CKPT -mkdir $HF -mkdir $LOG - - -#python quantize_finetune_llama.py --save_path $CKPT/2_70b_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 >> $LOG/2_70b_2bit 2>&1 - -#CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 - -python finetune_e2e_llama.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_2bit --devset_size 384 --ft_valid_size 128 --ft_epochs 8 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_2bit --ft_grad_ckpt --ft_train_mode >> $LOG/2_70b_2bit 2>&1 - -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 - -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -wait - - - diff --git a/sbatch/greedy_sweep.sh b/sbatch/greedy_sweep.sh deleted file mode 100644 index 0827b9a..0000000 --- a/sbatch/greedy_sweep.sh +++ /dev/null @@ -1,99 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/desa_data/checkpoints -HF=/mnt/desa_data/hfized -HESS=/mnt/desa_data/hessians -LOG=/mnt/desa_data/logs - -''' -python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_gr0 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 --quip_tune_iters 0 >> $LOG/2_70b_e8p_2bit_gr0 2>&1 -python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_gr5 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 --quip_tune_iters 5 >> $LOG/2_70b_e8p_2bit_gr5 2>&1 -python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_gr10 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 --quip_tune_iters 10 >> $LOG/2_70b_e8p_2bit_gr10 2>&1 -python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_gr15 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 --quip_tune_iters 15 >> $LOG/2_70b_e8p_2bit_gr15 2>&1 -python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_gr20 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 --quip_tune_iters 20 >> $LOG/2_70b_e8p_2bit_gr20 2>&1 - - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_gr0 --hf_output_path $HF/2_70b_e8p_2bit_gr0 >> $LOG/2_70b_e8p_2bit_gr0 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_gr5 --hf_output_path $HF/2_70b_e8p_2bit_gr5 >> $LOG/2_70b_e8p_2bit_gr5 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_gr10 --hf_output_path $HF/2_70b_e8p_2bit_gr10 >> $LOG/2_70b_e8p_2bit_gr10 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_gr15 --hf_output_path $HF/2_70b_e8p_2bit_gr15 >> $LOG/2_70b_e8p_2bit_gr15 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_gr20 --hf_output_path $HF/2_70b_e8p_2bit_gr20 >> $LOG/2_70b_e8p_2bit_gr20 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_gr0 >> $LOG/2_70b_e8p_2bit_gr0 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_gr5 >> $LOG/2_70b_e8p_2bit_gr5 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_gr10 >> $LOG/2_70b_e8p_2bit_gr10 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_gr15 >> $LOG/2_70b_e8p_2bit_gr15 2>&1 & -CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_gr20 >> $LOG/2_70b_e8p_2bit_gr20 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_gr0 >> $LOG/2_70b_e8p_2bit_gr0 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_gr5 >> $LOG/2_70b_e8p_2bit_gr5 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_gr10 >> $LOG/2_70b_e8p_2bit_gr10 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_gr15 >> $LOG/2_70b_e8p_2bit_gr15 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_gr20 >> $LOG/2_70b_e8p_2bit_gr20 2>&1 & -wait -''' - -python quantize_llama.py --save_path $CKPT/2_13b_e8p_2bit_gr0 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 --quip_tune_iters 0 >> $LOG/2_13b_e8p_2bit_gr0 2>&1 -python quantize_llama.py --save_path $CKPT/2_13b_e8p_2bit_gr5 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 --quip_tune_iters 5 >> $LOG/2_13b_e8p_2bit_gr5 2>&1 -python quantize_llama.py --save_path $CKPT/2_13b_e8p_2bit_gr10 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 --quip_tune_iters 10 >> $LOG/2_13b_e8p_2bit_gr10 2>&1 -python quantize_llama.py --save_path $CKPT/2_13b_e8p_2bit_gr15 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 --quip_tune_iters 15 >> $LOG/2_13b_e8p_2bit_gr15 2>&1 -python quantize_llama.py --save_path $CKPT/2_13b_e8p_2bit_gr20 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 --quip_tune_iters 20 >> $LOG/2_13b_e8p_2bit_gr20 2>&1 - - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit_gr0 --hf_output_path $HF/2_13b_e8p_2bit_gr0 >> $LOG/2_13b_e8p_2bit_gr0 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit_gr5 --hf_output_path $HF/2_13b_e8p_2bit_gr5 >> $LOG/2_13b_e8p_2bit_gr5 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit_gr10 --hf_output_path $HF/2_13b_e8p_2bit_gr10 >> $LOG/2_13b_e8p_2bit_gr10 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit_gr15 --hf_output_path $HF/2_13b_e8p_2bit_gr15 >> $LOG/2_13b_e8p_2bit_gr15 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit_gr20 --hf_output_path $HF/2_13b_e8p_2bit_gr20 >> $LOG/2_13b_e8p_2bit_gr20 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/2_13b_e8p_2bit_gr0 >> $LOG/2_13b_e8p_2bit_gr0 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/2_13b_e8p_2bit_gr5 >> $LOG/2_13b_e8p_2bit_gr5 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/2_13b_e8p_2bit_gr10 >> $LOG/2_13b_e8p_2bit_gr10 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --hf_path $HF/2_13b_e8p_2bit_gr15 >> $LOG/2_13b_e8p_2bit_gr15 2>&1 & -CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --hf_path $HF/2_13b_e8p_2bit_gr20 >> $LOG/2_13b_e8p_2bit_gr20 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8p_2bit_gr0 >> $LOG/2_13b_e8p_2bit_gr0 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8p_2bit_gr5 >> $LOG/2_13b_e8p_2bit_gr5 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8p_2bit_gr10 >> $LOG/2_13b_e8p_2bit_gr10 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8p_2bit_gr15 >> $LOG/2_13b_e8p_2bit_gr15 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8p_2bit_gr20 >> $LOG/2_13b_e8p_2bit_gr20 2>&1 & -wait - - -python quantize_llama.py --save_path $CKPT/2_7b_e8p_2bit_gr0 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 --quip_tune_iters 0 >> $LOG/2_7b_e8p_2bit_gr0 2>&1 -python quantize_llama.py --save_path $CKPT/2_7b_e8p_2bit_gr5 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 --quip_tune_iters 5 >> $LOG/2_7b_e8p_2bit_gr5 2>&1 -python quantize_llama.py --save_path $CKPT/2_7b_e8p_2bit_gr10 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 --quip_tune_iters 10 >> $LOG/2_7b_e8p_2bit_gr10 2>&1 -python quantize_llama.py --save_path $CKPT/2_7b_e8p_2bit_gr15 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 --quip_tune_iters 15 >> $LOG/2_7b_e8p_2bit_gr15 2>&1 -python quantize_llama.py --save_path $CKPT/2_7b_e8p_2bit_gr20 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 --quip_tune_iters 20 >> $LOG/2_7b_e8p_2bit_gr20 2>&1 - - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit_gr0 --hf_output_path $HF/2_7b_e8p_2bit_gr0 >> $LOG/2_7b_e8p_2bit_gr0 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit_gr5 --hf_output_path $HF/2_7b_e8p_2bit_gr5 >> $LOG/2_7b_e8p_2bit_gr5 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit_gr10 --hf_output_path $HF/2_7b_e8p_2bit_gr10 >> $LOG/2_7b_e8p_2bit_gr10 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit_gr15 --hf_output_path $HF/2_7b_e8p_2bit_gr15 >> $LOG/2_7b_e8p_2bit_gr15 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit_gr20 --hf_output_path $HF/2_7b_e8p_2bit_gr20 >> $LOG/2_7b_e8p_2bit_gr20 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/2_7b_e8p_2bit_gr0 >> $LOG/2_7b_e8p_2bit_gr0 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/2_7b_e8p_2bit_gr5 >> $LOG/2_7b_e8p_2bit_gr5 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/2_7b_e8p_2bit_gr10 >> $LOG/2_7b_e8p_2bit_gr10 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --hf_path $HF/2_7b_e8p_2bit_gr15 >> $LOG/2_7b_e8p_2bit_gr15 2>&1 & -CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --hf_path $HF/2_7b_e8p_2bit_gr20 >> $LOG/2_7b_e8p_2bit_gr20 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8p_2bit_gr0 >> $LOG/2_7b_e8p_2bit_gr0 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8p_2bit_gr5 >> $LOG/2_7b_e8p_2bit_gr5 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8p_2bit_gr10 >> $LOG/2_7b_e8p_2bit_gr10 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8p_2bit_gr15 >> $LOG/2_7b_e8p_2bit_gr15 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8p_2bit_gr20 >> $LOG/2_7b_e8p_2bit_gr20 2>&1 & -wait diff --git a/sbatch/hilr_susv_test.sh b/sbatch/hilr_susv_test.sh deleted file mode 100644 index 76250d5..0000000 --- a/sbatch/hilr_susv_test.sh +++ /dev/null @@ -1,49 +0,0 @@ -CKPT=/mnt/desa_data/checkpoints/finetune_llama_ehilr -HF=/mnt/desa_data/hfized/finetune_llama_ehilr -LOG=/mnt/desa_data/logs/finetune_llama_ehilr -HESS=/mnt/desa_data/hessians - -mkdir $CKPT -mkdir $HF -mkdir $LOG - -python finetune_susv_adam.py --save_path $CKPT/2_7b_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_7b_2bit 2>&1 - -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_7b_2bit --hf_output_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 - -python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-hf --hf_path $HF/2_7b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 - - -python finetune_susv_adam.py --save_path $CKPT/2_13b_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_13b_2bit 2>&1 - -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_13b_2bit --hf_output_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 - -python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-hf --hf_path $HF/2_13b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 - - -python finetune_susv_adam.py --save_path $CKPT/2_70b_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_2bit 2>&1 - -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 - -python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 - -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & - -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_2bit --hf_output_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 & - -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_7b_2bit --hf_output_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 & - -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 & - -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -wait - - - diff --git a/sbatch/icml_llama_eval.sh b/sbatch/icml_llama_eval.sh deleted file mode 100644 index f059b36..0000000 --- a/sbatch/icml_llama_eval.sh +++ /dev/null @@ -1,80 +0,0 @@ -HF=/mnt/desa_data/hfized/icml_llama -LOG=/mnt/desa_data/logs/icml_llama_eval - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_3bit >> $LOG/1_65b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_4bit >> $LOG/1_65b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_2bit >> $LOG/1_65b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/2_70b_chat_3bit >> $LOG/2_70b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path $HF/2_70b_chat_4bit >> $LOG/2_70b_chat_4bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_70b_chat_2bit >> $LOG/2_70b_chat_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_3bit >> $LOG/1_30b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_4bit >> $LOG/1_30b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_2bit >> $LOG/1_30b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_3bit >> $LOG/1_13b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_4bit >> $LOG/1_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_2bit >> $LOG/1_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path $HF/2_13b_3bit >> $LOG/2_13b_3bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_13b_chat_3bit >> $LOG/2_13b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/2_13b_chat_4bit >> $LOG/2_13b_chat_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_13b_chat_2bit >> $LOG/2_13b_chat_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_3bit >> $LOG/1_7b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_4bit >> $LOG/1_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_2bit >> $LOG/1_7b_2bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_7b_3bit >> $LOG/2_7b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/2_7b_chat_3bit >> $LOG/2_7b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_7b_chat_4bit >> $LOG/2_7b_chat_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/2_7b_chat_2bit >> $LOG/2_7b_chat_2bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_3bit >> $LOG/1_65b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_4bit >> $LOG/1_65b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_2bit >> $LOG/1_65b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_3bit >> $LOG/2_70b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_4bit >> $LOG/2_70b_chat_4bit 2>&1 & - -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_2bit >> $LOG/2_70b_chat_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_3bit >> $LOG/1_30b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_4bit >> $LOG/1_30b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_2bit >> $LOG/1_30b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_3bit >> $LOG/1_13b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_4bit >> $LOG/1_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_2bit >> $LOG/1_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_3bit >> $LOG/2_13b_3bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_3bit >> $LOG/2_13b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_4bit >> $LOG/2_13b_chat_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_2bit >> $LOG/2_13b_chat_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_3bit >> $LOG/1_7b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_4bit >> $LOG/1_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_2bit >> $LOG/1_7b_2bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_3bit >> $LOG/2_7b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_3bit >> $LOG/2_7b_chat_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_4bit >> $LOG/2_7b_chat_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_2bit >> $LOG/2_7b_chat_2bit 2>&1 & -wait - - - diff --git a/sbatch/icml_ppl.sh b/sbatch/icml_ppl.sh deleted file mode 100644 index b34d25c..0000000 --- a/sbatch/icml_ppl.sh +++ /dev/null @@ -1,49 +0,0 @@ -HF=/mnt/desa_data/hfized -LOG=/mnt/desa_data/logs/icml_ppl - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/new_e8p/2_70b_e8p_2bit --seqlen 2048 >> $LOG/2_70b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/rvq/2_70b_e8prvq_3bit --seqlen 2048 >> $LOG/2_70b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/rvq/2_70b_e8prvq_4bit --seqlen 2048 >> $LOG/2_70b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/new_e8p/2_7b_e8p_2bit --seqlen 2048 >> $LOG/2_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/new_e8p/2_13b_e8p_2bit --seqlen 2048 >> $LOG/2_13b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/rvq/2_13b_e8prvq_3bit --seqlen 2048 >> $LOG/2_13b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/rvq/2_13b_e8prvq_4bit --seqlen 2048 >> $LOG/2_13b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path meta-llama/Llama-2-13b-hf --seqlen 2048 >> $LOG/2_13b_fp16 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/rvq/2_7b_e8prvq_3bit --seqlen 2048 >> $LOG/2_7b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/rvq/2_7b_e8prvq_4bit --seqlen 2048 >> $LOG/2_7b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path meta-llama/Llama-2-7b-hf --seqlen 2048 >> $LOG/2_7b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/new_e8p/1_65b_e8p_2bit --seqlen 2048 >> $LOG/1_65b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/rvq/1_65b_e8prvq_3bit --seqlen 2048 >> $LOG/1_65b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/rvq/1_65b_e8prvq_4bit --seqlen 2048 >> $LOG/1_65b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6,7 python eval_ppl.py --hf_path meta-llama/Llama-2-70b-hf --seqlen 2048 >> $LOG/2_70b_fp16 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/new_e8p/1_30b_e8p_2bit --seqlen 2048 >> $LOG/1_30b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/rvq/1_30b_e8prvq_3bit --seqlen 2048 >> $LOG/1_30b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/rvq/1_30b_e8prvq_4bit --seqlen 2048 >> $LOG/1_30b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path relaxml/Llama-1-30b-hf --seqlen 2048 >> $LOG/1_30b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/new_e8p/1_13b_e8p_2bit --seqlen 2048 >> $LOG/1_13b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/rvq/1_13b_e8prvq_3bit --seqlen 2048 >> $LOG/1_13b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/rvq/1_13b_e8prvq_4bit --seqlen 2048 >> $LOG/1_13b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path relaxml/Llama-1-13b-hf --seqlen 2048 >> $LOG/1_13b_fp16 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/new_e8p/1_7b_e8p_2bit --seqlen 2048 >> $LOG/1_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/rvq/1_7b_e8prvq_3bit --seqlen 2048 >> $LOG/1_7b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/rvq/1_7b_e8prvq_4bit --seqlen 2048 >> $LOG/1_7b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path relaxml/Llama-1-7b-hf --seqlen 2048 >> $LOG/1_7b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/falcon_180b_e8p_2bit --seqlen 2048 --no_use_cuda_graph >> $LOG/falcon_180b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/falcon_180b_e8prvq_3bit --seqlen 2048 --no_use_cuda_graph >> $LOG/falcon_180b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=6,7 python eval_ppl.py --hf_path $HF/falcon_180b_e8prvq_4bit --seqlen 2048 --no_use_cuda_graph >> $LOG/falcon_180b_e8prvq_4bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0,1,2,3,4 python eval_ppl.py --hf_path tiiuae/falcon-180B --seqlen 2048 --no_use_cuda_graph >> $LOG/falcon_180b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=5,6 python eval_ppl.py --hf_path relaxml/Llama-1-65b-hf --seqlen 2048 >> $LOG/1_65b_fp16 2>&1 & - -wait diff --git a/sbatch/llama1_hessian.sh b/sbatch/llama1_hessian.sh deleted file mode 100644 index b85350d..0000000 --- a/sbatch/llama1_hessian.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -python hessian_offline.py --batch_size 4 --devset_size 6144 --ctx_size 2048 --base_model /work/desa_data/meta_llama1/huggingface_65B --save_path /work/desa_data/hessians/llama1_65b_6144 - -python hessian_offline.py --batch_size 4 --devset_size 6144 --ctx_size 2048 --base_model /work/desa_data/meta_llama1/huggingface_30B --save_path /work/desa_data/hessians/llama1_30b_6144 - -python hessian_offline.py --batch_size 4 --devset_size 6144 --ctx_size 2048 --base_model /work/desa_data/meta_llama1/huggingface_13B --save_path /work/desa_data/hessians/llama1_13b_6144 - -python hessian_offline.py --batch_size 4 --devset_size 6144 --ctx_size 2048 --base_model /work/desa_data/meta_llama1/huggingface_7B --save_path /work/desa_data/hessians/llama1_7b_6144 diff --git a/sbatch/llama1_nolr_test.sh b/sbatch/llama1_nolr_test.sh deleted file mode 100644 index 3fb1bbe..0000000 --- a/sbatch/llama1_nolr_test.sh +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/jerry_data/checkpoints -HF=/mnt/jerry_data/hfized -HESS=/mnt/jerry_data/hessians -LOG=/mnt/jerry_data/logs -L1=/mnt/jerry_data/meta_llama1 - - -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 2 --hf_path $HF/1_65b_hi_4bit_nolr >> $LOG/1_65b_hi_4bit_nolr 2>&1 - - -''' -python quantize_llama.py --save_path $CKPT/1_65b_e8p_2bit_nolr --codebook E8P12 --lora_rank 0 --scale_override 0.9 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_e8p_2bit_nolr 2>&1 -python quantize_llama.py --save_path $CKPT/1_30b_e8p_2bit_nolr --codebook E8P12 --lora_rank 0 --scale_override 0.9 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_e8p_2bit_nolr 2>&1 -python quantize_llama.py --save_path $CKPT/1_13b_e8p_2bit_nolr --codebook E8P12 --lora_rank 0 --scale_override 0.9 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_e8p_2bit_nolr 2>&1 -python quantize_llama.py --save_path $CKPT/1_7b_e8p_2bit_nolr --codebook E8P12 --lora_rank 0 --scale_override 0.9 --base_model $L1/huggingface_7B --hessian_path $HESS/llama1_7b_6144 >> $LOG/1_7b_e8p_2bit_nolr 2>&1 -python quantize_llama.py --save_path $CKPT/1_65b_hi_4bit_nolr --codebook HI4B1C --lora_rank 0 --scale_override 2.7 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_hi_4bit_nolr 2>&1 -python quantize_llama.py --save_path $CKPT/1_30b_hi_4bit_nolr --codebook HI4B1C --lora_rank 0 --scale_override 2.7 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_hi_4bit_nolr 2>&1 -python quantize_llama.py --save_path $CKPT/1_13b_hi_4bit_nolr --codebook HI4B1C --lora_rank 0 --scale_override 2.7 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_hi_4bit_nolr 2>&1 -python quantize_llama.py --save_path $CKPT/1_7b_hi_4bit_nolr --codebook HI4B1C --lora_rank 0 --scale_override 2.7 --base_model $L1/huggingface_7B --hessian_path $HESS/llama1_7b_6144 >> $LOG/1_7b_hi_4bit_nolr 2>&1 - - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_65b_e8p_2bit_nolr --hf_output_path $HF/1_65b_e8p_2bit_nolr & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_30b_e8p_2bit_nolr --hf_output_path $HF/1_30b_e8p_2bit_nolr & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_13b_e8p_2bit_nolr --hf_output_path $HF/1_13b_e8p_2bit_nolr & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_7b_e8p_2bit_nolr --hf_output_path $HF/1_7b_e8p_2bit_nolr & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_65b_hi_4bit_nolr --hf_output_path $HF/1_65b_hi_4bit_nolr & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_30b_hi_4bit_nolr --hf_output_path $HF/1_30b_hi_4bit_nolr & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_13b_hi_4bit_nolr --hf_output_path $HF/1_13b_hi_4bit_nolr & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_7b_hi_4bit_nolr --hf_output_path $HF/1_7b_hi_4bit_nolr & - -wait - -# perplexity -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_e8p_2bit_nolr >> $LOG/1_65b_e8p_2bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_e8p_2bit_nolr >> $LOG/1_30b_e8p_2bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_e8p_2bit_nolr >> $LOG/1_13b_e8p_2bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_e8p_2bit_nolr >> $LOG/1_7b_e8p_2bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_hi_4bit_nolr >> $LOG/1_65b_hi_4bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_hi_4bit_nolr >> $LOG/1_30b_hi_4bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_hi_4bit_nolr >> $LOG/1_13b_hi_4bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=7 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_hi_4bit_nolr >> $LOG/1_7b_hi_4bit_nolr 2>&1 & - -wait - -# zero shot - -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_e8p_2bit_nolr >> $LOG/1_65b_e8p_2bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_e8p_2bit_nolr >> $LOG/1_30b_e8p_2bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_e8p_2bit_nolr >> $LOG/1_13b_e8p_2bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_e8p_2bit_nolr >> $LOG/1_7b_e8p_2bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_hi_4bit_nolr >> $LOG/1_65b_hi_4bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_hi_4bit_nolr >> $LOG/1_30b_hi_4bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_hi_4bit_nolr >> $LOG/1_13b_hi_4bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_hi_4bit_nolr >> $LOG/1_7b_hi_4bit_nolr 2>&1 & - -wait -''' diff --git a/sbatch/llama2_chat_hessian.sh b/sbatch/llama2_chat_hessian.sh deleted file mode 100644 index 13417bc..0000000 --- a/sbatch/llama2_chat_hessian.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -python hessian_offline.py --batch_size 4 --devset_size 6144 --ctx_size 4096 --base_model meta-llama/Llama-2-70b-chat-hf --save_path /work/desa_data/hessians/llama2_70b_chat_6144 - -python hessian_offline.py --batch_size 4 --devset_size 6144 --ctx_size 4096 --base_model meta-llama/Llama-2-13b-chat-hf --save_path /work/desa_data/hessians/llama2_13b_chat_6144 - -python hessian_offline.py --batch_size 4 --devset_size 6144 --ctx_size 4096 --base_model meta-llama/Llama-2-7b-chat-hf --save_path /work/desa_data/hessians/llama2_7b_chat_6144 - diff --git a/sbatch/llama2_chat_quantize.sh b/sbatch/llama2_chat_quantize.sh deleted file mode 100644 index 0602ab6..0000000 --- a/sbatch/llama2_chat_quantize.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/desa_data/checkpoints -HF=/mnt/desa_data/hfized -HESS=/mnt/desa_data/hessians -LOG=/mnt/desa_data/logs - - -python quantize_llama.py --save_path $CKPT/2_70b_chat_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-chat-hf --hessian_path $HESS/llama2_70b_chat_6144 >> $LOG/2_70b_chat_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/2_13b_chat_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-chat-hf --hessian_path $HESS/llama2_13b_chat_6144 >> $LOG/2_13b_chat_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/2_7b_chat_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-chat-hf --hessian_path $HESS/llama2_7b_chat_6144 >> $LOG/2_7b_chat_e8p_2bit 2>&1 - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_e8p_2bit --hf_output_path $HF/2_70b_chat_e8p_2bit >> $LOG/2_70b_chat_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_e8p_2bit --hf_output_path $HF/2_13b_chat_e8p_2bit >> $LOG/2_13b_chat_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_e8p_2bit --hf_output_path $HF/2_7b_chat_e8p_2bit >> $LOG/2_7b_chat_e8p_2bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/2_70b_chat_e8p_2bit >> $LOG/2_70b_chat_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/2_13b_chat_e8p_2bit >> $LOG/2_13b_chat_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/2_7b_chat_e8p_2bit >> $LOG/2_7b_chat_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3,4 python ppl_llama.py --hf_path meta-llama/Llama-2-70b-chat-hf --no_use_cuda_graph >> $LOG/2_70b_chat_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --hf_path meta-llama/Llama-2-13b-chat-hf >> $LOG/2_13b_chat_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --hf_path meta-llama/Llama-2-7b-chat-hf >> $LOG/2_7b_chat_fp16 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_e8p_2bit >> $LOG/2_70b_chat_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_e8p_2bit >> $LOG/2_13b_chat_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_e8p_2bit >> $LOG/2_7b_chat_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3,4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 2 --hf_path meta-llama/Llama-2-70b-chat-hf >> $LOG/2_70b_chat_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path meta-llama/Llama-2-13b-chat-hf >> $LOG/2_13b_chat_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path meta-llama/Llama-2-7b-chat-hf >> $LOG/2_7b_chat_fp16 2>&1 & -wait diff --git a/sbatch/llama2_nolr_test.sh b/sbatch/llama2_nolr_test.sh deleted file mode 100644 index 8ea750b..0000000 --- a/sbatch/llama2_nolr_test.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/jerry_data/checkpoints -HF=/mnt/jerry_data/hfized -HESS=/mnt/jerry_data/hessians -LOG=/mnt/jerry_data/logs - -CUDA_VISIBLE_DEVICES=1,2,3,4,5,6,7 python quantize_llama.py --save_path $CKPT/2_70b_hi_4bit_nolr --codebook HI4B1C --lora_rank 0 --scale_override 2.7 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_hi_4bit_nolr 2>&1 - -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_hi_4bit_nolr --hf_output_path $HF/2_70b_hi_4bit_nolr - -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/2_70b_hi_4bit_nolr >> $LOG/2_70b_hi_4bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/2_13b_e8p_2bit_nolr >> $LOG/2_13b_e8p_2bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --hf_path $HF/2_7b_e8p_2bit_nolr >> $LOG/2_7b_e8p_2bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --hf_path $HF/2_13b_hi_4bit_nolr >> $LOG/2_13b_hi_4bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --hf_path $HF/2_7b_hi_4bit_nolr >> $LOG/2_7b_hi_4bit_nolr 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_hi_4bit_nolr >> $LOG/2_70b_hi_4bit_nolr 2>&1 - - -''' -CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python quantize_llama.py --save_path $CKPT/2_13b_e8p_2bit_nolr --codebook E8P12 --lora_rank 0 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 >> $LOG/2_13b_e8p_2bit_nolr 2>&1 -CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python quantize_llama.py --save_path $CKPT/2_7b_e8p_2bit_nolr --codebook E8P12 --lora_rank 0 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 >> $LOG/2_7b_e8p_2bit_nolr 2>&1 -CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python quantize_llama.py --save_path $CKPT/2_13b_hi_4bit_nolr --codebook HI4B1C --lora_rank 0 --scale_override 2.7 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 >> $LOG/2_13b_hi_4bit_nolr 2>&1 -CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python quantize_llama.py --save_path $CKPT/2_7b_hi_4bit_nolr --codebook HI4B1C --lora_rank 0 --scale_override 2.7 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 >> $LOG/2_7b_hi_4bit_nolr 2>&1 - - -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit_nolr --hf_output_path $HF/2_13b_e8p_2bit_nolr & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit_nolr --hf_output_path $HF/2_7b_e8p_2bit_nolr & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/2_13b_hi_4bit_nolr --hf_output_path $HF/2_13b_hi_4bit_nolr & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_hi_4bit_nolr --hf_output_path $HF/2_7b_hi_4bit_nolr & - -wait - -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_13b_e8p_2bit_nolr >> $LOG/2_13b_e8p_2bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_7b_e8p_2bit_nolr >> $LOG/2_7b_e8p_2bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_13b_hi_4bit_nolr >> $LOG/2_13b_hi_4bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=7 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_7b_hi_4bit_nolr >> $LOG/2_7b_hi_4bit_nolr 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8p_2bit_nolr >> $LOG/2_13b_e8p_2bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8p_2bit_nolr >> $LOG/2_7b_e8p_2bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_hi_4bit_nolr >> $LOG/2_13b_hi_4bit_nolr 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_hi_4bit_nolr >> $LOG/2_7b_hi_4bit_nolr 2>&1 & - -wait - -''' diff --git a/sbatch/lr_sweep_1.sh b/sbatch/lr_sweep_1.sh deleted file mode 100644 index b4d0511..0000000 --- a/sbatch/lr_sweep_1.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/jerry_data/checkpoints -HF=/mnt/jerry_data/hfized -HESS=/mnt/jerry_data/hessians -LOG=/mnt/jerry_data/logs - - -''' -python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_fulllr64 --codebook E8P12 --scale_override 0.9 --lora_rank 64 --hessian_path $HESS/llama2_70b_6144 --full_svd >> $LOG/2_70b_e8p_2bit_fulllr64 2>&1 -python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_fulllr32 --codebook E8P12 --scale_override 0.9 --lora_rank 32 --hessian_path $HESS/llama2_70b_6144 --full_svd >> $LOG/2_70b_e8p_2bit_fulllr32 2>&1 -python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_fulllr16 --codebook E8P12 --scale_override 0.9 --lora_rank 16 --hessian_path $HESS/llama2_70b_6144 --full_svd >> $LOG/2_70b_e8p_2bit_fulllr16 2>&1 -python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_fulllr8 --codebook E8P12 --scale_override 0.9 --lora_rank 8 --hessian_path $HESS/llama2_70b_6144 --full_svd >> $LOG/2_70b_e8p_2bit_fulllr8 2>&1 - -wait - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_fulllr64 --hf_output_path $HF/2_70b_e8p_2bit_fulllr64 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_fulllr32 --hf_output_path $HF/2_70b_e8p_2bit_fulllr32 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_fulllr16 --hf_output_path $HF/2_70b_e8p_2bit_fulllr16 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_fulllr8 --hf_output_path $HF/2_70b_e8p_2bit_fulllr8 & - -wait - -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_70b_e8p_2bit_fulllr64 >> $LOG/2_70b_e8p_2bit_fulllr64 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_70b_e8p_2bit_fulllr32 >> $LOG/2_70b_e8p_2bit_fulllr32 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_70b_e8p_2bit_fulllr16 >> $LOG/2_70b_e8p_2bit_fulllr16 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_70b_e8p_2bit_fulllr8 >> $LOG/2_70b_e8p_2bit_fulllr8 2>&1 & - -wait -''' - - -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr64 >> $LOG/2_70b_e8p_2bit_fulllr64 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr32 >> $LOG/2_70b_e8p_2bit_fulllr32 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr16 >> $LOG/2_70b_e8p_2bit_fulllr16 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr8 >> $LOG/2_70b_e8p_2bit_fulllr8 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr4 >> $LOG/2_70b_e8p_2bit_fulllr4 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr2 >> $LOG/2_70b_e8p_2bit_fulllr2 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr1 >> $LOG/2_70b_e8p_2bit_fulllr1 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr0 >> $LOG/2_70b_e8p_2bit_fulllr0 2>&1 & - - -wait diff --git a/sbatch/lr_sweep_2.sh b/sbatch/lr_sweep_2.sh deleted file mode 100644 index 1d61403..0000000 --- a/sbatch/lr_sweep_2.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/jerry_data/checkpoints -HF=/mnt/jerry_data/hfized -HESS=/mnt/jerry_data/hessians -LOG=/mnt/jerry_data/logs - - -python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_fulllr4 --codebook E8P12 --scale_override 0.9 --lora_rank 4 --hessian_path $HESS/llama2_70b_6144 --full_svd >> $LOG/2_70b_e8p_2bit_fulllr4 2>&1 -python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_fulllr2 --codebook E8P12 --scale_override 0.9 --lora_rank 2 --hessian_path $HESS/llama2_70b_6144 --full_svd >> $LOG/2_70b_e8p_2bit_fulllr2 2>&1 -python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_fulllr1 --codebook E8P12 --scale_override 0.9 --lora_rank 1 --hessian_path $HESS/llama2_70b_6144 --full_svd >> $LOG/2_70b_e8p_2bit_fulllr1 2>&1 -python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_nolr --codebook E8P12 --scale_override 0.9 --lora_rank 0 --hessian_path $HESS/llama2_70b_6144 --full_svd >> $LOG/2_70b_e8p_2bit_nolr 2>&1 - -wait - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_fulllr4 --hf_output_path $HF/2_70b_e8p_2bit_fulllr4 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_fulllr2 --hf_output_path $HF/2_70b_e8p_2bit_fulllr2 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_fulllr1 --hf_output_path $HF/2_70b_e8p_2bit_fulllr1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_nolr --hf_output_path $HF/2_70b_e8p_2bit_nolr & - -wait - -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_fulllr4 >> $LOG/2_70b_e8p_2bit_fulllr4 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_fulllr2 >> $LOG/2_70b_e8p_2bit_fulllr2 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_fulllr1 >> $LOG/2_70b_e8p_2bit_fulllr1 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_nolr >> $LOG/2_70b_e8p_2bit_nolr 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr4 >> $LOG/2_70b_e8p_2bit_fulllr4 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr2 >> $LOG/2_70b_e8p_2bit_fulllr2 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr1 >> $LOG/2_70b_e8p_2bit_fulllr1 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_nolr >> $LOG/2_70b_e8p_2bit_nolr 2>&1 & - -wait diff --git a/sbatch/mistral_hermes.sh b/sbatch/mistral_hermes.sh deleted file mode 100644 index 94b28d6..0000000 --- a/sbatch/mistral_hermes.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/desa_data/checkpoints -HF=/mnt/desa_data/hfized -HESS=/mnt/desa_data/hessians -LOG=/mnt/desa_data/logs -L1=/mnt/desa_data/meta_llama1 - -''' -python quantize_llama.py --save_path $CKPT/mistral_7b_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model mistralai/Mistral-7B-v0.1 --hessian_path $HESS/mistral_7b_4096 >> $LOG/mistral_7b_hi_4bit_packed 2>&1 -python quantize_llama.py --save_path $CKPT/openhermes_7b_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model teknium/OpenHermes-2.5-Mistral-7B --hessian_path $HESS/openhermes_7b_4096 >> $LOG/openhermes_7b_hi_4bit_packed 2>&1 -python quantize_llama.py --save_path $CKPT/mistral_7b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model mistralai/Mistral-7B-v0.1 --hessian_path $HESS/mistral_7b_4096 >> $LOG/mistral_7b_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/openhermes_7b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model teknium/OpenHermes-2.5-Mistral-7B --hessian_path $HESS/openhermes_7b_4096 >> $LOG/openhermes_7b_e8p_2bit 2>&1 - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/mistral_7b_hi_4bit_packed --hf_output_path $HF/mistral_7b_hi_4bit_packed & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/openhermes_7b_hi_4bit_packed --hf_output_path $HF/openhermes_7b_hi_4bit_packed & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/mistral_7b_e8p_2bit --hf_output_path $HF/mistral_7b_e8p_2bit & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/openhermes_7b_e8p_2bit --hf_output_path $HF/openhermes_7b_e8p_2bit & - -wait -''' -# perplexity -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --no_use_cuda_graph --seqlen 8192 --hf_path $HF/mistral_7b_hi_4bit_packed >> $LOG/mistral_7b_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --no_use_cuda_graph --seqlen 8192 --hf_path $HF/openhermes_7b_hi_4bit_packed >> $LOG/openhermes_7b_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --no_use_cuda_graph --seqlen 8192 --hf_path $HF/mistral_7b_e8p_2bit >> $LOG/mistral_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --no_use_cuda_graph --seqlen 8192 --hf_path $HF/openhermes_7b_e8p_2bit >> $LOG/openhermes_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --no_use_cuda_graph --seqlen 8192 --hf_path mistralai/Mistral-7B-v0.1 >> $LOG/mistral_7b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --no_use_cuda_graph --seqlen 8192 --hf_path teknium/OpenHermes-2.5-Mistral-7B >> $LOG/openhermes_7b_fp16 2>&1 & - -wait -''' -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/mistral_7b_hi_4bit_packed >> $LOG/mistral_7b_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/openhermes_7b_hi_4bit_packed >> $LOG/openhermes_7b_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/mistral_7b_e8p_2bit >> $LOG/mistral_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/openhermes_7b_e8p_2bit >> $LOG/openhermes_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path mistralai/Mistral-7B-v0.1 >> $LOG/mistral_7b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path teknium/OpenHermes-2.5-Mistral-7B >> $LOG/openhermes_7b_fp16 2>&1 & - -wait -''' diff --git a/sbatch/mistral_hermes_hessian.sh b/sbatch/mistral_hermes_hessian.sh deleted file mode 100644 index d89ed9a..0000000 --- a/sbatch/mistral_hermes_hessian.sh +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash - -python hessian_offline.py --batch_size 2 --devset_size 4096 --ctx_size 8192 --base_model mistralai/Mistral-7B-v0.1 --save_path /work/desa_data/hessians/mistral_7b_4096 - -python hessian_offline.py --batch_size 2 --devset_size 4096 --ctx_size 8192 --base_model teknium/OpenHermes-2.5-Mistral-7B --save_path /work/desa_data/hessians/openhermes_7b_4096 - - - diff --git a/sbatch/mixtral.sh b/sbatch/mixtral.sh deleted file mode 100644 index aa329c9..0000000 --- a/sbatch/mixtral.sh +++ /dev/null @@ -1,115 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/desa_data/checkpoints -HF=/mnt/desa_data/hfized/jerry -HESS=/mnt/desa_data/hessians -LOG=/mnt/desa_data/logs -L1=/mnt/desa_data/meta_llama1 - -source ~/miniconda3/bin/activate quipv2_mixtral - -## mixtral 8192ctx -# python quantize_mixtral.py --save_path $CKPT/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx \ -# --codebook E8P12 --scale_override 0.9 --base_model mistralai/Mixtral-8x7B-v0.1 \ -# --hessian_path $HESS/mixtral_8x7b_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx 2>&1 -# python quantize_mixtral.py --save_path $CKPT/mixtral_8x7b_hi_4bit_RPv1_4096dev8192ctx \ -# --codebook HI4B1C --scale_override 2.7 --base_model mistralai/Mixtral-8x7B-v0.1 \ -# --hessian_path $HESS/mixtral_8x7b_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_hi_4bit_RPv1_4096dev8192ctx 2>&1 - -## mixtral 12288ctx -# python quantize_mixtral.py --save_path $CKPT/mixtral_8x7b_e8p_2bit_RPv1_4096dev12288ctx \ -# --codebook E8P12 --scale_override 0.9 --base_model mistralai/Mixtral-8x7B-v0.1 \ -# --hessian_path $HESS/mixtral_8x7b_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_e8p_2bit_RPv1_4096dev12288ctx 2>&1 -# python quantize_mixtral.py --save_path $CKPT/mixtral_8x7b_hi_4bit_RPv1_4096dev12288ctx \ -# --codebook HI4B1C --scale_override 2.7 --base_model mistralai/Mixtral-8x7B-v0.1 \ -# --hessian_path $HESS/mixtral_8x7b_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_hi_4bit_RPv1_4096dev12288ctx 2>&1 - -## mixtral-instruct 8192ctx -# python quantize_mixtral.py --save_path $CKPT/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev8192ctx \ -# --codebook E8P12 --scale_override 0.9 --base_model mistralai/Mixtral-8x7b-Instruct-v0.1 \ -# --hessian_path $HESS/mixtral_8x7b_instruct_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev8192ctx 2>&1 -# python quantize_mixtral.py --save_path $CKPT/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev8192ctx \ -# --codebook HI4B1C --scale_override 2.7 --base_model mistralai/Mixtral-8x7b-Instruct-v0.1 \ -# --hessian_path $HESS/mixtral_8x7b_instruct_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev8192ctx 2>&1 -# -## mixtral-instruct 12288ctx -python quantize_mixtral.py --save_path $CKPT/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev12288ctx \ - --codebook E8P12 --scale_override 0.9 --base_model mistralai/Mixtral-8x7b-Instruct-v0.1 \ - --hessian_path $HESS/mixtral_8x7b_instruct_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev12288ctx 2>&1 -python quantize_mixtral.py --save_path $CKPT/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev12288ctx \ - --codebook HI4B1C --scale_override 2.7 --base_model mistralai/Mixtral-8x7b-Instruct-v0.1 \ - --hessian_path $HESS/mixtral_8x7b_instruct_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev12288ctx 2>&1 - -# CUDA_VISIBLE_DEVICES=0 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx \ -# --hf_output_path $HF/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx & -# wait - -## hfize -## mixtral 8192ctx -# CUDA_VISIBLE_DEVICES=0 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx \ -# --hf_output_path $HF/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx & -# CUDA_VISIBLE_DEVICES=0 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_hi_4bit_RPv1_4096dev8192ctx \ -# --hf_output_path $HF/mixtral_8x7b_hi_4bit_RPv1_4096dev8192ctx & -# ## mixtral 12288ctx -# CUDA_VISIBLE_DEVICES=1 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_e8p_2bit_RPv1_4096dev12288ctx \ -# --hf_output_path $HF/mixtral_8x7b_e8p_2bit_RPv1_4096dev12288ctx & -# CUDA_VISIBLE_DEVICES=2 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_hi_4bit_RPv1_4096dev12288ctx \ -# --hf_output_path $HF/mixtral_8x7b_hi_4bit_RPv1_4096dev12288ctx & -# ## mixtral-instruct 8192ctx -# CUDA_VISIBLE_DEVICES=0 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev8192ctx \ -# --hf_output_path $HF/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev8192ctx & -# CUDA_VISIBLE_DEVICES=1 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev8192ctx \ -# --hf_output_path $HF/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev8192ctx & -## mixtral-instruct 12288ctx -CUDA_VISIBLE_DEVICES=0 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev12288ctx \ - --hf_output_path $HF/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev12288ctx & -CUDA_VISIBLE_DEVICES=1 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev12288ctx \ - --hf_output_path $HF/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev12288ctx & -wait - -## perplexity -## mixtral 8192ctx -# CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 \ -# --hf_path $HF/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx 2>&1 & -# CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 \ -# --hf_path $HF/mixtral_8x7b_hi_4bit_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_hi_4bit_RPv1_4096dev8192ctx 2>&1 & -# ## mixtral 12288ctx -# CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 \ -# --hf_path $HF/mixtral_8x7b_e8p_2bit_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_e8p_2bit_RPv1_4096dev12288ctx 2>&1 & -# CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 \ -# --hf_path $HF/mixtral_8x7b_hi_4bit_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_hi_4bit_RPv1_4096dev12288ctx 2>&1 & -# wait -# ## mixtral-instruct 8192ctx -# CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 \ -# --hf_path $HF/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev8192ctx 2>&1 & -# CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 \ -# --hf_path $HF/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev8192ctx 2>&1 & -## mixtral-instruct 12288ctx -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 \ - --hf_path $HF/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev12288ctx 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 \ - --hf_path $HF/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev12288ctx 2>&1 & -wait - -## zeroshot -## mixtral 8192ctx -# CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \ -# --batch_size 4 --hf_path $HF/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx 2>&1 & -# CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \ -# --batch_size 4 --hf_path $HF/mixtral_8x7b_hi_4bit_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_hi_4bit_RPv1_4096dev8192ctx 2>&1 & -# ## mixtral 12288ctx -# CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \ -# --batch_size 4 --hf_path $HF/mixtral_8x7b_e8p_2bit_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_e8p_2bit_RPv1_4096dev12288ctx 2>&1 & -# CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \ -# --batch_size 4 --hf_path $HF/mixtral_8x7b_hi_4bit_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_hi_4bit_RPv1_4096dev12288ctx 2>&1 & -# ## mixtral-instruct 8192ctx -# CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \ -# --batch_size 4 --hf_path $HF/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev8192ctx 2>&1 & -# CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \ -# --batch_size 4 --hf_path $HF/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev8192ctx 2>&1 & -## mixtral-instruct 12288ctx -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \ - --batch_size 4 --hf_path $HF/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev12288ctx 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \ - --batch_size 4 --hf_path $HF/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev12288ctx 2>&1 & -wait \ No newline at end of file diff --git a/sbatch/mixtral_fp16.sh b/sbatch/mixtral_fp16.sh deleted file mode 100644 index 56e2ca3..0000000 --- a/sbatch/mixtral_fp16.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -LOG=/mnt/desa_data/logs - -source ~/miniconda3/bin/activate quipv2_mixtral - -# python eval_ppl.py --no_use_cuda_graph --seqlen 8192 --hf_path mistralai/Mixtral-8x7B-v0.1 >> $LOG/mixtral_8x7b_fp16 2>&1 -python eval_ppl.py --no_use_cuda_graph --seqlen 8192 --hf_path mistralai/Mixtral-8x7B-Instruct-v0.1 >> $LOG/mixtral_8x7b_instruct_fp16 2>&1 - -# python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \ -# --batch_size 2 --hf_path mistralai/Mixtral-8x7B-v0.1 >> $LOG/mixtral_8x7b_fp16 2>&1 -python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \ - --batch_size 2 --hf_path mistralai/Mixtral-8x7B-Instruct-v0.1 >> $LOG/mixtral_8x7b_instruct_fp16 2>&1 \ No newline at end of file diff --git a/sbatch/mixtral_hessian.sh b/sbatch/mixtral_hessian.sh deleted file mode 100644 index 68b03da..0000000 --- a/sbatch/mixtral_hessian.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -source ~/miniconda3/bin/activate quipv2_mixtral - -# TOKENIZERS_PARALLELISM=false python hessian_offline_mixtral.py \ -# --batch_size 2 --devset_size 64 --ctx_size 2048 --base_model mistralai/Mixtral-8x7B-v0.1 \ -# --save_path /mnt/desa_data/hessians/mixtral_8x7b_RPv1_64dev2048ctx - -# TOKENIZERS_PARALLELISM=false python hessian_offline_mixtral.py \ -# --batch_size 2 --devset_size 4096 --ctx_size 8192 --base_model mistralai/Mixtral-8x7B-v0.1 \ -# --save_path /mnt/desa_data/hessians/mixtral_8x7b_RPv1_4096dev8192ctx - -# TOKENIZERS_PARALLELISM=false python hessian_offline_mixtral.py \ -# --batch_size 1 --sample_proc 12 --devset_size 4096 --ctx_size 12288 \ -# --save_activations --base_model mistralai/Mixtral-8x7B-v0.1 \ -# --save_path /mnt/desa_data/hessians/mixtral_8x7b_RPv1_4096dev12288ctx - -# TOKENIZERS_PARALLELISM=false python hessian_offline_mixtral.py \ -# --batch_size 1 --sample_proc 12 --devset_size 4096 --ctx_size 8192 \ -# --save_activations --base_model mistralai/Mixtral-8x7B-Instruct-v0.1 \ -# --save_path /mnt/desa_data/hessians/mixtral_8x7b_instruct_RPv1_4096dev8192ctx - -# TOKENIZERS_PARALLELISM=false python hessian_offline_mixtral.py \ -# --batch_size 1 --sample_proc 12 --devset_size 4096 --ctx_size 12288 \ -# --save_activations --base_model mistralai/Mixtral-8x7B-Instruct-v0.1 \ -# --save_path /mnt/desa_data/hessians/mixtral_8x7b_instruct_RPv1_4096dev12288ctx - -TOKENIZERS_PARALLELISM=false python hessian_offline_mixtral.py \ - --batch_size 1 --sample_proc 12 --devset_size 4096 --ctx_size 12288 \ - --base_model mistralai/Mixtral-8x7B-v0.1 --dataset "togethercomputer/RedPajama-Data-V2" \ - --save_path /mnt/desa_data/hessians/mixtral_8x7b_RPv2_4096dev12288ctx \ No newline at end of file diff --git a/sbatch/nofuse_test.sh b/sbatch/nofuse_test.sh deleted file mode 100644 index af78130..0000000 --- a/sbatch/nofuse_test.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/desa_data/checkpoints -HF=/mnt/desa_data/hfized -HESS=/mnt/desa_data/hessians -LOG=/mnt/desa_data/logs -L1=/mnt/desa_data/meta_llama1 - -''' -python quantize_llama_nofuse.py --save_path $CKPT/2_70b_e8p_2bit_nofuse --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_e8p_2bit_nofuse 2>&1 -python quantize_llama_nofuse.py --save_path $CKPT/2_13b_e8p_2bit_nofuse --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 >> $LOG/2_13b_e8p_2bit_nofuse 2>&1 -python quantize_llama_nofuse.py --save_path $CKPT/2_7b_e8p_2bit_nofuse --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 >> $LOG/2_7b_e8p_2bit_nofuse 2>&1 -python quantize_llama_nofuse.py --save_path $CKPT/1_65b_e8p_2bit_nofuse --codebook E8P12 --scale_override 0.9 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_e8p_2bit_nofuse 2>&1 -python quantize_llama_nofuse.py --save_path $CKPT/1_30b_e8p_2bit_nofuse --codebook E8P12 --scale_override 0.9 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_e8p_2bit_nofuse 2>&1 -python quantize_llama_nofuse.py --save_path $CKPT/1_13b_e8p_2bit_nofuse --codebook E8P12 --scale_override 0.9 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_e8p_2bit_nofuse 2>&1 -python quantize_llama_nofuse.py --save_path $CKPT/1_7b_e8p_2bit_nofuse --codebook E8P12 --scale_override 0.9 --base_model $L1/huggingface_7B --hessian_path $HESS/llama1_7b_6144 >> $LOG/1_7b_e8p_2bit_nofuse 2>&1 - - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_nofuse --hf_output_path $HF/2_70b_e8p_2bit_nofuse >> $LOG/2_70b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit_nofuse --hf_output_path $HF/2_13b_e8p_2bit_nofuse >> $LOG/2_13b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit_nofuse --hf_output_path $HF/2_7b_e8p_2bit_nofuse >> $LOG/2_7b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_65b_e8p_2bit_nofuse --hf_output_path $HF/1_65b_e8p_2bit_nofuse >> $LOG/1_65b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_30b_e8p_2bit_nofuse --hf_output_path $HF/1_30b_e8p_2bit_nofuse >> $LOG/1_30b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_13b_e8p_2bit_nofuse --hf_output_path $HF/1_13b_e8p_2bit_nofuse >> $LOG/1_13b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_7b_e8p_2bit_nofuse --hf_output_path $HF/1_7b_e8p_2bit_nofuse >> $LOG/1_7b_e8p_2bit_nofuse 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_nofuse >> $LOG/2_70b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/2_13b_e8p_2bit_nofuse >> $LOG/2_13b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/2_7b_e8p_2bit_nofuse >> $LOG/2_7b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_e8p_2bit_nofuse >> $LOG/1_65b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_e8p_2bit_nofuse >> $LOG/1_30b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_e8p_2bit_nofuse >> $LOG/1_13b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_e8p_2bit_nofuse >> $LOG/1_7b_e8p_2bit_nofuse 2>&1 & - -wait -''' -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_nofuse >> $LOG/2_70b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8p_2bit_nofuse >> $LOG/2_13b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8p_2bit_nofuse >> $LOG/2_7b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_e8p_2bit_nofuse >> $LOG/1_65b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_e8p_2bit_nofuse >> $LOG/1_30b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_e8p_2bit_nofuse >> $LOG/1_13b_e8p_2bit_nofuse 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_e8p_2bit_nofuse >> $LOG/1_7b_e8p_2bit_nofuse 2>&1 & - -wait diff --git a/sbatch/old/2_13b_e8p_2bit.sbatch b/sbatch/old/2_13b_e8p_2bit.sbatch deleted file mode 100644 index 75ddcce..0000000 --- a/sbatch/old/2_13b_e8p_2bit.sbatch +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=2_13b_e8p_2bit -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=64G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -''' -CKPT=checkpoints - -python quantize_llama.py \ - --save_path $CKPT/2_13b_e8p_2bit \ - --codebook E8P12 \ - --sigma_reg2 1e-2 \ - --scale_override 0.9 \ - --base_model meta-llama/Llama-2-13b-hf \ - --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_13b_6144 - -python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit --hf_output_path hfized/2_13b_e8p_2bit - -''' - -python ppl_llama.py --hf_path hfized/2_13b_e8p_2bit -python eval_llama.py --hf_path hfized/2_13b_e8p_2bit --batch_size 4 --tasks arc_challenge,arc_easy,boolq,piqa,winogrande diff --git a/sbatch/old/2_70b_e8p_2bit.sh b/sbatch/old/2_70b_e8p_2bit.sh deleted file mode 100644 index 3041282..0000000 --- a/sbatch/old/2_70b_e8p_2bit.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/jerry_data/checkpoints -HF=/mnt/jerry_data/hfized -HESS=/mnt/jerry_data/hessians -LOG=/mnt/jerry_data/logs -NAME=2_70b_e8p_2bit - -python quantize_llama.py --save_path $CKPT/$NAME --codebook E8P12 --sigma_reg2 1e-2 --scale 0.90 --hessian_path $HESS/llama2_70b_6144 >> $LOG/$NAME 2>&1 -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/$NAME --hf_output_path $HF/$NAME >> $LOG/$NAME 2>&1 -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/$NAME >> $LOG/$NAME 2>&1 -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --hf_path $HF/$NAME --batch_size 4 --tasks arc_challenge,arc_easy,boolq,piqa,winogrande >> $LOG/$NAME 2>&1 diff --git a/sbatch/old/2_7b_e8p_2bit.sbatch b/sbatch/old/2_7b_e8p_2bit.sbatch deleted file mode 100644 index b68b2f7..0000000 --- a/sbatch/old/2_7b_e8p_2bit.sbatch +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=2_7b_e8p_2bit -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=64G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -''' -CKPT=checkpoints - -python quantize_llama.py \ - --save_path $CKPT/2_7b_e8p_2bit \ - --codebook E8P12 \ - --sigma_reg2 1e-2 \ - --scale_override 0.9 \ - --base_model meta-llama/Llama-2-7b-hf \ - --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_7b_6144 - -python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit --hf_output_path hfized/2_7b_e8p_2bit - -''' - -python ppl_llama.py --hf_path hfized/2_7b_e8p_2bit -python eval_llama.py --hf_path hfized/2_7b_e8p_2bit --batch_size 4 --tasks arc_challenge,arc_easy,boolq,piqa,winogrande - diff --git a/sbatch/old/d4.sbatch b/sbatch/old/d4.sbatch deleted file mode 100644 index 33f3099..0000000 --- a/sbatch/old/d4.sbatch +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu,ellis -#SBATCH --job-name=d4_fast2 -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=96G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=24:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -python quantize_llama.py --save_path checkpoints/d4_fast2 --codebook D4 -python hfize_llama.py --quantized_path checkpoints/d4_fast2 --hf_output_path hfized/d4_fast2 -python ppl_llama.py --hf_path hfized/d4_fast2 --dataset c4 diff --git a/sbatch/old/d44b_13b.sbatch b/sbatch/old/d44b_13b.sbatch deleted file mode 100644 index 3df4b34..0000000 --- a/sbatch/old/d44b_13b.sbatch +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=d44b_13b -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=64G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -''' -CKPT=checkpoints - -python quantize_llama.py --save_path $CKPT/d44b_13b --codebook D44B --sigma_reg2 1e-2 --scale 3.4 --base_model meta-llama/Llama-2-13b-hf --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_13b_6144 -python hfize_llama.py --quantized_path $CKPT/d44b_13b --hf_output_path hfized/d44b_13b -''' - -python ppl_llama.py --hf_path hfized/d44b_13b -python eval_llama.py --hf_path hfized/d44b_13b --batch_size 4 --tasks arc_challenge,arc_easy,boolq,piqa,winogrande - diff --git a/sbatch/old/d44b_7b.sbatch b/sbatch/old/d44b_7b.sbatch deleted file mode 100644 index 4170634..0000000 --- a/sbatch/old/d44b_7b.sbatch +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=d44b_7b -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=96G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -''' -CKPT=checkpoints - -python quantize_llama.py --save_path $CKPT/d44b_7b --codebook D44B --sigma_reg2 1e-2 --scale 3.4 --base_model meta-llama/Llama-2-7b-hf --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_7b_6144 -python hfize_llama.py --quantized_path $CKPT/d44b_7b --hf_output_path hfized/d44b_7b -''' - -python ppl_llama.py --hf_path hfized/d44b_7b -python eval_llama.py --hf_path hfized/d44b_7b --batch_size 4 --tasks arc_challenge,arc_easy,boolq,piqa,winogrande diff --git a/sbatch/old/d4_70b.sbatch b/sbatch/old/d4_70b.sbatch deleted file mode 100644 index 8c2c5b1..0000000 --- a/sbatch/old/d4_70b.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=d4_70b -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=96G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:2 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=checkpoints - -python quantize_llama.py --save_path $CKPT/d4_70b --codebook D44B --sigma_reg2 1e-2 --scale 3.4 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144 -python hfize_llama.py --quantized_path $CKPT/d4_70b --hf_output_path hfized/d4_70b -python ppl_llama.py --hf_path hfized/d4_70b diff --git a/sbatch/old/d4_7b.sbatch b/sbatch/old/d4_7b.sbatch deleted file mode 100644 index 21133ca..0000000 --- a/sbatch/old/d4_7b.sbatch +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=d4_7b -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=96G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=checkpoints - -#python quantize_llama.py \ -# --save_path $CKPT/d4_7b \ -# --codebook D4 \ -# --base_model meta-llama/Llama-2-7b-hf \ -# --hessian_path /share/kuleshov/jc3464/quip/hessians/7b-chat-512dev-4096ctx \ -# --use_fp64 - -python hfize_llama.py --quantized_path $CKPT/d4_7b --hf_output_path hfized/d4_7b -python ppl_llama.py --hf_path hfized/d4_7b --dataset c4 diff --git a/sbatch/old/e8.sbatch b/sbatch/old/e8.sbatch deleted file mode 100644 index 5928752..0000000 --- a/sbatch/old/e8.sbatch +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=e8 -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=96G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:2 -#SBATCH --constraint='gpu-high' -#SBATCH --exclude='coecis-compute-03' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -#python quantize_llama.py --save_path checkpoints/e8 --codebook E8 -#python hfize_llama.py --quantized_path checkpoints/e8 --hf_output_path hfized/e8 -python ppl_llama.py --hf_path hfized/e8_fast --dataset c4 diff --git a/sbatch/old/e81b.sbatch b/sbatch/old/e81b.sbatch deleted file mode 100644 index 065cdd4..0000000 --- a/sbatch/old/e81b.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=e81b -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=96G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:2 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=/share/desa/nfs02/quip_llama2/checkpoints/ - -python quantize_llama.py --save_path $CKPT/e81b --codebook E81B -python hfize_llama.py --quantized_path $CKPT/e81b --hf_output_path hfized/e81b -python ppl_llama.py --hf_path hfized/e81b --dataset c4 diff --git a/sbatch/old/e8237b.sh b/sbatch/old/e8237b.sh deleted file mode 100644 index 5ce97dc..0000000 --- a/sbatch/old/e8237b.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/jerry_data/checkpoints -HF=/mnt/jerry_data/hfized -HESS=/mnt/jerry_data/hessians -LOG=/mnt/jerry_data/logs -L1=/mnt/jerry_data/meta_llama1 - - -python quantize_llama.py --save_path $CKPT/1_65b_e8p_2bit --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_30b_e8p_2bit --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_13b_e8p_2bit --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_7b_e8p_2bit --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --base_model $L1/huggingface_7B --hessian_path $HESS/llama1_7b_6144 >> $LOG/1_7b_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_65b_d4_4bit --codebook D44B --sigma_reg2 1e-2 --scale 3.4 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_d4_4bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_30b_d4_4bit --codebook D44B --sigma_reg2 1e-2 --scale 3.4 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_d4_4bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_13b_d4_4bit --codebook D44B --sigma_reg2 1e-2 --scale 3.4 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_d4_4bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_7b_d4_4bit --codebook D44B --sigma_reg2 1e-2 --scale 3.4 --base_model $L1/huggingface_7B --hessian_path $HESS/llama1_7b_6144 >> $LOG/1_7b_d4_4bit 2>&1 - - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_65b_e8p_2bit --hf_output_path $HF/1_65b_e8p_2bit & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_30b_e8p_2bit --hf_output_path $HF/1_30b_e8p_2bit & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_13b_e8p_2bit --hf_output_path $HF/1_13b_e8p_2bit & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_7b_e8p_2bit --hf_output_path $HF/1_7b_e8p_2bit & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_65b_d4_4bit --hf_output_path $HF/1_65b_d4_4bit & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_30b_d4_4bit --hf_output_path $HF/1_30b_d4_4bit & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_13b_d4_4bit --hf_output_path $HF/1_13b_d4_4bit & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_7b_d4_4bit --hf_output_path $HF/1_7b_d4_4bit & - -wait - - -# fp16 zero shot for llama1 and 2 -CUDA_VISIBLE_DEVICES=0,1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $L1/huggingface_65B >> $LOG/1_65b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=2,3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path meta-llama/Llama-2-70b-hf >> $LOG/2_70b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $L1/huggingface_30B >> $LOG/1_30b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $L1/huggingface_13B >> $LOG/1_13b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path meta-llama/Llama-2-13b-hf >> $LOG/2_13b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $L1/huggingface_7B >> $LOG/1_7b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path meta-llama/Llama-2-7b-hf >> $LOG/2_7b_fp16 2>&1 & - -wait - -# perplexity -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_e8p_2bit >> $LOG/1_65b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_e8p_2bit >> $LOG/1_30b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_e8p_2bit >> $LOG/1_13b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_e8p_2bit >> $LOG/1_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_d4_4bit >> $LOG/1_65b_d4_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_d4_4bit >> $LOG/1_30b_d4_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_d4_4bit >> $LOG/1_13b_d4_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_d4_4bit >> $LOG/1_7b_d4_4bit 2>&1 & - -wait - -# zero shot - -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_e8p_2bit >> $LOG/1_65b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_e8p_2bit >> $LOG/1_30b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_e8p_2bit >> $LOG/1_13b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_e8p_2bit >> $LOG/1_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_d4_4bit >> $LOG/1_65b_d4_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_d4_4bit >> $LOG/1_30b_d4_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_d4_4bit >> $LOG/1_13b_d4_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_d4_4bit >> $LOG/1_7b_d4_4bit 2>&1 & - -wait diff --git a/sbatch/old/e8_ocs.sbatch b/sbatch/old/e8_ocs.sbatch deleted file mode 100644 index ba699ee..0000000 --- a/sbatch/old/e8_ocs.sbatch +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=e8_ocs -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=48G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:2 -#SBATCH --constraint='gpu-high' -#SBATCH --exclude='coecis-compute-03' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -python quantize_llama.py --save_path checkpoints/e8_ocs --codebook E8 --outlier_channel_split -python hfize_llama.py --quantized_path checkpoints/e8_ocs --hf_output_path hfized/e8_ocs -python ppl_llama.py --hf_path hfized/e8_ocs_fast --dataset c4 diff --git a/sbatch/old/e8p12.sbatch b/sbatch/old/e8p12.sbatch deleted file mode 100644 index 99057ce..0000000 --- a/sbatch/old/e8p12.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu,ellis -#SBATCH --job-name=e8p12_3 -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=96G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:2 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=checkpoints - -#python quantize_llama.py --save_path $CKPT/e8p12_3 --codebook E8P12 -python hfize_llama.py --quantized_path $CKPT/e8p12_3 --hf_output_path hfized/e8p12_3 -python ppl_llama.py --hf_path hfized/e8p12_3 --dataset c4 diff --git a/sbatch/old/e8p_7b.sbatch b/sbatch/old/e8p_7b.sbatch deleted file mode 100644 index a62516e..0000000 --- a/sbatch/old/e8p_7b.sbatch +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=e8p_7b -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=64G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=checkpoints - -#python quantize_llama.py \ -# --save_path $CKPT/e8p_7b \ -# --codebook E8P12 \ -# --sigma_reg2 5e-3 \ -# --scale_override 0.9 \ -# --base_model meta-llama/Llama-2-7b-hf \ -# --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_7b \ -# --use_fp64 -python hfize_llama.py --quantized_path $CKPT/e8p_7b --hf_output_path hfized/e8p_7b -python hfize_llama.py --quantized_path $CKPT/e8p_7b --hf_output_path hfized/e8p_7b -python ppl_llama.py --hf_path hfized/e8p_7b --dataset c4 diff --git a/sbatch/old/e8p_nofuse.sbatch b/sbatch/old/e8p_nofuse.sbatch deleted file mode 100644 index f4f90a6..0000000 --- a/sbatch/old/e8p_nofuse.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=e8p_nofuse_ch -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=96G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:2 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=checkpoints - -python quantize_llama_nofuse.py --save_path $CKPT/e8p_nofuse_ch --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b -python hfize_llama_nofuse.py --quantized_path $CKPT/e8p_nofuse_ch --hf_output_path hfized/e8p_nofuse_ch -python ppl_llama_nofuse.py --hf_path hfized/e8p_nofuse_ch --dataset c4 diff --git a/sbatch/old/e8s.sbatch b/sbatch/old/e8s.sbatch deleted file mode 100644 index 38dd291..0000000 --- a/sbatch/old/e8s.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=e8s -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=48G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=/share/desa/nfs02/quip_llama2/checkpoints/ - -#python quantize_llama.py --save_path $CKPT/e8s --codebook E8S -#python hfize_llama.py --quantized_path $CKPT/e8s --hf_output_path hfized/e8s -python ppl_llama.py --hf_path hfized/e8s --dataset c4 diff --git a/sbatch/old/eval_70b.sbatch b/sbatch/old/eval_70b.sbatch deleted file mode 100644 index 6ff14ad..0000000 --- a/sbatch/old/eval_70b.sbatch +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=eval_70b -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=64G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -''' -CKPT=checkpoints - -python quantize_llama.py \ - --save_path $CKPT/2_7b_e8p_2bit \ - --codebook E8P12 \ - --sigma_reg2 1e-2 \ - --scale_override 0.9 \ - --base_model meta-llama/Llama-2-7b-hf \ - --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_7b_6144 - -python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit --hf_output_path hfized/2_7b_e8p_2bit - -''' - -python ppl_llama.py --hf_path hfized/e8p_090 -python eval_llama.py --hf_path hfized/e8p_090 --batch_size 4 --tasks arc_challenge,arc_easy,boolq,piqa,winogrande - diff --git a/sbatch/old/fp16_ppl.sbatch b/sbatch/old/fp16_ppl.sbatch deleted file mode 100644 index 9679f45..0000000 --- a/sbatch/old/fp16_ppl.sbatch +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=fp16_ppl -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=256G -#SBATCH --cpus-per-task=8 -#SBATCH --gres=gpu:4 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -echo 'seqlen 2048' - -python ppl_llama.py --hf_path meta-llama/Llama-2-70b-hf --seqlen 2048 -python ppl_llama.py --hf_path meta-llama/Llama-2-13b-hf --seqlen 2048 -python ppl_llama.py --hf_path meta-llama/Llama-2-7b-hf --seqlen 2048 - - -echo 'seqlen 4096' - -python ppl_llama.py --hf_path meta-llama/Llama-2-70b-hf --seqlen 4096 -python ppl_llama.py --hf_path meta-llama/Llama-2-13b-hf --seqlen 4096 -python ppl_llama.py --hf_path meta-llama/Llama-2-7b-hf --seqlen 4096 - - diff --git a/sbatch/old/half_int.sbatch b/sbatch/old/half_int.sbatch deleted file mode 100644 index 2133ef5..0000000 --- a/sbatch/old/half_int.sbatch +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=half_int -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=96G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:2 -#SBATCH --constraint='gpu-high' -#SBATCH --time=24:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -python quantize_llama.py --save_path checkpoints/half_int --codebook half_integer_2bit -python hfize_llama.py --quantized_path checkpoints/half_int --hf_output_path hfized/half_int -python ppl_llama.py --hf_path hfized/half_int --dataset c4 diff --git a/sbatch/old/half_int_4bit_1col.sh b/sbatch/old/half_int_4bit_1col.sh deleted file mode 100644 index 16818bc..0000000 --- a/sbatch/old/half_int_4bit_1col.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/jerry_data/checkpoints -HF=/mnt/jerry_data/hfized -HESS=/mnt/jerry_data/hessians -LOG=/mnt/jerry_data/logs -L1=/mnt/jerry_data/meta_llama1 - -''' -CUDA_VISIBLE_DEVICES=1,3,4,5,6,7 python quantize_llama.py --save_path $CKPT/2_70b_hi_4bit --codebook HI4B1C --sigma_reg2 1e-2 --scale 2.7 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_hi_4bit 2>&1 -CUDA_VISIBLE_DEVICES=1,3,4,5,6,7 python quantize_llama.py --save_path $CKPT/2_13b_hi_4bit --codebook HI4B1C --sigma_reg2 1e-2 --scale 2.7 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 >> $LOG/2_13b_hi_4bit 2>&1 -python quantize_llama.py --save_path $CKPT/2_7b_hi_4bit --codebook HI4B1C --sigma_reg2 1e-2 --scale 2.7 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 >> $LOG/2_7b_hi_4bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_65b_hi_4bit --codebook HI4B1C --sigma_reg2 1e-2 --scale 2.7 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_hi_4bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_30b_hi_4bit --codebook HI4B1C --sigma_reg2 1e-2 --scale 2.7 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_hi_4bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_13b_hi_4bit --codebook HI4B1C --sigma_reg2 1e-2 --scale 2.7 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_hi_4bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_7b_hi_4bit --codebook HI4B1C --sigma_reg2 1e-2 --scale 2.7 --base_model $L1/huggingface_7B --hessian_path $HESS/llama1_7b_6144 >> $LOG/1_7b_hi_4bit 2>&1 - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_hi_4bit --hf_output_path $HF/2_70b_hi_4bit & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_13b_hi_4bit --hf_output_path $HF/2_13b_hi_4bit & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_7b_hi_4bit --hf_output_path $HF/2_7b_hi_4bit & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_65b_hi_4bit --hf_output_path $HF/1_65b_hi_4bit & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_30b_hi_4bit --hf_output_path $HF/1_30b_hi_4bit & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_13b_hi_4bit --hf_output_path $HF/1_13b_hi_4bit & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_7b_hi_4bit --hf_output_path $HF/1_7b_hi_4bit & - -wait - -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_70b_hi_4bit >> $LOG/2_70b_hi_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_13b_hi_4bit >> $LOG/2_13b_hi_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_7b_hi_4bit >> $LOG/2_7b_hi_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_hi_4bit >> $LOG/1_65b_hi_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_hi_4bit >> $LOG/1_30b_hi_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_hi_4bit >> $LOG/1_13b_hi_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_hi_4bit >> $LOG/1_7b_hi_4bit 2>&1 & - -wait -''' -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 1 --hf_path $HF/2_70b_hi_4bit >> $LOG/2_70b_hi_4bit 2>&1 & -#CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_hi_4bit >> $LOG/2_13b_hi_4bit 2>&1 & -#CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_hi_4bit >> $LOG/2_7b_hi_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 1 --hf_path $HF/1_65b_hi_4bit >> $LOG/1_65b_hi_4bit 2>&1 & -#CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_hi_4bit >> $LOG/1_30b_hi_4bit 2>&1 & -#CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_hi_4bit >> $LOG/1_13b_hi_4bit 2>&1 & -#CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_hi_4bit >> $LOG/1_7b_hi_4bit 2>&1 & - -wait diff --git a/sbatch/old/half_int_8col.sbatch b/sbatch/old/half_int_8col.sbatch deleted file mode 100644 index 67b8936..0000000 --- a/sbatch/old/half_int_8col.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=half_int_8col -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=48G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=24:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=/share/desa/nfs02/quip_llama2/checkpoints/ - -python quantize_llama.py --save_path $CKPT/half_int_8col --codebook half_integer_2bit_8col -python hfize_llama.py --quantized_path $CKPT/half_int_8col --hf_output_path hfized/half_int_8col -python ppl_llama.py --hf_path hfized/half_int_8col --dataset c4 diff --git a/sbatch/old/hessian.sbatch b/sbatch/old/hessian.sbatch deleted file mode 100644 index 8c759e4..0000000 --- a/sbatch/old/hessian.sbatch +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=hessian-70b -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=750G -#SBATCH --cpus-per-task=12 -#SBATCH --gres=gpu:8 -#SBATCH --constraint='v100' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -python hessian_offline2.py --batch_size 2 --devset_size 4096 --ctx_size 4096 --save_path /home/at676/two_bit_quant/hessians/llama2_70b diff --git a/sbatch/old/hessian_7b.sbatch b/sbatch/old/hessian_7b.sbatch deleted file mode 100644 index 462c64e..0000000 --- a/sbatch/old/hessian_7b.sbatch +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=hessian-7b_13b -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=500G -#SBATCH --cpus-per-task=8 -#SBATCH --gres=gpu:3 -#SBATCH --constraint='v100' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -rm /scratch/*.pt - -python hessian_offline.py --batch_size 4 --devset_size 6144 --ctx_size 4096 --save_path /share/desa/nfs01/quip_llama2/hessians_llama2_7b_6144 --scratch_path /scratch --base_model meta-llama/Llama-2-7b-hf - -rm /scratch/*.pt - -python hessian_offline.py --batch_size 2 --devset_size 6144 --ctx_size 4096 --save_path /share/desa/nfs01/quip_llama2/hessians_llama2_13b_6144 --scratch_path /scratch --base_model meta-llama/Llama-2-13b-hf - diff --git a/sbatch/old/hessian_together.sbatch b/sbatch/old/hessian_together.sbatch deleted file mode 100644 index 913a69b..0000000 --- a/sbatch/old/hessian_together.sbatch +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=hessian_llama_70b -#SBATCH --nodes=1 -#SBATCH --time=24:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err - -cd /work/albert - -python hessian_offline.py --batch_size 4 --devset_size 4096 --ctx_size 4096 --save_path /work/albert/two_bit_quant/hessians/llama2_70b --scratch_path /dev/shm diff --git a/sbatch/old/kick_eval.sh b/sbatch/old/kick_eval.sh deleted file mode 100644 index d86cd7f..0000000 --- a/sbatch/old/kick_eval.sh +++ /dev/null @@ -1,71 +0,0 @@ - -#!/bin/bash - -# output directory -# LEAVE COMMENTED OUT SO DON'T ACCIDENTALLY OVERWRITE -logs_dirname="slurm_out/eval_ablate1" -mkdir --parents $logs_dirname - -MEM="32G" -CONST="gpu-mid" -TIME="64" -CPU="8" -GPU="1" -BS="4" - -MODELS=( - # "meta-llama/Llama-2-7b-chat-hf" #\ - "hfized/7b-chat_all" \ - "hfized/7b-chat_baseline_hada"\ - "hfized/7b-chat_lora"\ - "hfized/7b-chat_ocs"\ - "hfized/7b-chat_rescaleWH"\ - "hfized/7b-chat_rescaleWH_lora" -) -TASKS=("piqa" "winogrande" "arc_easy" "arc_challenge" "boolq") - -# main loop -for mo_dir in "${MODELS[@]}" -do -for ta in "${TASKS[@]}" -do -# save file -# mo_name=$(basename "$mo_dir") -mo_head=$(echo "$mo_dir" | cut -d / -f 1) -mo_name=$(echo "$mo_dir" | cut -d / -f 2) -jobname="${mo_head}_${mo_name}_${ta}" -echo $jobname -# slurm helper -slurm_helper=" -#!/bin/bash -\n#SBATCH --job-name=${jobname} -\n#SBATCH -N 1 -\n#SBATCH -c ${CPU} -\n#SBATCH --mail-type=FAIL -\n#SBATCH --mail-user=jc3464@cornell.edu -\n#SBATCH --partition=gpu -\n#SBATCH --gres=gpu:${GPU} -\n#SBATCH --mem=${MEM} -\n#SBATCH --constraint=${CONST} -\n#SBATCH -t ${TIME}:00:00 -\n#SBATCH -o ${logs_dirname}/${jobname}_%j.out -\n#SBATCH -e ${logs_dirname}/${jobname}_%j.err -\n\n -\nsource ~/.bashrc -\nsource ~/anaconda3/etc/profile.d/conda.sh -\nconda activate smoothquant -\n -\necho jobname: $jobname -\n\n -\necho '-------------------------------------' -\npython eval_llama.py --hf_path ${mo_dir} --tasks ${ta} --batch_size ${BS} --output_path ${logs_dirname}/${jobname}.json -" -# add slurm header to helper.sh -temp_file=$(mktemp) -echo -en $slurm_helper > $temp_file -echo $temp_file -# run on slurm -sbatch --requeue $temp_file - -done -done \ No newline at end of file diff --git a/sbatch/old/kick_l1.sh b/sbatch/old/kick_l1.sh deleted file mode 100644 index ec5895a..0000000 --- a/sbatch/old/kick_l1.sh +++ /dev/null @@ -1,98 +0,0 @@ - -#!/bin/bash - -# output directory -# LEAVE COMMENTED OUT SO DON'T ACCIDENTALLY OVERWRITE -dirname="checkpoints/llama1" -logs_dirname="slurm_out/llama1" -mkdir --parents $dirname -mkdir --parents $logs_dirname - -MODELS=( - # "7b"\ - "13b"\ - "30b"\ - "65b" - ) -MEMS=( - # "32G"\ - "64G"\ - "160G"\ - "160G") -CONSTS=( - # "gpu-mid"\ - "gpu-mid"\ - "gpu-mid"\ - "gpu-high" - ) -TIME="64" -CPU="8" -GPUS=( - # "2"\ - "2"\ - "2"\ - "1" - ) - -HESSIAN_PATHS=( - # "hessians/llama1-7b-2048dev-2048ctx"\ - "hessians/llama1-13b-2048dev-2048ctx"\ - "hessians/llama1-30b-2048dev-2048ctx"\ - "hessians/llama1-65b-2048dev-2048ctx" -) -EXTRA_ARGS=( - # "--lora_rank 128 --rescale_WH --outlier_channel_split --ocs_down_size 16384"\ - "--lora_rank 128 --rescale_WH --outlier_channel_split --ocs_down_size 16384"\ - "--lora_rank 128 --rescale_WH --outlier_channel_split --ocs_down_size 32768"\ - "--lora_rank 128 --rescale_WH --outlier_channel_split --ocs_down_size 32768" - ) -NAMES=( - # "lora128_rescaleWH_ocs2-14"\ - "lora128_rescaleWH_ocs2-14"\ - "lora128_rescaleWH_ocs2-15"\ - "lora128_rescaleWH_ocs2-15" - ) - - -# main loop -for idx in "${!MODELS[@]}" -do -# save files -jobname="${MODELS[$idx]}_${NAMES[$idx]}" -# slurm helper -slurm_helper=" -#!/bin/bash -\n#SBATCH --job-name=${jobname} -\n#SBATCH -N 1 -\n#SBATCH -c ${CPU} -\n#SBATCH --mail-type=FAIL -\n#SBATCH --mail-user=jc3464@cornell.edu -\n#SBATCH --partition=gpu -\n#SBATCH --gres=gpu:${GPUS[$idx]} -\n#SBATCH --mem=${MEMS[$idx]} -\n#SBATCH --constraint=${CONSTS[$idx]} -\n#SBATCH -t ${TIME}:00:00 -\n#SBATCH -o ${logs_dirname}/${jobname}_%j.out -\n#SBATCH -e ${logs_dirname}/${jobname}_%j.err -\n\n -\nsource ~/.bashrc -\nsource ~/anaconda3/etc/profile.d/conda.sh -\nconda activate smoothquant -\n -\necho jobname: $jobname -\necho extra args: ${EXTRA_ARGS[$idx]} -\n\n -\necho '-------------------------------------' -\npython quantize_llama.py --base_model decapoda-research/llama-${MODELS[$idx]}-hf -${EXTRA_ARGS[$idx]} \ ---save_path ${dirname}/${jobname} \ ---hessian_path ${HESSIAN_PATHS[$idx]} -" -# add slurm header to helper.sh -temp_file=$(mktemp) -echo -en $slurm_helper > $temp_file -echo $temp_file -# run on slurm -# sbatch --requeue $temp_file - -done \ No newline at end of file diff --git a/sbatch/old/kick_l2.sh b/sbatch/old/kick_l2.sh deleted file mode 100644 index d636c95..0000000 --- a/sbatch/old/kick_l2.sh +++ /dev/null @@ -1,66 +0,0 @@ - -#!/bin/bash - -# output directory -# LEAVE COMMENTED OUT SO DON'T ACCIDENTALLY OVERWRITE -dirname="checkpoints/llama1" -logs_dirname="slurm_out/llama1" -mkdir --parents $dirname -mkdir --parents $logs_dirname - -# MODEL="7b-chat" -# HESSIAN_PATH="hessians/7b-chat-512dev-4096ctx" -# MODEL="13b-chat" -# HESSIAN_PATH="hessians/13b-chat-512dev-4096ctx" -MEM="32G" -CONST="gpu-mid" -TIME="64" -CPU="8" -GPU="2" - -EXTRA_ARGS=("--lora_rank -1 --rescale_WH") -NAMES=("rescaleWHv2") - - -# main loop -for idx in "${!EXTRA_ARGS[@]}" -do -# save files -jobname="${MODEL}_${NAMES[$idx]}" -# slurm helper -slurm_helper=" -#!/bin/bash -\n#SBATCH --job-name=${jobname} -\n#SBATCH -N 1 -\n#SBATCH -c ${CPU} -\n#SBATCH --mail-type=FAIL -\n#SBATCH --mail-user=jc3464@cornell.edu -\n#SBATCH --partition=gpu -\n#SBATCH --gres=gpu:${GPU} -\n#SBATCH --mem=${MEM} -\n#SBATCH --constraint=${CONST} -\n#SBATCH -t ${TIME}:00:00 -\n#SBATCH -o ${logs_dirname}/${jobname}_%j.out -\n#SBATCH -e ${logs_dirname}/${jobname}_%j.err -\n\n -\nsource ~/.bashrc -\nsource ~/anaconda3/etc/profile.d/conda.sh -\nconda activate smoothquant -\n -\necho jobname: $jobname -\necho extra args: ${EXTRA_ARGS[$idx]} -\n\n -\necho '-------------------------------------' -\npython quantize_llama.py --base_model meta-llama/Llama-2-${MODEL}-hf -${EXTRA_ARGS[$idx]} \ ---save_path ${dirname}/${jobname} \ ---hessian_path $HESSIAN_PATH -" -# add slurm header to helper.sh -temp_file=$(mktemp) -echo -en $slurm_helper > $temp_file -echo $temp_file -# run on slurm -sbatch --requeue $temp_file - -done \ No newline at end of file diff --git a/sbatch/old/kick_off.sh b/sbatch/old/kick_off.sh deleted file mode 100644 index d556a15..0000000 --- a/sbatch/old/kick_off.sh +++ /dev/null @@ -1,79 +0,0 @@ - -#!/bin/bash - -# output directory -# LEAVE COMMENTED OUT SO DON'T ACCIDENTALLY OVERWRITE -# dirname="hessians" -# logs_dirname="slurm_out/hessians" -# mkdir --parents $dirname -# mkdir --parents $logs_dirname - -TIME="240" -CPU="8" -GPU="1" -# GPU="v100:1|a100:1" -GPUCONST="v100|a100" - - -TITLES=( - "llama1-7b" "llama1-13b"\ - "llama1-30b" "llama1-65b" - ) -MODELS=( - 'decapoda-research/llama-7b-hf' 'decapoda-research/llama-13b-hf' \ - 'decapoda-research/llama-30b-hf' 'decapoda-research/llama-65b-hf' - ) -# GPUCONSTS=("gpu-mid" "gpu-mid" "gpu-high" "gpu-high") -MEMS=( - "64G" "100G"\ - "160G" "200G" - ) -BSS=( - "4" "4"\ - "4" "4" - ) -DEV="2048" -CTX="2048" - - -# main loop -for idx in "${!MODELS[@]}" -do -# save files -jobname="$Hessian_${TITLES[$idx]}" -echo $jobname -# slurm helper -slurm_helper=" -#!/bin/bash -\n#SBATCH --job-name=${jobname} -\n#SBATCH -N 1 -\n#SBATCH -c ${CPU} -\n#SBATCH --mail-type=FAIL -\n#SBATCH --mail-user=jc3464@cornell.edu -\n#SBATCH --partition=gpu -\n#SBATCH --gres=gpu:${GPU} -\n#SBATCH --constraint=\"${GPUCONST}\" -\n#SBATCH --mem=${MEMS[$idx]} -\n#SBATCH -t ${TIME}:00:00 -\n#SBATCH -o ${logs_dirname}/${jobname}_%j.out -\n#SBATCH -e ${logs_dirname}/${jobname}_%j.err -\n\n -\nsource ~/.bashrc -\nsource ~/anaconda3/etc/profile.d/conda.sh -\nconda activate smoothquant -\n -\necho jobname: $jobname -\n\n -\necho '-------------------------------------' -\npython hessian_offline.py --devset_size ${DEV} --ctx_size ${CTX} --batch_size ${BSS[$idx]} ---base_model ${MODELS[$idx]} --save_path ${dirname}/${TITLES[idx]}-${DEV}dev-${CTX}ctx -" -# add slurm header to helper.sh -temp_file=$(mktemp) -echo -en $slurm_helper > $temp_file -echo $temp_file -# run on slurm -sbatch --requeue $temp_file - -done -# \n#SBATCH --constraint=${GPUCONSTS[$idx]} \ No newline at end of file diff --git a/sbatch/old/kmeans_8col.sbatch b/sbatch/old/kmeans_8col.sbatch deleted file mode 100644 index 68587de..0000000 --- a/sbatch/old/kmeans_8col.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=kmeans_8col -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=48G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=24:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=/share/desa/nfs02/quip_llama2/checkpoints - -#python quantize_llama.py --save_path $CKPT/kmeans_8col --codebook kmeans_8col -#python hfize_llama.py --quantized_path $CKPT/kmeans_8col --hf_output_path hfized/kmeans_8col -python ppl_llama.py --hf_path hfized/kmeans_8col --dataset c4 diff --git a/sbatch/old/kmeans_np_8col.sbatch b/sbatch/old/kmeans_np_8col.sbatch deleted file mode 100644 index 41866b0..0000000 --- a/sbatch/old/kmeans_np_8col.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=kmeans_np_8col -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=48G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=24:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=/share/desa/nfs02/quip_llama2/checkpoints - -python quantize_llama.py --save_path $CKPT/kmeans_np_8col --codebook kmeans_8col -python hfize_llama.py --quantized_path $CKPT/kmeans_np_8col --hf_output_path hfized/kmeans_np_8col -python ppl_llama.py --hf_path hfized/kmeans_np_8col --dataset c4 diff --git a/sbatch/old/kmedoid_8col.sbatch b/sbatch/old/kmedoid_8col.sbatch deleted file mode 100644 index ded606b..0000000 --- a/sbatch/old/kmedoid_8col.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=kmedoid_8col -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=48G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=24:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=/share/desa/nfs02/quip_llama2/checkpoints - -python quantize_llama.py --save_path $CKPT/kmedoid_8col --codebook kmedoid_8col -python hfize_llama.py --quantized_path $CKPT/kmedoid_8col --hf_output_path hfized/kmedoid_8col -python ppl_llama.py --hf_path hfized/kmedoid_8col --dataset c4 diff --git a/sbatch/old/llama1.sh b/sbatch/old/llama1.sh deleted file mode 100644 index 5ce97dc..0000000 --- a/sbatch/old/llama1.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/jerry_data/checkpoints -HF=/mnt/jerry_data/hfized -HESS=/mnt/jerry_data/hessians -LOG=/mnt/jerry_data/logs -L1=/mnt/jerry_data/meta_llama1 - - -python quantize_llama.py --save_path $CKPT/1_65b_e8p_2bit --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_30b_e8p_2bit --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_13b_e8p_2bit --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_7b_e8p_2bit --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --base_model $L1/huggingface_7B --hessian_path $HESS/llama1_7b_6144 >> $LOG/1_7b_e8p_2bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_65b_d4_4bit --codebook D44B --sigma_reg2 1e-2 --scale 3.4 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_d4_4bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_30b_d4_4bit --codebook D44B --sigma_reg2 1e-2 --scale 3.4 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_d4_4bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_13b_d4_4bit --codebook D44B --sigma_reg2 1e-2 --scale 3.4 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_d4_4bit 2>&1 -python quantize_llama.py --save_path $CKPT/1_7b_d4_4bit --codebook D44B --sigma_reg2 1e-2 --scale 3.4 --base_model $L1/huggingface_7B --hessian_path $HESS/llama1_7b_6144 >> $LOG/1_7b_d4_4bit 2>&1 - - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_65b_e8p_2bit --hf_output_path $HF/1_65b_e8p_2bit & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_30b_e8p_2bit --hf_output_path $HF/1_30b_e8p_2bit & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_13b_e8p_2bit --hf_output_path $HF/1_13b_e8p_2bit & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_7b_e8p_2bit --hf_output_path $HF/1_7b_e8p_2bit & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_65b_d4_4bit --hf_output_path $HF/1_65b_d4_4bit & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_30b_d4_4bit --hf_output_path $HF/1_30b_d4_4bit & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_13b_d4_4bit --hf_output_path $HF/1_13b_d4_4bit & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_7b_d4_4bit --hf_output_path $HF/1_7b_d4_4bit & - -wait - - -# fp16 zero shot for llama1 and 2 -CUDA_VISIBLE_DEVICES=0,1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $L1/huggingface_65B >> $LOG/1_65b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=2,3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path meta-llama/Llama-2-70b-hf >> $LOG/2_70b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $L1/huggingface_30B >> $LOG/1_30b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $L1/huggingface_13B >> $LOG/1_13b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path meta-llama/Llama-2-13b-hf >> $LOG/2_13b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $L1/huggingface_7B >> $LOG/1_7b_fp16 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path meta-llama/Llama-2-7b-hf >> $LOG/2_7b_fp16 2>&1 & - -wait - -# perplexity -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_e8p_2bit >> $LOG/1_65b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_e8p_2bit >> $LOG/1_30b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_e8p_2bit >> $LOG/1_13b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_e8p_2bit >> $LOG/1_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_d4_4bit >> $LOG/1_65b_d4_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_d4_4bit >> $LOG/1_30b_d4_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_d4_4bit >> $LOG/1_13b_d4_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_d4_4bit >> $LOG/1_7b_d4_4bit 2>&1 & - -wait - -# zero shot - -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_e8p_2bit >> $LOG/1_65b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_e8p_2bit >> $LOG/1_30b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_e8p_2bit >> $LOG/1_13b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_e8p_2bit >> $LOG/1_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_d4_4bit >> $LOG/1_65b_d4_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_d4_4bit >> $LOG/1_30b_d4_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_d4_4bit >> $LOG/1_13b_d4_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_d4_4bit >> $LOG/1_7b_d4_4bit 2>&1 & - -wait diff --git a/sbatch/old/main_check.sbatch b/sbatch/old/main_check.sbatch deleted file mode 100644 index bd280f9..0000000 --- a/sbatch/old/main_check.sbatch +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -#SBATCH --partition=ellis,gpu -#SBATCH --job-name=main_check -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=96G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:2 -#SBATCH --constraint='gpu-high' -#SBATCH --time=72:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -#python quantize_llama.py --save_path checkpoints/main_check -python hfize_llama.py --quantized_path checkpoints/main_check --hf_output_path hfized/main_check -python ppl_llama.py --hf_path hfized/main_check --dataset c4 diff --git a/sbatch/old/ocs.sbatch b/sbatch/old/ocs.sbatch deleted file mode 100644 index 11a1c4d..0000000 --- a/sbatch/old/ocs.sbatch +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu,ellis -#SBATCH --job-name=ocs -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=64G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high|gpu-mid' -#SBATCH --time=72:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -#python quantize_llama.py --save_path checkpoints/ocs --outlier_channel_split -#python hfize_llama.py --quantized_path checkpoints/ocs --hf_output_path hfized/ocs -python c4_llama.py --hf_path hfized/ocs diff --git a/sbatch/old/perp.sbatch b/sbatch/old/perp.sbatch deleted file mode 100644 index 9ccd48d..0000000 --- a/sbatch/old/perp.sbatch +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=llama2_perp -#SBATCH -N 1 -#SBATCH -n 8 -#SBATCH --mem=254G -#SBATCH --partition=gpu -#SBATCH --gres=gpu:1 -#SBATCH --constraint="gpu-high" -#SBATCH --exclude=nikola-compute-[15-16] -#SBATCH -t 72:00:00 -#SBATCH -o log_perp_noq.out # Name of stdout output log file (%j expands to jobID) -#SBATCH -e log_perp_noq.err # Name of stderr output log file (%j expands to jobID) -python3 perp.py | tee -a tee_perp_noq.out diff --git a/sbatch/old/plot.sbatch b/sbatch/old/plot.sbatch deleted file mode 100644 index c5610ca..0000000 --- a/sbatch/old/plot.sbatch +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash -#SBATCH --partition=ellis -#SBATCH --job-name=plot -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=256G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -cd /home/at676/two_bit_quant/scripts -python plot_lattice.py - diff --git a/sbatch/old/quant_sink.sbatch b/sbatch/old/quant_sink.sbatch deleted file mode 100644 index bbe9447..0000000 --- a/sbatch/old/quant_sink.sbatch +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=llama2_sinkhorn -#SBATCH -N 1 -#SBATCH -n 8 -#SBATCH --mem=254G -#SBATCH --partition=gpu -#SBATCH --gres=gpu:1 -#SBATCH --constraint="gpu-high" -#SBATCH --exclude=nikola-compute-[15-18],badfellow,ellis-compute-[01-02] -#SBATCH -t 72:00:00 -#SBATCH -o log_quant_sinkscale.out # Name of stdout output log file (%j expands to jobID) -#SBATCH -e log_quant_sinkscale.err # Name of stderr output log file (%j expands to jobID) -python3 quant_sinkscale.py | tee -a tee_quant_sinkscale.out diff --git a/sbatch/old/run.sbatch b/sbatch/old/run.sbatch deleted file mode 100644 index 34e2666..0000000 --- a/sbatch/old/run.sbatch +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash -#SBATCH --job-name=llama2 -#SBATCH -N 1 -#SBATCH -n 8 -#SBATCH --mem=254G -#SBATCH --partition=gpu -#SBATCH --gres=gpu:1 -#SBATCH --constraint="gpu-high" -#SBATCH --exclude=nikola-compute-[15-18],badfellow,ellis-compute-[01-02] -#SBATCH -t 72:00:00 -#SBATCH -o log_hess_70b.out # Name of stdout output log file (%j expands to jobID) -#SBATCH -e log_hess_70b.err # Name of stderr output log file (%j expands to jobID) -python3 hessian.py | tee -a hess_70b.out diff --git a/sbatch/old/scale_tests/e8p_s075.sbatch b/sbatch/old/scale_tests/e8p_s075.sbatch deleted file mode 100644 index d37b9c5..0000000 --- a/sbatch/old/scale_tests/e8p_s075.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=e8p_s075 -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=64G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=checkpoints - -python quantize_llama.py --save_path $CKPT/e8p_s075 --codebook E8P12 --sigma_reg2 1e-2 --scale 0.75 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144 -python hfize_llama.py --quantized_path $CKPT/e8p_s075 --hf_output_path hfized/e8p_s075 -python ppl_llama.py --hf_path hfized/e8p_s075 --dataset c4 diff --git a/sbatch/old/scale_tests/e8p_s080.sbatch b/sbatch/old/scale_tests/e8p_s080.sbatch deleted file mode 100644 index c5eea4f..0000000 --- a/sbatch/old/scale_tests/e8p_s080.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=e8p_s080 -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=64G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=checkpoints - -python quantize_llama.py --save_path $CKPT/e8p_s080 --codebook E8P12 --sigma_reg2 1e-2 --scale 0.80 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144 -python hfize_llama.py --quantized_path $CKPT/e8p_s080 --hf_output_path hfized/e8p_s080 -python ppl_llama.py --hf_path hfized/e8p_s080 --dataset c4 diff --git a/sbatch/old/scale_tests/e8p_s085.sbatch b/sbatch/old/scale_tests/e8p_s085.sbatch deleted file mode 100644 index aaf97a9..0000000 --- a/sbatch/old/scale_tests/e8p_s085.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=e8p_s085 -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=64G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=checkpoints - -python quantize_llama.py --save_path $CKPT/e8p_s085 --codebook E8P12 --sigma_reg2 1e-2 --scale 0.85 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144 -python hfize_llama.py --quantized_path $CKPT/e8p_s085 --hf_output_path hfized/e8p_s085 -python ppl_llama.py --hf_path hfized/e8p_s085 --dataset c4 diff --git a/sbatch/old/scale_tests/e8p_s090.sbatch b/sbatch/old/scale_tests/e8p_s090.sbatch deleted file mode 100644 index 11a0139..0000000 --- a/sbatch/old/scale_tests/e8p_s090.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=e8p_s090 -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=64G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=checkpoints - -python quantize_llama.py --save_path $CKPT/e8p_s090 --codebook E8P12 --sigma_reg2 1e-2 --scale 0.90 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144 -python hfize_llama.py --quantized_path $CKPT/e8p_s090 --hf_output_path hfized/e8p_s090 -python ppl_llama.py --hf_path hfized/e8p_s090 --dataset c4 diff --git a/sbatch/old/scale_tests/e8p_s095.sbatch b/sbatch/old/scale_tests/e8p_s095.sbatch deleted file mode 100644 index 4cb78a3..0000000 --- a/sbatch/old/scale_tests/e8p_s095.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=e8p_s095 -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=64G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=checkpoints - -python quantize_llama.py --save_path $CKPT/e8p_s095 --codebook E8P12 --sigma_reg2 1e-2 --scale 0.95 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144 -python hfize_llama.py --quantized_path $CKPT/e8p_s095 --hf_output_path hfized/e8p_s095 -python ppl_llama.py --hf_path hfized/e8p_s095 --dataset c4 diff --git a/sbatch/old/scale_tests/e8p_s100.sbatch b/sbatch/old/scale_tests/e8p_s100.sbatch deleted file mode 100644 index f0ad2b8..0000000 --- a/sbatch/old/scale_tests/e8p_s100.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=e8p_s100 -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=64G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:00:00 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=checkpoints - -python quantize_llama.py --save_path $CKPT/e8p_s100 --codebook E8P12 --sigma_reg2 1e-2 --scale 1.00 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144 -python hfize_llama.py --quantized_path $CKPT/e8p_s100 --hf_output_path hfized/e8p_s100 -python ppl_llama.py --hf_path hfized/e8p_s100 --dataset c4 diff --git a/sbatch/old/scale_tests/e8p_s105.sbatch b/sbatch/old/scale_tests/e8p_s105.sbatch deleted file mode 100644 index fc636e6..0000000 --- a/sbatch/old/scale_tests/e8p_s105.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=e8p_s105 -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=64G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:05:05 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=checkpoints - -python quantize_llama.py --save_path $CKPT/e8p_s105 --codebook E8P12 --sigma_reg2 1e-2 --scale 1.05 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144 -python hfize_llama.py --quantized_path $CKPT/e8p_s105 --hf_output_path hfized/e8p_s105 -python ppl_llama.py --hf_path hfized/e8p_s105 --dataset c4 diff --git a/sbatch/old/scale_tests/e8p_s110.sbatch b/sbatch/old/scale_tests/e8p_s110.sbatch deleted file mode 100644 index c3722be..0000000 --- a/sbatch/old/scale_tests/e8p_s110.sbatch +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -#SBATCH --partition=gpu -#SBATCH --job-name=e8p_s110 -#SBATCH --mail-type=FAIL -#SBATCH --mail-user=at676@cornell.edu -#SBATCH --ntasks=1 -#SBATCH --mem=64G -#SBATCH --cpus-per-task=4 -#SBATCH --gres=gpu:1 -#SBATCH --constraint='gpu-high' -#SBATCH --time=48:10:10 -#SBATCH --output=slurm_out/%x_%j.out -#SBATCH --err=slurm_out/%x_%j.err -#SBATCH --requeue -#SBATCH --open-mode=append - -CKPT=checkpoints - -python quantize_llama.py --save_path $CKPT/e8p_s110 --codebook E8P12 --sigma_reg2 1e-2 --scale 1.10 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144 -python hfize_llama.py --quantized_path $CKPT/e8p_s110 --hf_output_path hfized/e8p_s110 -python ppl_llama.py --hf_path hfized/e8p_s110 --dataset c4 diff --git a/sbatch/old/scale_tests/run_all_d4.sh b/sbatch/old/scale_tests/run_all_d4.sh deleted file mode 100644 index e3c5c9f..0000000 --- a/sbatch/old/scale_tests/run_all_d4.sh +++ /dev/null @@ -1,31 +0,0 @@ -#!/bin/bash - -CKPT=checkpoints -HF=hfized -''' -CUDA_VISIBLE_DEVICES=0 python quantize_llama.py --save_path $CKPT/d4_s110 --codebook D4 --sigma_reg2 1e-2 --scale 1.10 --hessian_path hessians/llama2_70b_6144 > slurm_out/d4_s110.log 2>&1 & -CUDA_VISIBLE_DEVICES=1 python quantize_llama.py --save_path $CKPT/d4_s115 --codebook D4 --sigma_reg2 1e-2 --scale 1.15 --hessian_path hessians/llama2_70b_6144 > slurm_out/d4_s115.log 2>&1 & -CUDA_VISIBLE_DEVICES=2 python quantize_llama.py --save_path $CKPT/d4_s121 --codebook D4 --sigma_reg2 1e-2 --scale 1.21 --hessian_path hessians/llama2_70b_6144 > slurm_out/d4_s121.log 2>&1 & -CUDA_VISIBLE_DEVICES=3 python quantize_llama.py --save_path $CKPT/d4_s120 --codebook D4 --sigma_reg2 1e-2 --scale 1.20 --hessian_path hessians/llama2_70b_6144 > slurm_out/d4_s120.log 2>&1 & -CUDA_VISIBLE_DEVICES=4 python quantize_llama.py --save_path $CKPT/d4_s125 --codebook D4 --sigma_reg2 1e-2 --scale 1.25 --hessian_path hessians/llama2_70b_6144 > slurm_out/d4_s125.log 2>&1 & -CUDA_VISIBLE_DEVICES=5 python quantize_llama.py --save_path $CKPT/d4_s130 --codebook D4 --sigma_reg2 1e-2 --scale 1.30 --hessian_path hessians/llama2_70b_6144 > slurm_out/d4_s130.log 2>&1 & - -wait -''' -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/d4_s110 --hf_output_path $HF/d4_s110 >> slurm_out/d4_s110.log 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/d4_s115 --hf_output_path $HF/d4_s115 >> slurm_out/d4_s115.log 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/d4_s121 --hf_output_path $HF/d4_s121 >> slurm_out/d4_s121.log 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/d4_s120 --hf_output_path $HF/d4_s120 >> slurm_out/d4_s120.log 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/d4_s125 --hf_output_path $HF/d4_s125 >> slurm_out/d4_s125.log 2>&1 & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/d4_s130 --hf_output_path $HF/d4_s130 >> slurm_out/d4_s130.log 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/d4_s110 >> slurm_out/d4_s110.log 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/d4_s115 >> slurm_out/d4_s115.log 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/d4_s121 >> slurm_out/d4_s121.log 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --hf_path $HF/d4_s120 >> slurm_out/d4_s120.log 2>&1 & -CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --hf_path $HF/d4_s125 >> slurm_out/d4_s125.log 2>&1 & -CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --hf_path $HF/d4_s130 >> slurm_out/d4_s130.log 2>&1 & - -wait diff --git a/sbatch/old/scale_tests/run_all_e8.sh b/sbatch/old/scale_tests/run_all_e8.sh deleted file mode 100644 index 812003c..0000000 --- a/sbatch/old/scale_tests/run_all_e8.sh +++ /dev/null @@ -1,37 +0,0 @@ -#!/bin/bash - -CKPT=checkpoints -HF=hfized -''' -CUDA_VISIBLE_DEVICES=0 python quantize_llama.py --save_path $CKPT/e8p_s075 --codebook E8P12 --sigma_reg2 1e-2 --scale 0.75 --hessian_path hessians/llama2_70b_6144 > slurm_out/e8p_s075.log 2>&1 & -CUDA_VISIBLE_DEVICES=1 python quantize_llama.py --save_path $CKPT/e8p_s080 --codebook E8P12 --sigma_reg2 1e-2 --scale 0.80 --hessian_path hessians/llama2_70b_6144 > slurm_out/e8p_s080.log 2>&1 & -CUDA_VISIBLE_DEVICES=2 python quantize_llama.py --save_path $CKPT/e8p_s085 --codebook E8P12 --sigma_reg2 1e-2 --scale 0.85 --hessian_path hessians/llama2_70b_6144 > slurm_out/e8p_s085.log 2>&1 & -CUDA_VISIBLE_DEVICES=3 python quantize_llama.py --save_path $CKPT/e8p_s090 --codebook E8P12 --sigma_reg2 1e-2 --scale 0.90 --hessian_path hessians/llama2_70b_6144 > slurm_out/e8p_s090.log 2>&1 & -CUDA_VISIBLE_DEVICES=4 python quantize_llama.py --save_path $CKPT/e8p_s095 --codebook E8P12 --sigma_reg2 1e-2 --scale 0.95 --hessian_path hessians/llama2_70b_6144 > slurm_out/e8p_s095.log 2>&1 & -CUDA_VISIBLE_DEVICES=5 python quantize_llama.py --save_path $CKPT/e8p_s0100 --codebook E8P12 --sigma_reg2 1e-2 --scale 1.00 --hessian_path hessians/llama2_70b_6144 > slurm_out/e8p_s100.log 2>&1 & -CUDA_VISIBLE_DEVICES=6 python quantize_llama.py --save_path $CKPT/e8p_s0105 --codebook E8P12 --sigma_reg2 1e-2 --scale 1.05 --hessian_path hessians/llama2_70b_6144 > slurm_out/e8p_s105.log 2>&1 & -CUDA_VISIBLE_DEVICES=7 python quantize_llama.py --save_path $CKPT/e8p_s0103 --codebook E8P12 --sigma_reg2 1e-2 --scale 1.03 --hessian_path hessians/llama2_70b_6144 > slurm_out/e8p_s103.log 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/e8p_s075 --hf_output_path $HF/e8p_s075 >> slurm_out/e8p_s075.log 2>&1 & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/e8p_s080 --hf_output_path $HF/e8p_s080 >> slurm_out/e8p_s080.log 2>&1 & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/e8p_s085 --hf_output_path $HF/e8p_s085 >> slurm_out/e8p_s085.log 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/e8p_s090 --hf_output_path $HF/e8p_s090 >> slurm_out/e8p_s090.log 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/e8p_s095 --hf_output_path $HF/e8p_s095 >> slurm_out/e8p_s095.log 2>&1 & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/e8p_s0100 --hf_output_path $HF/e8p_s0100 >> slurm_out/e8p_s100.log 2>&1 & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/e8p_s0105 --hf_output_path $HF/e8p_s0105 >> slurm_out/e8p_s105.log 2>&1 & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/e8p_s0103 --hf_output_path $HF/e8p_s0103 >> slurm_out/e8p_s103.log 2>&1 & - -wait -''' -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/e8p_s075 >> slurm_out/e8p_s075.log 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/e8p_s080 >> slurm_out/e8p_s080.log 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/e8p_s085 >> slurm_out/e8p_s085.log 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --hf_path $HF/e8p_s090 >> slurm_out/e8p_s090.log 2>&1 & -CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --hf_path $HF/e8p_s095 >> slurm_out/e8p_s095.log 2>&1 & -CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --hf_path $HF/e8p_s0100 >> slurm_out/e8p_s100.log 2>&1 & -CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --hf_path $HF/e8p_s0105 >> slurm_out/e8p_s105.log 2>&1 & -CUDA_VISIBLE_DEVICES=7 python ppl_llama.py --hf_path $HF/e8p_s0103 >> slurm_out/e8p_s103.log 2>&1 & - -wait diff --git a/sbatch/packed_4bit.sh b/sbatch/packed_4bit.sh deleted file mode 100644 index 0e28dc8..0000000 --- a/sbatch/packed_4bit.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/desa_data/checkpoints -HF=/mnt/desa_data/hfized -HESS=/mnt/desa_data/hessians -LOG=/mnt/desa_data/logs -L1=/mnt/desa_data/meta_llama1 - - -python quantize_llama.py --save_path $CKPT/2_70b_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_hi_4bit_packed 2>&1 -python quantize_llama.py --save_path $CKPT/2_13b_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 >> $LOG/2_13b_hi_4bit_packed 2>&1 -python quantize_llama.py --save_path $CKPT/2_7b_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 >> $LOG/2_7b_hi_4bit_packed 2>&1 -python quantize_llama.py --save_path $CKPT/1_65b_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_hi_4bit_packed 2>&1 -python quantize_llama.py --save_path $CKPT/1_30b_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_hi_4bit_packed 2>&1 -python quantize_llama.py --save_path $CKPT/1_13b_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_hi_4bit_packed 2>&1 -python quantize_llama.py --save_path $CKPT/1_7b_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model $L1/huggingface_7B --hessian_path $HESS/llama1_7b_6144 >> $LOG/1_7b_hi_4bit_packed 2>&1 - - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_hi_4bit_packed --hf_output_path $HF/2_70b_hi_4bit_packed & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_13b_hi_4bit_packed --hf_output_path $HF/2_13b_hi_4bit_packed & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_7b_hi_4bit_packed --hf_output_path $HF/2_7b_hi_4bit_packed & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_65b_hi_4bit_packed --hf_output_path $HF/1_65b_hi_4bit_packed & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_30b_hi_4bit_packed --hf_output_path $HF/1_30b_hi_4bit_packed & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_13b_hi_4bit_packed --hf_output_path $HF/1_13b_hi_4bit_packed & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_7b_hi_4bit_packed --hf_output_path $HF/1_7b_hi_4bit_packed & - -wait - -# perplexity -CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_70b_hi_4bit_packed >> $LOG/2_70b_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_13b_hi_4bit_packed >> $LOG/2_13b_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_7b_hi_4bit_packed >> $LOG/2_7b_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_hi_4bit_packed >> $LOG/1_65b_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_hi_4bit_packed >> $LOG/1_30b_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_hi_4bit_packed >> $LOG/1_13b_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_hi_4bit_packed >> $LOG/1_7b_hi_4bit_packed 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_hi_4bit_packed >> $LOG/2_70b_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_hi_4bit_packed >> $LOG/2_13b_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_hi_4bit_packed >> $LOG/2_7b_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_hi_4bit_packed >> $LOG/1_65b_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_hi_4bit_packed >> $LOG/1_30b_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_hi_4bit_packed >> $LOG/1_13b_hi_4bit_packed 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_hi_4bit_packed >> $LOG/1_7b_hi_4bit_packed 2>&1 & - -wait - diff --git a/sbatch/remain.sh b/sbatch/remain.sh deleted file mode 100644 index 5510e63..0000000 --- a/sbatch/remain.sh +++ /dev/null @@ -1,65 +0,0 @@ -CKPT=/mnt/desa_data/checkpoints/finetune_llama_adamw -HF=/mnt/desa_data/hfized/finetune_llama_adamw -LOG=/mnt/desa_data/logs/finetune_llama_adamw -HESS=/mnt/desa_data/hessians -''' -CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-hf --hf_path $HF/2_7b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000003 --ft_opt adam --ft_bs 1 --ctx_size 4096 --ckpt_path $CKPT/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-hf --hf_path $HF/2_13b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000003 --ft_opt adam --ft_bs 1 --ctx_size 4096 --ckpt_path $CKPT/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 & -wait - -python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000003 --ft_bs 1 --ctx_size 3072 --ft_opt adam --ckpt_path $CKPT/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 -python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000003 --ft_bs 1 --ctx_size 3072 --ft_opt adam --ckpt_path $CKPT/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 -python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000003 --ft_bs 1 --ctx_size 3072 --ft_opt adam --ckpt_path $CKPT/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 - -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_3bit --hf_output_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_70b_4bit --hf_output_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/2_13b_4bit --hf_output_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_4bit --hf_output_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/2_70b_3bit --seqlen 2048 >> $LOG/2_70b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/2_70b_4bit --seqlen 2048 >> $LOG/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path $HF/2_70b_2bit --seqlen 2048 >> $LOG/2_70b_2bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_13b_4bit --seqlen 2048 >> $LOG/2_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/2_7b_4bit --seqlen 2048 >> $LOG/2_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_7b_2bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_7b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_70b_4bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_70b_3bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_70b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_70b_2bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_70b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_13b_4bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_13b_3bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_13b_3bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_13b_2bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_13b_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_7b_4bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_7b_3bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_7b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path /mnt/desa_data/hfized/new_e8p/2_70b_e8p_2bit >> /mnt/desa_data/logs/new_e8p/2_70b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path /mnt/desa_data/hfized/new_e8p/2_13b_e8p_2bit >> /mnt/desa_data/logs/new_e8p/2_13b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path /mnt/desa_data/hfized/new_e8p/2_7b_e8p_2bit >> /mnt/desa_data/logs/new_e8p/2_7b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path /mnt/desa_data/hfized/rvq/2_70b_e8prvq_3bit >> /mnt/desa_data/logs/rvq/2_70b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path /mnt/desa_data/hfized/rvq/2_13b_e8prvq_3bit >> /mnt/desa_data/logs/rvq/2_13b_e8prvq_3bit 2>&1 & -wait - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path /mnt/desa_data/hfized/rvq/2_7b_e8prvq_3bit >> /mnt/desa_data/logs/rvq/2_7b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path /mnt/desa_data/hfized/rvq/2_70b_e8prvq_4bit >> /mnt/desa_data/logs/rvq/2_70b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path /mnt/desa_data/hfized/rvq/2_13b_e8prvq_4bit >> /mnt/desa_data/logs/rvq/2_13b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path /mnt/desa_data/hfized/rvq/2_7b_e8prvq_4bit >> /mnt/desa_data/logs/rvq/2_7b_e8prvq_4bit 2>&1 & -''' -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path /mnt/desa_data/hfized/finetune_llama/2_7b_4bit >> /mnt/desa_data/logs/finetune_llama/2_7b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path /mnt/desa_data/hfized/finetune_llama/2_70b_4bit >> /mnt/desa_data/logs/finetune_llama/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 & -wait - - diff --git a/sbatch/rvq3b.sh b/sbatch/rvq3b.sh deleted file mode 100644 index 36531dd..0000000 --- a/sbatch/rvq3b.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/desa_data/checkpoints/rvq -HF=/mnt/desa_data/hfized/rvq -HESS=/mnt/desa_data/hessians -LOG=/mnt/desa_data/logs/rvq - -mkdir $CKPT -mkdir $HF -mkdir $LOG - -python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/2_70b_e8prvq_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_e8prvq_3bit 2>&1 -python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/2_13b_e8prvq_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 >> $LOG/2_13b_e8prvq_3bit 2>&1 -python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/2_7b_e8prvq_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 >> $LOG/2_7b_e8prvq_3bit 2>&1 -python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/2_70b_chat_e8prvq_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-70b-chat-hf --hessian_path $HESS/llama2_70b_chat_6144 >> $LOG/2_70b_chat_e8prvq_3bit 2>&1 -python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/2_13b_chat_e8prvq_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-13b-chat-hf --hessian_path $HESS/llama2_13b_chat_6144 >> $LOG/2_13b_chat_e8prvq_3bit 2>&1 -python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/2_7b_chat_e8prvq_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-7b-chat-hf --hessian_path $HESS/llama2_7b_chat_6144 >> $LOG/2_7b_chat_e8prvq_3bit 2>&1 -python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/1_65b_e8prvq_3bit --codebook E8P12RVQ3B --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_e8prvq_3bit 2>&1 -python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/1_30b_e8prvq_3bit --codebook E8P12RVQ3B --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_e8prvq_3bit 2>&1 -python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/1_13b_e8prvq_3bit --codebook E8P12RVQ3B --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_e8prvq_3bit 2>&1 -python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/1_7b_e8prvq_3bit --codebook E8P12RVQ3B --base_model relaxml/Llama-1-7b-hf --hessian_path $HESS/llama1_7b_6144 >> $LOG/1_7b_e8prvq_3bit 2>&1 -python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/mistral_7b_e8prvq_3bit --codebook E8P12RVQ3B --base_model mistralai/Mistral-7B-v0.1 --hessian_path $HESS/mistral_7b_4096 >> $LOG/mistral_7b_e8prvq_3bit 2>&1 -python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/openhermes_7b_e8prvq_3bit --codebook E8P12RVQ3B --base_model teknium/OpenHermes-2.5-Mistral-7B --hessian_path $HESS/openhermes_7b_4096 >> $LOG/openhermes_7b_e8prvq_3bit 2>&1 - - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_e8prvq_3bit --hf_output_path $HF/2_70b_e8prvq_3bit & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_e8prvq_3bit --hf_output_path $HF/2_70b_chat_e8prvq_3bit & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_65b_e8prvq_3bit --hf_output_path $HF/1_65b_e8prvq_3bit & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_30b_e8prvq_3bit --hf_output_path $HF/1_30b_e8prvq_3bit & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_e8prvq_3bit --hf_output_path $HF/2_13b_e8prvq_3bit & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_e8prvq_3bit --hf_output_path $HF/2_13b_chat_e8prvq_3bit & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_13b_e8prvq_3bit --hf_output_path $HF/1_13b_e8prvq_3bit & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_e8prvq_3bit --hf_output_path $HF/2_7b_e8prvq_3bit & -wait - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_e8prvq_3bit --hf_output_path $HF/2_7b_chat_e8prvq_3bit & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_7b_e8prvq_3bit --hf_output_path $HF/1_7b_e8prvq_3bit & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/mistral_7b_e8prvq_3bit --hf_output_path $HF/mistral_7b_e8prvq_3bit & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/openhermes_7b_e8prvq_3bit --hf_output_path $HF/openhermes_7b_e8prvq_3bit & - -wait - -# perplexity -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_70b_e8prvq_3bit >> $LOG/2_70b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_13b_e8prvq_3bit >> $LOG/2_13b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_7b_e8prvq_3bit >> $LOG/2_7b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_70b_chat_e8prvq_3bit >> $LOG/2_70b_chat_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_13b_chat_e8prvq_3bit >> $LOG/2_13b_chat_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_7b_chat_e8prvq_3bit >> $LOG/2_7b_chat_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_e8prvq_3bit >> $LOG/1_65b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_e8prvq_3bit >> $LOG/1_30b_e8prvq_3bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_e8prvq_3bit >> $LOG/1_13b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_e8prvq_3bit >> $LOG/1_7b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 --hf_path $HF/mistral_7b_e8prvq_3bit >> $LOG/mistral_7b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 --hf_path $HF/openhermes_7b_e8prvq_3bit >> $LOG/openhermes_7b_e8prvq_3bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8prvq_3bit >> $LOG/2_70b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8prvq_3bit >> $LOG/2_13b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8prvq_3bit >> $LOG/2_7b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_e8prvq_3bit >> $LOG/2_70b_chat_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_e8prvq_3bit >> $LOG/2_13b_chat_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_e8prvq_3bit >> $LOG/2_7b_chat_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_e8prvq_3bit >> $LOG/1_65b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_e8prvq_3bit >> $LOG/1_30b_e8prvq_3bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_e8prvq_3bit >> $LOG/1_13b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_e8prvq_3bit >> $LOG/1_7b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/mistral_7b_e8prvq_3bit >> $LOG/mistral_7b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/openhermes_7b_e8prvq_3bit >> $LOG/openhermes_7b_e8prvq_3bit 2>&1 & - -wait - diff --git a/sbatch/rvq4b.sh b/sbatch/rvq4b.sh deleted file mode 100644 index 9515152..0000000 --- a/sbatch/rvq4b.sh +++ /dev/null @@ -1,79 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/desa_data/checkpoints/rvq -HF=/mnt/desa_data/hfized/rvq -HESS=/mnt/desa_data/hessians -LOG=/mnt/desa_data/logs/rvq - -mkdir $CKPT -mkdir $HF -mkdir $LOG - -python quantize_llama.py --quip_tune_iters 0 --scale_override 0.9 --resid_scale_override 3.6 --save_path $CKPT/2_70b_e8prvq_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_e8prvq_4bit 2>&1 -python quantize_llama.py --quip_tune_iters 0 --scale_override 0.9 --resid_scale_override 3.45 --save_path $CKPT/2_13b_e8prvq_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 >> $LOG/2_13b_e8prvq_4bit 2>&1 -python quantize_llama.py --quip_tune_iters 0 --scale_override 0.9 --resid_scale_override 3.6 --save_path $CKPT/2_7b_e8prvq_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 >> $LOG/2_7b_e8prvq_4bit 2>&1 -python quantize_llama.py --quip_tune_iters 0 --scale_override 0.9 --resid_scale_override 3.6 --save_path $CKPT/2_70b_chat_e8prvq_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-70b-chat-hf --hessian_path $HESS/llama2_70b_chat_6144 >> $LOG/2_70b_chat_e8prvq_4bit 2>&1 -python quantize_llama.py --quip_tune_iters 0 --scale_override 1.03 --resid_scale_override 3.3 --save_path $CKPT/2_13b_chat_e8prvq_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-13b-chat-hf --hessian_path $HESS/llama2_13b_chat_6144 >> $LOG/2_13b_chat_e8prvq_4bit 2>&1 -python quantize_llama.py --quip_tune_iters 0 --scale_override 1.03 --resid_scale_override 3.6 --save_path $CKPT/2_7b_chat_e8prvq_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-7b-chat-hf --hessian_path $HESS/llama2_7b_chat_6144 >> $LOG/2_7b_chat_e8prvq_4bit 2>&1 -python quantize_llama.py --quip_tune_iters 0 --scale_override 0.9 --resid_scale_override 3.45 --save_path $CKPT/1_65b_e8prvq_4bit --codebook E8P12RVQ4B --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_e8prvq_4bit 2>&1 -python quantize_llama.py --quip_tune_iters 0 --scale_override 0.9 --resid_scale_override 3.45 --save_path $CKPT/1_30b_e8prvq_4bit --codebook E8P12RVQ4B --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_e8prvq_4bit 2>&1 -python quantize_llama.py --quip_tune_iters 0 --scale_override 0.9 --resid_scale_override 3.6 --save_path $CKPT/1_13b_e8prvq_4bit --codebook E8P12RVQ4B --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_e8prvq_4bit 2>&1 -python quantize_llama.py --quip_tune_iters 0 --scale_override 0.85 --resid_scale_override 3.45 --save_path $CKPT/1_7b_e8prvq_4bit --codebook E8P12RVQ4B --base_model relaxml/Llama-1-7b-hf --hessian_path $HESS/llama1_7b_6144 >> $LOG/1_7b_e8prvq_4bit 2>&1 -python quantize_llama.py --quip_tune_iters 0 --scale_override 0.9 --resid_scale_override 3.3 --save_path $CKPT/mistral_7b_e8prvq_4bit --codebook E8P12RVQ4B --base_model mistralai/Mistral-7B-v0.1 --hessian_path $HESS/mistral_7b_4096 >> $LOG/mistral_7b_e8prvq_4bit 2>&1 -python quantize_llama.py --quip_tune_iters 0 --scale_override 1.03 --resid_scale_override 3.3 --save_path $CKPT/openhermes_7b_e8prvq_4bit --codebook E8P12RVQ4B --base_model teknium/OpenHermes-2.5-Mistral-7B --hessian_path $HESS/openhermes_7b_4096 >> $LOG/openhermes_7b_e8prvq_4bit 2>&1 - - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_e8prvq_4bit --hf_output_path $HF/2_70b_e8prvq_4bit & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_e8prvq_4bit --hf_output_path $HF/2_70b_chat_e8prvq_4bit & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_65b_e8prvq_4bit --hf_output_path $HF/1_65b_e8prvq_4bit & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_30b_e8prvq_4bit --hf_output_path $HF/1_30b_e8prvq_4bit & -CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_e8prvq_4bit --hf_output_path $HF/2_13b_e8prvq_4bit & -CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_e8prvq_4bit --hf_output_path $HF/2_13b_chat_e8prvq_4bit & -CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_13b_e8prvq_4bit --hf_output_path $HF/1_13b_e8prvq_4bit & -CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_e8prvq_4bit --hf_output_path $HF/2_7b_e8prvq_4bit & -wait - -CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_e8prvq_4bit --hf_output_path $HF/2_7b_chat_e8prvq_4bit & -CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_7b_e8prvq_4bit --hf_output_path $HF/1_7b_e8prvq_4bit & -CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/mistral_7b_e8prvq_4bit --hf_output_path $HF/mistral_7b_e8prvq_4bit & -CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/openhermes_7b_e8prvq_4bit --hf_output_path $HF/openhermes_7b_e8prvq_4bit & - -wait - -# perplexity -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_70b_e8prvq_4bit >> $LOG/2_70b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_13b_e8prvq_4bit >> $LOG/2_13b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_7b_e8prvq_4bit >> $LOG/2_7b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_70b_chat_e8prvq_4bit >> $LOG/2_70b_chat_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_13b_chat_e8prvq_4bit >> $LOG/2_13b_chat_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_7b_chat_e8prvq_4bit >> $LOG/2_7b_chat_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_e8prvq_4bit >> $LOG/1_65b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_e8prvq_4bit >> $LOG/1_30b_e8prvq_4bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_e8prvq_4bit >> $LOG/1_13b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_e8prvq_4bit >> $LOG/1_7b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 --hf_path $HF/mistral_7b_e8prvq_4bit >> $LOG/mistral_7b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 --hf_path $HF/openhermes_7b_e8prvq_4bit >> $LOG/openhermes_7b_e8prvq_4bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8prvq_4bit >> $LOG/2_70b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8prvq_4bit >> $LOG/2_13b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8prvq_4bit >> $LOG/2_7b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_e8prvq_4bit >> $LOG/2_70b_chat_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_e8prvq_4bit >> $LOG/2_13b_chat_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_e8prvq_4bit >> $LOG/2_7b_chat_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_e8prvq_4bit >> $LOG/1_65b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_e8prvq_4bit >> $LOG/1_30b_e8prvq_4bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_e8prvq_4bit >> $LOG/1_13b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_e8prvq_4bit >> $LOG/1_7b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/mistral_7b_e8prvq_4bit >> $LOG/mistral_7b_e8prvq_4bit 2>&1 & -CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/openhermes_7b_e8prvq_4bit >> $LOG/openhermes_7b_e8prvq_4bit 2>&1 & - -wait - diff --git a/sbatch/sc_sweep.sh b/sbatch/sc_sweep.sh deleted file mode 100644 index 4f5232e..0000000 --- a/sbatch/sc_sweep.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/bash - -CKPT=/mnt/desa_data/checkpoints -HF=/mnt/desa_data/hfized -HESS=/mnt/desa_data/hessians -LOG=/mnt/desa_data/logs -L1=/mnt/desa_data/meta_llama1 - -function sc_sweep { - # NPRE $1 - # BMO $2 - SC_LS=("0.80" "0.85" "0.90" "0.95" "1.00") - NPOST_LS=("080" "085" "090" "095" "100") - for idx in "${!SC_LS[@]}" - do - python quantize_llama.py --save_path $CKPT/${1}_e8p_2bit_sc${NPOST_LS[$idx]} --codebook E8P12 --scale_override ${SC_LS[$idx]} \ - --base_model meta-llama/$2 --hessian_path $HESS/llama${1}_6144 >> $LOG/${1}_e8p_2bit_sc${NPOST_LS[$idx]} 2>&1 - done - for idx in "${!SC_LS[@]}" - do - CUDA_VISIBLE_DEVICES=$idx python hfize_llama.py --quantized_path $CKPT/${1}_e8p_2bit_sc${NPOST_LS[$idx]} \ - --hf_output_path $HF/${1}_e8p_2bit_sc${NPOST_LS[$idx]} & - done - wait - # perplexity - for idx in "${!SC_LS[@]}" - do - CUDA_VISIBLE_DEVICES=$idx python ppl_llama.py --seqlen 4096 --hf_path $HF/${1}_e8p_2bit_sc${NPOST_LS[$idx]} \ - >> $LOG/${1}_e8p_2bit_sc${NPOST_LS[$idx]} 2>&1 & - done - wait - # zeroshot - for idx in "${!SC_LS[@]}" - do - CUDA_VISIBLE_DEVICES=$idx python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 \ - --hf_path $HF/${1}_e8p_2bit_sc${NPOST_LS[$idx]} >> $LOG/${1}_e8p_2bit_sc${NPOST_LS[$idx]} 2>&1 & - done -} - -# sc_sweep "2_70b" "Llama-2-70b-hf" -# sc_sweep "2_13b" "Llama-2-13b-hf" -# sc_sweep "2_7b" "Llama-2-7b-hf" -# -# sc_sweep "2_70b_chat" "Llama-2-70b-chat-hf" -# sc_sweep "2_13b_chat" "Llama-2-13b-chat-hf" -# sc_sweep "2_7b_chat" "Llama-2-7b-chat-hf" - -function sc_sweep_hi { - # NPRE $1 - # BMO $2 - SC_LS=("2.4" "2.55" "2.7" "2.85" "3") - NPOST_LS=("240" "255" "270" "285" "300") - for idx in "${!SC_LS[@]}" - do - python quantize_llama.py --save_path $CKPT/${1}_hi_4bit_sc${NPOST_LS[$idx]} --codebook HI4B1C --scale_override ${SC_LS[$idx]} \ - --base_model meta-llama/$2 --hessian_path $HESS/llama${1}_6144 >> $LOG/${1}_hi_4bit_sc${NPOST_LS[$idx]} 2>&1 - done - for idx in "${!SC_LS[@]}" - do - CUDA_VISIBLE_DEVICES=$idx python hfize_llama.py --quantized_path $CKPT/${1}_hi_4bit_sc${NPOST_LS[$idx]} \ - --hf_output_path $HF/${1}_hi_4bit_sc${NPOST_LS[$idx]} & - done - wait - # perplexity - for idx in "${!SC_LS[@]}" - do - CUDA_VISIBLE_DEVICES=$idx python ppl_llama.py --seqlen 4096 --hf_path $HF/${1}_hi_4bit_sc${NPOST_LS[$idx]} \ - >> $LOG/${1}_hi_4bit_sc${NPOST_LS[$idx]} 2>&1 & - done - wait - # zeroshot - for idx in "${!SC_LS[@]}" - do - CUDA_VISIBLE_DEVICES=$idx python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 \ - --hf_path $HF/${1}_hi_4bit_sc${NPOST_LS[$idx]} >> $LOG/${1}_hi_4bit_sc${NPOST_LS[$idx]} 2>&1 & - done -} - -sc_sweep_hi "2_70b" "Llama-2-70b-hf" -sc_sweep_hi "2_13b" "Llama-2-13b-hf" -sc_sweep_hi "2_7b" "Llama-2-7b-hf" - -sc_sweep_hi "2_70b_chat" "Llama-2-70b-chat-hf" -sc_sweep_hi "2_13b_chat" "Llama-2-13b-chat-hf" -sc_sweep_hi "2_7b_chat" "Llama-2-7b-chat-hf" \ No newline at end of file diff --git a/sbatch/zeroshot_falcon.sh b/sbatch/zeroshot_falcon.sh deleted file mode 100644 index 2c95409..0000000 --- a/sbatch/zeroshot_falcon.sh +++ /dev/null @@ -1,10 +0,0 @@ -HF=/mnt/desa_data/hfized -LOG=/mnt/desa_data/logs - -CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/falcon_180b_e8p_2bit/ >> $LOG/falcon_180b_e8p_2bit 2>&1 & -CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/falcon_180b_e8prvq_3bit/ >> $LOG/falcon_180b_e8prvq_3bit 2>&1 & -CUDA_VISIBLE_DEVICES=6,7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/falcon_180b_e8prvq_4bit/ >> $LOG/falcon_180b_e8prvq_4bit 2>&1 & - -wait - -CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path tiiuae/falcon-180B >> $LOG/falcon_180b_fp16 2>&1