diff --git a/sbatch/D4_bits_sweep.sh b/sbatch/D4_bits_sweep.sh
deleted file mode 100644
index d6e6a56..0000000
--- a/sbatch/D4_bits_sweep.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/jerry_data/checkpoints
-HF=/mnt/jerry_data/hfized
-HESS=/mnt/jerry_data/hessians
-LOG=/mnt/jerry_data/logs
-L1=/mnt/jerry_data/meta_llama1
-
-
-python quantize_llama.py --save_path $CKPT/2_70b_d4_2bit_nolr --codebook D4 --lora_rank 0 --scale_override 1.1 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_d4_2bit_nolr 2>&1
-python quantize_llama.py --save_path $CKPT/2_70b_d4_221bit_nolr --codebook D4221B --lora_rank 0 --scale_override 1.2 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_d4_221bit_nolr 2>&1
-python quantize_llama.py --save_path $CKPT/2_70b_d4_234bit_nolr --codebook D4234B --lora_rank 0 --scale_override 1.4 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_d4_234bit_nolr 2>&1
-python quantize_llama.py --save_path $CKPT/2_70b_d4_248bit_nolr --codebook D4248B --lora_rank 0 --scale_override 1.4 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_d4_248bit_nolr 2>&1
-python quantize_llama.py --save_path $CKPT/2_70b_d4_274bit_nolr --codebook D4274B --lora_rank 0 --scale_override 1.6 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_d4_274bit_nolr 2>&1
-python quantize_llama.py --save_path $CKPT/2_70b_d4_299bit_nolr --codebook D4299B --lora_rank 0 --scale_override 1.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_d4_299bit_nolr 2>&1
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_d4_2bit_nolr --hf_output_path $HF/2_70b_d4_2bit_nolr & 
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_d4_221bit_nolr --hf_output_path $HF/2_70b_d4_221bit_nolr & 
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_d4_234bit_nolr --hf_output_path $HF/2_70b_d4_234bit_nolr & 
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_d4_248bit_nolr --hf_output_path $HF/2_70b_d4_248bit_nolr & 
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_70b_d4_274bit_nolr --hf_output_path $HF/2_70b_d4_274bit_nolr & 
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_70b_d4_299bit_nolr --hf_output_path $HF/2_70b_d4_299bit_nolr & 
-
-wait
-
-# perplexity
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_70b_d4_2bit_nolr >> $LOG/2_70b_d4_2bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_70b_d4_221bit_nolr >> $LOG/2_70b_d4_221bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_70b_d4_234bit_nolr >> $LOG/2_70b_d4_234bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_70b_d4_248bit_nolr >> $LOG/2_70b_d4_248bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_70b_d4_274bit_nolr >> $LOG/2_70b_d4_274bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_70b_d4_299bit_nolr >> $LOG/2_70b_d4_299bit_nolr 2>&1 &
-
-wait
-
-# zero shot
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_d4_2bit_nolr >> $LOG/2_70b_d4_2bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_d4_221bit_nolr >> $LOG/2_70b_d4_221bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_d4_234bit_nolr >> $LOG/2_70b_d4_234bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_d4_248bit_nolr >> $LOG/2_70b_d4_248bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_d4_274bit_nolr >> $LOG/2_70b_d4_274bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_d4_299bit_nolr >> $LOG/2_70b_d4_299bit_nolr 2>&1 &
-
-wait
diff --git a/sbatch/chat_4bit_packed.sh b/sbatch/chat_4bit_packed.sh
deleted file mode 100644
index 2a015c7..0000000
--- a/sbatch/chat_4bit_packed.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/desa_data/checkpoints
-HF=/mnt/desa_data/hfized
-HESS=/mnt/desa_data/hessians
-LOG=/mnt/desa_data/logs
-
-
-CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6 python quantize_llama.py --save_path $CKPT/2_70b_chat_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model meta-llama/Llama-2-70b-chat-hf --hessian_path $HESS/llama2_70b_chat_6144 >> $LOG/2_70b_chat_hi_4bit_packed 2>&1
-CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6 python quantize_llama.py --save_path $CKPT/2_13b_chat_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model meta-llama/Llama-2-13b-chat-hf --hessian_path $HESS/llama2_13b_chat_6144 >> $LOG/2_13b_chat_hi_4bit_packed 2>&1
-CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6 python quantize_llama.py --save_path $CKPT/2_7b_chat_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model meta-llama/Llama-2-7b-chat-hf --hessian_path $HESS/llama2_7b_chat_6144 >> $LOG/2_7b_chat_hi_4bit_packed 2>&1
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_hi_4bit_packed --hf_output_path $HF/2_70b_chat_hi_4bit_packed >> $LOG/2_70b_chat_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_hi_4bit_packed --hf_output_path $HF/2_13b_chat_hi_4bit_packed >> $LOG/2_13b_chat_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_hi_4bit_packed --hf_output_path $HF/2_7b_chat_hi_4bit_packed >> $LOG/2_7b_chat_hi_4bit_packed 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/2_70b_chat_hi_4bit_packed >> $LOG/2_70b_chat_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/2_13b_chat_hi_4bit_packed >> $LOG/2_13b_chat_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/2_7b_chat_hi_4bit_packed >> $LOG/2_7b_chat_hi_4bit_packed 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_hi_4bit_packed >> $LOG/2_70b_chat_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_hi_4bit_packed >> $LOG/2_13b_chat_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_hi_4bit_packed >> $LOG/2_7b_chat_hi_4bit_packed 2>&1 &
-
-wait
diff --git a/sbatch/e8237b.sh b/sbatch/e8237b.sh
deleted file mode 100644
index 67de8e1..0000000
--- a/sbatch/e8237b.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/jerry_data/checkpoints
-HF=/mnt/jerry_data/hfized
-HESS=/mnt/jerry_data/hessians
-LOG=/mnt/jerry_data/logs
-
-NAME=2_70b_e8_237bit_nolr
-
-CUDA_VISIBLE_DEVICES=4,5,6,7 python quantize_llama.py --save_path $CKPT/$NAME --codebook E8237B --scale_override 1.13 --lora_rank 0 --hessian_path $HESS/llama2_70b_6144 >> $LOG/$NAME 2>&1
-
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/$NAME --hf_output_path $HF/$NAME
-
-CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --hf_path $HF/$NAME >> $LOG/$NAME 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/$NAME >> $LOG/$NAME 2>&1 &
-
-wait
-
diff --git a/sbatch/e8p_requant.sh b/sbatch/e8p_requant.sh
deleted file mode 100644
index 2a4ab09..0000000
--- a/sbatch/e8p_requant.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/desa_data/checkpoints/new_e8p
-HF=/mnt/desa_data/hfized/new_e8p
-LOG=/mnt/desa_data/logs/new_e8p
-HESS=/mnt/desa_data/hessians
-L1=/mnt/desa_data/meta_llama1
-
-mkdir $CKPT
-mkdir $HF
-mkdir $LOG
-
-'''
-# llama 2
-python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/2_13b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 >> $LOG/2_13b_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/2_7b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 >> $LOG/2_7b_e8p_2bit 2>&1
-# llama 2 chat
-python quantize_llama.py --save_path $CKPT/2_70b_chat_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-chat-hf --hessian_path $HESS/llama2_70b_chat_6144 >> $LOG/2_70b_chat_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/2_13b_chat_e8p_2bit --codebook E8P12 --scale_override 1.0 --base_model meta-llama/Llama-2-13b-chat-hf --hessian_path $HESS/llama2_13b_chat_6144 >> $LOG/2_13b_chat_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/2_7b_chat_e8p_2bit --codebook E8P12 --scale_override 1.0 --base_model meta-llama/Llama-2-7b-chat-hf --hessian_path $HESS/llama2_7b_chat_6144 >> $LOG/2_7b_chat_e8p_2bit 2>&1
-# llama 1 
-python quantize_llama.py --save_path $CKPT/1_65b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_30b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_13b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_7b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-7b-hf --hessian_path $HESS/llama1_7b_6144 >> $LOG/1_7b_e8p_2bit 2>&1
-# mistral hermes
-python quantize_llama.py --save_path $CKPT/mistral_7b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model mistralai/Mistral-7B-v0.1 --hessian_path $HESS/mistral_7b_4096 >> $LOG/mistral_7b_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/openhermes_7b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model teknium/OpenHermes-2.5-Mistral-7B --hessian_path $HESS/openhermes_7b_4096 >> $LOG/openhermes_7b_e8p_2bit 2>&1
-
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit --hf_output_path $HF/2_70b_e8p_2bit >> $LOG/2_70b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit --hf_output_path $HF/2_13b_e8p_2bit >> $LOG/2_13b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit --hf_output_path $HF/2_7b_e8p_2bit >> $LOG/2_7b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_e8p_2bit --hf_output_path $HF/2_70b_chat_e8p_2bit >> $LOG/2_70b_chat_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_e8p_2bit --hf_output_path $HF/2_13b_chat_e8p_2bit >> $LOG/2_13b_chat_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_e8p_2bit --hf_output_path $HF/2_7b_chat_e8p_2bit >> $LOG/2_7b_chat_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_65b_e8p_2bit --hf_output_path $HF/1_65b_e8p_2bit >> $LOG/1_65b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_30b_e8p_2bit --hf_output_path $HF/1_30b_e8p_2bit >> $LOG/1_30b_e8p_2bit 2>&1 &
-
-wait
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_13b_e8p_2bit --hf_output_path $HF/1_13b_e8p_2bit >> $LOG/1_13b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_7b_e8p_2bit --hf_output_path $HF/1_7b_e8p_2bit >> $LOG/1_7b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/mistral_7b_e8p_2bit --hf_output_path $HF/mistral_7b_e8p_2bit >> $LOG/mistral_7b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/openhermes_7b_e8p_2bit --hf_output_path $HF/openhermes_7b_e8p_2bit >> $LOG/openhermes_7b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_70b_e8p_2bit >> $LOG/2_70b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/2_13b_e8p_2bit >> $LOG/2_13b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/2_7b_e8p_2bit >> $LOG/2_7b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path $HF/2_70b_chat_e8p_2bit >> $LOG/2_70b_chat_e8p_2bit 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/mistral_7b_e8p_2bit >> $LOG/mistral_7b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/openhermes_7b_e8p_2bit >> $LOG/openhermes_7b_e8p_2bit 2>&1 &
-
-wait
-'''
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_e8p_2bit >> $LOG/1_65b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_e8p_2bit >> $LOG/1_30b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_e8p_2bit >> $LOG/1_13b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_e8p_2bit >> $LOG/1_7b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_e8p_2bit    >> $LOG/2_7b_chat_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_e8p_2bit    >> $LOG/1_7b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/mistral_7b_e8p_2bit    >> $LOG/mistral_7b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/openhermes_7b_e8p_2bit  >> $LOG/openhermes_7b_e8p_2bit 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit    >> $LOG/2_70b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8p_2bit    >> $LOG/2_13b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8p_2bit    >> $LOG/2_7b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_e8p_2bit    >> $LOG/2_70b_chat_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_e8p_2bit    >> $LOG/2_13b_chat_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_e8p_2bit    >> $LOG/1_13b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_e8p_2bit    >> $LOG/1_65b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_e8p_2bit    >> $LOG/1_30b_e8p_2bit 2>&1 &
-
-wait
diff --git a/sbatch/finetune.sh b/sbatch/finetune.sh
deleted file mode 100644
index 8b11848..0000000
--- a/sbatch/finetune.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/desa_data/checkpoints/finetune_albert
-HF=/mnt/desa_data/hfized/finetune_albert
-HESS=/mnt/desa_data/hessians
-LOG=/mnt/desa_data/logs/finetune_albert
-
-mkdir $CKPT
-mkdir $HF
-mkdir $LOG
-
-CUDA_VISIBLE_DEVICES=0,1 python quantize_llama_finetune.py --save_path $CKPT/2_70b_2bit  --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144  --devset_size 768 --ddp_port 12345 >> $LOG/2_70b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2,3 python quantize_llama_finetune.py --save_path $CKPT/2_70b_3bit  --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144  --devset_size 768 --ddp_port 12346 >> $LOG/2_70b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4,5 python quantize_llama_finetune.py --save_path $CKPT/1_65b_2bit  --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144  --devset_size 768 --ddp_port 12347 >> $LOG/1_65b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6,7 python quantize_llama_finetune.py --save_path $CKPT/1_65b_3bit  --codebook E8P12RVQ3B --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144  --devset_size 768 --ddp_port 12348 >> $LOG/1_65b_3bit 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python quantize_llama_finetune.py --save_path $CKPT/2_13b_2bit  --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144  --devset_size 768 --ddp_port 12345 >> $LOG/2_13b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python quantize_llama_finetune.py --save_path $CKPT/2_13b_3bit  --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144  --devset_size 768 --ddp_port 12346 >> $LOG/2_13b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python quantize_llama_finetune.py --save_path $CKPT/1_13b_2bit  --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144  --devset_size 768 --ddp_port 12347 >> $LOG/1_13b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python quantize_llama_finetune.py --save_path $CKPT/1_13b_3bit  --codebook E8P12RVQ3B --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144  --devset_size 768 --ddp_port 12348 >> $LOG/1_13b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4,5 python quantize_llama_finetune.py --save_path $CKPT/1_30b_2bit  --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144  --devset_size 768 --ddp_port 12349 >> $LOG/1_30b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6,7 python quantize_llama_finetune.py --save_path $CKPT/1_30b_3bit  --codebook E8P12RVQ3B --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144  --devset_size 768 --ddp_port 12350 >> $LOG/1_30b_3bit 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python quantize_llama_finetune.py --save_path $CKPT/2_7b_2bit  --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144  --devset_size 768 --ddp_port 12345 >> $LOG/2_7b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python quantize_llama_finetune.py --save_path $CKPT/2_7b_3bit  --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144  --devset_size 768 --ddp_port 12346 >> $LOG/2_7b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python quantize_llama_finetune.py --save_path $CKPT/1_7b_2bit  --codebook E8P12 --scale_override 0.9 --base_model relaxml/Llama-1-7b-hf --hessian_path $HESS/llama1_7b_6144  --devset_size 768 --ddp_port 12347 >> $LOG/1_7b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python quantize_llama_finetune.py --save_path $CKPT/1_7b_3bit  --codebook E8P12RVQ3B --base_model relaxml/Llama-1-7b-hf --hessian_path $HESS/llama1_7b_6144  --devset_size 768 --ddp_port 12348 >> $LOG/1_7b_3bit 2>&1 &
-
-CUDA_VISIBLE_DEVICES=4 python quantize_llama_finetune.py --save_path $CKPT/2_7b_4bit  --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144  --devset_size 768 --ddp_port 12349 >> $LOG/2_7b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python quantize_llama_finetune.py --save_path $CKPT/1_7b_4bit  --codebook E8P12RVQ4B --base_model relaxml/Llama-1-7b-hf --hessian_path $HESS/llama1_7b_6144  --devset_size 768 --ddp_port 12350 >> $LOG/1_7b_4bit 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0,1 python quantize_llama_finetune.py --save_path $CKPT/1_30b_4bit  --codebook E8P12RVQ4B --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144  --devset_size 768 --ddp_port 12345 >> $LOG/1_30b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2,3 python quantize_llama_finetune.py --save_path $CKPT/2_70b_4bit  --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144  --devset_size 768 --ddp_port 12346 >> $LOG/2_70b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4,5 python quantize_llama_finetune.py --save_path $CKPT/1_65b_4bit  --codebook E8P12RVQ4B --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144  --devset_size 768 --ddp_port 12347 >> $LOG/1_65b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python quantize_llama_finetune.py --save_path $CKPT/2_13b_4bit  --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144  --devset_size 768 --ddp_port 12351 >> $LOG/2_13b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python quantize_llama_finetune.py --save_path $CKPT/1_13b_4bit  --codebook E8P12RVQ4B --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144  --devset_size 768 --ddp_port 12352 >> $LOG/1_13b_4bit 2>&1 &
-wait
diff --git a/sbatch/finetune_4bit.sh b/sbatch/finetune_4bit.sh
deleted file mode 100644
index ee5ef75..0000000
--- a/sbatch/finetune_4bit.sh
+++ /dev/null
@@ -1,123 +0,0 @@
-CKPT=/mnt/desa_data/checkpoints/finetune_llama
-HF=/mnt/desa_data/hfized/finetune_llama
-LOG=/mnt/desa_data/logs/finetune_llama
-HESS=/mnt/desa_data/hessians
-'''
-# llama 2 4 bit_scale
-
-python finetune_susv_adam.py --save_path $CKPT/2_70b_4bit_scale --scale_override 0.9 --resid_scale_override 3.6 --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-70b-hf  --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_4bit_scale 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_13b_4bit_scale --scale_override 0.9 --resid_scale_override 3.45 --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-13b-hf  --hessian_path $HESS/llama2_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 >> $LOG/2_13b_4bit_scale 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_7b_4bit_scale --scale_override 0.9 --resid_scale_override 3.6 --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-7b-hf  --hessian_path $HESS/llama2_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 >> $LOG/2_7b_4bit_scale 2>&1
-
-# llama 1 4 bit_scale
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_65b_4bit_scale --scale_override 0.9 --resid_scale_override 3.45 --codebook E8P12RVQ4B --base_model relaxml/Llama-1-65b-hf  --hessian_path $HESS/llama1_65b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/1_65b_4bit_scale 2>&1
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_30b_4bit_scale --scale_override 0.9 --resid_scale_override 3.45 --codebook E8P12RVQ4B --base_model relaxml/Llama-1-30b-hf  --hessian_path $HESS/llama1_30b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/1_30b_4bit_scale 2>&1
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_13b_4bit_scale --scale_override 0.9 --resid_scale_override 3.6 --codebook E8P12RVQ4B --base_model relaxml/Llama-1-13b-hf  --hessian_path $HESS/llama1_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 >> $LOG/1_13b_4bit_scale 2>&1
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_7b_4bit_scale --scale_override 0.85 --resid_scale_override 3.45 --codebook E8P12RVQ4B --base_model relaxml/Llama-1-7b-hf  --hessian_path $HESS/llama1_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 >> $LOG/1_7b_4bit_scale 2>&1
-
-# llama 1 3 bit_scale
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_65b_3bit_scale --scale_override 0.93 --resid_scale_override 1.99 --codebook E8P12RVQ3B --base_model relaxml/Llama-1-65b-hf  --hessian_path $HESS/llama1_65b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/1_65b_3bit_scale 2>&1
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_30b_3bit_scale --scale_override 0.93 --resid_scale_override 2.04 --codebook E8P12RVQ3B --base_model relaxml/Llama-1-30b-hf  --hessian_path $HESS/llama1_30b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/1_30b_3bit_scale 2>&1
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_13b_3bit_scale --scale_override 0.98 --resid_scale_override 2.09 --codebook E8P12RVQ3B --base_model relaxml/Llama-1-13b-hf  --hessian_path $HESS/llama1_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 >> $LOG/1_13b_3bit_scale 2>&1
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_7b_3bit_scale --scale_override 0.93 --resid_scale_override 2.09 --codebook E8P12RVQ3B --base_model relaxml/Llama-1-7b-hf  --hessian_path $HESS/llama1_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 >> $LOG/1_7b_3bit_scale 2>&1
-
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_4bit_scale --hf_output_path $HF/2_70b_4bit_scale >> $LOG/2_70b_4bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_13b_4bit_scale --hf_output_path $HF/2_13b_4bit_scale >> $LOG/2_13b_4bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_7b_4bit_scale  --hf_output_path $HF/2_7b_4bit_scale  >> $LOG/2_7b_4bit_scale  2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_65b_4bit_scale --hf_output_path $HF/1_65b_4bit_scale >> $LOG/1_65b_4bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_30b_4bit_scale --hf_output_path $HF/1_30b_4bit_scale >> $LOG/1_30b_4bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_13b_4bit_scale --hf_output_path $HF/1_13b_4bit_scale >> $LOG/1_13b_4bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_7b_4bit_scale  --hf_output_path $HF/1_7b_4bit_scale  >> $LOG/1_7b_4bit_scale  2>&1 &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_65b_3bit_scale --hf_output_path $HF/1_65b_3bit_scale >> $LOG/1_65b_3bit_scale 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_30b_3bit_scale --hf_output_path $HF/1_30b_3bit_scale >> $LOG/1_30b_3bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_13b_3bit_scale --hf_output_path $HF/1_13b_3bit_scale >> $LOG/1_13b_3bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_7b_3bit_scale  --hf_output_path $HF/1_7b_3bit_scale  >> $LOG/1_7b_3bit_scale  2>&1 &
-
-wait
-
-# tune llama 2 4 bit_scale
-python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_4bit_scale >> $LOG/2_70b_4bit_scale 2>&1
-#CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-hf --hf_path $HF/2_13b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_4bit_scale >> $LOG/2_13b_4bit_scale 2>&1 &
-#CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-hf --hf_path $HF/2_7b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_4bit_scale >> $LOG/2_7b_4bit_scale 2>&1 &
-wait
-
-
-python tune_susv_lmhead.py --base_model relaxml/Llama-1-65b-hf --hf_path $HF/1_65b_3bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_65b_3bit_scale >> $LOG/1_65b_3bit_scale 2>&1
-python tune_susv_lmhead.py --base_model relaxml/Llama-1-65b-hf --hf_path $HF/1_65b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_65b_4bit_scale >> $LOG/1_65b_4bit_scale 2>&1
-python tune_susv_lmhead.py --base_model relaxml/Llama-1-30b-hf --hf_path $HF/1_30b_3bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_30b_3bit_scale >> $LOG/1_30b_3bit_scale 2>&1
-#python tune_susv_lmhead.py --base_model relaxml/Llama-1-30b-hf --hf_path $HF/1_30b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_30b_4bit_scale >> $LOG/1_30b_4bit_scale 2>&1
-#CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model relaxml/Llama-1-13b-hf --hf_path $HF/1_13b_3bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_13b_3bit_scale >> $LOG/1_13b_3bit_scale 2>&1 &
-#CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model relaxml/Llama-1-13b-hf --hf_path $HF/1_13b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_13b_4bit_scale >> $LOG/1_13b_4bit_scale 2>&1 &
-wait
-#CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model relaxml/Llama-1-7b-hf --hf_path $HF/1_7b_3bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_7b_3bit_scale >> $LOG/1_7b_3bit_scale 2>&1 &
-#CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model relaxml/Llama-1-7b-hf --hf_path $HF/1_7b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_7b_4bit_scale >> $LOG/1_7b_4bit_scale 2>&1 &
-wait
-
-
-
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_4bit_scale --hf_output_path $HF/2_70b_4bit_scale >> $LOG/2_70b_4bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_13b_4bit_scale --hf_output_path $HF/2_13b_4bit_scale >> $LOG/2_13b_4bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_7b_4bit_scale  --hf_output_path $HF/2_7b_4bit_scale  >> $LOG/2_7b_4bit_scale  2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_65b_4bit_scale --hf_output_path $HF/1_65b_4bit_scale >> $LOG/1_65b_4bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_30b_4bit_scale --hf_output_path $HF/1_30b_4bit_scale >> $LOG/1_30b_4bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_13b_4bit_scale --hf_output_path $HF/1_13b_4bit_scale >> $LOG/1_13b_4bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_7b_4bit_scale  --hf_output_path $HF/1_7b_4bit_scale  >> $LOG/1_7b_4bit_scale  2>&1 &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_65b_3bit_scale --hf_output_path $HF/1_65b_3bit_scale >> $LOG/1_65b_3bit_scale 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_30b_3bit_scale --hf_output_path $HF/1_30b_3bit_scale >> $LOG/1_30b_3bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_13b_3bit_scale --hf_output_path $HF/1_13b_3bit_scale >> $LOG/1_13b_3bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_7b_3bit_scale  --hf_output_path $HF/1_7b_3bit_scale  >> $LOG/1_7b_3bit_scale  2>&1 &
-
-wait
-
-#CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_70b_4bit_scale >> $LOG/2_70b_4bit_scale 2>&1 &
-#CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/2_13b_4bit_scale >> $LOG/2_13b_4bit_scale 2>&1 &
-#CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_7b_4bit_scale  >> $LOG/2_7b_4bit_scale  2>&1 &
-wait
-'''
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/1_65b_4bit_scale --seqlen 2048 >> $LOG/1_65b_4bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/1_30b_4bit_scale --seqlen 2048 >> $LOG/1_30b_4bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/1_13b_4bit_scale --seqlen 2048 >> $LOG/1_13b_4bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/1_7b_4bit_scale  --seqlen 2048 >> $LOG/1_7b_4bit_scale  2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path $HF/1_65b_3bit_scale --seqlen 2048 >> $LOG/1_65b_3bit_scale 2>&1 &					                                                    
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/1_30b_3bit_scale --seqlen 2048 >> $LOG/1_30b_3bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/1_13b_3bit_scale --seqlen 2048 >> $LOG/1_13b_3bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/1_7b_3bit_scale  --seqlen 2048 >> $LOG/1_7b_3bit_scale  2>&1 &
-
-wait
-'''
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4  --hf_path $HF/2_70b_4bit_scale >> $LOG/2_70b_4bit_scale 2>&1 & 
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4  --hf_path $HF/2_13b_4bit_scale >> $LOG/2_13b_4bit_scale 2>&1 & 
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4  --hf_path $HF/2_7b_4bit_scale  >> $LOG/2_7b_4bit_scale  2>&1 & 
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4  --hf_path $HF/1_65b_4bit_scale >> $LOG/1_65b_4bit_scale 2>&1 & 
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4  --hf_path $HF/1_30b_4bit_scale >> $LOG/1_30b_4bit_scale 2>&1 & 
-CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4  --hf_path $HF/1_13b_4bit_scale >> $LOG/1_13b_4bit_scale 2>&1 & 
-CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4  --hf_path $HF/1_7b_4bit_scale  >> $LOG/1_7b_4bit_scale  2>&1 & 
-CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4  --hf_path $HF/1_65b_3bit_scale >> $LOG/1_65b_3bit_scale 2>&1 & 
-														                                                       
-wait														                                                       
-														                                                       
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4  --hf_path $HF/1_30b_3bit_scale >> $LOG/1_30b_3bit_scale 2>&1 & 
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4  --hf_path $HF/1_13b_3bit_scale >> $LOG/1_13b_3bit_scale 2>&1 & 
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4  --hf_path $HF/1_7b_3bit_scale  >> $LOG/1_7b_3bit_scale  2>&1 & 
-wait
-
-
-'''
diff --git a/sbatch/finetune_4bit_cr15.sh b/sbatch/finetune_4bit_cr15.sh
deleted file mode 100644
index cb39b4d..0000000
--- a/sbatch/finetune_4bit_cr15.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-CKPT=/mnt/desa_data/checkpoints/finetune_llama
-HF=/mnt/desa_data/hfized/finetune_llama
-LOG=/mnt/desa_data/logs/finetune_llama
-HESS=/mnt/desa_data/hessians
-
-
-
-CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-hf --hf_path $HF/2_13b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_4bit_scale >> $LOG/2_13b_4bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=2,3 python tune_susv_lmhead.py --base_model relaxml/Llama-1-7b-hf --hf_path $HF/1_7b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_7b_4bit_scale >> $LOG/1_7b_4bit_scale 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python tune_susv_lmhead.py --base_model relaxml/Llama-1-30b-hf --hf_path $HF/1_30b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_30b_4bit_scale >> $LOG/1_30b_4bit_scale 2>&1
-
-
-CUDA_VISIBLE_DEVICES=2,3,4 python tune_susv_lmhead.py --base_model relaxml/Llama-1-13b-hf --hf_path $HF/1_13b_3bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_13b_3bit_scale >> $LOG/1_13b_3bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=5,6,7 python tune_susv_lmhead.py --base_model relaxml/Llama-1-13b-hf --hf_path $HF/1_13b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_13b_4bit_scale >> $LOG/1_13b_4bit_scale 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=5,6,7 python tune_susv_lmhead.py --base_model relaxml/Llama-1-7b-hf --hf_path $HF/1_7b_3bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_7b_3bit_scale >> $LOG/1_7b_3bit_scale 2>&1 &
-CUDA_VISIBLE_DEVICES=2,3,4 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-hf --hf_path $HF/2_7b_4bit_scale --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_4bit_scale >> $LOG/2_7b_4bit_scale 2>&1 &
-wait
-
diff --git a/sbatch/finetune_adam.sh b/sbatch/finetune_adam.sh
deleted file mode 100644
index 16e53bf..0000000
--- a/sbatch/finetune_adam.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-CKPT=/mnt/desa_data/checkpoints/finetune_albert
-HF=/mnt/desa_data/hfized/finetune_albert
-LOG=/mnt/desa_data/logs/finetune_albert
-HESS=/mnt/desa_data/hessians
-
-CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python finetune_susv_adam.py --save_path $CKPT/2_70b_susv_adam2 --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf  --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_susv_adam2 2>&1
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_70b_susv_adam2 --hf_output_path $HF/2_70b_susv_adam2 >> $LOG/2_70b_susv_adam2 2>&1
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_70b_susv_adam2 >> $LOG/2_70b_susv_adam2 2>&1
-
-CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python finetune_susv_adam.py --save_path $CKPT/2_13b_susv_adam2 --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf  --hessian_path $HESS/llama2_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 >> $LOG/2_13b_susv_adam2 2>&1
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_susv_adam2 --hf_output_path $HF/2_13b_susv_adam2 >> $LOG/2_13b_susv_adam2 2>&1
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_13b_susv_adam2 >> $LOG/2_13b_susv_adam2 2>&1
-
-CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python finetune_susv_adam.py --save_path $CKPT/2_7b_susv_adam2 --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf  --hessian_path $HESS/llama2_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 >> $LOG/2_7b_susv_adam2 2>&1
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_7b_susv_adam2 --hf_output_path $HF/2_7b_susv_adam2 >> $LOG/2_7b_susv_adam2 2>&1
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_7b_susv_adam2 >> $LOG/2_7b_susv_adam2 2>&1
-
-
-
-
diff --git a/sbatch/finetune_adam_early.sh b/sbatch/finetune_adam_early.sh
deleted file mode 100644
index 2a0c19e..0000000
--- a/sbatch/finetune_adam_early.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-CKPT=/mnt/desa_data/checkpoints/finetune_albert
-HF=/mnt/desa_data/hfized/finetune_albert
-LOG=/mnt/desa_data/logs/finetune_albert
-HESS=/mnt/desa_data/hessians
-
-CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python finetune_susv_adam_early.py --save_path $CKPT/2_70b_susv_adam_early --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf  --hessian_path $HESS/llama2_70b_6144/ --devset_size 640 --ft_valid_size 128 --ft_epochs 20 --ft_lr 0.00003 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_susv_adam_early 2>&1
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_70b_susv_adam_early --hf_output_path $HF/2_70b_susv_adam_early >> $LOG/2_70b_susv_adam_early 2>&1
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_70b_susv_adam_early >> $LOG/2_70b_susv_adam_early 2>&1
-
-CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python finetune_susv_adam_early.py --save_path $CKPT/2_13b_susv_adam_early --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf  --hessian_path $HESS/llama2_13b_6144/ --devset_size 640 --ft_valid_size 128 --ft_epochs 20 --ft_lr 0.00003 >> $LOG/2_13b_susv_adam_early 2>&1
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_susv_adam_early --hf_output_path $HF/2_13b_susv_adam_early >> $LOG/2_13b_susv_adam_early 2>&1
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_13b_susv_adam_early >> $LOG/2_13b_susv_adam_early 2>&1
-
-CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python finetune_susv_adam_early.py --save_path $CKPT/2_7b_susv_adam_early --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf  --hessian_path $HESS/llama2_7b_6144/ --devset_size 640 --ft_valid_size 128 --ft_epochs 20 --ft_lr 0.00003 >> $LOG/2_7b_susv_adam_early 2>&1
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_7b_susv_adam_early --hf_output_path $HF/2_7b_susv_adam_early >> $LOG/2_7b_susv_adam_early 2>&1
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_7b_susv_adam_early >> $LOG/2_7b_susv_adam_early 2>&1
-
-
-
-
diff --git a/sbatch/finetune_adamw.sh b/sbatch/finetune_adamw.sh
deleted file mode 100644
index a64c045..0000000
--- a/sbatch/finetune_adamw.sh
+++ /dev/null
@@ -1,57 +0,0 @@
-CKPT=/mnt/desa_data/checkpoints/finetune_llama_adamw
-HF=/mnt/desa_data/hfized/finetune_llama_adamw
-LOG=/mnt/desa_data/logs/finetune_llama_adamw
-HESS=/mnt/desa_data/hessians
-
-python finetune_susv_adam.py --save_path $CKPT/2_70b_2bit --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf  --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 8 --ft_lr 0.000025 --ft_opt adamw --ft_bs 4 >> $LOG/2_70b_2bit 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_70b_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-70b-hf  --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 8 --ft_lr 0.000025 --ft_opt adamw --ft_bs 4 >> $LOG/2_70b_3bit 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_70b_4bit --codebook E8P12RVQ4B --scale_override 0.9 --resid_scale_override 3.6 --base_model meta-llama/Llama-2-70b-hf  --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 8 --ft_lr 0.000025 --ft_opt adamw --ft_bs 4 >> $LOG/2_70b_4bit 2>&1
-
-
-python finetune_susv_adam.py --save_path $CKPT/2_13b_4bit --codebook E8P12RVQ4B --scale_override 0.9 --resid_scale_override 3.45 --base_model meta-llama/Llama-2-13b-hf  --hessian_path $HESS/llama2_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 8 --ft_lr 0.000025 --ft_opt adamw --ft_bs 4 >> $LOG/2_13b_4bit 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_7b_4bit --codebook E8P12RVQ4B --scale_override 0.9 --resid_scale_override 3.6 --base_model meta-llama/Llama-2-7b-hf  --hessian_path $HESS/llama2_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 8 --ft_lr 0.000025 --ft_opt adamw  --ft_bs 4 >> $LOG/2_7b_4bit 2>&1
-
-
-
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_3bit --hf_output_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_70b_4bit --hf_output_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_13b_4bit --hf_output_path $HF/2_13b_3bit >> $LOG/2_13b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/2_7b_4bit --hf_output_path $HF/2_13b_4bit >> $LOG/2_7b_4bit 2>&1 &
-
-wait
-
-python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000001 --ft_bs 1 --ctx_size 3072 --ft_opt adamw --ckpt_path $CKPT/2_70b_3bit >> $LOG/2_70b_3bit 2>&1
-python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000001 --ft_bs 1 --ctx_size 3072 --ft_opt adamw --ckpt_path $CKPT/2_70b_4bit >> $LOG/2_70b_4bit 2>&1
-python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000001 --ft_bs 1 --ctx_size 3072 --ft_opt adamw --ckpt_path $CKPT/2_70b_2bit >> $LOG/2_70b_2bit 2>&1
-
-CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-hf --hf_path $HF/2_7b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000001   --ft_opt adamw --ft_bs 1 --ctx_size 4096 --ckpt_path $CKPT/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-hf --hf_path $HF/2_13b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000001 --ft_opt adamw --ft_bs 1 --ctx_size 4096  --ckpt_path $CKPT/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_3bit --hf_output_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_70b_4bit --hf_output_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/2_13b_4bit --hf_output_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_4bit --hf_output_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 &     
-wait
-
-
diff --git a/sbatch/finetune_llama1.sh b/sbatch/finetune_llama1.sh
deleted file mode 100644
index f7091c2..0000000
--- a/sbatch/finetune_llama1.sh
+++ /dev/null
@@ -1,121 +0,0 @@
-CKPT=/mnt/desa_data/checkpoints/icml_llama
-HF=/mnt/desa_data/hfized/icml_llama
-LOG=/mnt/desa_data/logs/icml_llama
-HESS=/mnt/desa_data/hessians
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_65b_3bit --codebook E8P12RVQ3B --base_model relaxml/Llama-1-65b-hf  --hessian_path $HESS/llama1_65b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  --ft_bs 4 --ft_update_freq 2 >> $LOG/1_65b_3bit 2>&1
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_30b_3bit --codebook E8P12RVQ3B --base_model relaxml/Llama-1-30b-hf  --hessian_path $HESS/llama1_30b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  --ft_bs 4 --ft_update_freq 2 >> $LOG/1_30b_3bit 2>&1
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_13b_3bit --codebook E8P12RVQ3B --base_model relaxml/Llama-1-13b-hf  --hessian_path $HESS/llama1_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/1_13b_3bit 2>&1
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_7b_3bit --codebook E8P12RVQ3B --base_model relaxml/Llama-1-7b-hf  --hessian_path $HESS/llama1_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/1_7b_3bit 2>&1
-
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_65b_4bit --codebook E8P12RVQ4B --base_model relaxml/Llama-1-65b-hf  --hessian_path $HESS/llama1_65b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  --ft_bs 4 --ft_update_freq 2 >> $LOG/1_65b_4bit 2>&1
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_30b_4bit --codebook E8P12RVQ4B --base_model relaxml/Llama-1-30b-hf  --hessian_path $HESS/llama1_30b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  --ft_bs 4 --ft_update_freq 2 >> $LOG/1_30b_4bit 2>&1
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_13b_4bit --codebook E8P12RVQ4B --base_model relaxml/Llama-1-13b-hf  --hessian_path $HESS/llama1_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/1_13b_4bit 2>&1
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_7b_4bit --codebook E8P12RVQ4B --base_model relaxml/Llama-1-7b-hf  --hessian_path $HESS/llama1_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/1_7b_4bit 2>&1
-
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_65b_2bit --codebook E8P12  --scale_override 0.9 --base_model relaxml/Llama-1-65b-hf  --hessian_path $HESS/llama1_65b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  --ft_bs 4 --ft_update_freq 2 >> $LOG/1_65b_2bit 2>&1
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_30b_2bit --codebook E8P12  --scale_override 0.9 --base_model relaxml/Llama-1-30b-hf  --hessian_path $HESS/llama1_30b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  --ft_bs 4 --ft_update_freq 2 >> $LOG/1_30b_2bit 2>&1
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_13b_2bit --codebook E8P12  --scale_override 0.9 --base_model relaxml/Llama-1-13b-hf  --hessian_path $HESS/llama1_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/1_13b_2bit 2>&1
-
-python finetune_susv_adam.py --ctx_size 2048 --save_path $CKPT/1_7b_2bit --codebook E8P12  --scale_override 0.9 --base_model relaxml/Llama-1-7b-hf  --hessian_path $HESS/llama1_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/1_7b_2bit 2>&1
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_65b_3bit --hf_output_path $HF/1_65b_3bit >> $LOG/1_65b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_65b_2bit --hf_output_path $HF/1_65b_2bit >> $LOG/1_65b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_65b_4bit --hf_output_path $HF/1_65b_4bit >> $LOG/1_65b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_30b_3bit --hf_output_path $HF/1_30b_3bit >> $LOG/1_30b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_30b_2bit --hf_output_path $HF/1_30b_2bit >> $LOG/1_30b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_30b_4bit --hf_output_path $HF/1_30b_4bit >> $LOG/1_30b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_13b_3bit --hf_output_path $HF/1_13b_3bit >> $LOG/1_13b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_13b_4bit --hf_output_path $HF/1_13b_4bit >> $LOG/1_13b_4bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_13b_2bit --hf_output_path $HF/1_13b_2bit >> $LOG/1_13b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_7b_3bit --hf_output_path $HF/1_7b_3bit >> $LOG/1_7b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_7b_4bit --hf_output_path $HF/1_7b_4bit >> $LOG/1_7b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_7b_2bit --hf_output_path $HF/1_7b_2bit >> $LOG/1_7b_2bit 2>&1 &
-
-wait
-
-
-python tune_susv_lmhead.py --base_model relaxml/Llama-1-65b-hf --hf_path $HF/1_65b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_65b_3bit >> $LOG/1_65b_3bit 2>&1
-python tune_susv_lmhead.py --base_model relaxml/Llama-1-65b-hf --hf_path $HF/1_65b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_65b_4bit >> $LOG/1_65b_4bit 2>&1
-python tune_susv_lmhead.py --base_model relaxml/Llama-1-65b-hf --hf_path $HF/1_65b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_65b_2bit >> $LOG/1_65b_2bit 2>&1
-
-python tune_susv_lmhead.py --base_model relaxml/Llama-1-30b-hf --hf_path $HF/1_30b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_30b_3bit >> $LOG/1_30b_3bit 2>&1
-python tune_susv_lmhead.py --base_model relaxml/Llama-1-30b-hf --hf_path $HF/1_30b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_30b_4bit >> $LOG/1_30b_4bit 2>&1
-python tune_susv_lmhead.py --base_model relaxml/Llama-1-30b-hf --hf_path $HF/1_30b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_30b_2bit >> $LOG/1_30b_2bit 2>&1
-
-
-CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model relaxml/Llama-1-13b-hf --hf_path $HF/1_13b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_13b_3bit >> $LOG/1_13b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model relaxml/Llama-1-13b-hf --hf_path $HF/1_13b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_13b_4bit >> $LOG/1_13b_4bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model relaxml/Llama-1-13b-hf --hf_path $HF/1_13b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_13b_2bit >> $LOG/1_13b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model relaxml/Llama-1-7b-hf --hf_path $HF/1_7b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_7b_3bit >> $LOG/1_7b_3bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model relaxml/Llama-1-7b-hf --hf_path $HF/1_7b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_7b_4bit >> $LOG/1_7b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model relaxml/Llama-1-7b-hf --hf_path $HF/1_7b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 2048 --ft_update_freq 2 --ckpt_path $CKPT/1_7b_2bit >> $LOG/1_7b_2bit 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_65b_3bit --hf_output_path $HF/1_65b_3bit >> $LOG/1_65b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_65b_2bit --hf_output_path $HF/1_65b_2bit >> $LOG/1_65b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_65b_4bit --hf_output_path $HF/1_65b_4bit >> $LOG/1_65b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_30b_3bit --hf_output_path $HF/1_30b_3bit >> $LOG/1_30b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_30b_2bit --hf_output_path $HF/1_30b_2bit >> $LOG/1_30b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_30b_4bit --hf_output_path $HF/1_30b_4bit >> $LOG/1_30b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_13b_3bit --hf_output_path $HF/1_13b_3bit >> $LOG/1_13b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_13b_4bit --hf_output_path $HF/1_13b_4bit >> $LOG/1_13b_4bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_13b_2bit --hf_output_path $HF/1_13b_2bit >> $LOG/1_13b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_7b_3bit --hf_output_path $HF/1_7b_3bit >> $LOG/1_7b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_7b_4bit --hf_output_path $HF/1_7b_4bit >> $LOG/1_7b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_7b_2bit --hf_output_path $HF/1_7b_2bit >> $LOG/1_7b_2bit 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_3bit >> $LOG/1_65b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_4bit >> $LOG/1_65b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_2bit >> $LOG/1_65b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_3bit >> $LOG/1_30b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_4bit >> $LOG/1_30b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_2bit >> $LOG/1_30b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_3bit >> $LOG/1_13b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_4bit >> $LOG/1_13b_4bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_2bit >> $LOG/1_13b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_3bit >> $LOG/1_7b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_4bit >> $LOG/1_7b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_2bit >> $LOG/1_7b_2bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_3bit >> $LOG/1_65b_3bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_4bit >> $LOG/1_65b_4bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_2bit >> $LOG/1_65b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_3bit >> $LOG/1_30b_3bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_4bit >> $LOG/1_30b_4bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_2bit >> $LOG/1_30b_2bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_3bit >> $LOG/1_13b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_4bit >> $LOG/1_13b_4bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_2bit >> $LOG/1_13b_2bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_3bit >> $LOG/1_7b_3bit 2>&1 &     
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_4bit >> $LOG/1_7b_4bit 2>&1 &     
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_2bit >> $LOG/1_7b_2bit 2>&1 &     
-wait
-
-
diff --git a/sbatch/finetune_llama2.sh b/sbatch/finetune_llama2.sh
deleted file mode 100644
index 87e88fa..0000000
--- a/sbatch/finetune_llama2.sh
+++ /dev/null
@@ -1,172 +0,0 @@
-CKPT=/mnt/desa_data/checkpoints/icml_llama
-HF=/mnt/desa_data/hfized/icml_llama
-LOG=/mnt/desa_data/logs/icml_llama
-HESS=/mnt/desa_data/hessians
-
-
-python finetune_susv_adam.py --save_path $CKPT/2_70b_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-70b-hf  --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_4bit 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_13b_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-13b-hf  --hessian_path $HESS/llama2_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/2_13b_4bit 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_7b_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-7b-hf  --hessian_path $HESS/llama2_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/2_7b_4bit 2>&1
-
-
-python finetune_susv_adam.py --save_path $CKPT/2_70b_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-70b-hf  --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_3bit 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_13b_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-13b-hf  --hessian_path $HESS/llama2_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/2_13b_3bit 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_7b_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-7b-hf  --hessian_path $HESS/llama2_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/2_7b_3bit 2>&1
-
-
-
-python finetune_susv_adam.py --save_path $CKPT/2_70b_2bit --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf  --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_2bit 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_13b_2bit --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf  --hessian_path $HESS/llama2_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/2_13b_2bit 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_7b_2bit --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf  --hessian_path $HESS/llama2_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/2_7b_2bit 2>&1
-
-
-python finetune_susv_adam.py --save_path $CKPT/2_70b_chat_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-70b-chat-hf  --hessian_path $HESS/llama2_70b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_chat_4bit 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_13b_chat_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-13b-chat-hf  --hessian_path $HESS/llama2_13b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/2_13b_chat_4bit 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_7b_chat_4bit --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-7b-chat-hf  --hessian_path $HESS/llama2_7b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/2_7b_chat_4bit 2>&1
-
-
-python finetune_susv_adam.py --save_path $CKPT/2_70b_chat_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-70b-chat-hf  --hessian_path $HESS/llama2_70b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_chat_3bit 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_13b_chat_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-13b-chat-hf  --hessian_path $HESS/llama2_13b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/2_13b_chat_3bit 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_7b_chat_3bit --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-7b-chat-hf  --hessian_path $HESS/llama2_7b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/2_7b_chat_3bit 2>&1
-
-
-python finetune_susv_adam.py --save_path $CKPT/2_70b_chat_2bit --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-70b-chat-hf  --hessian_path $HESS/llama2_70b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_chat_2bit 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_13b_chat_2bit --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-13b-chat-hf  --hessian_path $HESS/llama2_13b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/2_13b_chat_2bit 2>&1
-
-python finetune_susv_adam.py --save_path $CKPT/2_7b_chat_2bit --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-7b-chat-hf  --hessian_path $HESS/llama2_7b_chat_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5  >> $LOG/2_7b_chat_2bit 2>&1
-
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_4bit --hf_output_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_3bit --hf_output_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_13b_4bit --hf_output_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_3bit --hf_output_path $HF/2_13b_3bit >> $LOG/2_13b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_13b_2bit --hf_output_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/2_7b_4bit --hf_output_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_3bit --hf_output_path $HF/2_7b_3bit >> $LOG/2_7b_3bit 2>&1 &
-wait
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_7b_2bit --hf_output_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_4bit --hf_output_path $HF/2_70b_chat_4bit >> $LOG/2_70b_chat_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_3bit --hf_output_path $HF/2_70b_chat_3bit >> $LOG/2_70b_chat_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_2bit --hf_output_path $HF/2_70b_chat_2bit >> $LOG/2_70b_chat_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_4bit --hf_output_path $HF/2_13b_chat_4bit >> $LOG/2_13b_chat_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_3bit --hf_output_path $HF/2_13b_chat_3bit >> $LOG/2_13b_chat_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_2bit --hf_output_path $HF/2_13b_chat_2bit >> $LOG/2_13b_chat_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_4bit --hf_output_path $HF/2_7b_chat_4bit >> $LOG/2_7b_chat_4bit 2>&1 &
-wait
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_3bit --hf_output_path $HF/2_7b_chat_3bit >> $LOG/2_7b_chat_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_2bit --hf_output_path $HF/2_7b_chat_2bit >> $LOG/2_7b_chat_2bit 2>&1 &
-wait
-
-python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_3bit >> $LOG/2_70b_3bit 2>&1
-python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_4bit >> $LOG/2_70b_4bit 2>&1
-python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_2bit >> $LOG/2_70b_2bit 2>&1
-
-CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-hf --hf_path $HF/2_13b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_3bit >> $LOG/2_13b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-hf --hf_path $HF/2_13b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-hf --hf_path $HF/2_13b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-hf --hf_path $HF/2_7b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_3bit >> $LOG/2_7b_3bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-hf --hf_path $HF/2_7b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-hf --hf_path $HF/2_7b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 &
-wait
-
-python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-chat-hf --hf_path $HF/2_70b_chat_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_chat_3bit >> $LOG/2_70b_chat_3bit 2>&1
-python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-chat-hf --hf_path $HF/2_70b_chat_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_chat_4bit >> $LOG/2_70b_chat_4bit 2>&1
-python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-chat-hf --hf_path $HF/2_70b_chat_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_chat_2bit >> $LOG/2_70b_chat_2bit 2>&1
-
-CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-chat-hf --hf_path $HF/2_13b_chat_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_chat_3bit >> $LOG/2_13b_chat_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-chat-hf --hf_path $HF/2_13b_chat_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_chat_4bit >> $LOG/2_13b_chat_4bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-chat-hf --hf_path $HF/2_13b_chat_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_chat_2bit >> $LOG/2_13b_chat_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-chat-hf --hf_path $HF/2_7b_chat_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_chat_3bit >> $LOG/2_7b_chat_3bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-chat-hf --hf_path $HF/2_7b_chat_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_chat_4bit >> $LOG/2_7b_chat_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-chat-hf --hf_path $HF/2_7b_chat_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8  --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_chat_2bit >> $LOG/2_7b_chat_2bit 2>&1 &
-wait
-
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_4bit --hf_output_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_3bit --hf_output_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_13b_4bit --hf_output_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_3bit --hf_output_path $HF/2_13b_3bit >> $LOG/2_13b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_13b_2bit --hf_output_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/2_7b_4bit --hf_output_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_3bit --hf_output_path $HF/2_7b_3bit >> $LOG/2_7b_3bit 2>&1 &
-wait
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_7b_2bit --hf_output_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_4bit --hf_output_path $HF/2_70b_chat_4bit >> $LOG/2_70b_chat_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_3bit --hf_output_path $HF/2_70b_chat_3bit >> $LOG/2_70b_chat_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_2bit --hf_output_path $HF/2_70b_chat_2bit >> $LOG/2_70b_chat_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_4bit --hf_output_path $HF/2_13b_chat_4bit >> $LOG/2_13b_chat_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_3bit --hf_output_path $HF/2_13b_chat_3bit >> $LOG/2_13b_chat_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_2bit --hf_output_path $HF/2_13b_chat_2bit >> $LOG/2_13b_chat_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_4bit --hf_output_path $HF/2_7b_chat_4bit >> $LOG/2_7b_chat_4bit 2>&1 &
-wait
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_3bit --hf_output_path $HF/2_7b_chat_3bit >> $LOG/2_7b_chat_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_2bit --hf_output_path $HF/2_7b_chat_2bit >> $LOG/2_7b_chat_2bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/2_13b_3bit >> $LOG/2_13b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/2_7b_3bit >> $LOG/2_7b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 &
-wait
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/2_70b_chat_3bit >> $LOG/2_70b_chat_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_70b_chat_4bit >> $LOG/2_70b_chat_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/2_70b_chat_2bit >> $LOG/2_70b_chat_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_13b_chat_3bit >> $LOG/2_13b_chat_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/2_13b_chat_4bit >> $LOG/2_13b_chat_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/2_13b_chat_2bit >> $LOG/2_13b_chat_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path $HF/2_7b_chat_3bit >> $LOG/2_7b_chat_3bit 2>&1 &
-wait
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_7b_chat_4bit >> $LOG/2_7b_chat_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/2_7b_chat_2bit >> $LOG/2_7b_chat_2bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_3bit >> $LOG/2_13b_3bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_3bit >> $LOG/2_7b_3bit 2>&1 &     
-CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 &
-wait
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 &     
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_3bit >> $LOG/2_70b_chat_3bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_4bit >> $LOG/2_70b_chat_4bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_2bit >> $LOG/2_70b_chat_2bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_3bit >> $LOG/2_13b_chat_3bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_4bit >> $LOG/2_13b_chat_4bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_2bit >> $LOG/2_13b_chat_2bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_3bit >> $LOG/2_7b_chat_3bit 2>&1 &
-wait
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_4bit >> $LOG/2_7b_chat_4bit 2>&1 &     
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_2bit >> $LOG/2_7b_chat_2bit 2>&1 &
-wait
-
-
-
diff --git a/sbatch/finetune_orig.sh b/sbatch/finetune_orig.sh
deleted file mode 100644
index ed8a440..0000000
--- a/sbatch/finetune_orig.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-CKPT=/mnt/desa_data/checkpoints/finetune_albert
-HF=/mnt/desa_data/hfized/finetune_albert
-LOG=/mnt/desa_data/logs/finetune_albert
-HESS=/mnt/desa_data/hessians
-
-python finetune_susv.py --save_path $CKPT/2_70b_susv --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf  --hessian_path $HESS/llama2_70b_6144/ --devset_size 640 --ft_valid_size 128 --ft_epochs 5 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_susv 2>&1
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_susv --hf_output_path $HF/2_70b_susv >> $LOG/2_70b_susv 2>&1
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_70b_susv >> $LOG/2_70b_susv 2>&1
-
-python finetune_susv.py --save_path $CKPT/2_13b_susv --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf  --hessian_path $HESS/llama2_13b_6144/ --devset_size 640 --ft_valid_size 128 --ft_epochs 5 >> $LOG/2_13b_susv 2>&1
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_13b_susv --hf_output_path $HF/2_13b_susv >> $LOG/2_13b_susv 2>&1
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_13b_susv >> $LOG/2_13b_susv 2>&1
-
-python finetune_susv.py --save_path $CKPT/2_7b_susv --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf  --hessian_path $HESS/llama2_7b_6144/ --devset_size 640 --ft_valid_size 128 --ft_epochs 5 >> $LOG/2_7b_susv 2>&1
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_7b_susv --hf_output_path $HF/2_7b_susv >> $LOG/2_7b_susv 2>&1
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_7b_susv >> $LOG/2_7b_susv 2>&1
-
-
-
-
diff --git a/sbatch/finetune_test.sh b/sbatch/finetune_test.sh
deleted file mode 100644
index 71b7446..0000000
--- a/sbatch/finetune_test.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-CKPT=/mnt/desa_data/checkpoints/fttest
-HF=/mnt/desa_data/hfized/fttest
-LOG=/mnt/desa_data/logs/fttest
-HESS=/mnt/desa_data/hessians
-
-mkdir $CKPT
-mkdir $HF
-mkdir $LOG
-
-
-#python quantize_finetune_llama.py --save_path $CKPT/2_70b_2bit --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf  --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 >> $LOG/2_70b_2bit 2>&1
-
-#CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1
-
-python finetune_e2e_llama.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_2bit --devset_size 384 --ft_valid_size 128 --ft_epochs 8  --ft_bs 1 --ctx_size 4096 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_2bit --ft_grad_ckpt --ft_train_mode >> $LOG/2_70b_2bit 2>&1
-
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 
-
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &   
-wait
-
-
-
diff --git a/sbatch/greedy_sweep.sh b/sbatch/greedy_sweep.sh
deleted file mode 100644
index 0827b9a..0000000
--- a/sbatch/greedy_sweep.sh
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/desa_data/checkpoints
-HF=/mnt/desa_data/hfized
-HESS=/mnt/desa_data/hessians
-LOG=/mnt/desa_data/logs
-
-'''
-python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_gr0  --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 --quip_tune_iters 0 >> $LOG/2_70b_e8p_2bit_gr0 2>&1
-python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_gr5  --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 --quip_tune_iters 5 >> $LOG/2_70b_e8p_2bit_gr5 2>&1
-python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_gr10 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 --quip_tune_iters 10 >> $LOG/2_70b_e8p_2bit_gr10 2>&1
-python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_gr15 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 --quip_tune_iters 15 >> $LOG/2_70b_e8p_2bit_gr15 2>&1
-python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_gr20 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 --quip_tune_iters 20 >> $LOG/2_70b_e8p_2bit_gr20 2>&1
-
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_gr0   --hf_output_path $HF/2_70b_e8p_2bit_gr0  >> $LOG/2_70b_e8p_2bit_gr0  2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_gr5   --hf_output_path $HF/2_70b_e8p_2bit_gr5  >> $LOG/2_70b_e8p_2bit_gr5  2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_gr10  --hf_output_path $HF/2_70b_e8p_2bit_gr10 >> $LOG/2_70b_e8p_2bit_gr10 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_gr15  --hf_output_path $HF/2_70b_e8p_2bit_gr15 >> $LOG/2_70b_e8p_2bit_gr15 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_gr20  --hf_output_path $HF/2_70b_e8p_2bit_gr20 >> $LOG/2_70b_e8p_2bit_gr20 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_gr0  >> $LOG/2_70b_e8p_2bit_gr0  2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_gr5  >> $LOG/2_70b_e8p_2bit_gr5  2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_gr10 >> $LOG/2_70b_e8p_2bit_gr10 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_gr15 >> $LOG/2_70b_e8p_2bit_gr15 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_gr20 >> $LOG/2_70b_e8p_2bit_gr20 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_gr0  >> $LOG/2_70b_e8p_2bit_gr0  2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_gr5  >> $LOG/2_70b_e8p_2bit_gr5  2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_gr10 >> $LOG/2_70b_e8p_2bit_gr10 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_gr15 >> $LOG/2_70b_e8p_2bit_gr15 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_gr20 >> $LOG/2_70b_e8p_2bit_gr20 2>&1 &
-wait					     
-'''
-
-python quantize_llama.py --save_path $CKPT/2_13b_e8p_2bit_gr0  --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 --quip_tune_iters 0 >> $LOG/2_13b_e8p_2bit_gr0 2>&1
-python quantize_llama.py --save_path $CKPT/2_13b_e8p_2bit_gr5  --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 --quip_tune_iters 5 >> $LOG/2_13b_e8p_2bit_gr5 2>&1
-python quantize_llama.py --save_path $CKPT/2_13b_e8p_2bit_gr10 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 --quip_tune_iters 10 >> $LOG/2_13b_e8p_2bit_gr10 2>&1
-python quantize_llama.py --save_path $CKPT/2_13b_e8p_2bit_gr15 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 --quip_tune_iters 15 >> $LOG/2_13b_e8p_2bit_gr15 2>&1
-python quantize_llama.py --save_path $CKPT/2_13b_e8p_2bit_gr20 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 --quip_tune_iters 20 >> $LOG/2_13b_e8p_2bit_gr20 2>&1
-
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit_gr0   --hf_output_path $HF/2_13b_e8p_2bit_gr0  >> $LOG/2_13b_e8p_2bit_gr0  2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit_gr5   --hf_output_path $HF/2_13b_e8p_2bit_gr5  >> $LOG/2_13b_e8p_2bit_gr5  2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit_gr10  --hf_output_path $HF/2_13b_e8p_2bit_gr10 >> $LOG/2_13b_e8p_2bit_gr10 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit_gr15  --hf_output_path $HF/2_13b_e8p_2bit_gr15 >> $LOG/2_13b_e8p_2bit_gr15 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit_gr20  --hf_output_path $HF/2_13b_e8p_2bit_gr20 >> $LOG/2_13b_e8p_2bit_gr20 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/2_13b_e8p_2bit_gr0  >> $LOG/2_13b_e8p_2bit_gr0  2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/2_13b_e8p_2bit_gr5  >> $LOG/2_13b_e8p_2bit_gr5  2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/2_13b_e8p_2bit_gr10 >> $LOG/2_13b_e8p_2bit_gr10 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --hf_path $HF/2_13b_e8p_2bit_gr15 >> $LOG/2_13b_e8p_2bit_gr15 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --hf_path $HF/2_13b_e8p_2bit_gr20 >> $LOG/2_13b_e8p_2bit_gr20 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8p_2bit_gr0  >> $LOG/2_13b_e8p_2bit_gr0  2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8p_2bit_gr5  >> $LOG/2_13b_e8p_2bit_gr5  2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8p_2bit_gr10 >> $LOG/2_13b_e8p_2bit_gr10 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8p_2bit_gr15 >> $LOG/2_13b_e8p_2bit_gr15 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8p_2bit_gr20 >> $LOG/2_13b_e8p_2bit_gr20 2>&1 &
-wait
-
-
-python quantize_llama.py --save_path $CKPT/2_7b_e8p_2bit_gr0  --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 --quip_tune_iters 0 >> $LOG/2_7b_e8p_2bit_gr0 2>&1
-python quantize_llama.py --save_path $CKPT/2_7b_e8p_2bit_gr5  --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 --quip_tune_iters 5 >> $LOG/2_7b_e8p_2bit_gr5 2>&1
-python quantize_llama.py --save_path $CKPT/2_7b_e8p_2bit_gr10 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 --quip_tune_iters 10 >> $LOG/2_7b_e8p_2bit_gr10 2>&1
-python quantize_llama.py --save_path $CKPT/2_7b_e8p_2bit_gr15 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 --quip_tune_iters 15 >> $LOG/2_7b_e8p_2bit_gr15 2>&1
-python quantize_llama.py --save_path $CKPT/2_7b_e8p_2bit_gr20 --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf --hessian_path $HESS/llama2_7b_6144 --quip_tune_iters 20 >> $LOG/2_7b_e8p_2bit_gr20 2>&1
-
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit_gr0   --hf_output_path $HF/2_7b_e8p_2bit_gr0  >> $LOG/2_7b_e8p_2bit_gr0  2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit_gr5   --hf_output_path $HF/2_7b_e8p_2bit_gr5  >> $LOG/2_7b_e8p_2bit_gr5  2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit_gr10  --hf_output_path $HF/2_7b_e8p_2bit_gr10 >> $LOG/2_7b_e8p_2bit_gr10 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit_gr15  --hf_output_path $HF/2_7b_e8p_2bit_gr15 >> $LOG/2_7b_e8p_2bit_gr15 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit_gr20  --hf_output_path $HF/2_7b_e8p_2bit_gr20 >> $LOG/2_7b_e8p_2bit_gr20 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/2_7b_e8p_2bit_gr0  >> $LOG/2_7b_e8p_2bit_gr0  2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/2_7b_e8p_2bit_gr5  >> $LOG/2_7b_e8p_2bit_gr5  2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/2_7b_e8p_2bit_gr10 >> $LOG/2_7b_e8p_2bit_gr10 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --hf_path $HF/2_7b_e8p_2bit_gr15 >> $LOG/2_7b_e8p_2bit_gr15 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --hf_path $HF/2_7b_e8p_2bit_gr20 >> $LOG/2_7b_e8p_2bit_gr20 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8p_2bit_gr0  >> $LOG/2_7b_e8p_2bit_gr0  2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8p_2bit_gr5  >> $LOG/2_7b_e8p_2bit_gr5  2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8p_2bit_gr10 >> $LOG/2_7b_e8p_2bit_gr10 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8p_2bit_gr15 >> $LOG/2_7b_e8p_2bit_gr15 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8p_2bit_gr20 >> $LOG/2_7b_e8p_2bit_gr20 2>&1 &
-wait
diff --git a/sbatch/hilr_susv_test.sh b/sbatch/hilr_susv_test.sh
deleted file mode 100644
index 76250d5..0000000
--- a/sbatch/hilr_susv_test.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-CKPT=/mnt/desa_data/checkpoints/finetune_llama_ehilr
-HF=/mnt/desa_data/hfized/finetune_llama_ehilr
-LOG=/mnt/desa_data/logs/finetune_llama_ehilr
-HESS=/mnt/desa_data/hessians
-
-mkdir $CKPT
-mkdir $HF
-mkdir $LOG
-
-python finetune_susv_adam.py --save_path $CKPT/2_7b_2bit --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf  --hessian_path $HESS/llama2_7b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_7b_2bit 2>&1
-
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_7b_2bit --hf_output_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1
-
-python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-hf --hf_path $HF/2_7b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_7b_2bit >> $LOG/2_7b_2bit 2>&1
-
-
-python finetune_susv_adam.py --save_path $CKPT/2_13b_2bit --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf  --hessian_path $HESS/llama2_13b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_13b_2bit 2>&1
-
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_13b_2bit --hf_output_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1
-
-python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-hf --hf_path $HF/2_13b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_13b_2bit >> $LOG/2_13b_2bit 2>&1
-
-
-python finetune_susv_adam.py --save_path $CKPT/2_70b_2bit --codebook E8P12  --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf  --hessian_path $HESS/llama2_70b_6144/ --devset_size 384 --ft_valid_size 128 --ft_epochs 5 --ft_lr 0.00005 --ft_bs 4 --ft_update_freq 2 >> $LOG/2_70b_2bit 2>&1
-
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1
-
-python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.00001 --ft_bs 1 --ctx_size 3072 --ft_update_freq 2 --ckpt_path $CKPT/2_70b_2bit >> $LOG/2_70b_2bit 2>&1
-
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &
-
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_2bit --hf_output_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 &
-
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_7b_2bit --hf_output_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 &
-
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 &
-
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &
-wait
-
-
-
diff --git a/sbatch/icml_llama_eval.sh b/sbatch/icml_llama_eval.sh
deleted file mode 100644
index f059b36..0000000
--- a/sbatch/icml_llama_eval.sh
+++ /dev/null
@@ -1,80 +0,0 @@
-HF=/mnt/desa_data/hfized/icml_llama
-LOG=/mnt/desa_data/logs/icml_llama_eval
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_3bit >> $LOG/1_65b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_4bit >> $LOG/1_65b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_2bit >> $LOG/1_65b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/2_70b_chat_3bit >> $LOG/2_70b_chat_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path $HF/2_70b_chat_4bit >> $LOG/2_70b_chat_4bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_70b_chat_2bit >> $LOG/2_70b_chat_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_3bit >> $LOG/1_30b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_4bit >> $LOG/1_30b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_2bit >> $LOG/1_30b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_3bit >> $LOG/1_13b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_4bit >> $LOG/1_13b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_2bit >> $LOG/1_13b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path $HF/2_13b_3bit >> $LOG/2_13b_3bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_13b_chat_3bit >> $LOG/2_13b_chat_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/2_13b_chat_4bit >> $LOG/2_13b_chat_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_13b_chat_2bit >> $LOG/2_13b_chat_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_3bit >> $LOG/1_7b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_4bit >> $LOG/1_7b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_2bit >> $LOG/1_7b_2bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_7b_3bit >> $LOG/2_7b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/2_7b_chat_3bit >> $LOG/2_7b_chat_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_7b_chat_4bit >> $LOG/2_7b_chat_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/2_7b_chat_2bit >> $LOG/2_7b_chat_2bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_3bit >> $LOG/1_65b_3bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_4bit >> $LOG/1_65b_4bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_2bit >> $LOG/1_65b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_3bit >> $LOG/2_70b_chat_3bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_4bit >> $LOG/2_70b_chat_4bit 2>&1 &
-
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_2bit >> $LOG/2_70b_chat_2bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_3bit >> $LOG/1_30b_3bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_4bit >> $LOG/1_30b_4bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_2bit >> $LOG/1_30b_2bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_3bit >> $LOG/1_13b_3bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_4bit >> $LOG/1_13b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_2bit >> $LOG/1_13b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_3bit >> $LOG/2_13b_3bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_2bit >> $LOG/2_13b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_3bit >> $LOG/2_13b_chat_3bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_4bit >> $LOG/2_13b_chat_4bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_2bit >> $LOG/2_13b_chat_2bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_3bit >> $LOG/1_7b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_4bit >> $LOG/1_7b_4bit 2>&1 &     
-CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_2bit >> $LOG/1_7b_2bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_3bit >> $LOG/2_7b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 &     
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_2bit >> $LOG/2_7b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_3bit >> $LOG/2_7b_chat_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_4bit >> $LOG/2_7b_chat_4bit 2>&1 &     
-CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_2bit >> $LOG/2_7b_chat_2bit 2>&1 &
-wait
-
-
-
diff --git a/sbatch/icml_ppl.sh b/sbatch/icml_ppl.sh
deleted file mode 100644
index b34d25c..0000000
--- a/sbatch/icml_ppl.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-HF=/mnt/desa_data/hfized
-LOG=/mnt/desa_data/logs/icml_ppl
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/new_e8p/2_70b_e8p_2bit --seqlen 2048 >> $LOG/2_70b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/rvq/2_70b_e8prvq_3bit --seqlen 2048 >> $LOG/2_70b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/rvq/2_70b_e8prvq_4bit --seqlen 2048 >> $LOG/2_70b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/new_e8p/2_7b_e8p_2bit --seqlen 2048 >> $LOG/2_7b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/new_e8p/2_13b_e8p_2bit --seqlen 2048 >> $LOG/2_13b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/rvq/2_13b_e8prvq_3bit --seqlen 2048 >> $LOG/2_13b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/rvq/2_13b_e8prvq_4bit --seqlen 2048 >> $LOG/2_13b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path meta-llama/Llama-2-13b-hf --seqlen 2048 >> $LOG/2_13b_fp16 2>&1 & 
-
-wait
-
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/rvq/2_7b_e8prvq_3bit --seqlen 2048 >> $LOG/2_7b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/rvq/2_7b_e8prvq_4bit --seqlen 2048 >> $LOG/2_7b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path meta-llama/Llama-2-7b-hf --seqlen 2048 >> $LOG/2_7b_fp16 2>&1 & 
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/new_e8p/1_65b_e8p_2bit --seqlen 2048 >> $LOG/1_65b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/rvq/1_65b_e8prvq_3bit --seqlen 2048 >> $LOG/1_65b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/rvq/1_65b_e8prvq_4bit --seqlen 2048 >> $LOG/1_65b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6,7 python eval_ppl.py --hf_path meta-llama/Llama-2-70b-hf --seqlen 2048 >> $LOG/2_70b_fp16 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/new_e8p/1_30b_e8p_2bit --seqlen 2048 >> $LOG/1_30b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/rvq/1_30b_e8prvq_3bit --seqlen 2048 >> $LOG/1_30b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/rvq/1_30b_e8prvq_4bit --seqlen 2048 >> $LOG/1_30b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path relaxml/Llama-1-30b-hf --seqlen 2048 >> $LOG/1_30b_fp16 2>&1 & 
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/new_e8p/1_13b_e8p_2bit --seqlen 2048 >> $LOG/1_13b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/rvq/1_13b_e8prvq_3bit --seqlen 2048 >> $LOG/1_13b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/rvq/1_13b_e8prvq_4bit --seqlen 2048 >> $LOG/1_13b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path relaxml/Llama-1-13b-hf --seqlen 2048 >> $LOG/1_13b_fp16 2>&1 & 
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/new_e8p/1_7b_e8p_2bit --seqlen 2048 >> $LOG/1_7b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/rvq/1_7b_e8prvq_3bit --seqlen 2048 >> $LOG/1_7b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/rvq/1_7b_e8prvq_4bit --seqlen 2048 >> $LOG/1_7b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path relaxml/Llama-1-7b-hf --seqlen 2048 >> $LOG/1_7b_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/falcon_180b_e8p_2bit --seqlen 2048 --no_use_cuda_graph >> $LOG/falcon_180b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/falcon_180b_e8prvq_3bit --seqlen 2048 --no_use_cuda_graph >> $LOG/falcon_180b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6,7 python eval_ppl.py --hf_path $HF/falcon_180b_e8prvq_4bit --seqlen 2048 --no_use_cuda_graph >> $LOG/falcon_180b_e8prvq_4bit 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0,1,2,3,4 python eval_ppl.py --hf_path tiiuae/falcon-180B --seqlen 2048 --no_use_cuda_graph >> $LOG/falcon_180b_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=5,6 python eval_ppl.py --hf_path relaxml/Llama-1-65b-hf --seqlen 2048 >> $LOG/1_65b_fp16 2>&1 & 
-
-wait
diff --git a/sbatch/llama1_hessian.sh b/sbatch/llama1_hessian.sh
deleted file mode 100644
index b85350d..0000000
--- a/sbatch/llama1_hessian.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-python hessian_offline.py --batch_size 4 --devset_size 6144 --ctx_size 2048 --base_model /work/desa_data/meta_llama1/huggingface_65B --save_path /work/desa_data/hessians/llama1_65b_6144
-
-python hessian_offline.py --batch_size 4 --devset_size 6144 --ctx_size 2048 --base_model /work/desa_data/meta_llama1/huggingface_30B --save_path /work/desa_data/hessians/llama1_30b_6144
-
-python hessian_offline.py --batch_size 4 --devset_size 6144 --ctx_size 2048 --base_model /work/desa_data/meta_llama1/huggingface_13B --save_path /work/desa_data/hessians/llama1_13b_6144
-
-python hessian_offline.py --batch_size 4 --devset_size 6144 --ctx_size 2048 --base_model /work/desa_data/meta_llama1/huggingface_7B --save_path /work/desa_data/hessians/llama1_7b_6144 
diff --git a/sbatch/llama1_nolr_test.sh b/sbatch/llama1_nolr_test.sh
deleted file mode 100644
index 3fb1bbe..0000000
--- a/sbatch/llama1_nolr_test.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/jerry_data/checkpoints
-HF=/mnt/jerry_data/hfized
-HESS=/mnt/jerry_data/hessians
-LOG=/mnt/jerry_data/logs
-L1=/mnt/jerry_data/meta_llama1
-
-
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 2 --hf_path $HF/1_65b_hi_4bit_nolr  >> $LOG/1_65b_hi_4bit_nolr  2>&1 
-
-
-'''
-python quantize_llama.py --save_path $CKPT/1_65b_e8p_2bit_nolr --codebook E8P12 --lora_rank 0 --scale_override 0.9 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_e8p_2bit_nolr 2>&1
-python quantize_llama.py --save_path $CKPT/1_30b_e8p_2bit_nolr --codebook E8P12 --lora_rank 0 --scale_override 0.9 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_e8p_2bit_nolr 2>&1
-python quantize_llama.py --save_path $CKPT/1_13b_e8p_2bit_nolr --codebook E8P12 --lora_rank 0 --scale_override 0.9 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_e8p_2bit_nolr 2>&1
-python quantize_llama.py --save_path $CKPT/1_7b_e8p_2bit_nolr  --codebook E8P12 --lora_rank 0 --scale_override 0.9 --base_model $L1/huggingface_7B  --hessian_path $HESS/llama1_7b_6144  >> $LOG/1_7b_e8p_2bit_nolr 2>&1
-python quantize_llama.py --save_path $CKPT/1_65b_hi_4bit_nolr  --codebook HI4B1C  --lora_rank 0 --scale_override 2.7 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_hi_4bit_nolr 2>&1
-python quantize_llama.py --save_path $CKPT/1_30b_hi_4bit_nolr  --codebook HI4B1C  --lora_rank 0 --scale_override 2.7 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_hi_4bit_nolr 2>&1
-python quantize_llama.py --save_path $CKPT/1_13b_hi_4bit_nolr  --codebook HI4B1C  --lora_rank 0 --scale_override 2.7 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_hi_4bit_nolr 2>&1
-python quantize_llama.py --save_path $CKPT/1_7b_hi_4bit_nolr   --codebook HI4B1C  --lora_rank 0 --scale_override 2.7 --base_model $L1/huggingface_7B  --hessian_path $HESS/llama1_7b_6144  >> $LOG/1_7b_hi_4bit_nolr 2>&1
-
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_65b_e8p_2bit_nolr --hf_output_path $HF/1_65b_e8p_2bit_nolr & 
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_30b_e8p_2bit_nolr --hf_output_path $HF/1_30b_e8p_2bit_nolr &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_13b_e8p_2bit_nolr --hf_output_path $HF/1_13b_e8p_2bit_nolr &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_7b_e8p_2bit_nolr  --hf_output_path $HF/1_7b_e8p_2bit_nolr  &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_65b_hi_4bit_nolr  --hf_output_path $HF/1_65b_hi_4bit_nolr  &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_30b_hi_4bit_nolr  --hf_output_path $HF/1_30b_hi_4bit_nolr  &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_13b_hi_4bit_nolr  --hf_output_path $HF/1_13b_hi_4bit_nolr  &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_7b_hi_4bit_nolr   --hf_output_path $HF/1_7b_hi_4bit_nolr   &
-
-wait
-
-# perplexity
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_e8p_2bit_nolr >> $LOG/1_65b_e8p_2bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_e8p_2bit_nolr >> $LOG/1_30b_e8p_2bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_e8p_2bit_nolr >> $LOG/1_13b_e8p_2bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_e8p_2bit_nolr  >> $LOG/1_7b_e8p_2bit_nolr  2>&1 &
-CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_hi_4bit_nolr  >> $LOG/1_65b_hi_4bit_nolr  2>&1 &
-CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_hi_4bit_nolr  >> $LOG/1_30b_hi_4bit_nolr  2>&1 &
-CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_hi_4bit_nolr  >> $LOG/1_13b_hi_4bit_nolr  2>&1 &
-CUDA_VISIBLE_DEVICES=7 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_hi_4bit_nolr   >> $LOG/1_7b_hi_4bit_nolr   2>&1 &
-
-wait
-
-# zero shot
-
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_e8p_2bit_nolr >> $LOG/1_65b_e8p_2bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_e8p_2bit_nolr >> $LOG/1_30b_e8p_2bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_e8p_2bit_nolr >> $LOG/1_13b_e8p_2bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_e8p_2bit_nolr  >> $LOG/1_7b_e8p_2bit_nolr  2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_hi_4bit_nolr  >> $LOG/1_65b_hi_4bit_nolr  2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_hi_4bit_nolr  >> $LOG/1_30b_hi_4bit_nolr  2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_hi_4bit_nolr  >> $LOG/1_13b_hi_4bit_nolr  2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_hi_4bit_nolr   >> $LOG/1_7b_hi_4bit_nolr   2>&1 &
-
-wait
-'''
diff --git a/sbatch/llama2_chat_hessian.sh b/sbatch/llama2_chat_hessian.sh
deleted file mode 100644
index 13417bc..0000000
--- a/sbatch/llama2_chat_hessian.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-python hessian_offline.py --batch_size 4 --devset_size 6144 --ctx_size 4096 --base_model meta-llama/Llama-2-70b-chat-hf --save_path /work/desa_data/hessians/llama2_70b_chat_6144
-
-python hessian_offline.py --batch_size 4 --devset_size 6144 --ctx_size 4096 --base_model meta-llama/Llama-2-13b-chat-hf --save_path /work/desa_data/hessians/llama2_13b_chat_6144
-
-python hessian_offline.py --batch_size 4 --devset_size 6144 --ctx_size 4096 --base_model meta-llama/Llama-2-7b-chat-hf --save_path /work/desa_data/hessians/llama2_7b_chat_6144
-
diff --git a/sbatch/llama2_chat_quantize.sh b/sbatch/llama2_chat_quantize.sh
deleted file mode 100644
index 0602ab6..0000000
--- a/sbatch/llama2_chat_quantize.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/desa_data/checkpoints
-HF=/mnt/desa_data/hfized
-HESS=/mnt/desa_data/hessians
-LOG=/mnt/desa_data/logs
-
-
-python quantize_llama.py --save_path $CKPT/2_70b_chat_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-chat-hf --hessian_path $HESS/llama2_70b_chat_6144 >> $LOG/2_70b_chat_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/2_13b_chat_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-chat-hf --hessian_path $HESS/llama2_13b_chat_6144 >> $LOG/2_13b_chat_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/2_7b_chat_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-chat-hf --hessian_path $HESS/llama2_7b_chat_6144 >> $LOG/2_7b_chat_e8p_2bit 2>&1
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_e8p_2bit --hf_output_path $HF/2_70b_chat_e8p_2bit >> $LOG/2_70b_chat_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_e8p_2bit --hf_output_path $HF/2_13b_chat_e8p_2bit >> $LOG/2_13b_chat_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_e8p_2bit --hf_output_path $HF/2_7b_chat_e8p_2bit >> $LOG/2_7b_chat_e8p_2bit 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/2_70b_chat_e8p_2bit >> $LOG/2_70b_chat_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/2_13b_chat_e8p_2bit >> $LOG/2_13b_chat_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/2_7b_chat_e8p_2bit >> $LOG/2_7b_chat_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3,4 python ppl_llama.py --hf_path meta-llama/Llama-2-70b-chat-hf --no_use_cuda_graph >> $LOG/2_70b_chat_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --hf_path meta-llama/Llama-2-13b-chat-hf >> $LOG/2_13b_chat_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --hf_path meta-llama/Llama-2-7b-chat-hf >> $LOG/2_7b_chat_fp16 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_e8p_2bit >> $LOG/2_70b_chat_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_e8p_2bit >> $LOG/2_13b_chat_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_e8p_2bit >> $LOG/2_7b_chat_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3,4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 2  --hf_path meta-llama/Llama-2-70b-chat-hf >> $LOG/2_70b_chat_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path meta-llama/Llama-2-13b-chat-hf >> $LOG/2_13b_chat_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_llama.py  --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path meta-llama/Llama-2-7b-chat-hf >> $LOG/2_7b_chat_fp16 2>&1 &
-wait					     
diff --git a/sbatch/llama2_nolr_test.sh b/sbatch/llama2_nolr_test.sh
deleted file mode 100644
index 8ea750b..0000000
--- a/sbatch/llama2_nolr_test.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/jerry_data/checkpoints
-HF=/mnt/jerry_data/hfized
-HESS=/mnt/jerry_data/hessians
-LOG=/mnt/jerry_data/logs
-
-CUDA_VISIBLE_DEVICES=1,2,3,4,5,6,7 python quantize_llama.py --save_path $CKPT/2_70b_hi_4bit_nolr --codebook HI4B1C --lora_rank 0 --scale_override 2.7 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_hi_4bit_nolr 2>&1
-
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_hi_4bit_nolr --hf_output_path $HF/2_70b_hi_4bit_nolr 
-
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/2_70b_hi_4bit_nolr >> $LOG/2_70b_hi_4bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/2_13b_e8p_2bit_nolr >> $LOG/2_13b_e8p_2bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --hf_path $HF/2_7b_e8p_2bit_nolr  >> $LOG/2_7b_e8p_2bit_nolr  2>&1 &
-CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --hf_path $HF/2_13b_hi_4bit_nolr  >> $LOG/2_13b_hi_4bit_nolr  2>&1 &
-CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --hf_path $HF/2_7b_hi_4bit_nolr   >> $LOG/2_7b_hi_4bit_nolr   2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_hi_4bit_nolr >> $LOG/2_70b_hi_4bit_nolr 2>&1
-
-
-'''
-CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python quantize_llama.py --save_path $CKPT/2_13b_e8p_2bit_nolr --codebook E8P12 --lora_rank 0 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 >> $LOG/2_13b_e8p_2bit_nolr 2>&1
-CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python quantize_llama.py --save_path $CKPT/2_7b_e8p_2bit_nolr  --codebook E8P12 --lora_rank 0 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf  --hessian_path $HESS/llama2_7b_6144  >> $LOG/2_7b_e8p_2bit_nolr 2>&1
-CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python quantize_llama.py --save_path $CKPT/2_13b_hi_4bit_nolr  --codebook HI4B1C  --lora_rank 0 --scale_override 2.7 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 >> $LOG/2_13b_hi_4bit_nolr 2>&1
-CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python quantize_llama.py --save_path $CKPT/2_7b_hi_4bit_nolr   --codebook HI4B1C  --lora_rank 0 --scale_override 2.7 --base_model meta-llama/Llama-2-7b-hf  --hessian_path $HESS/llama2_7b_6144  >> $LOG/2_7b_hi_4bit_nolr 2>&1
-
-
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit_nolr --hf_output_path $HF/2_13b_e8p_2bit_nolr &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit_nolr  --hf_output_path $HF/2_7b_e8p_2bit_nolr  &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/2_13b_hi_4bit_nolr  --hf_output_path $HF/2_13b_hi_4bit_nolr  &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_hi_4bit_nolr   --hf_output_path $HF/2_7b_hi_4bit_nolr   &
-
-wait
-
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_13b_e8p_2bit_nolr >> $LOG/2_13b_e8p_2bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_7b_e8p_2bit_nolr  >> $LOG/2_7b_e8p_2bit_nolr  2>&1 &
-CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_13b_hi_4bit_nolr  >> $LOG/2_13b_hi_4bit_nolr  2>&1 &
-CUDA_VISIBLE_DEVICES=7 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_7b_hi_4bit_nolr   >> $LOG/2_7b_hi_4bit_nolr   2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8p_2bit_nolr >> $LOG/2_13b_e8p_2bit_nolr 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8p_2bit_nolr  >> $LOG/2_7b_e8p_2bit_nolr  2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_hi_4bit_nolr  >> $LOG/2_13b_hi_4bit_nolr  2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_hi_4bit_nolr   >> $LOG/2_7b_hi_4bit_nolr   2>&1 &
-
-wait
-
-'''
diff --git a/sbatch/lr_sweep_1.sh b/sbatch/lr_sweep_1.sh
deleted file mode 100644
index b4d0511..0000000
--- a/sbatch/lr_sweep_1.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/jerry_data/checkpoints
-HF=/mnt/jerry_data/hfized
-HESS=/mnt/jerry_data/hessians
-LOG=/mnt/jerry_data/logs
-
-
-'''
-python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_fulllr64 --codebook E8P12 --scale_override 0.9 --lora_rank 64 --hessian_path $HESS/llama2_70b_6144 --full_svd >> $LOG/2_70b_e8p_2bit_fulllr64 2>&1
-python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_fulllr32 --codebook E8P12 --scale_override 0.9 --lora_rank 32 --hessian_path $HESS/llama2_70b_6144 --full_svd >> $LOG/2_70b_e8p_2bit_fulllr32 2>&1
-python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_fulllr16 --codebook E8P12 --scale_override 0.9 --lora_rank 16 --hessian_path $HESS/llama2_70b_6144 --full_svd >> $LOG/2_70b_e8p_2bit_fulllr16 2>&1
-python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_fulllr8 --codebook E8P12 --scale_override 0.9 --lora_rank 8 --hessian_path $HESS/llama2_70b_6144 --full_svd >> $LOG/2_70b_e8p_2bit_fulllr8 2>&1
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_fulllr64 --hf_output_path $HF/2_70b_e8p_2bit_fulllr64 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_fulllr32 --hf_output_path $HF/2_70b_e8p_2bit_fulllr32 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_fulllr16 --hf_output_path $HF/2_70b_e8p_2bit_fulllr16 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_fulllr8  --hf_output_path $HF/2_70b_e8p_2bit_fulllr8  &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_70b_e8p_2bit_fulllr64 >> $LOG/2_70b_e8p_2bit_fulllr64 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_70b_e8p_2bit_fulllr32 >> $LOG/2_70b_e8p_2bit_fulllr32 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_70b_e8p_2bit_fulllr16 >> $LOG/2_70b_e8p_2bit_fulllr16 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_70b_e8p_2bit_fulllr8  >> $LOG/2_70b_e8p_2bit_fulllr8  2>&1 &
-
-wait
-'''
-
-
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr64 >> $LOG/2_70b_e8p_2bit_fulllr64 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr32 >> $LOG/2_70b_e8p_2bit_fulllr32 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr16 >> $LOG/2_70b_e8p_2bit_fulllr16 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr8  >> $LOG/2_70b_e8p_2bit_fulllr8  2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr4 >> $LOG/2_70b_e8p_2bit_fulllr4 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr2 >> $LOG/2_70b_e8p_2bit_fulllr2 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr1 >> $LOG/2_70b_e8p_2bit_fulllr1 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr0 >> $LOG/2_70b_e8p_2bit_fulllr0  2>&1 &
-
-
-wait
diff --git a/sbatch/lr_sweep_2.sh b/sbatch/lr_sweep_2.sh
deleted file mode 100644
index 1d61403..0000000
--- a/sbatch/lr_sweep_2.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/jerry_data/checkpoints
-HF=/mnt/jerry_data/hfized
-HESS=/mnt/jerry_data/hessians
-LOG=/mnt/jerry_data/logs
-
-
-python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_fulllr4 --codebook E8P12 --scale_override 0.9 --lora_rank 4 --hessian_path $HESS/llama2_70b_6144 --full_svd >> $LOG/2_70b_e8p_2bit_fulllr4 2>&1
-python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_fulllr2 --codebook E8P12 --scale_override 0.9 --lora_rank 2 --hessian_path $HESS/llama2_70b_6144 --full_svd >> $LOG/2_70b_e8p_2bit_fulllr2 2>&1
-python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_fulllr1 --codebook E8P12 --scale_override 0.9 --lora_rank 1 --hessian_path $HESS/llama2_70b_6144 --full_svd >> $LOG/2_70b_e8p_2bit_fulllr1 2>&1
-python quantize_llama.py --save_path $CKPT/2_70b_e8p_2bit_nolr --codebook E8P12 --scale_override 0.9 --lora_rank 0 --hessian_path $HESS/llama2_70b_6144 --full_svd >> $LOG/2_70b_e8p_2bit_nolr 2>&1
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_fulllr4 --hf_output_path $HF/2_70b_e8p_2bit_fulllr4 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_fulllr2 --hf_output_path $HF/2_70b_e8p_2bit_fulllr2 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_fulllr1 --hf_output_path $HF/2_70b_e8p_2bit_fulllr1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_nolr  --hf_output_path $HF/2_70b_e8p_2bit_nolr &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_fulllr4 >> $LOG/2_70b_e8p_2bit_fulllr4 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_fulllr2 >> $LOG/2_70b_e8p_2bit_fulllr2 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_fulllr1 >> $LOG/2_70b_e8p_2bit_fulllr1 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_nolr  >> $LOG/2_70b_e8p_2bit_nolr  2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr4 >> $LOG/2_70b_e8p_2bit_fulllr4 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr2 >> $LOG/2_70b_e8p_2bit_fulllr2 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_fulllr1 >> $LOG/2_70b_e8p_2bit_fulllr1 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_nolr  >> $LOG/2_70b_e8p_2bit_nolr  2>&1 &
-
-wait
diff --git a/sbatch/mistral_hermes.sh b/sbatch/mistral_hermes.sh
deleted file mode 100644
index 94b28d6..0000000
--- a/sbatch/mistral_hermes.sh
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/desa_data/checkpoints
-HF=/mnt/desa_data/hfized
-HESS=/mnt/desa_data/hessians
-LOG=/mnt/desa_data/logs
-L1=/mnt/desa_data/meta_llama1
-
-'''
-python quantize_llama.py --save_path $CKPT/mistral_7b_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model mistralai/Mistral-7B-v0.1 --hessian_path $HESS/mistral_7b_4096 >> $LOG/mistral_7b_hi_4bit_packed 2>&1
-python quantize_llama.py --save_path $CKPT/openhermes_7b_hi_4bit_packed --codebook HI4B1C --scale_override 2.7 --base_model teknium/OpenHermes-2.5-Mistral-7B --hessian_path $HESS/openhermes_7b_4096 >> $LOG/openhermes_7b_hi_4bit_packed 2>&1
-python quantize_llama.py --save_path $CKPT/mistral_7b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model mistralai/Mistral-7B-v0.1 --hessian_path $HESS/mistral_7b_4096 >> $LOG/mistral_7b_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/openhermes_7b_e8p_2bit --codebook E8P12 --scale_override 0.9 --base_model teknium/OpenHermes-2.5-Mistral-7B --hessian_path $HESS/openhermes_7b_4096 >> $LOG/openhermes_7b_e8p_2bit 2>&1
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/mistral_7b_hi_4bit_packed    --hf_output_path $HF/mistral_7b_hi_4bit_packed    & 
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/openhermes_7b_hi_4bit_packed --hf_output_path $HF/openhermes_7b_hi_4bit_packed &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/mistral_7b_e8p_2bit          --hf_output_path $HF/mistral_7b_e8p_2bit          &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/openhermes_7b_e8p_2bit       --hf_output_path $HF/openhermes_7b_e8p_2bit       &
-
-wait
-'''
-# perplexity
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --no_use_cuda_graph --seqlen 8192 --hf_path $HF/mistral_7b_hi_4bit_packed    >> $LOG/mistral_7b_hi_4bit_packed    2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --no_use_cuda_graph --seqlen 8192 --hf_path $HF/openhermes_7b_hi_4bit_packed >> $LOG/openhermes_7b_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --no_use_cuda_graph --seqlen 8192 --hf_path $HF/mistral_7b_e8p_2bit          >> $LOG/mistral_7b_e8p_2bit          2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --no_use_cuda_graph --seqlen 8192 --hf_path $HF/openhermes_7b_e8p_2bit       >> $LOG/openhermes_7b_e8p_2bit       2>&1 &
-CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --no_use_cuda_graph --seqlen 8192 --hf_path mistralai/Mistral-7B-v0.1        >> $LOG/mistral_7b_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --no_use_cuda_graph --seqlen 8192 --hf_path teknium/OpenHermes-2.5-Mistral-7B >> $LOG/openhermes_7b_fp16 2>&1 &
-
-wait
-'''
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/mistral_7b_hi_4bit_packed    >> $LOG/mistral_7b_hi_4bit_packed    2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/openhermes_7b_hi_4bit_packed >> $LOG/openhermes_7b_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/mistral_7b_e8p_2bit          >> $LOG/mistral_7b_e8p_2bit          2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/openhermes_7b_e8p_2bit       >> $LOG/openhermes_7b_e8p_2bit       2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path mistralai/Mistral-7B-v0.1        >> $LOG/mistral_7b_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path teknium/OpenHermes-2.5-Mistral-7B >> $LOG/openhermes_7b_fp16 2>&1 &
-
-wait
-'''
diff --git a/sbatch/mistral_hermes_hessian.sh b/sbatch/mistral_hermes_hessian.sh
deleted file mode 100644
index d89ed9a..0000000
--- a/sbatch/mistral_hermes_hessian.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/bin/bash
-
-python hessian_offline.py --batch_size 2 --devset_size 4096 --ctx_size 8192 --base_model mistralai/Mistral-7B-v0.1 --save_path /work/desa_data/hessians/mistral_7b_4096
-
-python hessian_offline.py --batch_size 2 --devset_size 4096 --ctx_size 8192 --base_model teknium/OpenHermes-2.5-Mistral-7B --save_path /work/desa_data/hessians/openhermes_7b_4096
-
-
-
diff --git a/sbatch/mixtral.sh b/sbatch/mixtral.sh
deleted file mode 100644
index aa329c9..0000000
--- a/sbatch/mixtral.sh
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/desa_data/checkpoints
-HF=/mnt/desa_data/hfized/jerry
-HESS=/mnt/desa_data/hessians
-LOG=/mnt/desa_data/logs
-L1=/mnt/desa_data/meta_llama1
-
-source ~/miniconda3/bin/activate quipv2_mixtral
-
-## mixtral 8192ctx
-# python quantize_mixtral.py --save_path $CKPT/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx \
-#     --codebook E8P12 --scale_override 0.9 --base_model mistralai/Mixtral-8x7B-v0.1 \
-#     --hessian_path $HESS/mixtral_8x7b_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx 2>&1
-# python quantize_mixtral.py --save_path $CKPT/mixtral_8x7b_hi_4bit_RPv1_4096dev8192ctx \
-#     --codebook HI4B1C --scale_override 2.7 --base_model mistralai/Mixtral-8x7B-v0.1 \
-#     --hessian_path $HESS/mixtral_8x7b_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_hi_4bit_RPv1_4096dev8192ctx 2>&1
-
-## mixtral 12288ctx
-# python quantize_mixtral.py --save_path $CKPT/mixtral_8x7b_e8p_2bit_RPv1_4096dev12288ctx \
-#     --codebook E8P12 --scale_override 0.9 --base_model mistralai/Mixtral-8x7B-v0.1 \
-#     --hessian_path $HESS/mixtral_8x7b_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_e8p_2bit_RPv1_4096dev12288ctx 2>&1
-# python quantize_mixtral.py --save_path $CKPT/mixtral_8x7b_hi_4bit_RPv1_4096dev12288ctx \
-#     --codebook HI4B1C --scale_override 2.7 --base_model mistralai/Mixtral-8x7B-v0.1 \
-#     --hessian_path $HESS/mixtral_8x7b_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_hi_4bit_RPv1_4096dev12288ctx 2>&1
-
-## mixtral-instruct 8192ctx
-# python quantize_mixtral.py --save_path $CKPT/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev8192ctx \
-#     --codebook E8P12 --scale_override 0.9 --base_model mistralai/Mixtral-8x7b-Instruct-v0.1 \
-#     --hessian_path $HESS/mixtral_8x7b_instruct_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev8192ctx 2>&1
-# python quantize_mixtral.py --save_path $CKPT/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev8192ctx \
-#     --codebook HI4B1C --scale_override 2.7 --base_model mistralai/Mixtral-8x7b-Instruct-v0.1 \
-#     --hessian_path $HESS/mixtral_8x7b_instruct_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev8192ctx 2>&1
-# 
-## mixtral-instruct 12288ctx
-python quantize_mixtral.py --save_path $CKPT/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev12288ctx \
-    --codebook E8P12 --scale_override 0.9 --base_model mistralai/Mixtral-8x7b-Instruct-v0.1 \
-    --hessian_path $HESS/mixtral_8x7b_instruct_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev12288ctx 2>&1
-python quantize_mixtral.py --save_path $CKPT/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev12288ctx \
-    --codebook HI4B1C --scale_override 2.7 --base_model mistralai/Mixtral-8x7b-Instruct-v0.1 \
-    --hessian_path $HESS/mixtral_8x7b_instruct_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev12288ctx 2>&1
-
-# CUDA_VISIBLE_DEVICES=0 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx \
-#     --hf_output_path $HF/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx &
-# wait
-
-## hfize
-## mixtral 8192ctx
-# CUDA_VISIBLE_DEVICES=0 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx \
-#     --hf_output_path $HF/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx & 
-# CUDA_VISIBLE_DEVICES=0 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_hi_4bit_RPv1_4096dev8192ctx \
-#     --hf_output_path $HF/mixtral_8x7b_hi_4bit_RPv1_4096dev8192ctx & 
-# ## mixtral 12288ctx
-# CUDA_VISIBLE_DEVICES=1 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_e8p_2bit_RPv1_4096dev12288ctx \
-#     --hf_output_path $HF/mixtral_8x7b_e8p_2bit_RPv1_4096dev12288ctx & 
-# CUDA_VISIBLE_DEVICES=2 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_hi_4bit_RPv1_4096dev12288ctx \
-#     --hf_output_path $HF/mixtral_8x7b_hi_4bit_RPv1_4096dev12288ctx & 
-# ## mixtral-instruct 8192ctx
-# CUDA_VISIBLE_DEVICES=0 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev8192ctx \
-#     --hf_output_path $HF/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev8192ctx & 
-# CUDA_VISIBLE_DEVICES=1 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev8192ctx \
-#     --hf_output_path $HF/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev8192ctx & 
-## mixtral-instruct 12288ctx
-CUDA_VISIBLE_DEVICES=0 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev12288ctx \
-    --hf_output_path $HF/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev12288ctx & 
-CUDA_VISIBLE_DEVICES=1 python hfize_mixtral.py --quantized_path $CKPT/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev12288ctx \
-    --hf_output_path $HF/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev12288ctx & 
-wait
-
-## perplexity
-## mixtral 8192ctx
-# CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 \
-#     --hf_path $HF/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx 2>&1 &
-# CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 \
-#     --hf_path $HF/mixtral_8x7b_hi_4bit_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_hi_4bit_RPv1_4096dev8192ctx 2>&1 &
-# ## mixtral 12288ctx
-# CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 \
-#     --hf_path $HF/mixtral_8x7b_e8p_2bit_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_e8p_2bit_RPv1_4096dev12288ctx 2>&1 &
-# CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 \
-#     --hf_path $HF/mixtral_8x7b_hi_4bit_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_hi_4bit_RPv1_4096dev12288ctx 2>&1 &
-# wait
-# ## mixtral-instruct 8192ctx
-# CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 \
-#     --hf_path $HF/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev8192ctx 2>&1 &
-# CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 \
-#     --hf_path $HF/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev8192ctx 2>&1 &
-## mixtral-instruct 12288ctx
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 \
-    --hf_path $HF/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev12288ctx 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 \
-    --hf_path $HF/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev12288ctx 2>&1 &
-wait
-
-## zeroshot
-## mixtral 8192ctx
-# CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \
-#     --batch_size 4 --hf_path $HF/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_e8p_2bit_RPv1_4096dev8192ctx 2>&1 &
-# CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \
-#     --batch_size 4 --hf_path $HF/mixtral_8x7b_hi_4bit_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_hi_4bit_RPv1_4096dev8192ctx 2>&1 &
-# ## mixtral 12288ctx
-# CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \
-#     --batch_size 4 --hf_path $HF/mixtral_8x7b_e8p_2bit_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_e8p_2bit_RPv1_4096dev12288ctx 2>&1 &
-# CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \
-#     --batch_size 4 --hf_path $HF/mixtral_8x7b_hi_4bit_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_hi_4bit_RPv1_4096dev12288ctx 2>&1 &
-# ## mixtral-instruct 8192ctx
-# CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \
-#     --batch_size 4 --hf_path $HF/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev8192ctx 2>&1 &
-# CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \
-#     --batch_size 4 --hf_path $HF/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev8192ctx >> $LOG/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev8192ctx 2>&1 &
-## mixtral-instruct 12288ctx
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \
-    --batch_size 4 --hf_path $HF/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_instruct_e8p_2bit_RPv1_4096dev12288ctx 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \
-    --batch_size 4 --hf_path $HF/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev12288ctx >> $LOG/mixtral_8x7b_instruct_hi_4bit_RPv1_4096dev12288ctx 2>&1 &
-wait
\ No newline at end of file
diff --git a/sbatch/mixtral_fp16.sh b/sbatch/mixtral_fp16.sh
deleted file mode 100644
index 56e2ca3..0000000
--- a/sbatch/mixtral_fp16.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-
-LOG=/mnt/desa_data/logs
-
-source ~/miniconda3/bin/activate quipv2_mixtral
-
-# python eval_ppl.py --no_use_cuda_graph --seqlen 8192 --hf_path mistralai/Mixtral-8x7B-v0.1 >> $LOG/mixtral_8x7b_fp16 2>&1
-python eval_ppl.py --no_use_cuda_graph --seqlen 8192 --hf_path mistralai/Mixtral-8x7B-Instruct-v0.1 >> $LOG/mixtral_8x7b_instruct_fp16 2>&1
-
-# python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \
-#     --batch_size 2 --hf_path mistralai/Mixtral-8x7B-v0.1 >> $LOG/mixtral_8x7b_fp16 2>&1
-python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande \
-    --batch_size 2 --hf_path mistralai/Mixtral-8x7B-Instruct-v0.1 >> $LOG/mixtral_8x7b_instruct_fp16 2>&1
\ No newline at end of file
diff --git a/sbatch/mixtral_hessian.sh b/sbatch/mixtral_hessian.sh
deleted file mode 100644
index 68b03da..0000000
--- a/sbatch/mixtral_hessian.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-
-source ~/miniconda3/bin/activate quipv2_mixtral
-
-# TOKENIZERS_PARALLELISM=false python hessian_offline_mixtral.py \
-#     --batch_size 2 --devset_size 64 --ctx_size 2048 --base_model mistralai/Mixtral-8x7B-v0.1 \
-#     --save_path /mnt/desa_data/hessians/mixtral_8x7b_RPv1_64dev2048ctx
-
-# TOKENIZERS_PARALLELISM=false python hessian_offline_mixtral.py \
-#     --batch_size 2 --devset_size 4096 --ctx_size 8192 --base_model mistralai/Mixtral-8x7B-v0.1 \
-#     --save_path /mnt/desa_data/hessians/mixtral_8x7b_RPv1_4096dev8192ctx
-
-# TOKENIZERS_PARALLELISM=false python hessian_offline_mixtral.py \
-#     --batch_size 1 --sample_proc 12 --devset_size 4096 --ctx_size 12288 \
-#     --save_activations --base_model mistralai/Mixtral-8x7B-v0.1 \
-#     --save_path /mnt/desa_data/hessians/mixtral_8x7b_RPv1_4096dev12288ctx
-
-# TOKENIZERS_PARALLELISM=false python hessian_offline_mixtral.py \
-#     --batch_size 1 --sample_proc 12 --devset_size 4096 --ctx_size 8192 \
-#     --save_activations --base_model mistralai/Mixtral-8x7B-Instruct-v0.1 \
-#     --save_path /mnt/desa_data/hessians/mixtral_8x7b_instruct_RPv1_4096dev8192ctx
-
-# TOKENIZERS_PARALLELISM=false python hessian_offline_mixtral.py \
-#     --batch_size 1 --sample_proc 12 --devset_size 4096 --ctx_size 12288 \
-#     --save_activations --base_model mistralai/Mixtral-8x7B-Instruct-v0.1 \
-#     --save_path /mnt/desa_data/hessians/mixtral_8x7b_instruct_RPv1_4096dev12288ctx
-
-TOKENIZERS_PARALLELISM=false python hessian_offline_mixtral.py \
-    --batch_size 1 --sample_proc 12 --devset_size 4096 --ctx_size 12288  \
-    --base_model mistralai/Mixtral-8x7B-v0.1 --dataset "togethercomputer/RedPajama-Data-V2" \
-    --save_path /mnt/desa_data/hessians/mixtral_8x7b_RPv2_4096dev12288ctx
\ No newline at end of file
diff --git a/sbatch/nofuse_test.sh b/sbatch/nofuse_test.sh
deleted file mode 100644
index af78130..0000000
--- a/sbatch/nofuse_test.sh
+++ /dev/null
@@ -1,47 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/desa_data/checkpoints
-HF=/mnt/desa_data/hfized
-HESS=/mnt/desa_data/hessians
-LOG=/mnt/desa_data/logs
-L1=/mnt/desa_data/meta_llama1
-
-'''
-python quantize_llama_nofuse.py --save_path $CKPT/2_70b_e8p_2bit_nofuse --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_e8p_2bit_nofuse 2>&1
-python quantize_llama_nofuse.py --save_path $CKPT/2_13b_e8p_2bit_nofuse --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 >> $LOG/2_13b_e8p_2bit_nofuse 2>&1
-python quantize_llama_nofuse.py --save_path $CKPT/2_7b_e8p_2bit_nofuse  --codebook E8P12 --scale_override 0.9 --base_model meta-llama/Llama-2-7b-hf  --hessian_path $HESS/llama2_7b_6144  >> $LOG/2_7b_e8p_2bit_nofuse 2>&1
-python quantize_llama_nofuse.py --save_path $CKPT/1_65b_e8p_2bit_nofuse --codebook E8P12 --scale_override 0.9 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_e8p_2bit_nofuse 2>&1
-python quantize_llama_nofuse.py --save_path $CKPT/1_30b_e8p_2bit_nofuse --codebook E8P12 --scale_override 0.9 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_e8p_2bit_nofuse 2>&1
-python quantize_llama_nofuse.py --save_path $CKPT/1_13b_e8p_2bit_nofuse --codebook E8P12 --scale_override 0.9 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_e8p_2bit_nofuse 2>&1
-python quantize_llama_nofuse.py --save_path $CKPT/1_7b_e8p_2bit_nofuse  --codebook E8P12 --scale_override 0.9 --base_model $L1/huggingface_7B  --hessian_path $HESS/llama1_7b_6144  >> $LOG/1_7b_e8p_2bit_nofuse 2>&1
-
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_e8p_2bit_nofuse --hf_output_path $HF/2_70b_e8p_2bit_nofuse >> $LOG/2_70b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit_nofuse --hf_output_path $HF/2_13b_e8p_2bit_nofuse >> $LOG/2_13b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit_nofuse --hf_output_path $HF/2_7b_e8p_2bit_nofuse >> $LOG/2_7b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_65b_e8p_2bit_nofuse --hf_output_path $HF/1_65b_e8p_2bit_nofuse >> $LOG/1_65b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_30b_e8p_2bit_nofuse --hf_output_path $HF/1_30b_e8p_2bit_nofuse >> $LOG/1_30b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_13b_e8p_2bit_nofuse --hf_output_path $HF/1_13b_e8p_2bit_nofuse >> $LOG/1_13b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_7b_e8p_2bit_nofuse --hf_output_path $HF/1_7b_e8p_2bit_nofuse >> $LOG/1_7b_e8p_2bit_nofuse 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/2_70b_e8p_2bit_nofuse >> $LOG/2_70b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/2_13b_e8p_2bit_nofuse >> $LOG/2_13b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/2_7b_e8p_2bit_nofuse >> $LOG/2_7b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_e8p_2bit_nofuse >> $LOG/1_65b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_e8p_2bit_nofuse >> $LOG/1_30b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_e8p_2bit_nofuse >> $LOG/1_13b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_e8p_2bit_nofuse >> $LOG/1_7b_e8p_2bit_nofuse 2>&1 &
-
-wait
-'''
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8p_2bit_nofuse >> $LOG/2_70b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8p_2bit_nofuse >> $LOG/2_13b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8p_2bit_nofuse >> $LOG/2_7b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_e8p_2bit_nofuse >> $LOG/1_65b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_e8p_2bit_nofuse >> $LOG/1_30b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_e8p_2bit_nofuse >> $LOG/1_13b_e8p_2bit_nofuse 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_e8p_2bit_nofuse >> $LOG/1_7b_e8p_2bit_nofuse 2>&1 &
-
-wait
diff --git a/sbatch/old/2_13b_e8p_2bit.sbatch b/sbatch/old/2_13b_e8p_2bit.sbatch
deleted file mode 100644
index 75ddcce..0000000
--- a/sbatch/old/2_13b_e8p_2bit.sbatch
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=2_13b_e8p_2bit
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=64G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-'''
-CKPT=checkpoints
-
-python quantize_llama.py \
-       --save_path $CKPT/2_13b_e8p_2bit \
-       --codebook E8P12 \
-       --sigma_reg2 1e-2 \
-       --scale_override 0.9 \
-       --base_model meta-llama/Llama-2-13b-hf \
-       --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_13b_6144
-
-python hfize_llama.py --quantized_path $CKPT/2_13b_e8p_2bit --hf_output_path hfized/2_13b_e8p_2bit
-
-'''
-
-python ppl_llama.py --hf_path hfized/2_13b_e8p_2bit
-python eval_llama.py --hf_path hfized/2_13b_e8p_2bit --batch_size 4 --tasks arc_challenge,arc_easy,boolq,piqa,winogrande
diff --git a/sbatch/old/2_70b_e8p_2bit.sh b/sbatch/old/2_70b_e8p_2bit.sh
deleted file mode 100644
index 3041282..0000000
--- a/sbatch/old/2_70b_e8p_2bit.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/jerry_data/checkpoints
-HF=/mnt/jerry_data/hfized
-HESS=/mnt/jerry_data/hessians
-LOG=/mnt/jerry_data/logs
-NAME=2_70b_e8p_2bit
-
-python quantize_llama.py --save_path $CKPT/$NAME --codebook E8P12 --sigma_reg2 1e-2 --scale 0.90 --hessian_path $HESS/llama2_70b_6144 >> $LOG/$NAME 2>&1
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/$NAME --hf_output_path $HF/$NAME >> $LOG/$NAME 2>&1
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/$NAME >> $LOG/$NAME 2>&1
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py --hf_path $HF/$NAME --batch_size 4 --tasks arc_challenge,arc_easy,boolq,piqa,winogrande >> $LOG/$NAME 2>&1
diff --git a/sbatch/old/2_7b_e8p_2bit.sbatch b/sbatch/old/2_7b_e8p_2bit.sbatch
deleted file mode 100644
index b68b2f7..0000000
--- a/sbatch/old/2_7b_e8p_2bit.sbatch
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=2_7b_e8p_2bit
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=64G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-'''
-CKPT=checkpoints
-
-python quantize_llama.py \
-       --save_path $CKPT/2_7b_e8p_2bit \
-       --codebook E8P12 \
-       --sigma_reg2 1e-2 \
-       --scale_override 0.9 \
-       --base_model meta-llama/Llama-2-7b-hf \
-       --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_7b_6144
-
-python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit --hf_output_path hfized/2_7b_e8p_2bit
-
-'''
-
-python ppl_llama.py --hf_path hfized/2_7b_e8p_2bit
-python eval_llama.py --hf_path hfized/2_7b_e8p_2bit --batch_size 4 --tasks arc_challenge,arc_easy,boolq,piqa,winogrande
-
diff --git a/sbatch/old/d4.sbatch b/sbatch/old/d4.sbatch
deleted file mode 100644
index 33f3099..0000000
--- a/sbatch/old/d4.sbatch
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu,ellis
-#SBATCH --job-name=d4_fast2
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=96G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=24:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-python quantize_llama.py --save_path checkpoints/d4_fast2 --codebook D4 
-python hfize_llama.py --quantized_path checkpoints/d4_fast2 --hf_output_path hfized/d4_fast2
-python ppl_llama.py --hf_path hfized/d4_fast2 --dataset c4
diff --git a/sbatch/old/d44b_13b.sbatch b/sbatch/old/d44b_13b.sbatch
deleted file mode 100644
index 3df4b34..0000000
--- a/sbatch/old/d44b_13b.sbatch
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=d44b_13b
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=64G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-'''
-CKPT=checkpoints
-
-python quantize_llama.py --save_path $CKPT/d44b_13b --codebook D44B --sigma_reg2 1e-2 --scale 3.4 --base_model meta-llama/Llama-2-13b-hf --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_13b_6144
-python hfize_llama.py --quantized_path $CKPT/d44b_13b --hf_output_path hfized/d44b_13b
-'''
-
-python ppl_llama.py --hf_path hfized/d44b_13b
-python eval_llama.py --hf_path hfized/d44b_13b --batch_size 4 --tasks arc_challenge,arc_easy,boolq,piqa,winogrande
-
diff --git a/sbatch/old/d44b_7b.sbatch b/sbatch/old/d44b_7b.sbatch
deleted file mode 100644
index 4170634..0000000
--- a/sbatch/old/d44b_7b.sbatch
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=d44b_7b
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=96G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-'''
-CKPT=checkpoints
-
-python quantize_llama.py --save_path $CKPT/d44b_7b --codebook D44B --sigma_reg2 1e-2 --scale 3.4 --base_model meta-llama/Llama-2-7b-hf --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_7b_6144
-python hfize_llama.py --quantized_path $CKPT/d44b_7b --hf_output_path hfized/d44b_7b
-'''
-
-python ppl_llama.py --hf_path hfized/d44b_7b
-python eval_llama.py --hf_path hfized/d44b_7b --batch_size 4 --tasks arc_challenge,arc_easy,boolq,piqa,winogrande
diff --git a/sbatch/old/d4_70b.sbatch b/sbatch/old/d4_70b.sbatch
deleted file mode 100644
index 8c2c5b1..0000000
--- a/sbatch/old/d4_70b.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=d4_70b
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=96G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:2
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=checkpoints
-
-python quantize_llama.py --save_path $CKPT/d4_70b --codebook D44B --sigma_reg2 1e-2 --scale 3.4 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144
-python hfize_llama.py --quantized_path $CKPT/d4_70b --hf_output_path hfized/d4_70b
-python ppl_llama.py --hf_path hfized/d4_70b
diff --git a/sbatch/old/d4_7b.sbatch b/sbatch/old/d4_7b.sbatch
deleted file mode 100644
index 21133ca..0000000
--- a/sbatch/old/d4_7b.sbatch
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=d4_7b
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=96G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=checkpoints
-
-#python quantize_llama.py \
-#       --save_path $CKPT/d4_7b \
-#       --codebook D4 \
-#       --base_model meta-llama/Llama-2-7b-hf \
-#       --hessian_path /share/kuleshov/jc3464/quip/hessians/7b-chat-512dev-4096ctx \
-#       --use_fp64
-
-python hfize_llama.py --quantized_path $CKPT/d4_7b --hf_output_path hfized/d4_7b
-python ppl_llama.py --hf_path hfized/d4_7b --dataset c4
diff --git a/sbatch/old/e8.sbatch b/sbatch/old/e8.sbatch
deleted file mode 100644
index 5928752..0000000
--- a/sbatch/old/e8.sbatch
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=e8
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=96G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:2
-#SBATCH --constraint='gpu-high'
-#SBATCH --exclude='coecis-compute-03'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-#python quantize_llama.py --save_path checkpoints/e8 --codebook E8
-#python hfize_llama.py --quantized_path checkpoints/e8 --hf_output_path hfized/e8
-python ppl_llama.py --hf_path hfized/e8_fast --dataset c4
diff --git a/sbatch/old/e81b.sbatch b/sbatch/old/e81b.sbatch
deleted file mode 100644
index 065cdd4..0000000
--- a/sbatch/old/e81b.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=e81b
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=96G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:2
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=/share/desa/nfs02/quip_llama2/checkpoints/
-
-python quantize_llama.py --save_path $CKPT/e81b --codebook E81B
-python hfize_llama.py --quantized_path $CKPT/e81b --hf_output_path hfized/e81b
-python ppl_llama.py --hf_path hfized/e81b --dataset c4
diff --git a/sbatch/old/e8237b.sh b/sbatch/old/e8237b.sh
deleted file mode 100644
index 5ce97dc..0000000
--- a/sbatch/old/e8237b.sh
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/jerry_data/checkpoints
-HF=/mnt/jerry_data/hfized
-HESS=/mnt/jerry_data/hessians
-LOG=/mnt/jerry_data/logs
-L1=/mnt/jerry_data/meta_llama1
-
-
-python quantize_llama.py --save_path $CKPT/1_65b_e8p_2bit --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_30b_e8p_2bit --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_13b_e8p_2bit --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_7b_e8p_2bit  --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --base_model $L1/huggingface_7B  --hessian_path $HESS/llama1_7b_6144  >> $LOG/1_7b_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_65b_d4_4bit  --codebook D44B  --sigma_reg2 1e-2 --scale 3.4 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_d4_4bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_30b_d4_4bit  --codebook D44B  --sigma_reg2 1e-2 --scale 3.4 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_d4_4bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_13b_d4_4bit  --codebook D44B  --sigma_reg2 1e-2 --scale 3.4 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_d4_4bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_7b_d4_4bit   --codebook D44B  --sigma_reg2 1e-2 --scale 3.4 --base_model $L1/huggingface_7B  --hessian_path $HESS/llama1_7b_6144  >> $LOG/1_7b_d4_4bit 2>&1
-
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_65b_e8p_2bit --hf_output_path $HF/1_65b_e8p_2bit & 
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_30b_e8p_2bit --hf_output_path $HF/1_30b_e8p_2bit &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_13b_e8p_2bit --hf_output_path $HF/1_13b_e8p_2bit &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_7b_e8p_2bit  --hf_output_path $HF/1_7b_e8p_2bit  &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_65b_d4_4bit  --hf_output_path $HF/1_65b_d4_4bit  &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_30b_d4_4bit  --hf_output_path $HF/1_30b_d4_4bit  &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_13b_d4_4bit  --hf_output_path $HF/1_13b_d4_4bit  &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_7b_d4_4bit   --hf_output_path $HF/1_7b_d4_4bit   &
-
-wait
-
-
-# fp16 zero shot for llama1 and 2
-CUDA_VISIBLE_DEVICES=0,1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $L1/huggingface_65B >> $LOG/1_65b_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=2,3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path meta-llama/Llama-2-70b-hf >> $LOG/2_70b_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $L1/huggingface_30B >> $LOG/1_30b_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $L1/huggingface_13B >> $LOG/1_13b_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path meta-llama/Llama-2-13b-hf >> $LOG/2_13b_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $L1/huggingface_7B >> $LOG/1_7b_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path meta-llama/Llama-2-7b-hf >> $LOG/2_7b_fp16 2>&1 &
-
-wait
-
-# perplexity
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_e8p_2bit >> $LOG/1_65b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_e8p_2bit >> $LOG/1_30b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_e8p_2bit >> $LOG/1_13b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_e8p_2bit  >> $LOG/1_7b_e8p_2bit  2>&1 &
-CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_d4_4bit  >> $LOG/1_65b_d4_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_d4_4bit  >> $LOG/1_30b_d4_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_d4_4bit  >> $LOG/1_13b_d4_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=7 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_d4_4bit   >> $LOG/1_7b_d4_4bit   2>&1 &
-
-wait
-
-# zero shot
-
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_e8p_2bit >> $LOG/1_65b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_e8p_2bit >> $LOG/1_30b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_e8p_2bit >> $LOG/1_13b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_e8p_2bit  >> $LOG/1_7b_e8p_2bit  2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_d4_4bit  >> $LOG/1_65b_d4_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_d4_4bit  >> $LOG/1_30b_d4_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_d4_4bit  >> $LOG/1_13b_d4_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_d4_4bit   >> $LOG/1_7b_d4_4bit   2>&1 &
-
-wait
diff --git a/sbatch/old/e8_ocs.sbatch b/sbatch/old/e8_ocs.sbatch
deleted file mode 100644
index ba699ee..0000000
--- a/sbatch/old/e8_ocs.sbatch
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=e8_ocs
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=48G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:2
-#SBATCH --constraint='gpu-high'
-#SBATCH --exclude='coecis-compute-03'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-python quantize_llama.py --save_path checkpoints/e8_ocs --codebook E8 --outlier_channel_split
-python hfize_llama.py --quantized_path checkpoints/e8_ocs --hf_output_path hfized/e8_ocs
-python ppl_llama.py --hf_path hfized/e8_ocs_fast --dataset c4
diff --git a/sbatch/old/e8p12.sbatch b/sbatch/old/e8p12.sbatch
deleted file mode 100644
index 99057ce..0000000
--- a/sbatch/old/e8p12.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu,ellis
-#SBATCH --job-name=e8p12_3
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=96G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:2
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=checkpoints
-
-#python quantize_llama.py --save_path $CKPT/e8p12_3 --codebook E8P12
-python hfize_llama.py --quantized_path $CKPT/e8p12_3 --hf_output_path hfized/e8p12_3
-python ppl_llama.py --hf_path hfized/e8p12_3 --dataset c4
diff --git a/sbatch/old/e8p_7b.sbatch b/sbatch/old/e8p_7b.sbatch
deleted file mode 100644
index a62516e..0000000
--- a/sbatch/old/e8p_7b.sbatch
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=e8p_7b
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=64G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=checkpoints
-
-#python quantize_llama.py \
-#       --save_path $CKPT/e8p_7b \
-#       --codebook E8P12 \
-#       --sigma_reg2 5e-3 \
-#       --scale_override 0.9 \
-#       --base_model meta-llama/Llama-2-7b-hf \
-#       --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_7b \
-#       --use_fp64
-python hfize_llama.py --quantized_path $CKPT/e8p_7b --hf_output_path hfized/e8p_7b
-python hfize_llama.py --quantized_path $CKPT/e8p_7b --hf_output_path hfized/e8p_7b
-python ppl_llama.py --hf_path hfized/e8p_7b --dataset c4
diff --git a/sbatch/old/e8p_nofuse.sbatch b/sbatch/old/e8p_nofuse.sbatch
deleted file mode 100644
index f4f90a6..0000000
--- a/sbatch/old/e8p_nofuse.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=e8p_nofuse_ch
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=96G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:2
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=checkpoints
-
-python quantize_llama_nofuse.py --save_path $CKPT/e8p_nofuse_ch --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b
-python hfize_llama_nofuse.py --quantized_path $CKPT/e8p_nofuse_ch --hf_output_path hfized/e8p_nofuse_ch
-python ppl_llama_nofuse.py --hf_path hfized/e8p_nofuse_ch --dataset c4
diff --git a/sbatch/old/e8s.sbatch b/sbatch/old/e8s.sbatch
deleted file mode 100644
index 38dd291..0000000
--- a/sbatch/old/e8s.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=e8s
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=48G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=/share/desa/nfs02/quip_llama2/checkpoints/
-
-#python quantize_llama.py --save_path $CKPT/e8s --codebook E8S
-#python hfize_llama.py --quantized_path $CKPT/e8s --hf_output_path hfized/e8s
-python ppl_llama.py --hf_path hfized/e8s --dataset c4
diff --git a/sbatch/old/eval_70b.sbatch b/sbatch/old/eval_70b.sbatch
deleted file mode 100644
index 6ff14ad..0000000
--- a/sbatch/old/eval_70b.sbatch
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=eval_70b
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=64G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-'''
-CKPT=checkpoints
-
-python quantize_llama.py \
-       --save_path $CKPT/2_7b_e8p_2bit \
-       --codebook E8P12 \
-       --sigma_reg2 1e-2 \
-       --scale_override 0.9 \
-       --base_model meta-llama/Llama-2-7b-hf \
-       --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_7b_6144
-
-python hfize_llama.py --quantized_path $CKPT/2_7b_e8p_2bit --hf_output_path hfized/2_7b_e8p_2bit
-
-'''
-
-python ppl_llama.py --hf_path hfized/e8p_090
-python eval_llama.py --hf_path hfized/e8p_090 --batch_size 4 --tasks arc_challenge,arc_easy,boolq,piqa,winogrande
-
diff --git a/sbatch/old/fp16_ppl.sbatch b/sbatch/old/fp16_ppl.sbatch
deleted file mode 100644
index 9679f45..0000000
--- a/sbatch/old/fp16_ppl.sbatch
+++ /dev/null
@@ -1,30 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=fp16_ppl
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=256G
-#SBATCH --cpus-per-task=8
-#SBATCH --gres=gpu:4
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-echo 'seqlen 2048'
-
-python ppl_llama.py --hf_path meta-llama/Llama-2-70b-hf --seqlen 2048
-python ppl_llama.py --hf_path meta-llama/Llama-2-13b-hf --seqlen 2048
-python ppl_llama.py --hf_path meta-llama/Llama-2-7b-hf --seqlen 2048
-
-
-echo 'seqlen 4096'
-
-python ppl_llama.py --hf_path meta-llama/Llama-2-70b-hf --seqlen 4096
-python ppl_llama.py --hf_path meta-llama/Llama-2-13b-hf --seqlen 4096
-python ppl_llama.py --hf_path meta-llama/Llama-2-7b-hf --seqlen 4096
-
-
diff --git a/sbatch/old/half_int.sbatch b/sbatch/old/half_int.sbatch
deleted file mode 100644
index 2133ef5..0000000
--- a/sbatch/old/half_int.sbatch
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=half_int
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=96G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:2
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=24:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-python quantize_llama.py --save_path checkpoints/half_int --codebook half_integer_2bit
-python hfize_llama.py --quantized_path checkpoints/half_int --hf_output_path hfized/half_int
-python ppl_llama.py --hf_path hfized/half_int --dataset c4
diff --git a/sbatch/old/half_int_4bit_1col.sh b/sbatch/old/half_int_4bit_1col.sh
deleted file mode 100644
index 16818bc..0000000
--- a/sbatch/old/half_int_4bit_1col.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/jerry_data/checkpoints
-HF=/mnt/jerry_data/hfized
-HESS=/mnt/jerry_data/hessians
-LOG=/mnt/jerry_data/logs
-L1=/mnt/jerry_data/meta_llama1
-
-'''
-CUDA_VISIBLE_DEVICES=1,3,4,5,6,7 python quantize_llama.py --save_path $CKPT/2_70b_hi_4bit  --codebook HI4B1C  --sigma_reg2 1e-2 --scale 2.7 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_hi_4bit 2>&1
-CUDA_VISIBLE_DEVICES=1,3,4,5,6,7 python quantize_llama.py --save_path $CKPT/2_13b_hi_4bit  --codebook HI4B1C  --sigma_reg2 1e-2 --scale 2.7 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 >> $LOG/2_13b_hi_4bit 2>&1
-python quantize_llama.py --save_path $CKPT/2_7b_hi_4bit   --codebook HI4B1C  --sigma_reg2 1e-2 --scale 2.7 --base_model meta-llama/Llama-2-7b-hf  --hessian_path $HESS/llama2_7b_6144  >> $LOG/2_7b_hi_4bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_65b_hi_4bit  --codebook HI4B1C  --sigma_reg2 1e-2 --scale 2.7 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_hi_4bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_30b_hi_4bit  --codebook HI4B1C  --sigma_reg2 1e-2 --scale 2.7 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_hi_4bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_13b_hi_4bit  --codebook HI4B1C  --sigma_reg2 1e-2 --scale 2.7 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_hi_4bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_7b_hi_4bit   --codebook HI4B1C  --sigma_reg2 1e-2 --scale 2.7 --base_model $L1/huggingface_7B  --hessian_path $HESS/llama1_7b_6144  >> $LOG/1_7b_hi_4bit 2>&1
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_hi_4bit  --hf_output_path $HF/2_70b_hi_4bit  &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_13b_hi_4bit  --hf_output_path $HF/2_13b_hi_4bit  &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_7b_hi_4bit   --hf_output_path $HF/2_7b_hi_4bit   &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_65b_hi_4bit  --hf_output_path $HF/1_65b_hi_4bit  &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_30b_hi_4bit  --hf_output_path $HF/1_30b_hi_4bit  &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_13b_hi_4bit  --hf_output_path $HF/1_13b_hi_4bit  &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_7b_hi_4bit   --hf_output_path $HF/1_7b_hi_4bit   &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_70b_hi_4bit  >> $LOG/2_70b_hi_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_13b_hi_4bit  >> $LOG/2_13b_hi_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --seqlen 2048 --hf_path $HF/2_7b_hi_4bit   >> $LOG/2_7b_hi_4bit   2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_hi_4bit  >> $LOG/1_65b_hi_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_hi_4bit  >> $LOG/1_30b_hi_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_hi_4bit  >> $LOG/1_13b_hi_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_hi_4bit   >> $LOG/1_7b_hi_4bit   2>&1 &
-
-wait
-'''
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 1 --hf_path $HF/2_70b_hi_4bit  >> $LOG/2_70b_hi_4bit  2>&1 &
-#CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_hi_4bit  >> $LOG/2_13b_hi_4bit  2>&1 &
-#CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_hi_4bit   >> $LOG/2_7b_hi_4bit   2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 1 --hf_path $HF/1_65b_hi_4bit  >> $LOG/1_65b_hi_4bit  2>&1 &
-#CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_hi_4bit  >> $LOG/1_30b_hi_4bit  2>&1 &
-#CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_hi_4bit  >> $LOG/1_13b_hi_4bit  2>&1 &
-#CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_hi_4bit   >> $LOG/1_7b_hi_4bit   2>&1 &
-
-wait
diff --git a/sbatch/old/half_int_8col.sbatch b/sbatch/old/half_int_8col.sbatch
deleted file mode 100644
index 67b8936..0000000
--- a/sbatch/old/half_int_8col.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=half_int_8col
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=48G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=24:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=/share/desa/nfs02/quip_llama2/checkpoints/
-
-python quantize_llama.py --save_path $CKPT/half_int_8col --codebook half_integer_2bit_8col
-python hfize_llama.py --quantized_path $CKPT/half_int_8col --hf_output_path hfized/half_int_8col
-python ppl_llama.py --hf_path hfized/half_int_8col --dataset c4
diff --git a/sbatch/old/hessian.sbatch b/sbatch/old/hessian.sbatch
deleted file mode 100644
index 8c759e4..0000000
--- a/sbatch/old/hessian.sbatch
+++ /dev/null
@@ -1,17 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=hessian-70b
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=750G
-#SBATCH --cpus-per-task=12
-#SBATCH --gres=gpu:8
-#SBATCH --constraint='v100'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-python hessian_offline2.py --batch_size 2 --devset_size 4096 --ctx_size 4096 --save_path /home/at676/two_bit_quant/hessians/llama2_70b
diff --git a/sbatch/old/hessian_7b.sbatch b/sbatch/old/hessian_7b.sbatch
deleted file mode 100644
index 462c64e..0000000
--- a/sbatch/old/hessian_7b.sbatch
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=hessian-7b_13b
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=500G
-#SBATCH --cpus-per-task=8
-#SBATCH --gres=gpu:3
-#SBATCH --constraint='v100'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-rm /scratch/*.pt
-
-python hessian_offline.py --batch_size 4 --devset_size 6144 --ctx_size 4096 --save_path /share/desa/nfs01/quip_llama2/hessians_llama2_7b_6144 --scratch_path /scratch --base_model meta-llama/Llama-2-7b-hf
-
-rm /scratch/*.pt
-
-python hessian_offline.py --batch_size 2 --devset_size 6144 --ctx_size 4096 --save_path /share/desa/nfs01/quip_llama2/hessians_llama2_13b_6144 --scratch_path /scratch --base_model meta-llama/Llama-2-13b-hf
-
diff --git a/sbatch/old/hessian_together.sbatch b/sbatch/old/hessian_together.sbatch
deleted file mode 100644
index 913a69b..0000000
--- a/sbatch/old/hessian_together.sbatch
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=hessian_llama_70b
-#SBATCH --nodes=1
-#SBATCH --time=24:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-
-cd /work/albert
-
-python hessian_offline.py --batch_size 4 --devset_size 4096 --ctx_size 4096 --save_path /work/albert/two_bit_quant/hessians/llama2_70b --scratch_path /dev/shm
diff --git a/sbatch/old/kick_eval.sh b/sbatch/old/kick_eval.sh
deleted file mode 100644
index d86cd7f..0000000
--- a/sbatch/old/kick_eval.sh
+++ /dev/null
@@ -1,71 +0,0 @@
-
-#!/bin/bash
-
-# output directory
-# LEAVE COMMENTED OUT SO DON'T ACCIDENTALLY OVERWRITE
-logs_dirname="slurm_out/eval_ablate1"
-mkdir --parents $logs_dirname
-
-MEM="32G"
-CONST="gpu-mid"
-TIME="64"
-CPU="8"
-GPU="1"
-BS="4"
-
-MODELS=(
-    # "meta-llama/Llama-2-7b-chat-hf" #\
-    "hfized/7b-chat_all" \
-    "hfized/7b-chat_baseline_hada"\
-    "hfized/7b-chat_lora"\
-    "hfized/7b-chat_ocs"\
-    "hfized/7b-chat_rescaleWH"\
-    "hfized/7b-chat_rescaleWH_lora"
-)
-TASKS=("piqa" "winogrande" "arc_easy" "arc_challenge" "boolq")
-
-# main loop
-for mo_dir in "${MODELS[@]}"
-do
-for ta in "${TASKS[@]}"
-do
-# save file 
-# mo_name=$(basename "$mo_dir")
-mo_head=$(echo "$mo_dir" | cut -d / -f 1)
-mo_name=$(echo "$mo_dir" | cut -d / -f 2)
-jobname="${mo_head}_${mo_name}_${ta}"
-echo $jobname
-# slurm helper
-slurm_helper="
-#!/bin/bash
-\n#SBATCH --job-name=${jobname}
-\n#SBATCH -N 1
-\n#SBATCH -c ${CPU}
-\n#SBATCH --mail-type=FAIL
-\n#SBATCH --mail-user=jc3464@cornell.edu
-\n#SBATCH --partition=gpu
-\n#SBATCH --gres=gpu:${GPU}
-\n#SBATCH --mem=${MEM}
-\n#SBATCH --constraint=${CONST}
-\n#SBATCH -t ${TIME}:00:00
-\n#SBATCH -o ${logs_dirname}/${jobname}_%j.out
-\n#SBATCH -e ${logs_dirname}/${jobname}_%j.err
-\n\n
-\nsource ~/.bashrc
-\nsource ~/anaconda3/etc/profile.d/conda.sh
-\nconda activate smoothquant
-\n
-\necho jobname: $jobname
-\n\n
-\necho '-------------------------------------'
-\npython eval_llama.py --hf_path ${mo_dir} --tasks ${ta} --batch_size ${BS} --output_path ${logs_dirname}/${jobname}.json
-"
-# add slurm header to helper.sh
-temp_file=$(mktemp)
-echo -en $slurm_helper > $temp_file
-echo $temp_file
-# run on slurm
-sbatch --requeue $temp_file
-
-done
-done
\ No newline at end of file
diff --git a/sbatch/old/kick_l1.sh b/sbatch/old/kick_l1.sh
deleted file mode 100644
index ec5895a..0000000
--- a/sbatch/old/kick_l1.sh
+++ /dev/null
@@ -1,98 +0,0 @@
-
-#!/bin/bash
-
-# output directory
-# LEAVE COMMENTED OUT SO DON'T ACCIDENTALLY OVERWRITE
-dirname="checkpoints/llama1"
-logs_dirname="slurm_out/llama1"
-mkdir --parents $dirname
-mkdir --parents $logs_dirname
-
-MODELS=(
-    # "7b"\
-    "13b"\
-    "30b"\
-    "65b"
-    )
-MEMS=(
-    # "32G"\
-    "64G"\
-    "160G"\
-    "160G")
-CONSTS=(
-    # "gpu-mid"\
-    "gpu-mid"\
-    "gpu-mid"\
-    "gpu-high"
-    )
-TIME="64"
-CPU="8"
-GPUS=(
-    # "2"\
-    "2"\
-    "2"\
-    "1"
-    )
-
-HESSIAN_PATHS=(
-    # "hessians/llama1-7b-2048dev-2048ctx"\
-    "hessians/llama1-13b-2048dev-2048ctx"\
-    "hessians/llama1-30b-2048dev-2048ctx"\
-    "hessians/llama1-65b-2048dev-2048ctx"
-)
-EXTRA_ARGS=(
-    # "--lora_rank 128 --rescale_WH --outlier_channel_split --ocs_down_size 16384"\
-    "--lora_rank 128 --rescale_WH --outlier_channel_split --ocs_down_size 16384"\
-    "--lora_rank 128 --rescale_WH --outlier_channel_split --ocs_down_size 32768"\
-    "--lora_rank 128 --rescale_WH --outlier_channel_split --ocs_down_size 32768"
-    )
-NAMES=(
-    # "lora128_rescaleWH_ocs2-14"\
-    "lora128_rescaleWH_ocs2-14"\
-    "lora128_rescaleWH_ocs2-15"\
-    "lora128_rescaleWH_ocs2-15"
-    )
-
-
-# main loop
-for idx in "${!MODELS[@]}"
-do
-# save files
-jobname="${MODELS[$idx]}_${NAMES[$idx]}"
-# slurm helper
-slurm_helper="
-#!/bin/bash
-\n#SBATCH --job-name=${jobname}
-\n#SBATCH -N 1
-\n#SBATCH -c ${CPU}
-\n#SBATCH --mail-type=FAIL
-\n#SBATCH --mail-user=jc3464@cornell.edu
-\n#SBATCH --partition=gpu
-\n#SBATCH --gres=gpu:${GPUS[$idx]}
-\n#SBATCH --mem=${MEMS[$idx]}
-\n#SBATCH --constraint=${CONSTS[$idx]}
-\n#SBATCH -t ${TIME}:00:00
-\n#SBATCH -o ${logs_dirname}/${jobname}_%j.out
-\n#SBATCH -e ${logs_dirname}/${jobname}_%j.err
-\n\n
-\nsource ~/.bashrc
-\nsource ~/anaconda3/etc/profile.d/conda.sh
-\nconda activate smoothquant
-\n
-\necho jobname: $jobname
-\necho extra args: ${EXTRA_ARGS[$idx]}
-\n\n
-\necho '-------------------------------------'
-\npython quantize_llama.py --base_model decapoda-research/llama-${MODELS[$idx]}-hf 
-${EXTRA_ARGS[$idx]} \
---save_path ${dirname}/${jobname} \
---hessian_path ${HESSIAN_PATHS[$idx]}
-"
-# add slurm header to helper.sh
-temp_file=$(mktemp)
-echo -en $slurm_helper > $temp_file
-echo $temp_file
-# run on slurm
-# sbatch --requeue $temp_file
-
-done
\ No newline at end of file
diff --git a/sbatch/old/kick_l2.sh b/sbatch/old/kick_l2.sh
deleted file mode 100644
index d636c95..0000000
--- a/sbatch/old/kick_l2.sh
+++ /dev/null
@@ -1,66 +0,0 @@
-
-#!/bin/bash
-
-# output directory
-# LEAVE COMMENTED OUT SO DON'T ACCIDENTALLY OVERWRITE
-dirname="checkpoints/llama1"
-logs_dirname="slurm_out/llama1"
-mkdir --parents $dirname
-mkdir --parents $logs_dirname
-
-# MODEL="7b-chat"
-# HESSIAN_PATH="hessians/7b-chat-512dev-4096ctx"
-# MODEL="13b-chat"
-# HESSIAN_PATH="hessians/13b-chat-512dev-4096ctx"
-MEM="32G"
-CONST="gpu-mid"
-TIME="64"
-CPU="8"
-GPU="2"
-
-EXTRA_ARGS=("--lora_rank -1 --rescale_WH")
-NAMES=("rescaleWHv2")
-
-
-# main loop
-for idx in "${!EXTRA_ARGS[@]}"
-do
-# save files
-jobname="${MODEL}_${NAMES[$idx]}"
-# slurm helper
-slurm_helper="
-#!/bin/bash
-\n#SBATCH --job-name=${jobname}
-\n#SBATCH -N 1
-\n#SBATCH -c ${CPU}
-\n#SBATCH --mail-type=FAIL
-\n#SBATCH --mail-user=jc3464@cornell.edu
-\n#SBATCH --partition=gpu
-\n#SBATCH --gres=gpu:${GPU}
-\n#SBATCH --mem=${MEM}
-\n#SBATCH --constraint=${CONST}
-\n#SBATCH -t ${TIME}:00:00
-\n#SBATCH -o ${logs_dirname}/${jobname}_%j.out
-\n#SBATCH -e ${logs_dirname}/${jobname}_%j.err
-\n\n
-\nsource ~/.bashrc
-\nsource ~/anaconda3/etc/profile.d/conda.sh
-\nconda activate smoothquant
-\n
-\necho jobname: $jobname
-\necho extra args: ${EXTRA_ARGS[$idx]}
-\n\n
-\necho '-------------------------------------'
-\npython quantize_llama.py --base_model meta-llama/Llama-2-${MODEL}-hf
-${EXTRA_ARGS[$idx]} \
---save_path ${dirname}/${jobname} \
---hessian_path $HESSIAN_PATH
-"
-# add slurm header to helper.sh
-temp_file=$(mktemp)
-echo -en $slurm_helper > $temp_file
-echo $temp_file
-# run on slurm
-sbatch --requeue $temp_file
-
-done
\ No newline at end of file
diff --git a/sbatch/old/kick_off.sh b/sbatch/old/kick_off.sh
deleted file mode 100644
index d556a15..0000000
--- a/sbatch/old/kick_off.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-
-#!/bin/bash
-
-# output directory
-# LEAVE COMMENTED OUT SO DON'T ACCIDENTALLY OVERWRITE
-# dirname="hessians"
-# logs_dirname="slurm_out/hessians"
-# mkdir --parents $dirname
-# mkdir --parents $logs_dirname
-
-TIME="240"
-CPU="8"
-GPU="1"
-# GPU="v100:1|a100:1"
-GPUCONST="v100|a100"
-
-
-TITLES=(
-    "llama1-7b" "llama1-13b"\
-    "llama1-30b" "llama1-65b"
-    )
-MODELS=(
-    'decapoda-research/llama-7b-hf' 'decapoda-research/llama-13b-hf' \
-    'decapoda-research/llama-30b-hf' 'decapoda-research/llama-65b-hf' 
-    )
-# GPUCONSTS=("gpu-mid" "gpu-mid" "gpu-high" "gpu-high")
-MEMS=(
-    "64G" "100G"\
-    "160G" "200G"
-    )
-BSS=(
-    "4" "4"\
-    "4" "4"
-    )
-DEV="2048"
-CTX="2048"
-
-
-# main loop
-for idx in "${!MODELS[@]}"
-do
-# save files
-jobname="$Hessian_${TITLES[$idx]}"
-echo $jobname
-# slurm helper
-slurm_helper="
-#!/bin/bash
-\n#SBATCH --job-name=${jobname}
-\n#SBATCH -N 1
-\n#SBATCH -c ${CPU}
-\n#SBATCH --mail-type=FAIL
-\n#SBATCH --mail-user=jc3464@cornell.edu
-\n#SBATCH --partition=gpu
-\n#SBATCH --gres=gpu:${GPU}
-\n#SBATCH --constraint=\"${GPUCONST}\"
-\n#SBATCH --mem=${MEMS[$idx]}
-\n#SBATCH -t ${TIME}:00:00
-\n#SBATCH -o ${logs_dirname}/${jobname}_%j.out
-\n#SBATCH -e ${logs_dirname}/${jobname}_%j.err
-\n\n
-\nsource ~/.bashrc
-\nsource ~/anaconda3/etc/profile.d/conda.sh
-\nconda activate smoothquant
-\n
-\necho jobname: $jobname
-\n\n
-\necho '-------------------------------------'
-\npython hessian_offline.py --devset_size ${DEV} --ctx_size ${CTX} --batch_size ${BSS[$idx]}
---base_model ${MODELS[$idx]} --save_path ${dirname}/${TITLES[idx]}-${DEV}dev-${CTX}ctx
-"
-# add slurm header to helper.sh
-temp_file=$(mktemp)
-echo -en $slurm_helper > $temp_file
-echo $temp_file
-# run on slurm
-sbatch --requeue $temp_file
-
-done
-# \n#SBATCH --constraint=${GPUCONSTS[$idx]}
\ No newline at end of file
diff --git a/sbatch/old/kmeans_8col.sbatch b/sbatch/old/kmeans_8col.sbatch
deleted file mode 100644
index 68587de..0000000
--- a/sbatch/old/kmeans_8col.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=kmeans_8col
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=48G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=24:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=/share/desa/nfs02/quip_llama2/checkpoints
-
-#python quantize_llama.py --save_path $CKPT/kmeans_8col --codebook kmeans_8col
-#python hfize_llama.py --quantized_path $CKPT/kmeans_8col --hf_output_path hfized/kmeans_8col
-python ppl_llama.py --hf_path hfized/kmeans_8col --dataset c4
diff --git a/sbatch/old/kmeans_np_8col.sbatch b/sbatch/old/kmeans_np_8col.sbatch
deleted file mode 100644
index 41866b0..0000000
--- a/sbatch/old/kmeans_np_8col.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=kmeans_np_8col
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=48G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=24:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=/share/desa/nfs02/quip_llama2/checkpoints
-
-python quantize_llama.py --save_path $CKPT/kmeans_np_8col --codebook kmeans_8col
-python hfize_llama.py --quantized_path $CKPT/kmeans_np_8col --hf_output_path hfized/kmeans_np_8col
-python ppl_llama.py --hf_path hfized/kmeans_np_8col --dataset c4
diff --git a/sbatch/old/kmedoid_8col.sbatch b/sbatch/old/kmedoid_8col.sbatch
deleted file mode 100644
index ded606b..0000000
--- a/sbatch/old/kmedoid_8col.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=kmedoid_8col
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=48G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=24:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=/share/desa/nfs02/quip_llama2/checkpoints
-
-python quantize_llama.py --save_path $CKPT/kmedoid_8col --codebook kmedoid_8col
-python hfize_llama.py --quantized_path $CKPT/kmedoid_8col --hf_output_path hfized/kmedoid_8col
-python ppl_llama.py --hf_path hfized/kmedoid_8col --dataset c4
diff --git a/sbatch/old/llama1.sh b/sbatch/old/llama1.sh
deleted file mode 100644
index 5ce97dc..0000000
--- a/sbatch/old/llama1.sh
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/jerry_data/checkpoints
-HF=/mnt/jerry_data/hfized
-HESS=/mnt/jerry_data/hessians
-LOG=/mnt/jerry_data/logs
-L1=/mnt/jerry_data/meta_llama1
-
-
-python quantize_llama.py --save_path $CKPT/1_65b_e8p_2bit --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_30b_e8p_2bit --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_13b_e8p_2bit --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_7b_e8p_2bit  --codebook E8P12 --sigma_reg2 1e-2 --scale 0.9 --base_model $L1/huggingface_7B  --hessian_path $HESS/llama1_7b_6144  >> $LOG/1_7b_e8p_2bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_65b_d4_4bit  --codebook D44B  --sigma_reg2 1e-2 --scale 3.4 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_d4_4bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_30b_d4_4bit  --codebook D44B  --sigma_reg2 1e-2 --scale 3.4 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_d4_4bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_13b_d4_4bit  --codebook D44B  --sigma_reg2 1e-2 --scale 3.4 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_d4_4bit 2>&1
-python quantize_llama.py --save_path $CKPT/1_7b_d4_4bit   --codebook D44B  --sigma_reg2 1e-2 --scale 3.4 --base_model $L1/huggingface_7B  --hessian_path $HESS/llama1_7b_6144  >> $LOG/1_7b_d4_4bit 2>&1
-
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/1_65b_e8p_2bit --hf_output_path $HF/1_65b_e8p_2bit & 
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_30b_e8p_2bit --hf_output_path $HF/1_30b_e8p_2bit &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_13b_e8p_2bit --hf_output_path $HF/1_13b_e8p_2bit &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_7b_e8p_2bit  --hf_output_path $HF/1_7b_e8p_2bit  &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_65b_d4_4bit  --hf_output_path $HF/1_65b_d4_4bit  &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_30b_d4_4bit  --hf_output_path $HF/1_30b_d4_4bit  &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_13b_d4_4bit  --hf_output_path $HF/1_13b_d4_4bit  &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_7b_d4_4bit   --hf_output_path $HF/1_7b_d4_4bit   &
-
-wait
-
-
-# fp16 zero shot for llama1 and 2
-CUDA_VISIBLE_DEVICES=0,1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $L1/huggingface_65B >> $LOG/1_65b_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=2,3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path meta-llama/Llama-2-70b-hf >> $LOG/2_70b_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $L1/huggingface_30B >> $LOG/1_30b_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $L1/huggingface_13B >> $LOG/1_13b_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path meta-llama/Llama-2-13b-hf >> $LOG/2_13b_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $L1/huggingface_7B >> $LOG/1_7b_fp16 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path meta-llama/Llama-2-7b-hf >> $LOG/2_7b_fp16 2>&1 &
-
-wait
-
-# perplexity
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_e8p_2bit >> $LOG/1_65b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_e8p_2bit >> $LOG/1_30b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_e8p_2bit >> $LOG/1_13b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_e8p_2bit  >> $LOG/1_7b_e8p_2bit  2>&1 &
-CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_d4_4bit  >> $LOG/1_65b_d4_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_d4_4bit  >> $LOG/1_30b_d4_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_d4_4bit  >> $LOG/1_13b_d4_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=7 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_d4_4bit   >> $LOG/1_7b_d4_4bit   2>&1 &
-
-wait
-
-# zero shot
-
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_e8p_2bit >> $LOG/1_65b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_e8p_2bit >> $LOG/1_30b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_e8p_2bit >> $LOG/1_13b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_e8p_2bit  >> $LOG/1_7b_e8p_2bit  2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_d4_4bit  >> $LOG/1_65b_d4_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_d4_4bit  >> $LOG/1_30b_d4_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_d4_4bit  >> $LOG/1_13b_d4_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_d4_4bit   >> $LOG/1_7b_d4_4bit   2>&1 &
-
-wait
diff --git a/sbatch/old/main_check.sbatch b/sbatch/old/main_check.sbatch
deleted file mode 100644
index bd280f9..0000000
--- a/sbatch/old/main_check.sbatch
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=ellis,gpu
-#SBATCH --job-name=main_check
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=96G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:2
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=72:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-#python quantize_llama.py --save_path checkpoints/main_check
-python hfize_llama.py --quantized_path checkpoints/main_check --hf_output_path hfized/main_check
-python ppl_llama.py --hf_path hfized/main_check --dataset c4
diff --git a/sbatch/old/ocs.sbatch b/sbatch/old/ocs.sbatch
deleted file mode 100644
index 11a1c4d..0000000
--- a/sbatch/old/ocs.sbatch
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu,ellis
-#SBATCH --job-name=ocs
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=64G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high|gpu-mid'
-#SBATCH --time=72:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-#python quantize_llama.py --save_path checkpoints/ocs --outlier_channel_split
-#python hfize_llama.py --quantized_path checkpoints/ocs --hf_output_path hfized/ocs
-python c4_llama.py --hf_path hfized/ocs
diff --git a/sbatch/old/perp.sbatch b/sbatch/old/perp.sbatch
deleted file mode 100644
index 9ccd48d..0000000
--- a/sbatch/old/perp.sbatch
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=llama2_perp
-#SBATCH -N 1
-#SBATCH -n 8
-#SBATCH --mem=254G
-#SBATCH --partition=gpu
-#SBATCH --gres=gpu:1
-#SBATCH --constraint="gpu-high"
-#SBATCH --exclude=nikola-compute-[15-16]
-#SBATCH -t 72:00:00
-#SBATCH -o log_perp_noq.out                  # Name of stdout output     log file (%j expands to jobID)
-#SBATCH -e log_perp_noq.err                  # Name of stderr output     log file (%j expands to jobID)
-python3 perp.py | tee -a tee_perp_noq.out
diff --git a/sbatch/old/plot.sbatch b/sbatch/old/plot.sbatch
deleted file mode 100644
index c5610ca..0000000
--- a/sbatch/old/plot.sbatch
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=ellis
-#SBATCH --job-name=plot
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=256G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-cd /home/at676/two_bit_quant/scripts
-python plot_lattice.py
-
diff --git a/sbatch/old/quant_sink.sbatch b/sbatch/old/quant_sink.sbatch
deleted file mode 100644
index bbe9447..0000000
--- a/sbatch/old/quant_sink.sbatch
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=llama2_sinkhorn
-#SBATCH -N 1
-#SBATCH -n 8
-#SBATCH --mem=254G
-#SBATCH --partition=gpu
-#SBATCH --gres=gpu:1
-#SBATCH --constraint="gpu-high"
-#SBATCH --exclude=nikola-compute-[15-18],badfellow,ellis-compute-[01-02]
-#SBATCH -t 72:00:00
-#SBATCH -o log_quant_sinkscale.out                  # Name of stdout output     log file (%j expands to jobID)
-#SBATCH -e log_quant_sinkscale.err                  # Name of stderr output     log file (%j expands to jobID)
-python3 quant_sinkscale.py | tee -a tee_quant_sinkscale.out
diff --git a/sbatch/old/run.sbatch b/sbatch/old/run.sbatch
deleted file mode 100644
index 34e2666..0000000
--- a/sbatch/old/run.sbatch
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/bash
-#SBATCH --job-name=llama2
-#SBATCH -N 1
-#SBATCH -n 8
-#SBATCH --mem=254G
-#SBATCH --partition=gpu
-#SBATCH --gres=gpu:1
-#SBATCH --constraint="gpu-high"
-#SBATCH --exclude=nikola-compute-[15-18],badfellow,ellis-compute-[01-02]
-#SBATCH -t 72:00:00
-#SBATCH -o log_hess_70b.out                  # Name of stdout output     log file (%j expands to jobID)
-#SBATCH -e log_hess_70b.err                  # Name of stderr output     log file (%j expands to jobID)
-python3 hessian.py | tee -a hess_70b.out
diff --git a/sbatch/old/scale_tests/e8p_s075.sbatch b/sbatch/old/scale_tests/e8p_s075.sbatch
deleted file mode 100644
index d37b9c5..0000000
--- a/sbatch/old/scale_tests/e8p_s075.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=e8p_s075
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=64G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=checkpoints
-
-python quantize_llama.py --save_path $CKPT/e8p_s075 --codebook E8P12 --sigma_reg2 1e-2 --scale 0.75 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144
-python hfize_llama.py --quantized_path $CKPT/e8p_s075 --hf_output_path hfized/e8p_s075
-python ppl_llama.py --hf_path hfized/e8p_s075 --dataset c4
diff --git a/sbatch/old/scale_tests/e8p_s080.sbatch b/sbatch/old/scale_tests/e8p_s080.sbatch
deleted file mode 100644
index c5eea4f..0000000
--- a/sbatch/old/scale_tests/e8p_s080.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=e8p_s080
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=64G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=checkpoints
-
-python quantize_llama.py --save_path $CKPT/e8p_s080 --codebook E8P12 --sigma_reg2 1e-2 --scale 0.80 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144
-python hfize_llama.py --quantized_path $CKPT/e8p_s080 --hf_output_path hfized/e8p_s080
-python ppl_llama.py --hf_path hfized/e8p_s080 --dataset c4
diff --git a/sbatch/old/scale_tests/e8p_s085.sbatch b/sbatch/old/scale_tests/e8p_s085.sbatch
deleted file mode 100644
index aaf97a9..0000000
--- a/sbatch/old/scale_tests/e8p_s085.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=e8p_s085
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=64G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=checkpoints
-
-python quantize_llama.py --save_path $CKPT/e8p_s085 --codebook E8P12 --sigma_reg2 1e-2 --scale 0.85 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144
-python hfize_llama.py --quantized_path $CKPT/e8p_s085 --hf_output_path hfized/e8p_s085
-python ppl_llama.py --hf_path hfized/e8p_s085 --dataset c4
diff --git a/sbatch/old/scale_tests/e8p_s090.sbatch b/sbatch/old/scale_tests/e8p_s090.sbatch
deleted file mode 100644
index 11a0139..0000000
--- a/sbatch/old/scale_tests/e8p_s090.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=e8p_s090
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=64G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=checkpoints
-
-python quantize_llama.py --save_path $CKPT/e8p_s090 --codebook E8P12 --sigma_reg2 1e-2 --scale 0.90 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144
-python hfize_llama.py --quantized_path $CKPT/e8p_s090 --hf_output_path hfized/e8p_s090
-python ppl_llama.py --hf_path hfized/e8p_s090 --dataset c4
diff --git a/sbatch/old/scale_tests/e8p_s095.sbatch b/sbatch/old/scale_tests/e8p_s095.sbatch
deleted file mode 100644
index 4cb78a3..0000000
--- a/sbatch/old/scale_tests/e8p_s095.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=e8p_s095
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=64G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=checkpoints
-
-python quantize_llama.py --save_path $CKPT/e8p_s095 --codebook E8P12 --sigma_reg2 1e-2 --scale 0.95 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144
-python hfize_llama.py --quantized_path $CKPT/e8p_s095 --hf_output_path hfized/e8p_s095
-python ppl_llama.py --hf_path hfized/e8p_s095 --dataset c4
diff --git a/sbatch/old/scale_tests/e8p_s100.sbatch b/sbatch/old/scale_tests/e8p_s100.sbatch
deleted file mode 100644
index f0ad2b8..0000000
--- a/sbatch/old/scale_tests/e8p_s100.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=e8p_s100
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=64G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:00:00
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=checkpoints
-
-python quantize_llama.py --save_path $CKPT/e8p_s100 --codebook E8P12 --sigma_reg2 1e-2 --scale 1.00 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144
-python hfize_llama.py --quantized_path $CKPT/e8p_s100 --hf_output_path hfized/e8p_s100
-python ppl_llama.py --hf_path hfized/e8p_s100 --dataset c4
diff --git a/sbatch/old/scale_tests/e8p_s105.sbatch b/sbatch/old/scale_tests/e8p_s105.sbatch
deleted file mode 100644
index fc636e6..0000000
--- a/sbatch/old/scale_tests/e8p_s105.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=e8p_s105
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=64G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:05:05
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=checkpoints
-
-python quantize_llama.py --save_path $CKPT/e8p_s105 --codebook E8P12 --sigma_reg2 1e-2 --scale 1.05 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144
-python hfize_llama.py --quantized_path $CKPT/e8p_s105 --hf_output_path hfized/e8p_s105
-python ppl_llama.py --hf_path hfized/e8p_s105 --dataset c4
diff --git a/sbatch/old/scale_tests/e8p_s110.sbatch b/sbatch/old/scale_tests/e8p_s110.sbatch
deleted file mode 100644
index c3722be..0000000
--- a/sbatch/old/scale_tests/e8p_s110.sbatch
+++ /dev/null
@@ -1,21 +0,0 @@
-#!/bin/bash
-#SBATCH --partition=gpu
-#SBATCH --job-name=e8p_s110
-#SBATCH --mail-type=FAIL
-#SBATCH --mail-user=at676@cornell.edu
-#SBATCH --ntasks=1
-#SBATCH --mem=64G
-#SBATCH --cpus-per-task=4
-#SBATCH --gres=gpu:1
-#SBATCH --constraint='gpu-high'
-#SBATCH --time=48:10:10
-#SBATCH --output=slurm_out/%x_%j.out
-#SBATCH --err=slurm_out/%x_%j.err
-#SBATCH --requeue
-#SBATCH --open-mode=append
-
-CKPT=checkpoints
-
-python quantize_llama.py --save_path $CKPT/e8p_s110 --codebook E8P12 --sigma_reg2 1e-2 --scale 1.10 --hessian_path /share/desa/nfs01/quip_llama2/hessians_llama2_70b_6144
-python hfize_llama.py --quantized_path $CKPT/e8p_s110 --hf_output_path hfized/e8p_s110
-python ppl_llama.py --hf_path hfized/e8p_s110 --dataset c4
diff --git a/sbatch/old/scale_tests/run_all_d4.sh b/sbatch/old/scale_tests/run_all_d4.sh
deleted file mode 100644
index e3c5c9f..0000000
--- a/sbatch/old/scale_tests/run_all_d4.sh
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/bin/bash
-
-CKPT=checkpoints
-HF=hfized
-'''
-CUDA_VISIBLE_DEVICES=0 python quantize_llama.py --save_path $CKPT/d4_s110  --codebook D4 --sigma_reg2 1e-2 --scale 1.10 --hessian_path hessians/llama2_70b_6144 > slurm_out/d4_s110.log 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python quantize_llama.py --save_path $CKPT/d4_s115  --codebook D4 --sigma_reg2 1e-2 --scale 1.15 --hessian_path hessians/llama2_70b_6144 > slurm_out/d4_s115.log 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python quantize_llama.py --save_path $CKPT/d4_s121  --codebook D4 --sigma_reg2 1e-2 --scale 1.21 --hessian_path hessians/llama2_70b_6144 > slurm_out/d4_s121.log 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python quantize_llama.py --save_path $CKPT/d4_s120  --codebook D4 --sigma_reg2 1e-2 --scale 1.20 --hessian_path hessians/llama2_70b_6144 > slurm_out/d4_s120.log 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python quantize_llama.py --save_path $CKPT/d4_s125  --codebook D4 --sigma_reg2 1e-2 --scale 1.25 --hessian_path hessians/llama2_70b_6144 > slurm_out/d4_s125.log 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python quantize_llama.py --save_path $CKPT/d4_s130  --codebook D4 --sigma_reg2 1e-2 --scale 1.30 --hessian_path hessians/llama2_70b_6144 > slurm_out/d4_s130.log 2>&1 &
-
-wait
-'''
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/d4_s110  --hf_output_path $HF/d4_s110  >> slurm_out/d4_s110.log 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/d4_s115  --hf_output_path $HF/d4_s115  >> slurm_out/d4_s115.log 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/d4_s121  --hf_output_path $HF/d4_s121  >> slurm_out/d4_s121.log 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/d4_s120  --hf_output_path $HF/d4_s120  >> slurm_out/d4_s120.log 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/d4_s125  --hf_output_path $HF/d4_s125  >> slurm_out/d4_s125.log 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/d4_s130 --hf_output_path $HF/d4_s130 >> slurm_out/d4_s130.log 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/d4_s110   >> slurm_out/d4_s110.log 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/d4_s115   >> slurm_out/d4_s115.log 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/d4_s121   >> slurm_out/d4_s121.log 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --hf_path $HF/d4_s120   >> slurm_out/d4_s120.log 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --hf_path $HF/d4_s125   >> slurm_out/d4_s125.log 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --hf_path $HF/d4_s130   >> slurm_out/d4_s130.log 2>&1 &
-
-wait
diff --git a/sbatch/old/scale_tests/run_all_e8.sh b/sbatch/old/scale_tests/run_all_e8.sh
deleted file mode 100644
index 812003c..0000000
--- a/sbatch/old/scale_tests/run_all_e8.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/bash
-
-CKPT=checkpoints
-HF=hfized
-'''
-CUDA_VISIBLE_DEVICES=0 python quantize_llama.py --save_path $CKPT/e8p_s075  --codebook E8P12 --sigma_reg2 1e-2 --scale 0.75 --hessian_path hessians/llama2_70b_6144 > slurm_out/e8p_s075.log 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python quantize_llama.py --save_path $CKPT/e8p_s080  --codebook E8P12 --sigma_reg2 1e-2 --scale 0.80 --hessian_path hessians/llama2_70b_6144 > slurm_out/e8p_s080.log 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python quantize_llama.py --save_path $CKPT/e8p_s085  --codebook E8P12 --sigma_reg2 1e-2 --scale 0.85 --hessian_path hessians/llama2_70b_6144 > slurm_out/e8p_s085.log 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python quantize_llama.py --save_path $CKPT/e8p_s090  --codebook E8P12 --sigma_reg2 1e-2 --scale 0.90 --hessian_path hessians/llama2_70b_6144 > slurm_out/e8p_s090.log 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python quantize_llama.py --save_path $CKPT/e8p_s095  --codebook E8P12 --sigma_reg2 1e-2 --scale 0.95 --hessian_path hessians/llama2_70b_6144 > slurm_out/e8p_s095.log 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python quantize_llama.py --save_path $CKPT/e8p_s0100 --codebook E8P12 --sigma_reg2 1e-2 --scale 1.00 --hessian_path hessians/llama2_70b_6144 > slurm_out/e8p_s100.log 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python quantize_llama.py --save_path $CKPT/e8p_s0105 --codebook E8P12 --sigma_reg2 1e-2 --scale 1.05 --hessian_path hessians/llama2_70b_6144 > slurm_out/e8p_s105.log 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python quantize_llama.py --save_path $CKPT/e8p_s0103 --codebook E8P12 --sigma_reg2 1e-2 --scale 1.03 --hessian_path hessians/llama2_70b_6144 > slurm_out/e8p_s103.log 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/e8p_s075  --hf_output_path $HF/e8p_s075  >> slurm_out/e8p_s075.log 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/e8p_s080  --hf_output_path $HF/e8p_s080  >> slurm_out/e8p_s080.log 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/e8p_s085  --hf_output_path $HF/e8p_s085  >> slurm_out/e8p_s085.log 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/e8p_s090  --hf_output_path $HF/e8p_s090  >> slurm_out/e8p_s090.log 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/e8p_s095  --hf_output_path $HF/e8p_s095  >> slurm_out/e8p_s095.log 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/e8p_s0100 --hf_output_path $HF/e8p_s0100 >> slurm_out/e8p_s100.log 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/e8p_s0105 --hf_output_path $HF/e8p_s0105 >> slurm_out/e8p_s105.log 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/e8p_s0103 --hf_output_path $HF/e8p_s0103 >> slurm_out/e8p_s103.log 2>&1 &
-
-wait
-'''
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --hf_path $HF/e8p_s075 >> slurm_out/e8p_s075.log 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --hf_path $HF/e8p_s080 >> slurm_out/e8p_s080.log 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --hf_path $HF/e8p_s085 >> slurm_out/e8p_s085.log 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --hf_path $HF/e8p_s090 >> slurm_out/e8p_s090.log 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --hf_path $HF/e8p_s095 >> slurm_out/e8p_s095.log 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --hf_path $HF/e8p_s0100 >> slurm_out/e8p_s100.log 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --hf_path $HF/e8p_s0105 >> slurm_out/e8p_s105.log 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python ppl_llama.py --hf_path $HF/e8p_s0103 >> slurm_out/e8p_s103.log 2>&1 &
-
-wait
diff --git a/sbatch/packed_4bit.sh b/sbatch/packed_4bit.sh
deleted file mode 100644
index 0e28dc8..0000000
--- a/sbatch/packed_4bit.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/desa_data/checkpoints
-HF=/mnt/desa_data/hfized
-HESS=/mnt/desa_data/hessians
-LOG=/mnt/desa_data/logs
-L1=/mnt/desa_data/meta_llama1
-
-
-python quantize_llama.py --save_path $CKPT/2_70b_hi_4bit_packed  --codebook HI4B1C --scale_override 2.7 --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_hi_4bit_packed 2>&1
-python quantize_llama.py --save_path $CKPT/2_13b_hi_4bit_packed  --codebook HI4B1C --scale_override 2.7 --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 >> $LOG/2_13b_hi_4bit_packed 2>&1
-python quantize_llama.py --save_path $CKPT/2_7b_hi_4bit_packed   --codebook HI4B1C --scale_override 2.7 --base_model meta-llama/Llama-2-7b-hf  --hessian_path $HESS/llama2_7b_6144  >> $LOG/2_7b_hi_4bit_packed 2>&1
-python quantize_llama.py --save_path $CKPT/1_65b_hi_4bit_packed  --codebook HI4B1C --scale_override 2.7 --base_model $L1/huggingface_65B --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_hi_4bit_packed 2>&1
-python quantize_llama.py --save_path $CKPT/1_30b_hi_4bit_packed  --codebook HI4B1C --scale_override 2.7 --base_model $L1/huggingface_30B --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_hi_4bit_packed 2>&1
-python quantize_llama.py --save_path $CKPT/1_13b_hi_4bit_packed  --codebook HI4B1C --scale_override 2.7 --base_model $L1/huggingface_13B --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_hi_4bit_packed 2>&1
-python quantize_llama.py --save_path $CKPT/1_7b_hi_4bit_packed   --codebook HI4B1C --scale_override 2.7 --base_model $L1/huggingface_7B  --hessian_path $HESS/llama1_7b_6144  >> $LOG/1_7b_hi_4bit_packed 2>&1
-
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_hi_4bit_packed --hf_output_path $HF/2_70b_hi_4bit_packed & 
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_13b_hi_4bit_packed --hf_output_path $HF/2_13b_hi_4bit_packed &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_7b_hi_4bit_packed  --hf_output_path $HF/2_7b_hi_4bit_packed  &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/1_65b_hi_4bit_packed --hf_output_path $HF/1_65b_hi_4bit_packed &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/1_30b_hi_4bit_packed --hf_output_path $HF/1_30b_hi_4bit_packed &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_13b_hi_4bit_packed --hf_output_path $HF/1_13b_hi_4bit_packed &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/1_7b_hi_4bit_packed  --hf_output_path $HF/1_7b_hi_4bit_packed  &
-
-wait
-
-# perplexity
-CUDA_VISIBLE_DEVICES=0 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_70b_hi_4bit_packed >> $LOG/2_70b_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_13b_hi_4bit_packed >> $LOG/2_13b_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python ppl_llama.py --seqlen 4096 --hf_path $HF/2_7b_hi_4bit_packed  >> $LOG/2_7b_hi_4bit_packed  2>&1 &
-CUDA_VISIBLE_DEVICES=3 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_65b_hi_4bit_packed >> $LOG/1_65b_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_30b_hi_4bit_packed >> $LOG/1_30b_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_13b_hi_4bit_packed >> $LOG/1_13b_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python ppl_llama.py --seqlen 2048 --hf_path $HF/1_7b_hi_4bit_packed  >> $LOG/1_7b_hi_4bit_packed  2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_hi_4bit_packed >> $LOG/2_70b_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_hi_4bit_packed >> $LOG/2_13b_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_hi_4bit_packed  >> $LOG/2_7b_hi_4bit_packed  2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_hi_4bit_packed >> $LOG/1_65b_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_hi_4bit_packed >> $LOG/1_30b_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_hi_4bit_packed >> $LOG/1_13b_hi_4bit_packed 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_hi_4bit_packed  >> $LOG/1_7b_hi_4bit_packed  2>&1 &
-
-wait
-
diff --git a/sbatch/remain.sh b/sbatch/remain.sh
deleted file mode 100644
index 5510e63..0000000
--- a/sbatch/remain.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-CKPT=/mnt/desa_data/checkpoints/finetune_llama_adamw
-HF=/mnt/desa_data/hfized/finetune_llama_adamw
-LOG=/mnt/desa_data/logs/finetune_llama_adamw
-HESS=/mnt/desa_data/hessians
-'''
-CUDA_VISIBLE_DEVICES=0,1,2,3 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-7b-hf --hf_path $HF/2_7b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000003   --ft_opt adam --ft_bs 1 --ctx_size 4096 --ckpt_path $CKPT/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4,5,6,7 python tune_susv_lmhead.py --base_model meta-llama/Llama-2-13b-hf --hf_path $HF/2_13b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000003 --ft_opt adam --ft_bs 1 --ctx_size 4096  --ckpt_path $CKPT/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 &
-wait
-
-python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_3bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000003 --ft_bs 1 --ctx_size 3072 --ft_opt adam --ckpt_path $CKPT/2_70b_3bit >> $LOG/2_70b_3bit 2>&1
-python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_4bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000003 --ft_bs 1 --ctx_size 3072 --ft_opt adam --ckpt_path $CKPT/2_70b_4bit >> $LOG/2_70b_4bit 2>&1
-python tune_susv_lmhead.py --base_model meta-llama/Llama-2-70b-hf --hf_path $HF/2_70b_2bit --devset_size 240 --ft_valid_size 40 --ft_epochs 8 --ft_lr 0.000003 --ft_bs 1 --ctx_size 3072 --ft_opt adam --ckpt_path $CKPT/2_70b_2bit >> $LOG/2_70b_2bit 2>&1
-
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/2_70b_3bit --hf_output_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/2_70b_2bit --hf_output_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_70b_4bit --hf_output_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/2_13b_4bit --hf_output_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_4bit --hf_output_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path $HF/2_70b_3bit --seqlen 2048 >> $LOG/2_70b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path $HF/2_70b_4bit --seqlen 2048 >> $LOG/2_70b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path $HF/2_70b_2bit --seqlen 2048 >> $LOG/2_70b_2bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path $HF/2_13b_4bit --seqlen 2048 >> $LOG/2_13b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path $HF/2_7b_4bit --seqlen 2048 >> $LOG/2_7b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_7b_2bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_7b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_70b_4bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_70b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_70b_3bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_70b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_70b_2bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_70b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_13b_4bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_13b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_13b_3bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_13b_3bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_13b_2bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_13b_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_7b_4bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_7b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path /mnt/desa_data/hfized/finetune_llama/2_7b_3bit --seqlen 2048 >> /mnt/desa_data/logs/finetune_llama/2_7b_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path /mnt/desa_data/hfized/new_e8p/2_70b_e8p_2bit >> /mnt/desa_data/logs/new_e8p/2_70b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --hf_path /mnt/desa_data/hfized/new_e8p/2_13b_e8p_2bit >> /mnt/desa_data/logs/new_e8p/2_13b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --hf_path /mnt/desa_data/hfized/new_e8p/2_7b_e8p_2bit >> /mnt/desa_data/logs/new_e8p/2_7b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --hf_path /mnt/desa_data/hfized/rvq/2_70b_e8prvq_3bit >> /mnt/desa_data/logs/rvq/2_70b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --hf_path /mnt/desa_data/hfized/rvq/2_13b_e8prvq_3bit >> /mnt/desa_data/logs/rvq/2_13b_e8prvq_3bit 2>&1 &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --hf_path /mnt/desa_data/hfized/rvq/2_7b_e8prvq_3bit >> /mnt/desa_data/logs/rvq/2_7b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --hf_path /mnt/desa_data/hfized/rvq/2_70b_e8prvq_4bit >> /mnt/desa_data/logs/rvq/2_70b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --hf_path /mnt/desa_data/hfized/rvq/2_13b_e8prvq_4bit >> /mnt/desa_data/logs/rvq/2_13b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --hf_path /mnt/desa_data/hfized/rvq/2_7b_e8prvq_4bit >> /mnt/desa_data/logs/rvq/2_7b_e8prvq_4bit 2>&1 &
-'''
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_4bit >> $LOG/2_13b_4bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_4bit >> $LOG/2_7b_4bit 2>&1 &     
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path /mnt/desa_data/hfized/finetune_llama/2_7b_4bit >> /mnt/desa_data/logs/finetune_llama/2_7b_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path /mnt/desa_data/hfized/finetune_llama/2_70b_4bit >> /mnt/desa_data/logs/finetune_llama/2_70b_4bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_3bit >> $LOG/2_70b_3bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_4bit >> $LOG/2_70b_4bit 2>&1 &   
-CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_2bit >> $LOG/2_70b_2bit 2>&1 &   
-wait
-
-
diff --git a/sbatch/rvq3b.sh b/sbatch/rvq3b.sh
deleted file mode 100644
index 36531dd..0000000
--- a/sbatch/rvq3b.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/desa_data/checkpoints/rvq
-HF=/mnt/desa_data/hfized/rvq
-HESS=/mnt/desa_data/hessians
-LOG=/mnt/desa_data/logs/rvq
-
-mkdir $CKPT
-mkdir $HF
-mkdir $LOG
-
-python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/2_70b_e8prvq_3bit  --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_e8prvq_3bit 2>&1
-python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/2_13b_e8prvq_3bit  --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 >> $LOG/2_13b_e8prvq_3bit 2>&1
-python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/2_7b_e8prvq_3bit   --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-7b-hf  --hessian_path $HESS/llama2_7b_6144  >> $LOG/2_7b_e8prvq_3bit 2>&1
-python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/2_70b_chat_e8prvq_3bit  --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-70b-chat-hf --hessian_path $HESS/llama2_70b_chat_6144 >> $LOG/2_70b_chat_e8prvq_3bit 2>&1
-python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/2_13b_chat_e8prvq_3bit  --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-13b-chat-hf --hessian_path $HESS/llama2_13b_chat_6144 >> $LOG/2_13b_chat_e8prvq_3bit 2>&1
-python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/2_7b_chat_e8prvq_3bit   --codebook E8P12RVQ3B --base_model meta-llama/Llama-2-7b-chat-hf  --hessian_path $HESS/llama2_7b_chat_6144  >> $LOG/2_7b_chat_e8prvq_3bit 2>&1
-python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/1_65b_e8prvq_3bit  --codebook E8P12RVQ3B --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_e8prvq_3bit 2>&1
-python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/1_30b_e8prvq_3bit  --codebook E8P12RVQ3B --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_e8prvq_3bit 2>&1
-python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/1_13b_e8prvq_3bit  --codebook E8P12RVQ3B --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_e8prvq_3bit 2>&1
-python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/1_7b_e8prvq_3bit   --codebook E8P12RVQ3B --base_model relaxml/Llama-1-7b-hf  --hessian_path $HESS/llama1_7b_6144  >> $LOG/1_7b_e8prvq_3bit 2>&1
-python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/mistral_7b_e8prvq_3bit --codebook E8P12RVQ3B --base_model mistralai/Mistral-7B-v0.1 --hessian_path $HESS/mistral_7b_4096 >> $LOG/mistral_7b_e8prvq_3bit 2>&1
-python quantize_llama.py --no_eval --quip_tune_iters 0 --save_path $CKPT/openhermes_7b_e8prvq_3bit --codebook E8P12RVQ3B --base_model teknium/OpenHermes-2.5-Mistral-7B --hessian_path $HESS/openhermes_7b_4096 >> $LOG/openhermes_7b_e8prvq_3bit 2>&1
-
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_e8prvq_3bit --hf_output_path $HF/2_70b_e8prvq_3bit &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_e8prvq_3bit --hf_output_path $HF/2_70b_chat_e8prvq_3bit &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_65b_e8prvq_3bit --hf_output_path $HF/1_65b_e8prvq_3bit &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_30b_e8prvq_3bit --hf_output_path $HF/1_30b_e8prvq_3bit &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_e8prvq_3bit --hf_output_path $HF/2_13b_e8prvq_3bit &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_e8prvq_3bit --hf_output_path $HF/2_13b_chat_e8prvq_3bit &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_13b_e8prvq_3bit --hf_output_path $HF/1_13b_e8prvq_3bit &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_e8prvq_3bit  --hf_output_path $HF/2_7b_e8prvq_3bit  &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_e8prvq_3bit  --hf_output_path $HF/2_7b_chat_e8prvq_3bit  &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_7b_e8prvq_3bit  --hf_output_path $HF/1_7b_e8prvq_3bit  &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/mistral_7b_e8prvq_3bit  --hf_output_path $HF/mistral_7b_e8prvq_3bit  &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/openhermes_7b_e8prvq_3bit  --hf_output_path $HF/openhermes_7b_e8prvq_3bit  &
-
-wait
-
-# perplexity
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_70b_e8prvq_3bit >> $LOG/2_70b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_13b_e8prvq_3bit >> $LOG/2_13b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_7b_e8prvq_3bit  >> $LOG/2_7b_e8prvq_3bit  2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_70b_chat_e8prvq_3bit >> $LOG/2_70b_chat_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_13b_chat_e8prvq_3bit >> $LOG/2_13b_chat_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_7b_chat_e8prvq_3bit  >> $LOG/2_7b_chat_e8prvq_3bit  2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_e8prvq_3bit >> $LOG/1_65b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_e8prvq_3bit >> $LOG/1_30b_e8prvq_3bit 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_e8prvq_3bit >> $LOG/1_13b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_e8prvq_3bit  >> $LOG/1_7b_e8prvq_3bit  2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 --hf_path $HF/mistral_7b_e8prvq_3bit >> $LOG/mistral_7b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 --hf_path $HF/openhermes_7b_e8prvq_3bit  >> $LOG/openhermes_7b_e8prvq_3bit  2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8prvq_3bit >> $LOG/2_70b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8prvq_3bit >> $LOG/2_13b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8prvq_3bit  >> $LOG/2_7b_e8prvq_3bit  2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_e8prvq_3bit >> $LOG/2_70b_chat_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_e8prvq_3bit >> $LOG/2_13b_chat_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_e8prvq_3bit  >> $LOG/2_7b_chat_e8prvq_3bit  2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_e8prvq_3bit >> $LOG/1_65b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_e8prvq_3bit >> $LOG/1_30b_e8prvq_3bit 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_e8prvq_3bit >> $LOG/1_13b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_e8prvq_3bit  >> $LOG/1_7b_e8prvq_3bit  2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/mistral_7b_e8prvq_3bit >> $LOG/mistral_7b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/openhermes_7b_e8prvq_3bit  >> $LOG/openhermes_7b_e8prvq_3bit  2>&1 &
-
-wait
-
diff --git a/sbatch/rvq4b.sh b/sbatch/rvq4b.sh
deleted file mode 100644
index 9515152..0000000
--- a/sbatch/rvq4b.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/desa_data/checkpoints/rvq
-HF=/mnt/desa_data/hfized/rvq
-HESS=/mnt/desa_data/hessians
-LOG=/mnt/desa_data/logs/rvq
-
-mkdir $CKPT
-mkdir $HF
-mkdir $LOG
-
-python quantize_llama.py --quip_tune_iters 0 --scale_override 0.9 --resid_scale_override 3.6 --save_path $CKPT/2_70b_e8prvq_4bit  --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-70b-hf --hessian_path $HESS/llama2_70b_6144 >> $LOG/2_70b_e8prvq_4bit 2>&1
-python quantize_llama.py --quip_tune_iters 0 --scale_override 0.9 --resid_scale_override 3.45 --save_path $CKPT/2_13b_e8prvq_4bit  --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-13b-hf --hessian_path $HESS/llama2_13b_6144 >> $LOG/2_13b_e8prvq_4bit 2>&1
-python quantize_llama.py --quip_tune_iters 0 --scale_override 0.9 --resid_scale_override 3.6 --save_path $CKPT/2_7b_e8prvq_4bit   --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-7b-hf  --hessian_path $HESS/llama2_7b_6144  >> $LOG/2_7b_e8prvq_4bit 2>&1
-python quantize_llama.py --quip_tune_iters 0 --scale_override 0.9 --resid_scale_override 3.6 --save_path $CKPT/2_70b_chat_e8prvq_4bit  --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-70b-chat-hf --hessian_path $HESS/llama2_70b_chat_6144 >> $LOG/2_70b_chat_e8prvq_4bit 2>&1
-python quantize_llama.py --quip_tune_iters 0 --scale_override 1.03 --resid_scale_override 3.3 --save_path $CKPT/2_13b_chat_e8prvq_4bit  --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-13b-chat-hf --hessian_path $HESS/llama2_13b_chat_6144 >> $LOG/2_13b_chat_e8prvq_4bit 2>&1
-python quantize_llama.py --quip_tune_iters 0 --scale_override 1.03 --resid_scale_override 3.6 --save_path $CKPT/2_7b_chat_e8prvq_4bit   --codebook E8P12RVQ4B --base_model meta-llama/Llama-2-7b-chat-hf  --hessian_path $HESS/llama2_7b_chat_6144  >> $LOG/2_7b_chat_e8prvq_4bit 2>&1
-python quantize_llama.py --quip_tune_iters 0 --scale_override 0.9 --resid_scale_override 3.45 --save_path $CKPT/1_65b_e8prvq_4bit  --codebook E8P12RVQ4B --base_model relaxml/Llama-1-65b-hf --hessian_path $HESS/llama1_65b_6144 >> $LOG/1_65b_e8prvq_4bit 2>&1
-python quantize_llama.py --quip_tune_iters 0 --scale_override 0.9 --resid_scale_override 3.45 --save_path $CKPT/1_30b_e8prvq_4bit  --codebook E8P12RVQ4B --base_model relaxml/Llama-1-30b-hf --hessian_path $HESS/llama1_30b_6144 >> $LOG/1_30b_e8prvq_4bit 2>&1
-python quantize_llama.py --quip_tune_iters 0 --scale_override 0.9 --resid_scale_override 3.6 --save_path $CKPT/1_13b_e8prvq_4bit  --codebook E8P12RVQ4B --base_model relaxml/Llama-1-13b-hf --hessian_path $HESS/llama1_13b_6144 >> $LOG/1_13b_e8prvq_4bit 2>&1
-python quantize_llama.py --quip_tune_iters 0 --scale_override 0.85 --resid_scale_override 3.45 --save_path $CKPT/1_7b_e8prvq_4bit   --codebook E8P12RVQ4B --base_model relaxml/Llama-1-7b-hf  --hessian_path $HESS/llama1_7b_6144  >> $LOG/1_7b_e8prvq_4bit 2>&1
-python quantize_llama.py --quip_tune_iters 0 --scale_override 0.9 --resid_scale_override 3.3 --save_path $CKPT/mistral_7b_e8prvq_4bit --codebook E8P12RVQ4B --base_model mistralai/Mistral-7B-v0.1 --hessian_path $HESS/mistral_7b_4096 >> $LOG/mistral_7b_e8prvq_4bit 2>&1
-python quantize_llama.py --quip_tune_iters 0 --scale_override 1.03 --resid_scale_override 3.3 --save_path $CKPT/openhermes_7b_e8prvq_4bit --codebook E8P12RVQ4B --base_model teknium/OpenHermes-2.5-Mistral-7B --hessian_path $HESS/openhermes_7b_4096 >> $LOG/openhermes_7b_e8prvq_4bit 2>&1
-
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_70b_e8prvq_4bit --hf_output_path $HF/2_70b_e8prvq_4bit &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/2_70b_chat_e8prvq_4bit --hf_output_path $HF/2_70b_chat_e8prvq_4bit &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/1_65b_e8prvq_4bit --hf_output_path $HF/1_65b_e8prvq_4bit &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/1_30b_e8prvq_4bit --hf_output_path $HF/1_30b_e8prvq_4bit &
-CUDA_VISIBLE_DEVICES=4 python hfize_llama.py --quantized_path $CKPT/2_13b_e8prvq_4bit --hf_output_path $HF/2_13b_e8prvq_4bit &
-CUDA_VISIBLE_DEVICES=5 python hfize_llama.py --quantized_path $CKPT/2_13b_chat_e8prvq_4bit --hf_output_path $HF/2_13b_chat_e8prvq_4bit &
-CUDA_VISIBLE_DEVICES=6 python hfize_llama.py --quantized_path $CKPT/1_13b_e8prvq_4bit --hf_output_path $HF/1_13b_e8prvq_4bit &
-CUDA_VISIBLE_DEVICES=7 python hfize_llama.py --quantized_path $CKPT/2_7b_e8prvq_4bit  --hf_output_path $HF/2_7b_e8prvq_4bit  &
-wait
-
-CUDA_VISIBLE_DEVICES=0 python hfize_llama.py --quantized_path $CKPT/2_7b_chat_e8prvq_4bit  --hf_output_path $HF/2_7b_chat_e8prvq_4bit  &
-CUDA_VISIBLE_DEVICES=1 python hfize_llama.py --quantized_path $CKPT/1_7b_e8prvq_4bit  --hf_output_path $HF/1_7b_e8prvq_4bit  &
-CUDA_VISIBLE_DEVICES=2 python hfize_llama.py --quantized_path $CKPT/mistral_7b_e8prvq_4bit  --hf_output_path $HF/mistral_7b_e8prvq_4bit  &
-CUDA_VISIBLE_DEVICES=3 python hfize_llama.py --quantized_path $CKPT/openhermes_7b_e8prvq_4bit  --hf_output_path $HF/openhermes_7b_e8prvq_4bit  &
-
-wait
-
-# perplexity
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_70b_e8prvq_4bit >> $LOG/2_70b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_13b_e8prvq_4bit >> $LOG/2_13b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_7b_e8prvq_4bit  >> $LOG/2_7b_e8prvq_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_70b_chat_e8prvq_4bit >> $LOG/2_70b_chat_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_13b_chat_e8prvq_4bit >> $LOG/2_13b_chat_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_ppl.py --seqlen 4096 --hf_path $HF/2_7b_chat_e8prvq_4bit  >> $LOG/2_7b_chat_e8prvq_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_65b_e8prvq_4bit >> $LOG/1_65b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_30b_e8prvq_4bit >> $LOG/1_30b_e8prvq_4bit 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_13b_e8prvq_4bit >> $LOG/1_13b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_ppl.py --seqlen 2048 --hf_path $HF/1_7b_e8prvq_4bit  >> $LOG/1_7b_e8prvq_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 --hf_path $HF/mistral_7b_e8prvq_4bit >> $LOG/mistral_7b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_ppl.py --no_use_cuda_graph --seqlen 8192 --hf_path $HF/openhermes_7b_e8prvq_4bit  >> $LOG/openhermes_7b_e8prvq_4bit  2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_e8prvq_4bit >> $LOG/2_70b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_e8prvq_4bit >> $LOG/2_13b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_e8prvq_4bit  >> $LOG/2_7b_e8prvq_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_70b_chat_e8prvq_4bit >> $LOG/2_70b_chat_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_13b_chat_e8prvq_4bit >> $LOG/2_13b_chat_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/2_7b_chat_e8prvq_4bit  >> $LOG/2_7b_chat_e8prvq_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=6 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_65b_e8prvq_4bit >> $LOG/1_65b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_30b_e8prvq_4bit >> $LOG/1_30b_e8prvq_4bit 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=0 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_13b_e8prvq_4bit >> $LOG/1_13b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=1 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/1_7b_e8prvq_4bit  >> $LOG/1_7b_e8prvq_4bit  2>&1 &
-CUDA_VISIBLE_DEVICES=2 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/mistral_7b_e8prvq_4bit >> $LOG/mistral_7b_e8prvq_4bit 2>&1 &
-CUDA_VISIBLE_DEVICES=3 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/openhermes_7b_e8prvq_4bit  >> $LOG/openhermes_7b_e8prvq_4bit  2>&1 &
-
-wait
-
diff --git a/sbatch/sc_sweep.sh b/sbatch/sc_sweep.sh
deleted file mode 100644
index 4f5232e..0000000
--- a/sbatch/sc_sweep.sh
+++ /dev/null
@@ -1,85 +0,0 @@
-#!/bin/bash
-
-CKPT=/mnt/desa_data/checkpoints
-HF=/mnt/desa_data/hfized
-HESS=/mnt/desa_data/hessians
-LOG=/mnt/desa_data/logs
-L1=/mnt/desa_data/meta_llama1
-
-function sc_sweep {
-    # NPRE $1
-    # BMO $2
-    SC_LS=("0.80" "0.85" "0.90" "0.95" "1.00")
-    NPOST_LS=("080" "085" "090" "095" "100")
-    for idx in "${!SC_LS[@]}"
-    do
-    python quantize_llama.py --save_path $CKPT/${1}_e8p_2bit_sc${NPOST_LS[$idx]} --codebook E8P12 --scale_override ${SC_LS[$idx]} \
-        --base_model meta-llama/$2 --hessian_path $HESS/llama${1}_6144 >> $LOG/${1}_e8p_2bit_sc${NPOST_LS[$idx]} 2>&1
-    done
-    for idx in "${!SC_LS[@]}"
-    do
-    CUDA_VISIBLE_DEVICES=$idx python hfize_llama.py --quantized_path $CKPT/${1}_e8p_2bit_sc${NPOST_LS[$idx]} \
-        --hf_output_path $HF/${1}_e8p_2bit_sc${NPOST_LS[$idx]} &
-    done
-    wait
-    # perplexity
-    for idx in "${!SC_LS[@]}"
-    do
-    CUDA_VISIBLE_DEVICES=$idx python ppl_llama.py --seqlen 4096 --hf_path $HF/${1}_e8p_2bit_sc${NPOST_LS[$idx]} \
-        >> $LOG/${1}_e8p_2bit_sc${NPOST_LS[$idx]} 2>&1 &
-    done
-    wait 
-    # zeroshot
-    for idx in "${!SC_LS[@]}"
-    do
-    CUDA_VISIBLE_DEVICES=$idx python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 \
-        --hf_path $HF/${1}_e8p_2bit_sc${NPOST_LS[$idx]} >> $LOG/${1}_e8p_2bit_sc${NPOST_LS[$idx]} 2>&1 &
-    done
-}
-
-# sc_sweep "2_70b" "Llama-2-70b-hf"
-# sc_sweep "2_13b" "Llama-2-13b-hf"
-# sc_sweep "2_7b"  "Llama-2-7b-hf"
-# 
-# sc_sweep "2_70b_chat" "Llama-2-70b-chat-hf"
-# sc_sweep "2_13b_chat" "Llama-2-13b-chat-hf"
-# sc_sweep "2_7b_chat"  "Llama-2-7b-chat-hf"
-
-function sc_sweep_hi {
-    # NPRE $1
-    # BMO $2
-    SC_LS=("2.4" "2.55" "2.7" "2.85" "3")
-    NPOST_LS=("240" "255" "270" "285" "300")
-    for idx in "${!SC_LS[@]}"
-    do
-    python quantize_llama.py --save_path $CKPT/${1}_hi_4bit_sc${NPOST_LS[$idx]} --codebook HI4B1C --scale_override ${SC_LS[$idx]} \
-        --base_model meta-llama/$2 --hessian_path $HESS/llama${1}_6144 >> $LOG/${1}_hi_4bit_sc${NPOST_LS[$idx]} 2>&1
-    done
-    for idx in "${!SC_LS[@]}"
-    do
-    CUDA_VISIBLE_DEVICES=$idx python hfize_llama.py --quantized_path $CKPT/${1}_hi_4bit_sc${NPOST_LS[$idx]} \
-        --hf_output_path $HF/${1}_hi_4bit_sc${NPOST_LS[$idx]} &
-    done
-    wait
-    # perplexity
-    for idx in "${!SC_LS[@]}"
-    do
-    CUDA_VISIBLE_DEVICES=$idx python ppl_llama.py --seqlen 4096 --hf_path $HF/${1}_hi_4bit_sc${NPOST_LS[$idx]} \
-        >> $LOG/${1}_hi_4bit_sc${NPOST_LS[$idx]} 2>&1 &
-    done
-    wait 
-    # zeroshot
-    for idx in "${!SC_LS[@]}"
-    do
-    CUDA_VISIBLE_DEVICES=$idx python eval_llama.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 \
-        --hf_path $HF/${1}_hi_4bit_sc${NPOST_LS[$idx]} >> $LOG/${1}_hi_4bit_sc${NPOST_LS[$idx]} 2>&1 &
-    done
-}
-
-sc_sweep_hi "2_70b" "Llama-2-70b-hf"
-sc_sweep_hi "2_13b" "Llama-2-13b-hf"
-sc_sweep_hi "2_7b"  "Llama-2-7b-hf"
-
-sc_sweep_hi "2_70b_chat" "Llama-2-70b-chat-hf"
-sc_sweep_hi "2_13b_chat" "Llama-2-13b-chat-hf"
-sc_sweep_hi "2_7b_chat"  "Llama-2-7b-chat-hf"
\ No newline at end of file
diff --git a/sbatch/zeroshot_falcon.sh b/sbatch/zeroshot_falcon.sh
deleted file mode 100644
index 2c95409..0000000
--- a/sbatch/zeroshot_falcon.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-HF=/mnt/desa_data/hfized
-LOG=/mnt/desa_data/logs
-
-CUDA_VISIBLE_DEVICES=4 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/falcon_180b_e8p_2bit/ >> $LOG/falcon_180b_e8p_2bit 2>&1 &
-CUDA_VISIBLE_DEVICES=5 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/falcon_180b_e8prvq_3bit/ >> $LOG/falcon_180b_e8prvq_3bit 2>&1 &
-CUDA_VISIBLE_DEVICES=6,7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path $HF/falcon_180b_e8prvq_4bit/ >> $LOG/falcon_180b_e8prvq_4bit 2>&1 &
-
-wait
-
-CUDA_VISIBLE_DEVICES=2,3,4,5,6,7 python eval_zeroshot.py --tasks arc_challenge,arc_easy,boolq,piqa,winogrande --batch_size 4 --hf_path tiiuae/falcon-180B >> $LOG/falcon_180b_fp16 2>&1