update

StiphyJay · Oct 16, 2023 · 504833c · 504833c
1 parent 65d3f82
commit 504833c
Show file tree

Hide file tree

Showing 2 changed files with 846 additions and 0 deletions.
diff --git a/cluster/test_time.yaml b/cluster/test_time.yaml
@@ -0,0 +1,48 @@
+description: quantization_profile
+
+target:
+  service: aml
+  # name: tscience-a100-80g-eastus
+  name: A100-80G-PCIE-westus3
+  # name: V10032G
+  # name: A100EastUS
+  # name: openai-A10080G
+  # name: A10080G
+  # name: gpu-v100-32g
+  # name: gpu-a100-80g
+
+
+environment:
+  image: pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel
+  image_setup:
+    - apt-get -y update
+    - apt-get -y install wget
+    - apt-get -y install git
+    - apt-get -y install git-lfs
+  setup:
+    - pip install transformers==4.31
+    - pip install accelerate==0.23
+    - pip install git+https://github.com/huggingface/peft
+    - pip install evaluate scikit-learn scipy typing_extensions einops
+    - pip install datasets sentencepiece setuptools rouge-score nltk openai
+    - pip install tensorboard tensorboardX
+    - pip install bitsandbytes
+
+storage:
+  output:
+    storage_account_name: tsinterns
+    container_name: t-qingzhang
+    mount_dir: /mnt/t-qingzhang
+
+code:
+  local_dir: ../
+
+jobs:
+- name: gsm8k_llama2_13b_4bit_profile
+  sku: 1xG4
+  process_count_per_node: 1
+  submit_args:
+    container_args:
+      cpus: 32
+  command:
+    - CUDA_VISIBLE_DEVICES=0 python test_time.py --model_name_or_path LoftQ/Llama-2-13b-hf-bit4-rank64 --output_dir /mnt/t-qingzhang/exp_results/test_time/llama-2-13b/gsm8k/bit4-rank64_ft_real --learning_rate 1e-4  --seed 202 --dataset_name gsm8k --dataset_config main --pad_to_max_length --max_source_length 128 --max_target_length 256 --num_train_epochs 5 --per_device_train_batch_size 4 --per_device_eval_batch_size 4 --gradient_accumulation_steps 4 --with_tracking --report_to tensorboard