diff --git a/config_hub/finetune/README.md b/config_hub/finetune/README.md index 1f53b18a84..96af2970d5 100644 --- a/config_hub/finetune/README.md +++ b/config_hub/finetune/README.md @@ -1,10 +1,11 @@ ## Config files -The table below lists the performances you can expect from the provided config files. Note that you can achieve lower memory consumption by lowering the micro batch size as needed. See the [Dealing with out-of-memory (OOM) errors](../../tutorials/oom.md) on lowering the memory requirements. +The table below lists the performances you can expect from the provided config files. Note that you can achieve lower memory consumption by lowering the micro batch size as needed. In addition, you can lower the rank (`lora_r`) in the LoRA configuration files and disable LoRA for certain layers (for example, setting `lora_projection` and other LoRA layer-specific parameters to `false`). +For more information, see the [Dealing with out-of-memory (OOM) errors](../../tutorials/oom.md) on lowering the memory requirements. | | Size | Dataset | Epochs | Val loss | Peak memory | Max seq length | Micro batch size | Precision | Training runtime | | --------------------- | ---- | --------- | ------ | -------- | ----------- | -------------- | ---------------- | --------- | ---------------- | -| tiny-llama/lora.yaml | 1.1B | Alpaca 2k | 4 | 1.053 | 10.54 GB | 512 | 8 | bfloat16 | 9.24 min (A10G) | -| tiny-llama/qlora.yaml | 1.1B | Alpaca 2k | 4 | 1.074 | 13.32 GB | 512 | 8 | bfloat16 | 9.89 min (A10G) | +| tiny-llama/lora.yaml | 1.1B | Alpaca 2k | 3 | 1.038 | 13.50 GB | 512 | 8 | bfloat16 | 8.06 min (A10G) | +| tiny-llama/qlora.yaml | 1.1B | Alpaca 2k | 3 | 1.056 | 16.24 GB | 512 | 8 | bfloat16 | 8.74 min (A10G) | | tiny-llama/full.yaml | 1.1B | Alpaca 2k | 1 | 1.105 | 14.10 GB | 512 | 4 | bfloat16 | 2.59 min (A10G) | | llama-2-7b/qlora.yaml | 7B | Alpaca 2k | 4 | 0.814 | 13.68 GB | 512 | 2 | bfloat16 | 45.68 min (A10G) | diff --git a/config_hub/finetune/tiny-llama/lora.yaml b/config_hub/finetune/tiny-llama/lora.yaml index 82f41cb161..4991900954 100644 --- a/config_hub/finetune/tiny-llama/lora.yaml +++ b/config_hub/finetune/tiny-llama/lora.yaml @@ -27,19 +27,19 @@ lora_dropout: 0.05 lora_query: true # Whether to apply LoRA to the key weights in attention. (type: bool, default: False) -lora_key: false +lora_key: true # Whether to apply LoRA to the value weights in attention. (type: bool, default: True) lora_value: true # Whether to apply LoRA to the output projection in the attention block. (type: bool, default: False) -lora_projection: false +lora_projection: true # Whether to apply LoRA to the weights of the MLP in the attention block. (type: bool, default: False) -lora_mlp: false +lora_mlp: true # Whether to apply LoRA to output head in GPT. (type: bool, default: False) -lora_head: false +lora_head: true # Data-related arguments. If not provided, the default is ``litgpt.data.Alpaca``. data: @@ -71,7 +71,7 @@ train: lr_warmup_steps: 10 # Number of epochs to train on (type: Optional[int], default: 5) - epochs: 4 + epochs: 3 # Total number of tokens to train on (type: Optional[int], default: null) max_tokens: diff --git a/config_hub/finetune/tiny-llama/qlora.yaml b/config_hub/finetune/tiny-llama/qlora.yaml index 9851055f8e..1e8cf20b8a 100644 --- a/config_hub/finetune/tiny-llama/qlora.yaml +++ b/config_hub/finetune/tiny-llama/qlora.yaml @@ -27,19 +27,19 @@ lora_dropout: 0.05 lora_query: true # Whether to apply LoRA to the key weights in attention. (type: bool, default: False) -lora_key: false +lora_key: true # Whether to apply LoRA to the value weights in attention. (type: bool, default: True) lora_value: true # Whether to apply LoRA to the output projection in the attention block. (type: bool, default: False) -lora_projection: false +lora_projection: true # Whether to apply LoRA to the weights of the MLP in the attention block. (type: bool, default: False) -lora_mlp: false +lora_mlp: true # Whether to apply LoRA to output head in GPT. (type: bool, default: False) -lora_head: false +lora_head: true # Data-related arguments. If not provided, the default is ``litgpt.data.Alpaca``. data: @@ -71,7 +71,7 @@ train: lr_warmup_steps: 10 # Number of epochs to train on (type: Optional[int], default: 5) - epochs: 4 + epochs: 3 # Total number of tokens to train on (type: Optional[int], default: null) max_tokens: