Skip to content

Commit

Permalink
Restructure repo into directories as Lit-LLaMA (Lightning-AI#86)
Browse files Browse the repository at this point in the history
  • Loading branch information
carmocca authored Jun 2, 2023
1 parent dcccbcc commit a905731
Show file tree
Hide file tree
Showing 27 changed files with 88 additions and 122 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ To generate text predictions, you need to download the model weights. **If you d
Run inference:

```bash
python generate.py --prompt "Hello, my name is"
python generate/base.py --prompt "Hello, my name is"
```

This will run the 3B pre-trained model and require ~7 GB of GPU memory using the `bfloat16` datatype.
Expand All @@ -107,7 +107,7 @@ This will run the 3B pre-trained model and require ~7 GB of GPU memory using the
You can also chat with the model interactively:

```bash
python chat.py
python chat/base.py
```

### Run large models on smaller consumer devices
Expand All @@ -116,7 +116,7 @@ We support LLM.int8 and GPTQ.int4 inference by following [this guide](howto/infe

## Finetune the model

We provide a simple training script `finetune_adapter.py` that instruction-tunes a pretrained model on the [Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset.
We provide a simple training script `finetune/adapter.py` that instruction-tunes a pretrained model on the [Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset.

1. Download the data and generate an instruction tuning dataset:

Expand All @@ -129,7 +129,7 @@ python scripts/prepare_alpaca.py
[Adapter](https://arxiv.org/abs/2303.16199):

```bash
python finetune_adapter.py
python finetune/adapter.py
```

The finetuning requires at least one GPU with ~12 GB memory (GTX 3060).
Expand Down
4 changes: 4 additions & 0 deletions chat.py → chat/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
import lightning as L
import torch

# support running without installing as a package
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))

from lit_parrot import Parrot, Tokenizer, Config
from lit_parrot.utils import EmptyInitOnDevice, lazy_load, check_valid_checkpoint_dir

Expand Down
7 changes: 6 additions & 1 deletion finetune_adapter.py → finetune/adapter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import shutil
import sys
import time
from pathlib import Path
from typing import Literal
Expand All @@ -10,7 +11,11 @@
from lightning.fabric.accelerators.mps import MPSAccelerator
from lightning.fabric.strategies import DeepSpeedStrategy

from generate import generate
# support running without installing as a package
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))

from generate.base import generate
from lit_parrot.adapter import Parrot, Config, mark_only_adapter_as_trainable, adapter_state_from_state_dict
from lit_parrot.tokenizer import Tokenizer
from lit_parrot.utils import lazy_load, check_valid_checkpoint_dir
Expand Down
File renamed without changes.
10 changes: 7 additions & 3 deletions generate_adapter.py → generate/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@
import lightning as L
import torch

from generate import generate
# support running without installing as a package
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))

from generate.base import generate
from lit_parrot import Tokenizer
from lit_parrot.adapter import Parrot, Config
from lit_parrot.utils import EmptyInitOnDevice, lazy_load, check_valid_checkpoint_dir
Expand All @@ -27,12 +31,12 @@ def main(
) -> None:
"""Generates a response based on a given instruction and an optional input.
This script will only work with checkpoints from the instruction-tuned Parrot-Adapter model.
See `finetune_adapter.py`.
See `finetune/adapter.py`.
Args:
prompt: The prompt/instruction (Alpaca style).
adapter_path: Path to the checkpoint with trained adapter weights, which are the output of
`finetune_adapter.py`.
`finetune/adapter.py`.
checkpoint_dir: The path to the checkpoint folder with pretrained Parrot weights.
input: Optional input (Alpaca style).
quantize: Whether to quantize the model and using which method:
Expand Down
4 changes: 2 additions & 2 deletions generate_adapter_v2.py → generate/adapter_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ def main(
) -> None:
"""Generates a response based on a given instruction and an optional input.
This script will only work with checkpoints from the instruction-tuned Parrot-AdapterV2 model.
See `finetune_adapter_v2.py`.
See `finetune/adapter_v2.py`.
Args:
prompt: The prompt/instruction (Alpaca style).
adapter_path: Path to the checkpoint with trained adapter weights, which are the output of
`finetune_adapter_v2.py`.
`finetune/adapter_v2.py`.
checkpoint_dir: The path to the checkpoint folder with pretrained Parrot weights.
input: Optional input (Alpaca style).
quantize: Whether to quantize the model and using which method:
Expand Down
4 changes: 4 additions & 0 deletions generate.py → generate/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
import lightning as L
import torch

# support running without installing as a package
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))

from lit_parrot import Parrot, Tokenizer, Config
from lit_parrot.utils import EmptyInitOnDevice, lazy_load, check_valid_checkpoint_dir

Expand Down
2 changes: 1 addition & 1 deletion howto/download_pythia.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,5 @@ python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/EleutherAI/
You're done! To execute the model just run:

```bash
python generate.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/EleutherAI/pythia-1b
python generate/base.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/EleutherAI/pythia-1b
```
2 changes: 1 addition & 1 deletion howto/download_redpajama_incite.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/togethercom
You're done! To execute the model just run:

```bash
python generate.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/togethercomputer/RedPajama-INCITE-Base-3B-v1
python generate/base.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/togethercomputer/RedPajama-INCITE-Base-3B-v1
```
2 changes: 1 addition & 1 deletion howto/download_stablelm.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,5 @@ python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/stabilityai
You're done! To execute the model just run:

```bash
python generate.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
python generate/base.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
```
18 changes: 9 additions & 9 deletions howto/finetune_adapter.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ or [prepare your own dataset](#tune-on-your-dataset).
## Running the finetuning

```bash
python finetune_adapter.py --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
python finetune/adapter.py --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
```

or for Adapter V2

```bash
python finetune_adapter_v2.py --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
python finetune/adapter_v2.py --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
```

The finetuning requires at least one GPU with ~12 GB memory.
Expand All @@ -49,20 +49,20 @@ micro_batch_size = 4
This script will save checkpoints periodically to the `out_dir` directory. If you are finetuning different models or on your own dataset, you can specify an output directory with your preferred name:

```bash
python finetune_adapter.py --out_dir out/adapter/my-model-finetuned
python finetune/adapter.py --out_dir out/adapter/my-model-finetuned
```

or for Adapter V2

```bash
python finetune_adapter_v2.py --out_dir out/adapter_v2/my-model-finetuned
python finetune/adapter_v2.py --out_dir out/adapter_v2/my-model-finetuned
```

If your GPU does not support `bfloat16`, you can pass the `--precision 32-true` argument.
For instance, to fine-tune on MPS (the GPU on modern Macs), you can run

```bash
python finetune_adapter.py --out_dir out/adapter/my-model-finetuned --precision 32-true
python finetune/adapter.py --out_dir out/adapter/my-model-finetuned --precision 32-true
```

Note that `mps` as the accelerator will be picked up automatically by Fabric when running on a modern Mac.
Expand All @@ -72,15 +72,15 @@ Note that `mps` as the accelerator will be picked up automatically by Fabric whe
You can test the finetuned model with your own instructions by running:

```bash
python generate_adapter.py \
python generate/adapter.py \
--prompt "Recommend a movie to watch on the weekend." \
--checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
```

or for Adapter V2

```bash
python generate_adapter_v2.py \
python generate/adapter_v2.py \
--prompt "Recommend a movie to watch on the weekend." \
--checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
```
Expand Down Expand Up @@ -123,10 +123,10 @@ With only a few modifications, you can prepare and train on your own instruction
python scripts/prepare_mydata.py --destination_path data/mydata/
```
5. Run `finetune_adapter.py` by passing in the location of your data (and optionally other parameters):
5. Run `finetune/adapter.py` by passing in the location of your data (and optionally other parameters):
```bash
python finetune_adapter.py \
python finetune/adapter.py \
--data_dir data/mydata/ \
--checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b \
--out_dir data/mydata-finetuned
Expand Down
4 changes: 2 additions & 2 deletions howto/inference.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
We demonstrate how to run inference (next token prediction) with the Parrot base model in the [`generate.py`](generate.py) script:

```bash
python generate.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
python generate/base.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
```
Output:
```
Expand All @@ -19,7 +19,7 @@ This will run the 3B pre-trained model and require ~7 GB of GPU memory using the
You can also chat with the model interactively:

```bash
python chat.py --checkpoint_dir checkpoints/stabilityai/stablelm-tuned-alpha-3b
python chat/base.py --checkpoint_dir checkpoints/stabilityai/stablelm-tuned-alpha-3b
```

This script can work with any checkpoint. For the best chat-like experience, we recommend using it with a checkpoints
Expand Down
2 changes: 1 addition & 1 deletion howto/tpus.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ Since you created a new machine, you'll probably need to download the weights. Y
Generation works out-of-the-box with TPUs:

```shell
python3 generate.py --prompt "Hello, my name is" --num_samples 3
python3 generate/base.py --prompt "Hello, my name is" --num_samples 3
```

This command will take take ~17s for the first generation time as XLA needs to compile the graph.
Expand Down
5 changes: 5 additions & 0 deletions train_redpajama.py → pretrain/redpajama.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import glob
import math
import sys
import time
from functools import partial
from pathlib import Path
Expand All @@ -11,6 +12,10 @@
from torch.distributed.fsdp.wrap import transformer_auto_wrap_policy
from torch.utils.data import DataLoader

# support running without installing as a package
wd = Path(__file__).parent.parent.resolve()
sys.path.append(str(wd))

from lit_parrot.model import Block, Parrot, Config
from lit_parrot.packed_dataset import PackedDataset, CombinedDataset
from lit_parrot.utils import save_model_checkpoint
Expand Down
8 changes: 2 additions & 6 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,7 @@
wd = Path(__file__).parent.parent.absolute()


@pytest.fixture()
def lit_parrot():
@pytest.fixture(autouse=True)
def add_wd_to_path():
# this adds support for running tests without the package installed
sys.path.append(str(wd))

import lit_parrot

return lit_parrot
2 changes: 1 addition & 1 deletion tests/test_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

@pytest.mark.skipif(sys.platform == "win32", reason="EmptyInitOnDevice on CPU not working for Windows.")
@pytest.mark.parametrize("name", ["pythia-70m", "stablelm-base-alpha-3b"])
def test_config_identical(name, lit_parrot):
def test_config_identical(name):
import lit_parrot.adapter as parrot_adapter
import lit_parrot.model as parrot
from lit_parrot.utils import EmptyInitOnDevice
Expand Down
8 changes: 2 additions & 6 deletions tests/test_adapter_v2.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,21 @@
import sys
from dataclasses import asdict

import pytest


@pytest.mark.skipif(sys.platform == "win32", reason="EmptyInitOnDevice on CPU not working for Windows.")
@pytest.mark.parametrize("name", ["pythia-70m", "stablelm-base-alpha-3b"])
def test_config_identical(name, lit_parrot):
def test_config_identical(name):
import torch.nn as nn
import lit_parrot.adapter as parrot_adapter
from lit_parrot.adapter_v2 import adapter_v2_linear_with_bias_and_scale
import lit_parrot.model as parrot
from lit_parrot.utils import EmptyInitOnDevice

base_config = asdict(parrot.Config.from_name(name))
adapter_config = asdict(parrot_adapter.Config.from_name(name))

with EmptyInitOnDevice():
base_model = parrot.Parrot.from_name(name)
adapter_model = parrot_adapter.Parrot.from_name(name)

for module in adapter_model.modules():
if isinstance(module, nn.Linear):
adapter_v2_linear_with_bias_and_scale(module)
Expand Down
16 changes: 2 additions & 14 deletions tests/test_chat.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import functools
import subprocess
import sys
from itertools import repeat
Expand All @@ -8,17 +7,6 @@
import pytest
import torch

wd = Path(__file__).parent.parent.absolute()


@functools.lru_cache(maxsize=1)
def load_script():
sys.path.append(str(wd))

import chat

return chat


@pytest.mark.parametrize(
("generated", "stop_tokens", "expected"),
Expand All @@ -30,7 +18,7 @@ def load_script():
],
)
def test_generate(generated, stop_tokens, expected):
chat = load_script()
import chat.base as chat

input_idx = torch.tensor([5, 3])
max_returned_tokens = len(input_idx) + 8
Expand All @@ -55,7 +43,7 @@ def multinomial(*_, **__):


def test_cli():
cli_path = wd / "chat.py"
cli_path = Path(__file__).parent.parent / "chat" / "base.py"
output = subprocess.check_output([sys.executable, cli_path, "-h"])
output = str(output.decode())
assert "Starts a conversation" in output
6 changes: 0 additions & 6 deletions tests/test_config.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
from pathlib import Path


wd = Path(__file__).parent.parent.absolute()


def test_config():
from lit_parrot import Config

Expand Down
14 changes: 4 additions & 10 deletions tests/test_convert_hf_checkpoint.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,18 @@
import sys
from pathlib import Path
from unittest import mock

import pytest

wd = (Path(__file__).parent.parent / "scripts").absolute()


def test_convert_hf_checkpoint(tmp_path):
sys.path.append(str(wd))

import convert_hf_checkpoint
from scripts.convert_hf_checkpoint import convert_hf_checkpoint

with pytest.raises(ValueError, match="to contain .bin"):
convert_hf_checkpoint.convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m")
convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m")

bin_file = tmp_path / "foo.bin"
bin_file.touch()
with mock.patch("convert_hf_checkpoint.lazy_load") as load:
convert_hf_checkpoint.convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m")
with mock.patch("scripts.convert_hf_checkpoint.lazy_load") as load:
convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m")
load.assert_called_with(bin_file)

assert {p.name for p in tmp_path.glob("*")} == {"foo.bin", "lit_config.json", "lit_model.pth"}
Loading

0 comments on commit a905731

Please sign in to comment.