Restructure repo into directories as Lit-LLaMA (Lightning-AI#86)

ichit · Jun 2, 2023 · a905731 · a905731
1 parent dcccbcc
commit a905731
Show file tree

Hide file tree

Showing 27 changed files with 88 additions and 122 deletions.
diff --git a/README.md b/README.md
@@ -97,7 +97,7 @@ To generate text predictions, you need to download the model weights. **If you d
 Run inference:
 
 ```bash
-python generate.py --prompt "Hello, my name is"
+python generate/base.py --prompt "Hello, my name is"
 ```
 
 This will run the 3B pre-trained model and require ~7 GB of GPU memory using the `bfloat16` datatype.
@@ -107,7 +107,7 @@ This will run the 3B pre-trained model and require ~7 GB of GPU memory using the
 You can also chat with the model interactively:
 
 ```bash
-python chat.py
+python chat/base.py
 ```
 
 ### Run large models on smaller consumer devices
@@ -116,7 +116,7 @@ We support LLM.int8 and GPTQ.int4 inference by following [this guide](howto/infe
 
 ## Finetune the model
 
-We provide a simple training script `finetune_adapter.py` that instruction-tunes a pretrained model on the [Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset.
+We provide a simple training script `finetune/adapter.py` that instruction-tunes a pretrained model on the [Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset.
 
 1. Download the data and generate an instruction tuning dataset:
 
@@ -129,7 +129,7 @@ python scripts/prepare_alpaca.py
 [Adapter](https://arxiv.org/abs/2303.16199):
 
 ```bash
-python finetune_adapter.py
+python finetune/adapter.py
 ```
 
 The finetuning requires at least one GPU with ~12 GB memory (GTX 3060).

diff --git a/chat.py → chat/base.py b/chat.py → chat/base.py
@@ -9,6 +9,10 @@
 import lightning as L
 import torch
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
 from lit_parrot import Parrot, Tokenizer, Config
 from lit_parrot.utils import EmptyInitOnDevice, lazy_load, check_valid_checkpoint_dir
 

diff --git a/finetune_adapter.py → finetune/adapter.py b/finetune_adapter.py → finetune/adapter.py
@@ -1,5 +1,6 @@
 import os
 import shutil
+import sys
 import time
 from pathlib import Path
 from typing import Literal
@@ -10,7 +11,11 @@
 from lightning.fabric.accelerators.mps import MPSAccelerator
 from lightning.fabric.strategies import DeepSpeedStrategy
 
-from generate import generate
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
+from generate.base import generate
 from lit_parrot.adapter import Parrot, Config, mark_only_adapter_as_trainable, adapter_state_from_state_dict
 from lit_parrot.tokenizer import Tokenizer
 from lit_parrot.utils import lazy_load, check_valid_checkpoint_dir

diff --git a/finetune_adapter_v2.py → finetune/adapter_v2.py b/finetune_adapter_v2.py → finetune/adapter_v2.py
diff --git a/generate_adapter.py → generate/adapter.py b/generate_adapter.py → generate/adapter.py
@@ -8,7 +8,11 @@
 import lightning as L
 import torch
 
-from generate import generate
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
+from generate.base import generate
 from lit_parrot import Tokenizer
 from lit_parrot.adapter import Parrot, Config
 from lit_parrot.utils import EmptyInitOnDevice, lazy_load, check_valid_checkpoint_dir
@@ -27,12 +31,12 @@ def main(
 ) -> None:
     """Generates a response based on a given instruction and an optional input.
     This script will only work with checkpoints from the instruction-tuned Parrot-Adapter model.
-    See `finetune_adapter.py`.
+    See `finetune/adapter.py`.
 
     Args:
         prompt: The prompt/instruction (Alpaca style).
         adapter_path: Path to the checkpoint with trained adapter weights, which are the output of
-            `finetune_adapter.py`.
+            `finetune/adapter.py`.
         checkpoint_dir: The path to the checkpoint folder with pretrained Parrot weights.
         input: Optional input (Alpaca style).
         quantize: Whether to quantize the model and using which method:

diff --git a/generate_adapter_v2.py → generate/adapter_v2.py b/generate_adapter_v2.py → generate/adapter_v2.py
@@ -28,12 +28,12 @@ def main(
 ) -> None:
     """Generates a response based on a given instruction and an optional input.
     This script will only work with checkpoints from the instruction-tuned Parrot-AdapterV2 model.
-    See `finetune_adapter_v2.py`.
+    See `finetune/adapter_v2.py`.
 
     Args:
         prompt: The prompt/instruction (Alpaca style).
         adapter_path: Path to the checkpoint with trained adapter weights, which are the output of
-            `finetune_adapter_v2.py`.
+            `finetune/adapter_v2.py`.
         checkpoint_dir: The path to the checkpoint folder with pretrained Parrot weights.
         input: Optional input (Alpaca style).
         quantize: Whether to quantize the model and using which method:

diff --git a/generate.py → generate/base.py b/generate.py → generate/base.py
@@ -8,6 +8,10 @@
 import lightning as L
 import torch
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
 from lit_parrot import Parrot, Tokenizer, Config
 from lit_parrot.utils import EmptyInitOnDevice, lazy_load, check_valid_checkpoint_dir
 

diff --git a/howto/download_pythia.md b/howto/download_pythia.md
@@ -45,5 +45,5 @@ python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/EleutherAI/
 You're done! To execute the model just run:
 
 ```bash
-python generate.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/EleutherAI/pythia-1b
+python generate/base.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/EleutherAI/pythia-1b
 ```
diff --git a/howto/download_redpajama_incite.md b/howto/download_redpajama_incite.md
@@ -34,5 +34,5 @@ python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/togethercom
 You're done! To execute the model just run:
 
 ```bash
-python generate.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/togethercomputer/RedPajama-INCITE-Base-3B-v1
+python generate/base.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/togethercomputer/RedPajama-INCITE-Base-3B-v1
 ```
diff --git a/howto/download_stablelm.md b/howto/download_stablelm.md
@@ -32,5 +32,5 @@ python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/stabilityai
 You're done! To execute the model just run:
 
 ```bash
-python generate.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
+python generate/base.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
 ```
diff --git a/howto/finetune_adapter.md b/howto/finetune_adapter.md
@@ -26,13 +26,13 @@ or [prepare your own dataset](#tune-on-your-dataset).
 ## Running the finetuning
 
 ```bash
-python finetune_adapter.py --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
+python finetune/adapter.py --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
 ```
 
 or for Adapter V2
 
 ```bash 
-python finetune_adapter_v2.py --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
+python finetune/adapter_v2.py --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
 ```
 
 The finetuning requires at least one GPU with ~12 GB memory.
@@ -49,20 +49,20 @@ micro_batch_size = 4
 This script will save checkpoints periodically to the `out_dir` directory. If you are finetuning different models or on your own dataset, you can specify an output directory with your preferred name:
 
 ```bash
-python finetune_adapter.py --out_dir out/adapter/my-model-finetuned
+python finetune/adapter.py --out_dir out/adapter/my-model-finetuned
 ```
 
 or for Adapter V2
 
 ```bash
-python finetune_adapter_v2.py --out_dir out/adapter_v2/my-model-finetuned
+python finetune/adapter_v2.py --out_dir out/adapter_v2/my-model-finetuned
 ```
 
 If your GPU does not support `bfloat16`, you can pass the `--precision 32-true` argument.
 For instance, to fine-tune on MPS (the GPU on modern Macs), you can run
 
 ```bash
-python finetune_adapter.py --out_dir out/adapter/my-model-finetuned --precision 32-true
+python finetune/adapter.py --out_dir out/adapter/my-model-finetuned --precision 32-true
 ```
 
 Note that `mps` as the accelerator will be picked up automatically by Fabric when running on a modern Mac.
@@ -72,15 +72,15 @@ Note that `mps` as the accelerator will be picked up automatically by Fabric whe
 You can test the finetuned model with your own instructions by running:
 
 ```bash
-python generate_adapter.py \
+python generate/adapter.py \
     --prompt "Recommend a movie to watch on the weekend." \
     --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
 ```
 
 or for Adapter V2
 
 ```bash 
-python generate_adapter_v2.py \
+python generate/adapter_v2.py \
     --prompt "Recommend a movie to watch on the weekend." \
     --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
 ```
@@ -123,10 +123,10 @@ With only a few modifications, you can prepare and train on your own instruction
     python scripts/prepare_mydata.py --destination_path data/mydata/
     ```
 
-5. Run `finetune_adapter.py` by passing in the location of your data (and optionally other parameters):
+5. Run `finetune/adapter.py` by passing in the location of your data (and optionally other parameters):
    
     ```bash
-    python finetune_adapter.py \
+    python finetune/adapter.py \
         --data_dir data/mydata/ \
         --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b \
         --out_dir data/mydata-finetuned

diff --git a/howto/inference.md b/howto/inference.md
@@ -3,7 +3,7 @@
 We demonstrate how to run inference (next token prediction) with the Parrot base model in the [`generate.py`](generate.py) script:
 
 ```bash
-python generate.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
+python generate/base.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
 ```
 Output:
 ```
@@ -19,7 +19,7 @@ This will run the 3B pre-trained model and require ~7 GB of GPU memory using the
 You can also chat with the model interactively:
 
 ```bash
-python chat.py --checkpoint_dir checkpoints/stabilityai/stablelm-tuned-alpha-3b
+python chat/base.py --checkpoint_dir checkpoints/stabilityai/stablelm-tuned-alpha-3b
 ```
 
 This script can work with any checkpoint. For the best chat-like experience, we recommend using it with a checkpoints

diff --git a/howto/tpus.md b/howto/tpus.md
@@ -48,7 +48,7 @@ Since you created a new machine, you'll probably need to download the weights. Y
 Generation works out-of-the-box with TPUs:
 
 ```shell
-python3 generate.py --prompt "Hello, my name is" --num_samples 3
+python3 generate/base.py --prompt "Hello, my name is" --num_samples 3
 ```
 
 This command will take take ~17s for the first generation time as XLA needs to compile the graph.

diff --git a/train_redpajama.py → pretrain/redpajama.py b/train_redpajama.py → pretrain/redpajama.py
@@ -1,5 +1,6 @@
 import glob
 import math
+import sys
 import time
 from functools import partial
 from pathlib import Path
@@ -11,6 +12,10 @@
 from torch.distributed.fsdp.wrap import transformer_auto_wrap_policy
 from torch.utils.data import DataLoader
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
 from lit_parrot.model import Block, Parrot, Config
 from lit_parrot.packed_dataset import PackedDataset, CombinedDataset
 from lit_parrot.utils import save_model_checkpoint

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -6,11 +6,7 @@
 wd = Path(__file__).parent.parent.absolute()
 
 
-@pytest.fixture()
-def lit_parrot():
+@pytest.fixture(autouse=True)
+def add_wd_to_path():
     # this adds support for running tests without the package installed
     sys.path.append(str(wd))
-
-    import lit_parrot
-
-    return lit_parrot
diff --git a/tests/test_adapter.py b/tests/test_adapter.py
@@ -6,7 +6,7 @@
 
 @pytest.mark.skipif(sys.platform == "win32", reason="EmptyInitOnDevice on CPU not working for Windows.")
 @pytest.mark.parametrize("name", ["pythia-70m", "stablelm-base-alpha-3b"])
-def test_config_identical(name, lit_parrot):
+def test_config_identical(name):
     import lit_parrot.adapter as parrot_adapter
     import lit_parrot.model as parrot
     from lit_parrot.utils import EmptyInitOnDevice

diff --git a/tests/test_adapter_v2.py b/tests/test_adapter_v2.py
@@ -1,25 +1,21 @@
 import sys
-from dataclasses import asdict
 
 import pytest
 
 
 @pytest.mark.skipif(sys.platform == "win32", reason="EmptyInitOnDevice on CPU not working for Windows.")
 @pytest.mark.parametrize("name", ["pythia-70m", "stablelm-base-alpha-3b"])
-def test_config_identical(name, lit_parrot):
+def test_config_identical(name):
     import torch.nn as nn
     import lit_parrot.adapter as parrot_adapter
     from lit_parrot.adapter_v2 import adapter_v2_linear_with_bias_and_scale
     import lit_parrot.model as parrot
     from lit_parrot.utils import EmptyInitOnDevice
 
-    base_config = asdict(parrot.Config.from_name(name))
-    adapter_config = asdict(parrot_adapter.Config.from_name(name))
-
     with EmptyInitOnDevice():
         base_model = parrot.Parrot.from_name(name)
         adapter_model = parrot_adapter.Parrot.from_name(name)
-        
+
         for module in adapter_model.modules():
             if isinstance(module, nn.Linear):
                 adapter_v2_linear_with_bias_and_scale(module)

diff --git a/tests/test_chat.py b/tests/test_chat.py
@@ -1,4 +1,3 @@
-import functools
 import subprocess
 import sys
 from itertools import repeat
@@ -8,17 +7,6 @@
 import pytest
 import torch
 
-wd = Path(__file__).parent.parent.absolute()
-
-
-@functools.lru_cache(maxsize=1)
-def load_script():
-    sys.path.append(str(wd))
-
-    import chat
-
-    return chat
-
 
 @pytest.mark.parametrize(
     ("generated", "stop_tokens", "expected"),
@@ -30,7 +18,7 @@ def load_script():
     ],
 )
 def test_generate(generated, stop_tokens, expected):
-    chat = load_script()
+    import chat.base as chat
 
     input_idx = torch.tensor([5, 3])
     max_returned_tokens = len(input_idx) + 8
@@ -55,7 +43,7 @@ def multinomial(*_, **__):
 
 
 def test_cli():
-    cli_path = wd / "chat.py"
+    cli_path = Path(__file__).parent.parent / "chat" / "base.py"
     output = subprocess.check_output([sys.executable, cli_path, "-h"])
     output = str(output.decode())
     assert "Starts a conversation" in output
diff --git a/tests/test_config.py b/tests/test_config.py
@@ -1,9 +1,3 @@
-from pathlib import Path
-
-
-wd = Path(__file__).parent.parent.absolute()
-
-
 def test_config():
     from lit_parrot import Config
 

diff --git a/tests/test_convert_hf_checkpoint.py b/tests/test_convert_hf_checkpoint.py
@@ -1,24 +1,18 @@
-import sys
-from pathlib import Path
 from unittest import mock
 
 import pytest
 
-wd = (Path(__file__).parent.parent / "scripts").absolute()
-
 
 def test_convert_hf_checkpoint(tmp_path):
-    sys.path.append(str(wd))
-
-    import convert_hf_checkpoint
+    from scripts.convert_hf_checkpoint import convert_hf_checkpoint
 
     with pytest.raises(ValueError, match="to contain .bin"):
-        convert_hf_checkpoint.convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m")
+        convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m")
 
     bin_file = tmp_path / "foo.bin"
     bin_file.touch()
-    with mock.patch("convert_hf_checkpoint.lazy_load") as load:
-        convert_hf_checkpoint.convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m")
+    with mock.patch("scripts.convert_hf_checkpoint.lazy_load") as load:
+        convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m")
     load.assert_called_with(bin_file)
 
     assert {p.name for p in tmp_path.glob("*")} == {"foo.bin", "lit_config.json", "lit_model.pth"}