From a9057312c8b121225e02922263144e70817c9c99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= Date: Fri, 2 Jun 2023 19:36:32 +0200 Subject: [PATCH] Restructure repo into directories as Lit-LLaMA (#86) --- README.md | 8 ++--- chat.py => chat/base.py | 4 +++ finetune_adapter.py => finetune/adapter.py | 7 +++- .../adapter_v2.py | 0 generate_adapter.py => generate/adapter.py | 10 ++++-- .../adapter_v2.py | 4 +-- generate.py => generate/base.py | 4 +++ howto/download_pythia.md | 2 +- howto/download_redpajama_incite.md | 2 +- howto/download_stablelm.md | 2 +- howto/finetune_adapter.md | 18 +++++----- howto/inference.md | 4 +-- howto/tpus.md | 2 +- train_redpajama.py => pretrain/redpajama.py | 5 +++ tests/conftest.py | 8 ++--- tests/test_adapter.py | 2 +- tests/test_adapter_v2.py | 8 ++--- tests/test_chat.py | 16 ++------- tests/test_config.py | 6 ---- tests/test_convert_hf_checkpoint.py | 14 +++----- tests/test_generate.py | 18 ++-------- tests/test_model.py | 34 +++++++------------ tests/test_packed_dataset.py | 2 -- tests/test_prepare_redpajama.py | 14 +++----- tests/test_rope.py | 4 ++- tests/test_tokenizer.py | 4 ++- tests/test_utils.py | 8 ++--- 27 files changed, 88 insertions(+), 122 deletions(-) rename chat.py => chat/base.py (98%) rename finetune_adapter.py => finetune/adapter.py (98%) rename finetune_adapter_v2.py => finetune/adapter_v2.py (100%) rename generate_adapter.py => generate/adapter.py (95%) rename generate_adapter_v2.py => generate/adapter_v2.py (98%) rename generate.py => generate/base.py (98%) rename train_redpajama.py => pretrain/redpajama.py (98%) diff --git a/README.md b/README.md index 6b8cd13b78..25586f4a28 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ To generate text predictions, you need to download the model weights. **If you d Run inference: ```bash -python generate.py --prompt "Hello, my name is" +python generate/base.py --prompt "Hello, my name is" ``` This will run the 3B pre-trained model and require ~7 GB of GPU memory using the `bfloat16` datatype. @@ -107,7 +107,7 @@ This will run the 3B pre-trained model and require ~7 GB of GPU memory using the You can also chat with the model interactively: ```bash -python chat.py +python chat/base.py ``` ### Run large models on smaller consumer devices @@ -116,7 +116,7 @@ We support LLM.int8 and GPTQ.int4 inference by following [this guide](howto/infe ## Finetune the model -We provide a simple training script `finetune_adapter.py` that instruction-tunes a pretrained model on the [Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset. +We provide a simple training script `finetune/adapter.py` that instruction-tunes a pretrained model on the [Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset. 1. Download the data and generate an instruction tuning dataset: @@ -129,7 +129,7 @@ python scripts/prepare_alpaca.py [Adapter](https://arxiv.org/abs/2303.16199): ```bash -python finetune_adapter.py +python finetune/adapter.py ``` The finetuning requires at least one GPU with ~12 GB memory (GTX 3060). diff --git a/chat.py b/chat/base.py similarity index 98% rename from chat.py rename to chat/base.py index abfc1cdd8a..5065ac8b9a 100644 --- a/chat.py +++ b/chat/base.py @@ -9,6 +9,10 @@ import lightning as L import torch +# support running without installing as a package +wd = Path(__file__).parent.parent.resolve() +sys.path.append(str(wd)) + from lit_parrot import Parrot, Tokenizer, Config from lit_parrot.utils import EmptyInitOnDevice, lazy_load, check_valid_checkpoint_dir diff --git a/finetune_adapter.py b/finetune/adapter.py similarity index 98% rename from finetune_adapter.py rename to finetune/adapter.py index b9df2d66de..7e8af11d40 100644 --- a/finetune_adapter.py +++ b/finetune/adapter.py @@ -1,5 +1,6 @@ import os import shutil +import sys import time from pathlib import Path from typing import Literal @@ -10,7 +11,11 @@ from lightning.fabric.accelerators.mps import MPSAccelerator from lightning.fabric.strategies import DeepSpeedStrategy -from generate import generate +# support running without installing as a package +wd = Path(__file__).parent.parent.resolve() +sys.path.append(str(wd)) + +from generate.base import generate from lit_parrot.adapter import Parrot, Config, mark_only_adapter_as_trainable, adapter_state_from_state_dict from lit_parrot.tokenizer import Tokenizer from lit_parrot.utils import lazy_load, check_valid_checkpoint_dir diff --git a/finetune_adapter_v2.py b/finetune/adapter_v2.py similarity index 100% rename from finetune_adapter_v2.py rename to finetune/adapter_v2.py diff --git a/generate_adapter.py b/generate/adapter.py similarity index 95% rename from generate_adapter.py rename to generate/adapter.py index c882ed83c0..8e4b680b99 100644 --- a/generate_adapter.py +++ b/generate/adapter.py @@ -8,7 +8,11 @@ import lightning as L import torch -from generate import generate +# support running without installing as a package +wd = Path(__file__).parent.parent.resolve() +sys.path.append(str(wd)) + +from generate.base import generate from lit_parrot import Tokenizer from lit_parrot.adapter import Parrot, Config from lit_parrot.utils import EmptyInitOnDevice, lazy_load, check_valid_checkpoint_dir @@ -27,12 +31,12 @@ def main( ) -> None: """Generates a response based on a given instruction and an optional input. This script will only work with checkpoints from the instruction-tuned Parrot-Adapter model. - See `finetune_adapter.py`. + See `finetune/adapter.py`. Args: prompt: The prompt/instruction (Alpaca style). adapter_path: Path to the checkpoint with trained adapter weights, which are the output of - `finetune_adapter.py`. + `finetune/adapter.py`. checkpoint_dir: The path to the checkpoint folder with pretrained Parrot weights. input: Optional input (Alpaca style). quantize: Whether to quantize the model and using which method: diff --git a/generate_adapter_v2.py b/generate/adapter_v2.py similarity index 98% rename from generate_adapter_v2.py rename to generate/adapter_v2.py index 7a89582bac..49cc200abd 100644 --- a/generate_adapter_v2.py +++ b/generate/adapter_v2.py @@ -28,12 +28,12 @@ def main( ) -> None: """Generates a response based on a given instruction and an optional input. This script will only work with checkpoints from the instruction-tuned Parrot-AdapterV2 model. - See `finetune_adapter_v2.py`. + See `finetune/adapter_v2.py`. Args: prompt: The prompt/instruction (Alpaca style). adapter_path: Path to the checkpoint with trained adapter weights, which are the output of - `finetune_adapter_v2.py`. + `finetune/adapter_v2.py`. checkpoint_dir: The path to the checkpoint folder with pretrained Parrot weights. input: Optional input (Alpaca style). quantize: Whether to quantize the model and using which method: diff --git a/generate.py b/generate/base.py similarity index 98% rename from generate.py rename to generate/base.py index f9afe93b8d..8d762187f5 100644 --- a/generate.py +++ b/generate/base.py @@ -8,6 +8,10 @@ import lightning as L import torch +# support running without installing as a package +wd = Path(__file__).parent.parent.resolve() +sys.path.append(str(wd)) + from lit_parrot import Parrot, Tokenizer, Config from lit_parrot.utils import EmptyInitOnDevice, lazy_load, check_valid_checkpoint_dir diff --git a/howto/download_pythia.md b/howto/download_pythia.md index 382dc136d1..5e48c52876 100644 --- a/howto/download_pythia.md +++ b/howto/download_pythia.md @@ -45,5 +45,5 @@ python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/EleutherAI/ You're done! To execute the model just run: ```bash -python generate.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/EleutherAI/pythia-1b +python generate/base.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/EleutherAI/pythia-1b ``` diff --git a/howto/download_redpajama_incite.md b/howto/download_redpajama_incite.md index ad4cea9c22..31a3d690f8 100644 --- a/howto/download_redpajama_incite.md +++ b/howto/download_redpajama_incite.md @@ -34,5 +34,5 @@ python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/togethercom You're done! To execute the model just run: ```bash -python generate.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/togethercomputer/RedPajama-INCITE-Base-3B-v1 +python generate/base.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/togethercomputer/RedPajama-INCITE-Base-3B-v1 ``` diff --git a/howto/download_stablelm.md b/howto/download_stablelm.md index 9f66f2bbd6..cc43706f52 100644 --- a/howto/download_stablelm.md +++ b/howto/download_stablelm.md @@ -32,5 +32,5 @@ python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/stabilityai You're done! To execute the model just run: ```bash -python generate.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b +python generate/base.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b ``` diff --git a/howto/finetune_adapter.md b/howto/finetune_adapter.md index 6a84fc7c9a..c2d25386b0 100644 --- a/howto/finetune_adapter.md +++ b/howto/finetune_adapter.md @@ -26,13 +26,13 @@ or [prepare your own dataset](#tune-on-your-dataset). ## Running the finetuning ```bash -python finetune_adapter.py --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b +python finetune/adapter.py --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b ``` or for Adapter V2 ```bash -python finetune_adapter_v2.py --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b +python finetune/adapter_v2.py --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b ``` The finetuning requires at least one GPU with ~12 GB memory. @@ -49,20 +49,20 @@ micro_batch_size = 4 This script will save checkpoints periodically to the `out_dir` directory. If you are finetuning different models or on your own dataset, you can specify an output directory with your preferred name: ```bash -python finetune_adapter.py --out_dir out/adapter/my-model-finetuned +python finetune/adapter.py --out_dir out/adapter/my-model-finetuned ``` or for Adapter V2 ```bash -python finetune_adapter_v2.py --out_dir out/adapter_v2/my-model-finetuned +python finetune/adapter_v2.py --out_dir out/adapter_v2/my-model-finetuned ``` If your GPU does not support `bfloat16`, you can pass the `--precision 32-true` argument. For instance, to fine-tune on MPS (the GPU on modern Macs), you can run ```bash -python finetune_adapter.py --out_dir out/adapter/my-model-finetuned --precision 32-true +python finetune/adapter.py --out_dir out/adapter/my-model-finetuned --precision 32-true ``` Note that `mps` as the accelerator will be picked up automatically by Fabric when running on a modern Mac. @@ -72,7 +72,7 @@ Note that `mps` as the accelerator will be picked up automatically by Fabric whe You can test the finetuned model with your own instructions by running: ```bash -python generate_adapter.py \ +python generate/adapter.py \ --prompt "Recommend a movie to watch on the weekend." \ --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b ``` @@ -80,7 +80,7 @@ python generate_adapter.py \ or for Adapter V2 ```bash -python generate_adapter_v2.py \ +python generate/adapter_v2.py \ --prompt "Recommend a movie to watch on the weekend." \ --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b ``` @@ -123,10 +123,10 @@ With only a few modifications, you can prepare and train on your own instruction python scripts/prepare_mydata.py --destination_path data/mydata/ ``` -5. Run `finetune_adapter.py` by passing in the location of your data (and optionally other parameters): +5. Run `finetune/adapter.py` by passing in the location of your data (and optionally other parameters): ```bash - python finetune_adapter.py \ + python finetune/adapter.py \ --data_dir data/mydata/ \ --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b \ --out_dir data/mydata-finetuned diff --git a/howto/inference.md b/howto/inference.md index c4abef72e0..8b3e2dfdb6 100644 --- a/howto/inference.md +++ b/howto/inference.md @@ -3,7 +3,7 @@ We demonstrate how to run inference (next token prediction) with the Parrot base model in the [`generate.py`](generate.py) script: ```bash -python generate.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b +python generate/base.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b ``` Output: ``` @@ -19,7 +19,7 @@ This will run the 3B pre-trained model and require ~7 GB of GPU memory using the You can also chat with the model interactively: ```bash -python chat.py --checkpoint_dir checkpoints/stabilityai/stablelm-tuned-alpha-3b +python chat/base.py --checkpoint_dir checkpoints/stabilityai/stablelm-tuned-alpha-3b ``` This script can work with any checkpoint. For the best chat-like experience, we recommend using it with a checkpoints diff --git a/howto/tpus.md b/howto/tpus.md index efe1f38ad8..23138f8f43 100644 --- a/howto/tpus.md +++ b/howto/tpus.md @@ -48,7 +48,7 @@ Since you created a new machine, you'll probably need to download the weights. Y Generation works out-of-the-box with TPUs: ```shell -python3 generate.py --prompt "Hello, my name is" --num_samples 3 +python3 generate/base.py --prompt "Hello, my name is" --num_samples 3 ``` This command will take take ~17s for the first generation time as XLA needs to compile the graph. diff --git a/train_redpajama.py b/pretrain/redpajama.py similarity index 98% rename from train_redpajama.py rename to pretrain/redpajama.py index fc3ebb0499..addecea01d 100644 --- a/train_redpajama.py +++ b/pretrain/redpajama.py @@ -1,5 +1,6 @@ import glob import math +import sys import time from functools import partial from pathlib import Path @@ -11,6 +12,10 @@ from torch.distributed.fsdp.wrap import transformer_auto_wrap_policy from torch.utils.data import DataLoader +# support running without installing as a package +wd = Path(__file__).parent.parent.resolve() +sys.path.append(str(wd)) + from lit_parrot.model import Block, Parrot, Config from lit_parrot.packed_dataset import PackedDataset, CombinedDataset from lit_parrot.utils import save_model_checkpoint diff --git a/tests/conftest.py b/tests/conftest.py index a4a917b3f9..702d56a0a8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,11 +6,7 @@ wd = Path(__file__).parent.parent.absolute() -@pytest.fixture() -def lit_parrot(): +@pytest.fixture(autouse=True) +def add_wd_to_path(): # this adds support for running tests without the package installed sys.path.append(str(wd)) - - import lit_parrot - - return lit_parrot diff --git a/tests/test_adapter.py b/tests/test_adapter.py index be392f13c9..605ebd6769 100644 --- a/tests/test_adapter.py +++ b/tests/test_adapter.py @@ -6,7 +6,7 @@ @pytest.mark.skipif(sys.platform == "win32", reason="EmptyInitOnDevice on CPU not working for Windows.") @pytest.mark.parametrize("name", ["pythia-70m", "stablelm-base-alpha-3b"]) -def test_config_identical(name, lit_parrot): +def test_config_identical(name): import lit_parrot.adapter as parrot_adapter import lit_parrot.model as parrot from lit_parrot.utils import EmptyInitOnDevice diff --git a/tests/test_adapter_v2.py b/tests/test_adapter_v2.py index 0fe946a145..77ef250653 100644 --- a/tests/test_adapter_v2.py +++ b/tests/test_adapter_v2.py @@ -1,25 +1,21 @@ import sys -from dataclasses import asdict import pytest @pytest.mark.skipif(sys.platform == "win32", reason="EmptyInitOnDevice on CPU not working for Windows.") @pytest.mark.parametrize("name", ["pythia-70m", "stablelm-base-alpha-3b"]) -def test_config_identical(name, lit_parrot): +def test_config_identical(name): import torch.nn as nn import lit_parrot.adapter as parrot_adapter from lit_parrot.adapter_v2 import adapter_v2_linear_with_bias_and_scale import lit_parrot.model as parrot from lit_parrot.utils import EmptyInitOnDevice - base_config = asdict(parrot.Config.from_name(name)) - adapter_config = asdict(parrot_adapter.Config.from_name(name)) - with EmptyInitOnDevice(): base_model = parrot.Parrot.from_name(name) adapter_model = parrot_adapter.Parrot.from_name(name) - + for module in adapter_model.modules(): if isinstance(module, nn.Linear): adapter_v2_linear_with_bias_and_scale(module) diff --git a/tests/test_chat.py b/tests/test_chat.py index a1f08ebf49..d26fd9062b 100644 --- a/tests/test_chat.py +++ b/tests/test_chat.py @@ -1,4 +1,3 @@ -import functools import subprocess import sys from itertools import repeat @@ -8,17 +7,6 @@ import pytest import torch -wd = Path(__file__).parent.parent.absolute() - - -@functools.lru_cache(maxsize=1) -def load_script(): - sys.path.append(str(wd)) - - import chat - - return chat - @pytest.mark.parametrize( ("generated", "stop_tokens", "expected"), @@ -30,7 +18,7 @@ def load_script(): ], ) def test_generate(generated, stop_tokens, expected): - chat = load_script() + import chat.base as chat input_idx = torch.tensor([5, 3]) max_returned_tokens = len(input_idx) + 8 @@ -55,7 +43,7 @@ def multinomial(*_, **__): def test_cli(): - cli_path = wd / "chat.py" + cli_path = Path(__file__).parent.parent / "chat" / "base.py" output = subprocess.check_output([sys.executable, cli_path, "-h"]) output = str(output.decode()) assert "Starts a conversation" in output diff --git a/tests/test_config.py b/tests/test_config.py index 3fe4b7d348..b58fda3db7 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,9 +1,3 @@ -from pathlib import Path - - -wd = Path(__file__).parent.parent.absolute() - - def test_config(): from lit_parrot import Config diff --git a/tests/test_convert_hf_checkpoint.py b/tests/test_convert_hf_checkpoint.py index 8f19c213b1..4db2e4d693 100644 --- a/tests/test_convert_hf_checkpoint.py +++ b/tests/test_convert_hf_checkpoint.py @@ -1,24 +1,18 @@ -import sys -from pathlib import Path from unittest import mock import pytest -wd = (Path(__file__).parent.parent / "scripts").absolute() - def test_convert_hf_checkpoint(tmp_path): - sys.path.append(str(wd)) - - import convert_hf_checkpoint + from scripts.convert_hf_checkpoint import convert_hf_checkpoint with pytest.raises(ValueError, match="to contain .bin"): - convert_hf_checkpoint.convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m") + convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m") bin_file = tmp_path / "foo.bin" bin_file.touch() - with mock.patch("convert_hf_checkpoint.lazy_load") as load: - convert_hf_checkpoint.convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m") + with mock.patch("scripts.convert_hf_checkpoint.lazy_load") as load: + convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m") load.assert_called_with(bin_file) assert {p.name for p in tmp_path.glob("*")} == {"foo.bin", "lit_config.json", "lit_model.pth"} diff --git a/tests/test_generate.py b/tests/test_generate.py index 949a00e8d8..1f46a53702 100644 --- a/tests/test_generate.py +++ b/tests/test_generate.py @@ -1,4 +1,3 @@ -import functools import json import os import subprocess @@ -12,17 +11,6 @@ import pytest import torch -wd = Path(__file__).parent.parent.absolute() - - -@functools.lru_cache(maxsize=1) -def load_generate_script(): - sys.path.append(str(wd)) - - import generate - - return generate - @pytest.fixture() def fake_checkpoint_dir(tmp_path): @@ -38,7 +26,7 @@ def fake_checkpoint_dir(tmp_path): @pytest.mark.parametrize("max_seq_length", (10, 20 + 5)) def test_generate(max_seq_length): - generate = load_generate_script() + import generate.base as generate from lit_parrot import Parrot, Config @@ -69,7 +57,7 @@ def multinomial(*args, **kwargs): @mock.patch("torch.cuda.is_bf16_supported", return_value=False) def test_main(_, fake_checkpoint_dir, monkeypatch): - generate = load_generate_script() + import generate.base as generate config_path = fake_checkpoint_dir / "lit_config.json" config = {"block_size": 128, "vocab_size": 50, "n_layer": 2, "n_head": 4, "n_embd": 8, "rotary_percentage": 1} @@ -114,7 +102,7 @@ def setup_module(self, *_): def test_cli(): - cli_path = wd / "generate.py" + cli_path = Path(__file__).parent.parent / "generate" / "base.py" output = subprocess.check_output([sys.executable, cli_path, "-h"]) output = str(output.decode()) assert "Generates text samples" in output diff --git a/tests/test_model.py b/tests/test_model.py index 589578ef31..cc6d900ec3 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -1,34 +1,24 @@ -import functools -import os +import sys from pathlib import Path from urllib.request import urlretrieve -import torch import pytest -import sys - +import torch from transformers import GPTNeoXForCausalLM, PretrainedConfig - wd = Path(__file__).parent.parent.absolute() -@functools.lru_cache(maxsize=1) -def load_convert_script(): - sys.path.append(str(wd / "scripts")) - - import convert_hf_checkpoint - - return convert_hf_checkpoint - - @torch.inference_mode() @pytest.mark.parametrize("rotary_pct", (0.25, 1)) @pytest.mark.parametrize("batch_size", (1, 3)) @pytest.mark.parametrize("n_embd", (16, 32)) @pytest.mark.parametrize("parallel_residual", (False, True)) @pytest.mark.parametrize("kv_cache", (False, True)) -def test_against_hf_model(rotary_pct, batch_size, n_embd, parallel_residual, kv_cache, lit_parrot) -> None: +def test_against_hf_model(rotary_pct, batch_size, n_embd, parallel_residual, kv_cache) -> None: + import lit_parrot + from scripts.convert_hf_checkpoint import copy_weights_gpt_neox + block_size = 64 # https://huggingface.co/stabilityai/stablelm-base-alpha-3b/blob/main/config.json#L24 vocab_size = 100 @@ -64,9 +54,8 @@ def test_against_hf_model(rotary_pct, batch_size, n_embd, parallel_residual, kv_ state_dict = {} theirs_model = GPTNeoXForCausalLM(theirs_config) - convert_hf_checkpoint = load_convert_script() # load the hf initialization into our model - convert_hf_checkpoint.copy_weights_gpt_neox(state_dict, theirs_model.state_dict()) + copy_weights_gpt_neox(state_dict, theirs_model.state_dict()) ours_model = lit_parrot.Parrot(ours_config) ours_model.load_state_dict(state_dict) @@ -100,7 +89,7 @@ def test_against_hf_model(rotary_pct, batch_size, n_embd, parallel_residual, kv_ @torch.inference_mode() -def test_against_original_falcon_40b(lit_parrot): +def test_against_original_falcon_40b(): file_path = wd / "tests" / "original_falcon_40b.py" url = "https://gist.githubusercontent.com/carmocca/feed39b1bc65a29f73c1cecc58a01167/raw/a9a65f2b93716b3c09ec9f354d535ae5953de08f/original_falcon_40b.py" if not file_path.is_file(): @@ -134,7 +123,8 @@ def test_against_original_falcon_40b(lit_parrot): @pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires CUDA") @pytest.mark.xfail(raises=AssertionError) # https://github.com/Lightning-AI/lit-parrot/issues/13 @torch.inference_mode() -def test_model_bfloat16(lit_parrot) -> None: +def test_model_bfloat16() -> None: + import lit_parrot from lit_parrot.utils import EmptyInitOnDevice block_size = 64 @@ -164,7 +154,9 @@ def test_model_bfloat16(lit_parrot) -> None: @pytest.mark.skipif(sys.platform in ("win32", "darwin"), reason="torch.compile not supported on this platform") @torch.inference_mode() -def test_model_compile(lit_parrot): +def test_model_compile(): + import lit_parrot + config = lit_parrot.Config(block_size=8, vocab_size=8, n_layer=2, n_head=2, n_embd=4) model = lit_parrot.Parrot(config) model.apply(model._init_weights) diff --git a/tests/test_packed_dataset.py b/tests/test_packed_dataset.py index be30c3a2b0..541a8140d9 100644 --- a/tests/test_packed_dataset.py +++ b/tests/test_packed_dataset.py @@ -46,8 +46,6 @@ def test_packed_dataset(tmp_path): vocab_size=tokenizer.vocab_size, ) - text_ids = [] - for text in texts: text_ids = tokenizer.encode(text) print(len(text_ids)) diff --git a/tests/test_prepare_redpajama.py b/tests/test_prepare_redpajama.py index dfa3fdd32f..3d8bdfca62 100644 --- a/tests/test_prepare_redpajama.py +++ b/tests/test_prepare_redpajama.py @@ -5,8 +5,6 @@ from pathlib import Path from unittest import mock -wd = (Path(__file__).parent.parent / "scripts").absolute() - import requests @@ -17,8 +15,6 @@ def maybe_get_file(url, file_path): def test_prepare_sample(tmp_path): - sys.path.append(str(wd)) - tmp_path.mkdir(parents=True, exist_ok=True) vocabulary_path = tmp_path / "tokenizer.json" @@ -40,7 +36,7 @@ def test_prepare_sample(tmp_path): jsonl_sample = "\n".join([json.dumps(el) for el in [sample] * 2]) - import prepare_redpajama + import scripts.prepare_redpajama as prepare_redpajama for filename in prepare_redpajama.filenames_sample: with open(source_path / filename, "w") as f: @@ -78,8 +74,6 @@ def test_prepare_sample(tmp_path): def test_prepare_full(tmp_path): - sys.path.append(str(wd)) - tmp_path.mkdir(parents=True, exist_ok=True) vocabulary_path = tmp_path / "tokenizer.json" @@ -101,7 +95,7 @@ def test_prepare_full(tmp_path): jsonl_sample = "\n".join([json.dumps(el) for el in [sample] * 2]) - import prepare_redpajama + import scripts.prepare_redpajama as prepare_redpajama arxiv_file = source_path / "arxiv" / "arxiv_0.jsonl" arxiv_file.parent.mkdir(parents=True, exist_ok=True) @@ -120,7 +114,7 @@ def test_prepare_full(tmp_path): "common_crawl": "common_crawl/*", } - with mock.patch("prepare_redpajama.filename_sets", filename_sets): + with mock.patch.object(prepare_redpajama, "filename_sets", filename_sets): prepare_redpajama.prepare( source_path=source_path, vocabulary_path=vocabulary_path, @@ -155,7 +149,7 @@ def test_prepare_full(tmp_path): def test_cli(): - cli_path = wd / "prepare_redpajama.py" + cli_path = Path(__file__).parent.parent / "scripts" / "prepare_redpajama.py" output = subprocess.check_output([sys.executable, cli_path, "-h"]) output = str(output.decode()) assert 'Prepare the "Red Pajama"' in output diff --git a/tests/test_rope.py b/tests/test_rope.py index 80983e72ae..4939c35960 100644 --- a/tests/test_rope.py +++ b/tests/test_rope.py @@ -3,7 +3,9 @@ @torch.inference_mode() -def test_rope(lit_parrot): +def test_rope(): + import lit_parrot + bs, seq_len, n_head, n_embed = 1, 6, 2, 8 head_size = n_embed // n_head x = torch.randint(0, 10000, size=(bs, n_head, seq_len, head_size)).float() diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py index 42981bba3b..988a85ab8a 100644 --- a/tests/test_tokenizer.py +++ b/tests/test_tokenizer.py @@ -5,7 +5,9 @@ from transformers import AutoTokenizer -def test_tokenizer_against_hf(lit_parrot): +def test_tokenizer_against_hf(): + import lit_parrot + hf_tokenizer = AutoTokenizer.from_pretrained("StabilityAI/stablelm-base-alpha-3b") # hacky way to access the data loaded by the above folder = Path(hf_tokenizer.init_kwargs["special_tokens_map_file"]).parent diff --git a/tests/test_utils.py b/tests/test_utils.py index 9383451121..813b3625a7 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -11,7 +11,7 @@ class ATensor(torch.Tensor): pass -def test_lazy_load_basic(lit_parrot): +def test_lazy_load_basic(): import lit_parrot.utils with tempfile.TemporaryDirectory() as tmpdirname: @@ -30,7 +30,7 @@ def test_lazy_load_basic(lit_parrot): torch.testing.assert_close(actual, expected) -def test_lazy_load_subclass(lit_parrot): +def test_lazy_load_subclass(): import lit_parrot.utils with tempfile.TemporaryDirectory() as tmpdirname: @@ -46,7 +46,7 @@ def test_lazy_load_subclass(lit_parrot): torch.testing.assert_close(actual._load_tensor(), expected) -def test_find_multiple(lit_parrot): +def test_find_multiple(): from lit_parrot.utils import find_multiple assert find_multiple(17, 5) == 20 @@ -59,7 +59,7 @@ def test_find_multiple(lit_parrot): @pytest.mark.skipif(sys.platform == "win32", reason="match fails on windows. why did they have to use backslashes?") -def test_check_valid_checkpoint_dir(lit_parrot, tmp_path): +def test_check_valid_checkpoint_dir(tmp_path): from lit_parrot.utils import check_valid_checkpoint_dir os.chdir(tmp_path)