From a9057312c8b121225e02922263144e70817c9c99 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlos=20Mochol=C3=AD?= <carlossmocholi@gmail.com>
Date: Fri, 2 Jun 2023 19:36:32 +0200
Subject: [PATCH] Restructure repo into directories as Lit-LLaMA (#86)

---
 README.md                                     |  8 ++---
 chat.py => chat/base.py                       |  4 +++
 finetune_adapter.py => finetune/adapter.py    |  7 +++-
 .../adapter_v2.py                             |  0
 generate_adapter.py => generate/adapter.py    | 10 ++++--
 .../adapter_v2.py                             |  4 +--
 generate.py => generate/base.py               |  4 +++
 howto/download_pythia.md                      |  2 +-
 howto/download_redpajama_incite.md            |  2 +-
 howto/download_stablelm.md                    |  2 +-
 howto/finetune_adapter.md                     | 18 +++++-----
 howto/inference.md                            |  4 +--
 howto/tpus.md                                 |  2 +-
 train_redpajama.py => pretrain/redpajama.py   |  5 +++
 tests/conftest.py                             |  8 ++---
 tests/test_adapter.py                         |  2 +-
 tests/test_adapter_v2.py                      |  8 ++---
 tests/test_chat.py                            | 16 ++-------
 tests/test_config.py                          |  6 ----
 tests/test_convert_hf_checkpoint.py           | 14 +++-----
 tests/test_generate.py                        | 18 ++--------
 tests/test_model.py                           | 34 +++++++------------
 tests/test_packed_dataset.py                  |  2 --
 tests/test_prepare_redpajama.py               | 14 +++-----
 tests/test_rope.py                            |  4 ++-
 tests/test_tokenizer.py                       |  4 ++-
 tests/test_utils.py                           |  8 ++---
 27 files changed, 88 insertions(+), 122 deletions(-)
 rename chat.py => chat/base.py (98%)
 rename finetune_adapter.py => finetune/adapter.py (98%)
 rename finetune_adapter_v2.py => finetune/adapter_v2.py (100%)
 rename generate_adapter.py => generate/adapter.py (95%)
 rename generate_adapter_v2.py => generate/adapter_v2.py (98%)
 rename generate.py => generate/base.py (98%)
 rename train_redpajama.py => pretrain/redpajama.py (98%)

diff --git a/README.md b/README.md
index 6b8cd13b78..25586f4a28 100644
--- a/README.md
+++ b/README.md
@@ -97,7 +97,7 @@ To generate text predictions, you need to download the model weights. **If you d
 Run inference:
 
 ```bash
-python generate.py --prompt "Hello, my name is"
+python generate/base.py --prompt "Hello, my name is"
 ```
 
 This will run the 3B pre-trained model and require ~7 GB of GPU memory using the `bfloat16` datatype.
@@ -107,7 +107,7 @@ This will run the 3B pre-trained model and require ~7 GB of GPU memory using the
 You can also chat with the model interactively:
 
 ```bash
-python chat.py
+python chat/base.py
 ```
 
 ### Run large models on smaller consumer devices
@@ -116,7 +116,7 @@ We support LLM.int8 and GPTQ.int4 inference by following [this guide](howto/infe
 
 ## Finetune the model
 
-We provide a simple training script `finetune_adapter.py` that instruction-tunes a pretrained model on the [Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset.
+We provide a simple training script `finetune/adapter.py` that instruction-tunes a pretrained model on the [Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset.
 
 1. Download the data and generate an instruction tuning dataset:
 
@@ -129,7 +129,7 @@ python scripts/prepare_alpaca.py
 [Adapter](https://arxiv.org/abs/2303.16199):
 
 ```bash
-python finetune_adapter.py
+python finetune/adapter.py
 ```
 
 The finetuning requires at least one GPU with ~12 GB memory (GTX 3060).
diff --git a/chat.py b/chat/base.py
similarity index 98%
rename from chat.py
rename to chat/base.py
index abfc1cdd8a..5065ac8b9a 100644
--- a/chat.py
+++ b/chat/base.py
@@ -9,6 +9,10 @@
 import lightning as L
 import torch
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
 from lit_parrot import Parrot, Tokenizer, Config
 from lit_parrot.utils import EmptyInitOnDevice, lazy_load, check_valid_checkpoint_dir
 
diff --git a/finetune_adapter.py b/finetune/adapter.py
similarity index 98%
rename from finetune_adapter.py
rename to finetune/adapter.py
index b9df2d66de..7e8af11d40 100644
--- a/finetune_adapter.py
+++ b/finetune/adapter.py
@@ -1,5 +1,6 @@
 import os
 import shutil
+import sys
 import time
 from pathlib import Path
 from typing import Literal
@@ -10,7 +11,11 @@
 from lightning.fabric.accelerators.mps import MPSAccelerator
 from lightning.fabric.strategies import DeepSpeedStrategy
 
-from generate import generate
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
+from generate.base import generate
 from lit_parrot.adapter import Parrot, Config, mark_only_adapter_as_trainable, adapter_state_from_state_dict
 from lit_parrot.tokenizer import Tokenizer
 from lit_parrot.utils import lazy_load, check_valid_checkpoint_dir
diff --git a/finetune_adapter_v2.py b/finetune/adapter_v2.py
similarity index 100%
rename from finetune_adapter_v2.py
rename to finetune/adapter_v2.py
diff --git a/generate_adapter.py b/generate/adapter.py
similarity index 95%
rename from generate_adapter.py
rename to generate/adapter.py
index c882ed83c0..8e4b680b99 100644
--- a/generate_adapter.py
+++ b/generate/adapter.py
@@ -8,7 +8,11 @@
 import lightning as L
 import torch
 
-from generate import generate
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
+from generate.base import generate
 from lit_parrot import Tokenizer
 from lit_parrot.adapter import Parrot, Config
 from lit_parrot.utils import EmptyInitOnDevice, lazy_load, check_valid_checkpoint_dir
@@ -27,12 +31,12 @@ def main(
 ) -> None:
     """Generates a response based on a given instruction and an optional input.
     This script will only work with checkpoints from the instruction-tuned Parrot-Adapter model.
-    See `finetune_adapter.py`.
+    See `finetune/adapter.py`.
 
     Args:
         prompt: The prompt/instruction (Alpaca style).
         adapter_path: Path to the checkpoint with trained adapter weights, which are the output of
-            `finetune_adapter.py`.
+            `finetune/adapter.py`.
         checkpoint_dir: The path to the checkpoint folder with pretrained Parrot weights.
         input: Optional input (Alpaca style).
         quantize: Whether to quantize the model and using which method:
diff --git a/generate_adapter_v2.py b/generate/adapter_v2.py
similarity index 98%
rename from generate_adapter_v2.py
rename to generate/adapter_v2.py
index 7a89582bac..49cc200abd 100644
--- a/generate_adapter_v2.py
+++ b/generate/adapter_v2.py
@@ -28,12 +28,12 @@ def main(
 ) -> None:
     """Generates a response based on a given instruction and an optional input.
     This script will only work with checkpoints from the instruction-tuned Parrot-AdapterV2 model.
-    See `finetune_adapter_v2.py`.
+    See `finetune/adapter_v2.py`.
 
     Args:
         prompt: The prompt/instruction (Alpaca style).
         adapter_path: Path to the checkpoint with trained adapter weights, which are the output of
-            `finetune_adapter_v2.py`.
+            `finetune/adapter_v2.py`.
         checkpoint_dir: The path to the checkpoint folder with pretrained Parrot weights.
         input: Optional input (Alpaca style).
         quantize: Whether to quantize the model and using which method:
diff --git a/generate.py b/generate/base.py
similarity index 98%
rename from generate.py
rename to generate/base.py
index f9afe93b8d..8d762187f5 100644
--- a/generate.py
+++ b/generate/base.py
@@ -8,6 +8,10 @@
 import lightning as L
 import torch
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
 from lit_parrot import Parrot, Tokenizer, Config
 from lit_parrot.utils import EmptyInitOnDevice, lazy_load, check_valid_checkpoint_dir
 
diff --git a/howto/download_pythia.md b/howto/download_pythia.md
index 382dc136d1..5e48c52876 100644
--- a/howto/download_pythia.md
+++ b/howto/download_pythia.md
@@ -45,5 +45,5 @@ python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/EleutherAI/
 You're done! To execute the model just run:
 
 ```bash
-python generate.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/EleutherAI/pythia-1b
+python generate/base.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/EleutherAI/pythia-1b
 ```
diff --git a/howto/download_redpajama_incite.md b/howto/download_redpajama_incite.md
index ad4cea9c22..31a3d690f8 100644
--- a/howto/download_redpajama_incite.md
+++ b/howto/download_redpajama_incite.md
@@ -34,5 +34,5 @@ python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/togethercom
 You're done! To execute the model just run:
 
 ```bash
-python generate.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/togethercomputer/RedPajama-INCITE-Base-3B-v1
+python generate/base.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/togethercomputer/RedPajama-INCITE-Base-3B-v1
 ```
diff --git a/howto/download_stablelm.md b/howto/download_stablelm.md
index 9f66f2bbd6..cc43706f52 100644
--- a/howto/download_stablelm.md
+++ b/howto/download_stablelm.md
@@ -32,5 +32,5 @@ python scripts/convert_hf_checkpoint.py --checkpoint_dir checkpoints/stabilityai
 You're done! To execute the model just run:
 
 ```bash
-python generate.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
+python generate/base.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
 ```
diff --git a/howto/finetune_adapter.md b/howto/finetune_adapter.md
index 6a84fc7c9a..c2d25386b0 100644
--- a/howto/finetune_adapter.md
+++ b/howto/finetune_adapter.md
@@ -26,13 +26,13 @@ or [prepare your own dataset](#tune-on-your-dataset).
 ## Running the finetuning
 
 ```bash
-python finetune_adapter.py --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
+python finetune/adapter.py --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
 ```
 
 or for Adapter V2
 
 ```bash 
-python finetune_adapter_v2.py --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
+python finetune/adapter_v2.py --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
 ```
 
 The finetuning requires at least one GPU with ~12 GB memory.
@@ -49,20 +49,20 @@ micro_batch_size = 4
 This script will save checkpoints periodically to the `out_dir` directory. If you are finetuning different models or on your own dataset, you can specify an output directory with your preferred name:
 
 ```bash
-python finetune_adapter.py --out_dir out/adapter/my-model-finetuned
+python finetune/adapter.py --out_dir out/adapter/my-model-finetuned
 ```
 
 or for Adapter V2
 
 ```bash
-python finetune_adapter_v2.py --out_dir out/adapter_v2/my-model-finetuned
+python finetune/adapter_v2.py --out_dir out/adapter_v2/my-model-finetuned
 ```
 
 If your GPU does not support `bfloat16`, you can pass the `--precision 32-true` argument.
 For instance, to fine-tune on MPS (the GPU on modern Macs), you can run
 
 ```bash
-python finetune_adapter.py --out_dir out/adapter/my-model-finetuned --precision 32-true
+python finetune/adapter.py --out_dir out/adapter/my-model-finetuned --precision 32-true
 ```
 
 Note that `mps` as the accelerator will be picked up automatically by Fabric when running on a modern Mac.
@@ -72,7 +72,7 @@ Note that `mps` as the accelerator will be picked up automatically by Fabric whe
 You can test the finetuned model with your own instructions by running:
 
 ```bash
-python generate_adapter.py \
+python generate/adapter.py \
     --prompt "Recommend a movie to watch on the weekend." \
     --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
 ```
@@ -80,7 +80,7 @@ python generate_adapter.py \
 or for Adapter V2
 
 ```bash 
-python generate_adapter_v2.py \
+python generate/adapter_v2.py \
     --prompt "Recommend a movie to watch on the weekend." \
     --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
 ```
@@ -123,10 +123,10 @@ With only a few modifications, you can prepare and train on your own instruction
     python scripts/prepare_mydata.py --destination_path data/mydata/
     ```
 
-5. Run `finetune_adapter.py` by passing in the location of your data (and optionally other parameters):
+5. Run `finetune/adapter.py` by passing in the location of your data (and optionally other parameters):
    
     ```bash
-    python finetune_adapter.py \
+    python finetune/adapter.py \
         --data_dir data/mydata/ \
         --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b \
         --out_dir data/mydata-finetuned
diff --git a/howto/inference.md b/howto/inference.md
index c4abef72e0..8b3e2dfdb6 100644
--- a/howto/inference.md
+++ b/howto/inference.md
@@ -3,7 +3,7 @@
 We demonstrate how to run inference (next token prediction) with the Parrot base model in the [`generate.py`](generate.py) script:
 
 ```bash
-python generate.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
+python generate/base.py --prompt "Hello, my name is" --checkpoint_dir checkpoints/stabilityai/stablelm-base-alpha-3b
 ```
 Output:
 ```
@@ -19,7 +19,7 @@ This will run the 3B pre-trained model and require ~7 GB of GPU memory using the
 You can also chat with the model interactively:
 
 ```bash
-python chat.py --checkpoint_dir checkpoints/stabilityai/stablelm-tuned-alpha-3b
+python chat/base.py --checkpoint_dir checkpoints/stabilityai/stablelm-tuned-alpha-3b
 ```
 
 This script can work with any checkpoint. For the best chat-like experience, we recommend using it with a checkpoints
diff --git a/howto/tpus.md b/howto/tpus.md
index efe1f38ad8..23138f8f43 100644
--- a/howto/tpus.md
+++ b/howto/tpus.md
@@ -48,7 +48,7 @@ Since you created a new machine, you'll probably need to download the weights. Y
 Generation works out-of-the-box with TPUs:
 
 ```shell
-python3 generate.py --prompt "Hello, my name is" --num_samples 3
+python3 generate/base.py --prompt "Hello, my name is" --num_samples 3
 ```
 
 This command will take take ~17s for the first generation time as XLA needs to compile the graph.
diff --git a/train_redpajama.py b/pretrain/redpajama.py
similarity index 98%
rename from train_redpajama.py
rename to pretrain/redpajama.py
index fc3ebb0499..addecea01d 100644
--- a/train_redpajama.py
+++ b/pretrain/redpajama.py
@@ -1,5 +1,6 @@
 import glob
 import math
+import sys
 import time
 from functools import partial
 from pathlib import Path
@@ -11,6 +12,10 @@
 from torch.distributed.fsdp.wrap import transformer_auto_wrap_policy
 from torch.utils.data import DataLoader
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
 from lit_parrot.model import Block, Parrot, Config
 from lit_parrot.packed_dataset import PackedDataset, CombinedDataset
 from lit_parrot.utils import save_model_checkpoint
diff --git a/tests/conftest.py b/tests/conftest.py
index a4a917b3f9..702d56a0a8 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -6,11 +6,7 @@
 wd = Path(__file__).parent.parent.absolute()
 
 
-@pytest.fixture()
-def lit_parrot():
+@pytest.fixture(autouse=True)
+def add_wd_to_path():
     # this adds support for running tests without the package installed
     sys.path.append(str(wd))
-
-    import lit_parrot
-
-    return lit_parrot
diff --git a/tests/test_adapter.py b/tests/test_adapter.py
index be392f13c9..605ebd6769 100644
--- a/tests/test_adapter.py
+++ b/tests/test_adapter.py
@@ -6,7 +6,7 @@
 
 @pytest.mark.skipif(sys.platform == "win32", reason="EmptyInitOnDevice on CPU not working for Windows.")
 @pytest.mark.parametrize("name", ["pythia-70m", "stablelm-base-alpha-3b"])
-def test_config_identical(name, lit_parrot):
+def test_config_identical(name):
     import lit_parrot.adapter as parrot_adapter
     import lit_parrot.model as parrot
     from lit_parrot.utils import EmptyInitOnDevice
diff --git a/tests/test_adapter_v2.py b/tests/test_adapter_v2.py
index 0fe946a145..77ef250653 100644
--- a/tests/test_adapter_v2.py
+++ b/tests/test_adapter_v2.py
@@ -1,25 +1,21 @@
 import sys
-from dataclasses import asdict
 
 import pytest
 
 
 @pytest.mark.skipif(sys.platform == "win32", reason="EmptyInitOnDevice on CPU not working for Windows.")
 @pytest.mark.parametrize("name", ["pythia-70m", "stablelm-base-alpha-3b"])
-def test_config_identical(name, lit_parrot):
+def test_config_identical(name):
     import torch.nn as nn
     import lit_parrot.adapter as parrot_adapter
     from lit_parrot.adapter_v2 import adapter_v2_linear_with_bias_and_scale
     import lit_parrot.model as parrot
     from lit_parrot.utils import EmptyInitOnDevice
 
-    base_config = asdict(parrot.Config.from_name(name))
-    adapter_config = asdict(parrot_adapter.Config.from_name(name))
-
     with EmptyInitOnDevice():
         base_model = parrot.Parrot.from_name(name)
         adapter_model = parrot_adapter.Parrot.from_name(name)
-        
+
         for module in adapter_model.modules():
             if isinstance(module, nn.Linear):
                 adapter_v2_linear_with_bias_and_scale(module)
diff --git a/tests/test_chat.py b/tests/test_chat.py
index a1f08ebf49..d26fd9062b 100644
--- a/tests/test_chat.py
+++ b/tests/test_chat.py
@@ -1,4 +1,3 @@
-import functools
 import subprocess
 import sys
 from itertools import repeat
@@ -8,17 +7,6 @@
 import pytest
 import torch
 
-wd = Path(__file__).parent.parent.absolute()
-
-
-@functools.lru_cache(maxsize=1)
-def load_script():
-    sys.path.append(str(wd))
-
-    import chat
-
-    return chat
-
 
 @pytest.mark.parametrize(
     ("generated", "stop_tokens", "expected"),
@@ -30,7 +18,7 @@ def load_script():
     ],
 )
 def test_generate(generated, stop_tokens, expected):
-    chat = load_script()
+    import chat.base as chat
 
     input_idx = torch.tensor([5, 3])
     max_returned_tokens = len(input_idx) + 8
@@ -55,7 +43,7 @@ def multinomial(*_, **__):
 
 
 def test_cli():
-    cli_path = wd / "chat.py"
+    cli_path = Path(__file__).parent.parent / "chat" / "base.py"
     output = subprocess.check_output([sys.executable, cli_path, "-h"])
     output = str(output.decode())
     assert "Starts a conversation" in output
diff --git a/tests/test_config.py b/tests/test_config.py
index 3fe4b7d348..b58fda3db7 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -1,9 +1,3 @@
-from pathlib import Path
-
-
-wd = Path(__file__).parent.parent.absolute()
-
-
 def test_config():
     from lit_parrot import Config
 
diff --git a/tests/test_convert_hf_checkpoint.py b/tests/test_convert_hf_checkpoint.py
index 8f19c213b1..4db2e4d693 100644
--- a/tests/test_convert_hf_checkpoint.py
+++ b/tests/test_convert_hf_checkpoint.py
@@ -1,24 +1,18 @@
-import sys
-from pathlib import Path
 from unittest import mock
 
 import pytest
 
-wd = (Path(__file__).parent.parent / "scripts").absolute()
-
 
 def test_convert_hf_checkpoint(tmp_path):
-    sys.path.append(str(wd))
-
-    import convert_hf_checkpoint
+    from scripts.convert_hf_checkpoint import convert_hf_checkpoint
 
     with pytest.raises(ValueError, match="to contain .bin"):
-        convert_hf_checkpoint.convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m")
+        convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m")
 
     bin_file = tmp_path / "foo.bin"
     bin_file.touch()
-    with mock.patch("convert_hf_checkpoint.lazy_load") as load:
-        convert_hf_checkpoint.convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m")
+    with mock.patch("scripts.convert_hf_checkpoint.lazy_load") as load:
+        convert_hf_checkpoint(checkpoint_dir=tmp_path, model_name="pythia-70m")
     load.assert_called_with(bin_file)
 
     assert {p.name for p in tmp_path.glob("*")} == {"foo.bin", "lit_config.json", "lit_model.pth"}
diff --git a/tests/test_generate.py b/tests/test_generate.py
index 949a00e8d8..1f46a53702 100644
--- a/tests/test_generate.py
+++ b/tests/test_generate.py
@@ -1,4 +1,3 @@
-import functools
 import json
 import os
 import subprocess
@@ -12,17 +11,6 @@
 import pytest
 import torch
 
-wd = Path(__file__).parent.parent.absolute()
-
-
-@functools.lru_cache(maxsize=1)
-def load_generate_script():
-    sys.path.append(str(wd))
-
-    import generate
-
-    return generate
-
 
 @pytest.fixture()
 def fake_checkpoint_dir(tmp_path):
@@ -38,7 +26,7 @@ def fake_checkpoint_dir(tmp_path):
 
 @pytest.mark.parametrize("max_seq_length", (10, 20 + 5))
 def test_generate(max_seq_length):
-    generate = load_generate_script()
+    import generate.base as generate
 
     from lit_parrot import Parrot, Config
 
@@ -69,7 +57,7 @@ def multinomial(*args, **kwargs):
 
 @mock.patch("torch.cuda.is_bf16_supported", return_value=False)
 def test_main(_, fake_checkpoint_dir, monkeypatch):
-    generate = load_generate_script()
+    import generate.base as generate
 
     config_path = fake_checkpoint_dir / "lit_config.json"
     config = {"block_size": 128, "vocab_size": 50, "n_layer": 2, "n_head": 4, "n_embd": 8, "rotary_percentage": 1}
@@ -114,7 +102,7 @@ def setup_module(self, *_):
 
 
 def test_cli():
-    cli_path = wd / "generate.py"
+    cli_path = Path(__file__).parent.parent / "generate" / "base.py"
     output = subprocess.check_output([sys.executable, cli_path, "-h"])
     output = str(output.decode())
     assert "Generates text samples" in output
diff --git a/tests/test_model.py b/tests/test_model.py
index 589578ef31..cc6d900ec3 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -1,34 +1,24 @@
-import functools
-import os
+import sys
 from pathlib import Path
 from urllib.request import urlretrieve
 
-import torch
 import pytest
-import sys
-
+import torch
 from transformers import GPTNeoXForCausalLM, PretrainedConfig
 
-
 wd = Path(__file__).parent.parent.absolute()
 
 
-@functools.lru_cache(maxsize=1)
-def load_convert_script():
-    sys.path.append(str(wd / "scripts"))
-
-    import convert_hf_checkpoint
-
-    return convert_hf_checkpoint
-
-
 @torch.inference_mode()
 @pytest.mark.parametrize("rotary_pct", (0.25, 1))
 @pytest.mark.parametrize("batch_size", (1, 3))
 @pytest.mark.parametrize("n_embd", (16, 32))
 @pytest.mark.parametrize("parallel_residual", (False, True))
 @pytest.mark.parametrize("kv_cache", (False, True))
-def test_against_hf_model(rotary_pct, batch_size, n_embd, parallel_residual, kv_cache, lit_parrot) -> None:
+def test_against_hf_model(rotary_pct, batch_size, n_embd, parallel_residual, kv_cache) -> None:
+    import lit_parrot
+    from scripts.convert_hf_checkpoint import copy_weights_gpt_neox
+
     block_size = 64
     # https://huggingface.co/stabilityai/stablelm-base-alpha-3b/blob/main/config.json#L24
     vocab_size = 100
@@ -64,9 +54,8 @@ def test_against_hf_model(rotary_pct, batch_size, n_embd, parallel_residual, kv_
 
     state_dict = {}
     theirs_model = GPTNeoXForCausalLM(theirs_config)
-    convert_hf_checkpoint = load_convert_script()
     # load the hf initialization into our model
-    convert_hf_checkpoint.copy_weights_gpt_neox(state_dict, theirs_model.state_dict())
+    copy_weights_gpt_neox(state_dict, theirs_model.state_dict())
     ours_model = lit_parrot.Parrot(ours_config)
     ours_model.load_state_dict(state_dict)
 
@@ -100,7 +89,7 @@ def test_against_hf_model(rotary_pct, batch_size, n_embd, parallel_residual, kv_
 
 
 @torch.inference_mode()
-def test_against_original_falcon_40b(lit_parrot):
+def test_against_original_falcon_40b():
     file_path = wd / "tests" / "original_falcon_40b.py"
     url = "https://gist.githubusercontent.com/carmocca/feed39b1bc65a29f73c1cecc58a01167/raw/a9a65f2b93716b3c09ec9f354d535ae5953de08f/original_falcon_40b.py"
     if not file_path.is_file():
@@ -134,7 +123,8 @@ def test_against_original_falcon_40b(lit_parrot):
 @pytest.mark.skipif(not torch.cuda.is_available(), reason="Requires CUDA")
 @pytest.mark.xfail(raises=AssertionError)  # https://github.com/Lightning-AI/lit-parrot/issues/13
 @torch.inference_mode()
-def test_model_bfloat16(lit_parrot) -> None:
+def test_model_bfloat16() -> None:
+    import lit_parrot
     from lit_parrot.utils import EmptyInitOnDevice
 
     block_size = 64
@@ -164,7 +154,9 @@ def test_model_bfloat16(lit_parrot) -> None:
 
 @pytest.mark.skipif(sys.platform in ("win32", "darwin"), reason="torch.compile not supported on this platform")
 @torch.inference_mode()
-def test_model_compile(lit_parrot):
+def test_model_compile():
+    import lit_parrot
+
     config = lit_parrot.Config(block_size=8, vocab_size=8, n_layer=2, n_head=2, n_embd=4)
     model = lit_parrot.Parrot(config)
     model.apply(model._init_weights)
diff --git a/tests/test_packed_dataset.py b/tests/test_packed_dataset.py
index be30c3a2b0..541a8140d9 100644
--- a/tests/test_packed_dataset.py
+++ b/tests/test_packed_dataset.py
@@ -46,8 +46,6 @@ def test_packed_dataset(tmp_path):
         vocab_size=tokenizer.vocab_size,
     )
 
-    text_ids = []
-
     for text in texts:
         text_ids = tokenizer.encode(text)
         print(len(text_ids))
diff --git a/tests/test_prepare_redpajama.py b/tests/test_prepare_redpajama.py
index dfa3fdd32f..3d8bdfca62 100644
--- a/tests/test_prepare_redpajama.py
+++ b/tests/test_prepare_redpajama.py
@@ -5,8 +5,6 @@
 from pathlib import Path
 from unittest import mock
 
-wd = (Path(__file__).parent.parent / "scripts").absolute()
-
 import requests
 
 
@@ -17,8 +15,6 @@ def maybe_get_file(url, file_path):
 
 
 def test_prepare_sample(tmp_path):
-    sys.path.append(str(wd))
-
     tmp_path.mkdir(parents=True, exist_ok=True)
 
     vocabulary_path = tmp_path / "tokenizer.json"
@@ -40,7 +36,7 @@ def test_prepare_sample(tmp_path):
 
     jsonl_sample = "\n".join([json.dumps(el) for el in [sample] * 2])
 
-    import prepare_redpajama
+    import scripts.prepare_redpajama as prepare_redpajama
 
     for filename in prepare_redpajama.filenames_sample:
         with open(source_path / filename, "w") as f:
@@ -78,8 +74,6 @@ def test_prepare_sample(tmp_path):
 
 
 def test_prepare_full(tmp_path):
-    sys.path.append(str(wd))
-
     tmp_path.mkdir(parents=True, exist_ok=True)
 
     vocabulary_path = tmp_path / "tokenizer.json"
@@ -101,7 +95,7 @@ def test_prepare_full(tmp_path):
 
     jsonl_sample = "\n".join([json.dumps(el) for el in [sample] * 2])
 
-    import prepare_redpajama
+    import scripts.prepare_redpajama as prepare_redpajama
 
     arxiv_file = source_path / "arxiv" / "arxiv_0.jsonl"
     arxiv_file.parent.mkdir(parents=True, exist_ok=True)
@@ -120,7 +114,7 @@ def test_prepare_full(tmp_path):
         "common_crawl": "common_crawl/*",
     }
 
-    with mock.patch("prepare_redpajama.filename_sets", filename_sets):
+    with mock.patch.object(prepare_redpajama, "filename_sets", filename_sets):
         prepare_redpajama.prepare(
             source_path=source_path,
             vocabulary_path=vocabulary_path,
@@ -155,7 +149,7 @@ def test_prepare_full(tmp_path):
 
 
 def test_cli():
-    cli_path = wd / "prepare_redpajama.py"
+    cli_path = Path(__file__).parent.parent / "scripts" / "prepare_redpajama.py"
     output = subprocess.check_output([sys.executable, cli_path, "-h"])
     output = str(output.decode())
     assert 'Prepare the "Red Pajama"' in output
diff --git a/tests/test_rope.py b/tests/test_rope.py
index 80983e72ae..4939c35960 100644
--- a/tests/test_rope.py
+++ b/tests/test_rope.py
@@ -3,7 +3,9 @@
 
 
 @torch.inference_mode()
-def test_rope(lit_parrot):
+def test_rope():
+    import lit_parrot
+
     bs, seq_len, n_head, n_embed = 1, 6, 2, 8
     head_size = n_embed // n_head
     x = torch.randint(0, 10000, size=(bs, n_head, seq_len, head_size)).float()
diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py
index 42981bba3b..988a85ab8a 100644
--- a/tests/test_tokenizer.py
+++ b/tests/test_tokenizer.py
@@ -5,7 +5,9 @@
 from transformers import AutoTokenizer
 
 
-def test_tokenizer_against_hf(lit_parrot):
+def test_tokenizer_against_hf():
+    import lit_parrot
+
     hf_tokenizer = AutoTokenizer.from_pretrained("StabilityAI/stablelm-base-alpha-3b")
     # hacky way to access the data loaded by the above
     folder = Path(hf_tokenizer.init_kwargs["special_tokens_map_file"]).parent
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 9383451121..813b3625a7 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -11,7 +11,7 @@ class ATensor(torch.Tensor):
     pass
 
 
-def test_lazy_load_basic(lit_parrot):
+def test_lazy_load_basic():
     import lit_parrot.utils
 
     with tempfile.TemporaryDirectory() as tmpdirname:
@@ -30,7 +30,7 @@ def test_lazy_load_basic(lit_parrot):
         torch.testing.assert_close(actual, expected)
 
 
-def test_lazy_load_subclass(lit_parrot):
+def test_lazy_load_subclass():
     import lit_parrot.utils
 
     with tempfile.TemporaryDirectory() as tmpdirname:
@@ -46,7 +46,7 @@ def test_lazy_load_subclass(lit_parrot):
                 torch.testing.assert_close(actual._load_tensor(), expected)
 
 
-def test_find_multiple(lit_parrot):
+def test_find_multiple():
     from lit_parrot.utils import find_multiple
 
     assert find_multiple(17, 5) == 20
@@ -59,7 +59,7 @@ def test_find_multiple(lit_parrot):
 
 
 @pytest.mark.skipif(sys.platform == "win32", reason="match fails on windows. why did they have to use backslashes?")
-def test_check_valid_checkpoint_dir(lit_parrot, tmp_path):
+def test_check_valid_checkpoint_dir(tmp_path):
     from lit_parrot.utils import check_valid_checkpoint_dir
 
     os.chdir(tmp_path)