Make generate_adapter.py work with --quantize argument (Lightning-A…

…I#149)
sateeshs · Apr 17, 2023 · f8cf484 · f8cf484
1 parent 89f285e
commit f8cf484
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 2 deletions.
diff --git a/generate_adapter.py b/generate_adapter.py
@@ -73,11 +73,13 @@ def main(
         print("Loading model ...", file=sys.stderr)
         t0 = time.time()
         model = LLaMA(LLaMAConfig())  # TODO: Support different model sizes
+
         # 1. Load the pretrained weights
         pretrained_checkpoint = torch.load(pretrained_path)
         model.load_state_dict(pretrained_checkpoint, strict=False)
         # 2. Load the fine-tuned adapter weights
-        adapter_checkpoint = torch.load(adapter_path)
+        adapter_checkpoint = torch.load(adapter_path, map_location=torch.device("cpu"))
+
         model.load_state_dict(adapter_checkpoint, strict=False)
         print(f"Time to load model: {time.time() - t0:.02f} seconds.", file=sys.stderr)
 

diff --git a/lit_llama/adapter.py b/lit_llama/adapter.py
@@ -69,7 +69,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
             self.rope_cache = build_rope_cache(
                 seq_len=self.block_size,
                 n_elem=self.n_embd // self.n_head, 
-                dtype=self.c_attn.weight.dtype,
+                dtype=x.dtype,
                 device=x.device,
             )