Skip to content

Commit

Permalink
Make generate_adapter.py work with --quantize argument (Lightning-A…
Browse files Browse the repository at this point in the history
  • Loading branch information
awaelchli authored Apr 17, 2023
1 parent 89f285e commit f8cf484
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
4 changes: 3 additions & 1 deletion generate_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,13 @@ def main(
print("Loading model ...", file=sys.stderr)
t0 = time.time()
model = LLaMA(LLaMAConfig()) # TODO: Support different model sizes

# 1. Load the pretrained weights
pretrained_checkpoint = torch.load(pretrained_path)
model.load_state_dict(pretrained_checkpoint, strict=False)
# 2. Load the fine-tuned adapter weights
adapter_checkpoint = torch.load(adapter_path)
adapter_checkpoint = torch.load(adapter_path, map_location=torch.device("cpu"))

model.load_state_dict(adapter_checkpoint, strict=False)
print(f"Time to load model: {time.time() - t0:.02f} seconds.", file=sys.stderr)

Expand Down
2 changes: 1 addition & 1 deletion lit_llama/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
self.rope_cache = build_rope_cache(
seq_len=self.block_size,
n_elem=self.n_embd // self.n_head,
dtype=self.c_attn.weight.dtype,
dtype=x.dtype,
device=x.device,
)

Expand Down

0 comments on commit f8cf484

Please sign in to comment.