Skip to content

Commit

Permalink
Update prepare_alpaca.py to enable data_file_name specification (Ligh…
Browse files Browse the repository at this point in the history
  • Loading branch information
mosheber authored Apr 17, 2023
1 parent 15d5eee commit 945ffb3
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion scripts/prepare_alpaca.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def prepare(
max_seq_length: int = 256,
seed: int = 42,
mask_inputs: bool = False, # as in alpaca-lora
data_file_name: str = DATA_FILE_NAME
) -> None:
"""Prepare the Alpaca dataset for instruction tuning.
Expand All @@ -34,7 +35,7 @@ def prepare(
"""

destination_path.mkdir(parents=True, exist_ok=True)
file_path = destination_path / DATA_FILE_NAME
file_path = destination_path / data_file_name
download(file_path)

# TODO: If we don't have the Meta weights, where do we get the tokenizer from?
Expand Down

0 comments on commit 945ffb3

Please sign in to comment.