Skip to content

Commit

Permalink
Print task list in litgpt evaluate (Lightning-AI#1258)
Browse files Browse the repository at this point in the history
Co-authored-by: Carlos Mocholí <[email protected]>
  • Loading branch information
rasbt and carmocca authored Apr 11, 2024
1 parent 5dc541e commit 9475ec4
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 6 deletions.
16 changes: 14 additions & 2 deletions litgpt/eval/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ def prepare_results(results, save_filepath, print_results=True):


def convert_and_evaluate(
checkpoint_dir: str,
checkpoint_dir: Path,
tasks: Optional[str] = None,
out_dir: Optional[str] = None,
force_conversion: bool = False,
tasks: Optional[str] = "hellaswag,truthfulqa_mc2,mmlu",
num_fewshot: Optional[int] = None,
batch_size: int = 1,
device: Optional[str] = None,
Expand Down Expand Up @@ -69,6 +69,18 @@ def convert_and_evaluate(

from lm_eval import evaluator

if tasks is None:
from lm_eval.tasks import TaskManager
taskm = TaskManager()
print("\n".join(taskm.task_index.keys()))
print(
"\n\nTo evaluate multiple tasks, you can chain the task names "
"listed above via a comma-separated list."
"\nFor example: `--tasks 'hellaswag,truthfulqa_mc2,mmlu'`. "
"\nTo search for a specific task, use `litgpt evaluate | grep task_name`."
)
return

checkpoint_dir = Path(checkpoint_dir)

if out_dir is None:
Expand Down
9 changes: 5 additions & 4 deletions tutorials/evaluation.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ specify in the following evaluation command:
litgpt evaluate \
--checkpoint_dir checkpoints/microsoft/phi-2/ \
--batch_size 4 \
--tasks "hellaswag,truthfulqa_mc2,mmlu" \
--out_dir evaluate_model/
```

Expand Down Expand Up @@ -65,21 +66,20 @@ litgpt evaluate \
--checkpoint_dir checkpoints/microsoft/phi-2/ \
--batch_size 4 \
--out_dir evaluate_model/ \
--tasks "hellaswag,truthfulqa_mc2,mmlu" \
--force_conversion true
```

&nbsp;

> [!TIP]
> By default, `ligpt evaluate` will evaluate a model on 3 tasks
> to the setting `--tasks "hellaswag,truthfulqa_mc2,mmlu"`.
> Run `litgpt evaluate --checkpoint_dir ...` without specifying `--tasks` to print a list
> of the supported tasks.
> [!TIP]
> The evaluation may take a long time, and for testing purpoes, you may want to reduce the number of tasks
> or set a limit for the number of examples per task, for example, `--limit 10`.
A list of supported tasks can be found [here](https://github.com/EleutherAI/lm-evaluation-harness/blob/master/docs/task_table.md).




Expand All @@ -101,5 +101,6 @@ litgpt finetune lora \
litgpt evaluate \
--checkpoint_dir lora_model/final \
--batch_size 4 \
--tasks "hellaswag,truthfulqa_mc2,mmlu" \
--out_dir evaluate_model/ \
```

0 comments on commit 9475ec4

Please sign in to comment.