Print task list in litgpt evaluate (Lightning-AI#1258)

Co-authored-by: Carlos Mocholí <[email protected]>
tuantupharma · Apr 11, 2024 · 9475ec4 · 9475ec4
1 parent 5dc541e
commit 9475ec4
Show file tree

Hide file tree

Showing 2 changed files with 19 additions and 6 deletions.
diff --git a/litgpt/eval/evaluate.py b/litgpt/eval/evaluate.py
@@ -36,10 +36,10 @@ def prepare_results(results, save_filepath, print_results=True):
 
 
 def convert_and_evaluate(
-    checkpoint_dir: str,
+    checkpoint_dir: Path,
+    tasks: Optional[str] = None,
     out_dir: Optional[str] = None,
     force_conversion: bool = False,
-    tasks: Optional[str] = "hellaswag,truthfulqa_mc2,mmlu",
     num_fewshot: Optional[int] = None,
     batch_size: int = 1,
     device: Optional[str] = None,
@@ -69,6 +69,18 @@ def convert_and_evaluate(
 
     from lm_eval import evaluator
 
+    if tasks is None:
+        from lm_eval.tasks import TaskManager
+        taskm = TaskManager()
+        print("\n".join(taskm.task_index.keys()))
+        print(
+            "\n\nTo evaluate multiple tasks, you can chain the task names "
+            "listed above via a comma-separated list."
+            "\nFor example: `--tasks 'hellaswag,truthfulqa_mc2,mmlu'`. "
+            "\nTo search for a specific task, use `litgpt evaluate | grep task_name`."
+        )
+        return
+
     checkpoint_dir = Path(checkpoint_dir)
 
     if out_dir is None:

diff --git a/tutorials/evaluation.md b/tutorials/evaluation.md
@@ -30,6 +30,7 @@ specify in the following evaluation command:
 litgpt evaluate \
   --checkpoint_dir checkpoints/microsoft/phi-2/ \
   --batch_size 4 \
+  --tasks "hellaswag,truthfulqa_mc2,mmlu" \
   --out_dir evaluate_model/
 ```
 
@@ -65,21 +66,20 @@ litgpt evaluate \
   --checkpoint_dir checkpoints/microsoft/phi-2/ \
   --batch_size 4 \
   --out_dir evaluate_model/ \
+  --tasks "hellaswag,truthfulqa_mc2,mmlu" \
   --force_conversion true
 ```
 
 &nbsp;
 
 > [!TIP]
-> By default, `ligpt evaluate` will evaluate a model on 3 tasks
-> to the setting `--tasks "hellaswag,truthfulqa_mc2,mmlu"`. 
+> Run `litgpt evaluate --checkpoint_dir ...` without specifying `--tasks` to print a list
+> of the supported tasks. 
 
 > [!TIP]
 > The evaluation may take a long time, and for testing purpoes, you may want to reduce the number of tasks
 > or set a limit for the number of examples per task, for example, `--limit 10`.
 
-A list of supported tasks can be found [here](https://github.com/EleutherAI/lm-evaluation-harness/blob/master/docs/task_table.md).
-
 
 
 
@@ -101,5 +101,6 @@ litgpt finetune lora \
 litgpt evaluate \
   --checkpoint_dir lora_model/final \
   --batch_size 4 \
+  --tasks "hellaswag,truthfulqa_mc2,mmlu" \
   --out_dir evaluate_model/ \
 ```