karpathy · cesposo · Oct 14, 2023 · Oct 19, 2023 · Oct 20, 2023 · Nov 5, 2023
diff --git a/eval_sat.py b/eval_sat.py
@@ -25,6 +25,7 @@
 compile = False # use PyTorch 2.0 to compile the model to be faster
 eval = True
 sep = ' ' # separator between tokens during decoding, default: nothing, i.e. join with empty string
+class_only = False # only generate the class token
 exec(open('configurator.py').read()) # overrides from command line or config file
 if sep == 'SPACE':
     sep = ' '
@@ -135,19 +136,24 @@ def line_sat(line, sep=' '):
                 continue
             sample_cnt += 1
             prompt = (torch.tensor(prompt, dtype=torch.long, device=device)[None, ...])
-            y = model.generate(prompt, max_new_tokens, temperature=temperature, top_k=top_k, stop=encode(['[SEP]']))
-            res_str = decode(y[0].tolist())
-            print(res_str)
-            print('---------------')
-            if eval:
+            if class_only:
                 true_label.append(label)
-                res = line_sat(res_str, sep)
-                if res is None:
-                    res = not label
-                pred_label.append(res)
-            if output_file is not None:
-                with open(output_file, 'a', encoding='utf-8') as f:
-                    f.write(res_str + '\n')
+                y = model.classify(prompt)
+                pred_label.append(y[0].item() == stoi['SAT'])
+            else:
+                y = model.generate(prompt, max_new_tokens, temperature=temperature, top_k=top_k, stop=encode(['[SEP]']))
+                res_str = decode(y[0].tolist())
+                print(res_str)
+                print('---------------')
+                if eval:
+                    true_label.append(label)
+                    res = line_sat(res_str, sep)
+                    if res is None:
+                        res = not label
+                    pred_label.append(res)
+                if output_file is not None:
+                    with open(output_file, 'a', encoding='utf-8') as f:
+                        f.write(res_str + '\n')
 
 if eval_labels is not None:
     with open(eval_labels, 'r') as f:

diff --git a/model.py b/model.py
@@ -348,3 +348,12 @@ def generate(self, idx, max_new_tokens, temperature=1.0, top_k=None, stop=None):
                 break
 
         return idx
+
+    @torch.no_grad()
+    def classify(self, idx):
+        """
+        Take a conditioning sequence of indices idx (LongTensor of shape (b,t)) and use the first position to predict a single token.
+        """
+        logits, _ = self(idx)
+        logits = logits[:, 0, :]
+        return logits.argmax(dim=-1)