Update transformer mask comment

Update names for consistency with code Co-authored-by: ruanslv <[email protected]>
zhuzhenping · Nov 13, 2023 · 6b3154b · 6b3154b
1 parent e9077bd
commit 6b3154b
Showing 1 changed file with 2 additions and 2 deletions.
diff --git a/llama/model.py b/llama/model.py
@@ -481,8 +481,8 @@ def forward(self, tokens: torch.Tensor, start_pos: int):
 
             # When performing key-value caching, we compute the attention scores
             # only for the new sequence. Thus, the matrix of scores is of size
-            # (seq_len, total_len), and the only masked entries are (i, j) for
-            # j > cached_len + i, since row i corresponds to token cached_len + i.
+            # (seqlen, cache_len + seqlen), and the only masked entries are (i, j) for
+            # j > cache_len + i, since row i corresponds to token cache_len + i.
             mask = torch.hstack([
                 torch.zeros((seqlen, start_pos), device=tokens.device),
                 mask