Merge branch 'main' into develop

ZetangForward · Nov 5, 2023 · 2cbba31 · 2cbba31
2 parents 5c41a4a + 89149fb
commit 2cbba31
Show file tree

Hide file tree

Showing 2 changed files with 2 additions and 1 deletion.
diff --git a/docs/MODEL_ZOO.md b/docs/MODEL_ZOO.md
@@ -15,7 +15,7 @@ The model weights below are *merged* weights. You do not need to apply delta. Th
 | LLaVA-1.5 | 7B | lora-1e | [liuhaotian/llava-v1.5-7b-lora](https://huggingface.co/liuhaotian/llava-v1.5-7b-lora) | 79.1 | 63.0 | 47.8 | 68.4 | 58.2 | 86.4 | 1476.9 | 66.1 | 58.9 | 60.1 | 67.9 | 30.2 |
 | LLaVA-1.5 | 13B | lora-1e | [liuhaotian/llava-v1.5-13b-lora](https://huggingface.co/liuhaotian/llava-v1.5-13b-lora) | 80.0 | 63.3 | 58.9 | 71.2 | 60.2 | 86.7 | 1541.7 | 68.5 | 61.5 | 61.3 | 69.5 | 38.3 |
 
-Training logs: [wandb](https://api.wandb.ai/links/lht/6orh56wc).
+Base model: Vicuna v1.5. Training logs: [wandb](https://api.wandb.ai/links/lht/6orh56wc).
 
 <p align="center">
   <img src="../images/llava_v1_5_radar.jpg" width="500px"> <br>

diff --git a/llava/model/llava_arch.py b/llava/model/llava_arch.py
@@ -103,6 +103,7 @@ def prepare_inputs_labels_for_multimodal(
         if vision_tower is None or images is None or input_ids.shape[1] == 1:
             if past_key_values is not None and vision_tower is not None and images is not None and input_ids.shape[1] == 1:
                 attention_mask = torch.ones((attention_mask.shape[0], past_key_values[-1][-1].shape[-2] + 1), dtype=attention_mask.dtype, device=attention_mask.device)
+                position_ids = torch.sum(attention_mask, dim=1).unsqueeze(-1) - 1
             return input_ids, position_ids, attention_mask, past_key_values, None, labels
 
         if type(images) is list or images.ndim == 5: