Improved inverting for attn masks (EricLBuehler#811)

apepkuss · Oct 1, 2024 · 2ec3bcb · 2ec3bcb
1 parent ce02618
commit 2ec3bcb
Showing 2 changed files with 3 additions and 8 deletions.
diff --git a/mistralrs-core/src/vision_models/mllama/mod.rs b/mistralrs-core/src/vision_models/mllama/mod.rs
@@ -52,9 +52,7 @@ fn prepare_cross_attention_mask(
     cross_attn_mask = cross_attn_mask.unsqueeze(1)?;
 
     // Invert the mask
-    let inverted_cross_attn_mask = (1. - cross_attn_mask)?
-        .to_dtype(DType::F32)?
-        .to_dtype(dtype)?;
+    let inverted_cross_attn_mask = (1. - cross_attn_mask.to_dtype(DType::F32)?.to_dtype(dtype)?)?;
     const NEG_INF_VALUE: f64 = -1e15;
     cross_attn_mask = masked_fill(
         &inverted_cross_attn_mask,

diff --git a/mistralrs-core/src/vision_models/mllama/vision.rs b/mistralrs-core/src/vision_models/mllama/vision.rs
@@ -353,14 +353,11 @@ fn _prepare_aspect_ratio_attention_mask(
     )?;
 
     // Invert the mask
-    attention_mask = (1. - attention_mask)?;
+    attention_mask = (1. - attention_mask.to_dtype(DType::F32)?.to_dtype(dtype)?)?;
 
     // Reshape to 2d and create 4d attn mask
     // (batch_size, 1, max_num_tiles * target_length, max_num_tiles * target_length)
-    attention_mask = attention_mask
-        .reshape((bs, max_num_tiles * target_length, 1))?
-        .to_dtype(DType::F32)?
-        .to_dtype(dtype)?;
+    attention_mask = attention_mask.reshape((bs, max_num_tiles * target_length, 1))?;
     attention_mask =
         attention_mask.matmul(&attention_mask.transpose(D::Minus1, D::Minus2)?.mul(-1e15)?)?;
     attention_mask