From 8d3d4c227e70dc0e3f3bc1bac9fd8899765d2182 Mon Sep 17 00:00:00 2001 From: turboderp <11859846+turboderp@users.noreply.github.com> Date: Tue, 27 Aug 2024 21:48:30 +0200 Subject: [PATCH] Ensure logit padding happens on default stream --- exllamav2/model.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/exllamav2/model.py b/exllamav2/model.py index 77ae01dd..6eb90df3 100644 --- a/exllamav2/model.py +++ b/exllamav2/model.py @@ -989,6 +989,9 @@ def forward_chunk(self, if self.tp_context: self.tp_context.wait_streams() + if x is not None and x.is_cuda: + torch.cuda.set_stream(torch.cuda.default_stream(x.device)) + # Apply logit scale # if x is not None and self.config.logit_scale != 1: