dtype asserts

OxTapinear · Feb 7, 2024 · d90c43b · d90c43b
1 parent f019b4e
commit d90c43b
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 1 deletion.
diff --git a/inference_lib/src/aqlm/inference_kernels/kernel_selector.py b/inference_lib/src/aqlm/inference_kernels/kernel_selector.py
@@ -18,10 +18,16 @@ def forward_pass_quantized_linear(
         case (True, 1, 65536, 1, 8):
             from .cuda_kernel import CUDA_KERNEL
 
+            assert (
+                input.dtype == torch.float16
+            ), f"please load the model with `torch_dtype=torch.float16`, as {input.dtype} is not supported on GPU yet"
             return CUDA_KERNEL.code1x16_matmat(input, codes, codebooks, scales) + (bias if bias is not None else 0)
         case (True, 2, 256, 1, 8):
             from .cuda_kernel import CUDA_KERNEL
 
+            assert (
+                input.dtype == torch.float16
+            ), f"please load the model with `torch_dtype=torch.float16`, as {input.dtype} is not supported on GPU yet"
             return CUDA_KERNEL.code2x8_matmat(input, codes, codebooks, scales) + (bias if bias is not None else 0)
         case (True, _, _, _, _):
             from .triton_kernel import triton_matmul

diff --git a/inference_lib/src/aqlm/inference_kernels/numba_kernel.py b/inference_lib/src/aqlm/inference_kernels/numba_kernel.py
@@ -27,7 +27,7 @@ def numba_gemm_lut(
     assert in_features % in_group_size == 0
     assert codebook_size == 2**8
     assert codes.dtype == torch.int8
-    assert input.dtype == torch.float32 and codebooks.dtype == torch.float32
+    assert input.dtype == torch.float32 and codebooks.dtype == torch.float32, f"please load the model with `torch_dtype=torch.float32`, as {input.dtype} is not supported for CPU"
 
     kernel_key = (in_group_size, out_features, in_features, num_codebooks)
     if kernel_key not in COMPILED_KERNELS: