Skip to content

Commit

Permalink
cuda streams
Browse files Browse the repository at this point in the history
  • Loading branch information
BlackSamorez committed Feb 21, 2024
1 parent b0683b2 commit 4810b6c
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions inference_lib/src/aqlm/inference_kernels/cuda_kernel.cu
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <cuda.h>
#include <cuda_fp16.h>
#include <cuda_runtime.h>
#include <c10/cuda/CUDAStream.h>

#include <iostream>

Expand Down Expand Up @@ -166,7 +167,8 @@ void code1x16_matvec_cuda(

int blocks = ceildiv(prob_m, thread_m);
int threads = 32 * thread_m;
Code1x16MatVec<<<blocks, threads>>>(
cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
Code1x16MatVec<<<blocks, threads, 16*32*9, stream>>>(
(const int4*) A,
(const int4*) B,
(int4*) C,
Expand Down Expand Up @@ -199,7 +201,8 @@ void code2x8_matvec_cuda(
cudaFuncSetAttribute(
Code2x8MatVec, cudaFuncAttributeMaxDynamicSharedMemorySize, shared
);
Code2x8MatVec<<<blocks, threads, shared>>>(
cudaStream_t stream = at::cuda::getCurrentCUDAStream().stream();
Code2x8MatVec<<<blocks, threads, shared, stream>>>(
(const int4*) A,
(const int4*) B,
(int4*) C,
Expand Down

0 comments on commit 4810b6c

Please sign in to comment.