-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkernel.cu
53 lines (37 loc) · 1.59 KB
/
kernel.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#include <stdio.h>
#define TILE_SIZE 16
__global__ void matAdd(int dim, const float *A, const float *B, float* C) {
/********************************************************************
*
* Compute C = A + B
* where A is a (dim x dim) matrix
* where B is a (dim x dim) matrix
* where C is a (dim x dim) matrix
*
********************************************************************/
/*************************************************************************/
// INSERT KERNEL CODE HERE
// Calculate global thread ID
int tid = (blockIdx.x * blockDim.x) + threadIdx.x;
// Boundary check
if (tid < dim) C[tid] = A[tid] + B[tid];
/*************************************************************************/
}
void basicMatAdd(int dim, const float *A, const float *B, float *C)
{
// Initialize thread block and kernel grid dimensions ---------------------
const unsigned int BLOCK_SIZE = TILE_SIZE;
/*************************************************************************/
//INSERT CODE HERE
/*************************************************************************/
//dim3 grid(dim,dim);
//
//
int num_thrd = 1 << 10; //1024
int num_blk = (dim*dim + num_thrd - 1) / num_thrd;
// Invoke CUDA kernel -----------------------------------------------------
matAdd<<<num_blk, num_thrd >>>(dim*dim, A, B, C);
/*************************************************************************/
//INSERT CODE HERE
/*************************************************************************/
}