-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathcset_pointer.cu
150 lines (119 loc) · 4.64 KB
/
cset_pointer.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
/*
-- MAGMA (version 2.1.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
@date August 2016
@generated from magmablas/zset_pointer.cu, normal z -> c, Tue Aug 30 09:38:39 2016
@author Azzam Haidar
@author Tingxing Dong
*/
#include "magma_internal.h"
/******************************************************************************/
__global__ void kernel_cset_pointer(
magmaFloatComplex **output_array,
magmaFloatComplex *input,
magma_int_t lda,
magma_int_t row, magma_int_t column,
magma_int_t batch_offset)
{
output_array[blockIdx.x] = input + blockIdx.x * batch_offset + row + column * lda;
//printf("==> kernel_set_pointer input_array %p output_array %p \n",input+ blockIdx.x * batch_offset,output_array[blockIdx.x]);
}
/***************************************************************************//**
Purpose
-------
convert consecutive stored variable to array stored
for example the size of A is N*batchCount; N is the size of A(batch_offset)
change into dA_array[0] dA_array[1],... dA_array[batchCount-1], where the size of each dA_array[i] is N
Arguments
----------
@param[out]
output_array Array of pointers, dimension (batchCount).
Each is a COMPLEX array A of DIMENSION ( lda, column ) on the GPU
@param[in]
input COMPLEX array of dimension ( LDDA, N*batchCount ) on the GPU.
@param[in]
lda INTEGER
LDA specifies the leading dimension of A.
@param[in]
row INTEGER
On entry, row specifies the number of rows of the matrix A.
@param[in]
column INTEGER
On entry, column specifies the number of columns of the matrix A
@param[in]
batch_offset INTEGER
The starting pointer of each matrix A in input arrray
@param[in]
batchCount INTEGER
The number of matrices to operate on.
@param[in]
queue magma_queue_t
Queue to execute in.
*******************************************************************************/
extern "C"
void magma_cset_pointer(
magmaFloatComplex **output_array,
magmaFloatComplex *input,
magma_int_t lda,
magma_int_t row, magma_int_t column,
magma_int_t batch_offset,
magma_int_t batchCount,
magma_queue_t queue)
{
kernel_cset_pointer
<<< batchCount, 1, 0, queue->cuda_stream() >>>
(output_array, input, lda, row, column, batch_offset);
}
/******************************************************************************/
__global__ void zdisplace_pointers_kernel(magmaFloatComplex **output_array,
magmaFloatComplex **input_array, magma_int_t lda,
magma_int_t row, magma_int_t column)
{
magmaFloatComplex *inpt = input_array[blockIdx.x];
output_array[blockIdx.x] = &inpt[row + column * lda];
}
/***************************************************************************//**
Purpose
-------
compute the offset for all the matrices and save the displacment of the new pointer on output_array.
input_array contains the pointers to the initial position.
output_array[i] = input_array[i] + row + lda * column;
Arguments
----------
@param[out]
output_array Array of pointers, dimension (batchCount).
Each pointer points to the new displacement of array A in input_array on the GPU
@param[in]
input_array Array of pointers, dimension (batchCount).
Each is a COMPLEX array A of DIMENSION ( lda, column ) on the GPU
@param[in]
lda INTEGER
LDA specifies the leading dimension of A.
@param[in]
row INTEGER
On entry, row specifies the number of rows of the matrix A.
@param[in]
column INTEGER
On entry, column specifies the number of columns of the matrix A
@param[in]
batch_offset INTEGER
The starting pointer of each matrix A in input arrray
@param[in]
batchCount INTEGER
The number of matrices to operate on.
@param[in]
queue magma_queue_t
Queue to execute in.
*******************************************************************************/
extern "C"
void magma_cdisplace_pointers(magmaFloatComplex **output_array,
magmaFloatComplex **input_array, magma_int_t lda,
magma_int_t row, magma_int_t column,
magma_int_t batchCount, magma_queue_t queue)
{
zdisplace_pointers_kernel
<<< batchCount, 1, 0, queue->cuda_stream() >>>
(output_array, input_array, lda, row, column);
}