-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathcgemv_conj.cu
132 lines (104 loc) · 3.35 KB
/
cgemv_conj.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
/*
-- MAGMA (version 2.1.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
@date August 2016
@author Stan Tomov
@generated from magmablas/zgemv_conj.cu, normal z -> c, Tue Aug 30 09:38:28 2016
*/
#include "magma_internal.h"
#include "commonblas_c.h"
#define num_threads 256
__global__ void
cgemv_conj_kernel(
int m, int n, magmaFloatComplex alpha,
const magmaFloatComplex * __restrict__ A, int lda,
const magmaFloatComplex * __restrict__ x, int incx, magmaFloatComplex beta,
magmaFloatComplex * __restrict__ y, int incy)
{
int ind = blockIdx.x*num_threads + threadIdx.x;
A += ind;
if ( ind < m ) {
magmaFloatComplex res = MAGMA_C_ZERO;
#pragma unroll
for( int i=0; i < n; i ++ ) {
res += A[0] * MAGMA_C_CONJ(x[0]);
A += lda;
x += incx;
}
y[ind*incy] = alpha * res + beta * y[ind*incy];
}
}
/***************************************************************************//**
Purpose
-------
CGEMV_CONJ performs the matrix-vector operation
y := alpha*A*conj(x) + beta*y,
where alpha and beta are scalars, x and y are vectors and A is an
m by n matrix.
Arguments
----------
@param[in]
m INTEGER
On entry, m specifies the number of rows of the matrix A.
@param[in]
n INTEGER
On entry, n specifies the number of columns of the matrix A
@param[in]
alpha COMPLEX
On entry, ALPHA specifies the scalar alpha.
@param[in]
dA COMPLEX array of dimension ( LDDA, n ) on the GPU.
@param[in]
ldda INTEGER
LDDA specifies the leading dimension of A.
@param[in]
dx COMPLEX array of dimension n
@param[in]
incx Specifies the increment for the elements of X.
INCX must not be zero.
@param[in]
beta DOUBLE REAL
On entry, BETA specifies the scalar beta. When BETA is
supplied as zero then Y need not be set on input.
@param[out]
dy REAL array of dimension m
@param[in]
incy Specifies the increment for the elements of Y.
INCY must not be zero.
@param[in]
queue magma_queue_t
Queue to execute in.
@ingroup magma_gemv
*******************************************************************************/
extern "C" void
magmablas_cgemv_conj_q(
magma_int_t m, magma_int_t n, magmaFloatComplex alpha,
magmaFloatComplex_const_ptr dA, magma_int_t ldda,
magmaFloatComplex_const_ptr dx, magma_int_t incx,
magmaFloatComplex beta,
magmaFloatComplex_ptr dy, magma_int_t incy,
magma_queue_t queue )
{
magma_int_t info = 0;
if ( m < 0 )
info = -1;
else if ( n < 0 )
info = -2;
else if ( ldda < m )
info = -5;
else if ( incx == 0 )
info = -7;
else if ( incy == 0 )
info = -10;
if (info != 0) {
magma_xerbla( __func__, -(info) );
return; //info;
}
magma_int_t blocks = magma_ceildiv( m, num_threads );
dim3 grid(blocks, 1, 1);
dim3 threads(num_threads, 1, 1);
cgemv_conj_kernel<<< grid, threads, 0, queue->cuda_stream() >>>
(m, n, alpha, dA, ldda, dx, incx, beta, dy, incy);
}