-
Notifications
You must be signed in to change notification settings - Fork 13
/
conpar2.c
executable file
·294 lines (256 loc) · 8.02 KB
/
conpar2.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
#include "auto_f2c.h"
#include "auto_c.h"
#include "auto_types.h"
#ifdef TIME
#include <unistd.h>
#include <sys/time.h>
#include <sys/resource.h>
static double time_start (void) {
struct rusage time;
double seconds,microseconds;
getrusage(RUSAGE_SELF,&time);
seconds = (double)time.ru_utime.tv_sec;
microseconds = (double)time.ru_utime.tv_usec;
return seconds + microseconds/1e6;
}
static double time_end(double start) {
struct rusage time;
double seconds,microseconds;
getrusage(RUSAGE_SELF,&time);
seconds = (double)time.ru_utime.tv_sec;
microseconds = (double)time.ru_utime.tv_usec;
return (seconds + microseconds/1e6)-start;
}
#endif
/*This is the process function. It is meant to be called either
on a SMP using shared memory, or wrapped inside another
routine for message passing*/
void *conpar_process(void * arg)
{
integer icf_dim1, irf_dim1, d_dim1;
integer a_dim1, a_dim2, b_dim1, b_dim2, c_dim1, c_dim2;
/* Local variables */
integer ipiv, jpiv, itmp;
doublereal tpiv;
integer i, l, k1, k2, m1, m2, ic, ir;
doublereal rm;
integer ir1, irp;
doublereal piv;
integer icp1;
integer *nov, *nra, *nca;
doublereal *a;
integer *ncb;
doublereal *b;
integer *nbc, *nrc;
doublereal *c, *d;
integer *irf, *icf;
integer loop_start,loop_end;
nov = ((conpar_parallel_arglist *)arg)->nov;
nra = ((conpar_parallel_arglist *)arg)->nra;
nca = ((conpar_parallel_arglist *)arg)->nca;
a = ((conpar_parallel_arglist *)arg)->a;
ncb = ((conpar_parallel_arglist *)arg)->ncb;
b = ((conpar_parallel_arglist *)arg)->b;
nbc = ((conpar_parallel_arglist *)arg)->nbc;
nrc = ((conpar_parallel_arglist *)arg)->nrc;
c = ((conpar_parallel_arglist *)arg)->c;
d = ((conpar_parallel_arglist *)arg)->d;
irf = ((conpar_parallel_arglist *)arg)->irf;
icf = ((conpar_parallel_arglist *)arg)->icf;
loop_start = ((conpar_parallel_arglist *)arg)->loop_start;
loop_end = ((conpar_parallel_arglist *)arg)->loop_end;
/* In the default case we don't need to do anything special */
if(global_conpar_type == CONPAR_DEFAULT) {
;
}
/* In the message passing case we set d to be
0.0, do a sum here, and then do the final
sum (with the true copy of d) in the
master */
else if (global_conpar_type == CONPAR_MPI) {
for(i=0;i<(*ncb)*(*nrc);i++)
d[i]=0.0;
}
/* In the shared memory case we create a local
variable for doing this threads part of the
sum, then we do a final sum into shared memory
at the end */
else if (global_conpar_type == CONPAR_PTHREADS) {
;
}
/* Note that the summation of the adjacent overlapped part of C */
/* is delayed until REDUCE, in order to merge it with other communications.*/
/* NA is the local NTST. */
irf_dim1 = *nra;
icf_dim1 = *nca;
d_dim1 = *ncb;
a_dim1 = *nca;
a_dim2 = *nra;
b_dim1 = *ncb;
b_dim2 = *nra;
c_dim1 = *nca;
c_dim2 = *nrc;
/* Condensation of parameters (Elimination of local variables). */
m1 = *nov + 1;
m2 = *nca - *nov;
for (i = loop_start;i < loop_end; i++) {
for (ic = m1; ic <= m2; ++ic) {
ir1 = ic - *nov + 1;
irp = ir1 - 1;
icp1 = ic + 1;
/* **Search for pivot (Complete pivoting) */
piv = 0.0;
ipiv = irp;
jpiv = ic;
for (k1 = irp; k1 <= *nra; ++k1) {
int irf_k1_i = irf[-1 + k1 + i*irf_dim1];
for (k2 = ic; k2 <= m2; ++k2) {
int icf_k2_i = icf[-1 + k2 + i*icf_dim1];
tpiv = a[-1 + icf_k2_i + a_dim1*(-1 + irf_k1_i + a_dim2*i)];
if (tpiv < 0.0) {
tpiv = -tpiv;
}
if (piv < tpiv) {
piv = tpiv;
ipiv = k1;
jpiv = k2;
}
}
}
/* **Move indices */
itmp = icf[-1 + ic + i*icf_dim1];
icf[-1 + ic + i*icf_dim1] = icf[-1 + jpiv + i*icf_dim1];
icf[-1 + jpiv + i*icf_dim1] = itmp;
itmp = irf[-1 + irp + i*irf_dim1];
irf[-1 + irp + i*irf_dim1] = irf[-1 + ipiv + i*irf_dim1];
irf[-1 + ipiv + i*irf_dim1] = itmp;
{
int icf_ic_i = icf[-1 + ic + i*icf_dim1];
int irf_irp_i = irf[-1 + irp + i*irf_dim1];
int a_offset2 = a_dim1*(-1 + irf_irp_i + a_dim2*i);
int b_offset2 = b_dim1*(-1 + irf_irp_i + b_dim2*i);
/* **End of pivoting; elimination starts here */
for (ir = ir1; ir <= *nra; ++ir) {
int irf_ir_i = irf[-1 + ir + i*irf_dim1];
int a_offset1 = a_dim1*(-1 + irf_ir_i + a_dim2*i);
int b_offset1 = b_dim1*(-1 + irf_ir_i + b_dim2*i);
rm = a[-1 + icf_ic_i + a_dim1*(-1 + irf_ir_i + a_dim2*i)]/a[-1 + icf_ic_i + a_dim1*(-1 + irf_irp_i + a_dim2*i)];
a[-1 + icf_ic_i + a_dim1*(-1 + irf_ir_i + a_dim2*i)] = rm;
if (rm != (double)0.) {
for (l = 0; l < *nov; ++l) {
a[l + a_offset1] -= rm * a[l + a_offset2];
}
for (l = icp1 -1; l < *nca; ++l) {
int icf_l_i = icf[l + i*icf_dim1];
a[-1 + icf_l_i + a_offset1] -= rm * a[-1 + icf_l_i + a_offset2];
}
for (l = 0; l < *ncb; ++l) {
b[l + b_offset1] -= rm * b[l + b_offset2];
}
}
}
for (ir = *nbc + 1; ir <= *nrc; ++ir) {
int c_offset1 = c_dim1*(-1 + ir + c_dim2*i);
int d_offset1 = (-1 + ir)*d_dim1;
rm = c[-1 + icf_ic_i + c_dim1*(-1 + ir + c_dim2*i)]/a[-1 + icf_ic_i + a_dim1*(-1 + irf_irp_i + a_dim2*i)];
c[-1 + icf_ic_i + c_dim1*(-1 + ir + c_dim2*i)]=rm;
if (rm != (double)0.) {
for (l = 0; l < *nov; ++l) {
c[l + c_offset1] -= rm * a[l + a_offset2];
}
for (l = icp1 -1 ; l < *nca; ++l) {
int icf_l_i = icf[l + i*icf_dim1];
c[-1 + icf_l_i + c_offset1] -= rm * a[-1 + icf_l_i + a_offset2];
}
for (l = 0; l < *ncb; ++l) {
/*
A little explanation of what is going on here
is in order I believe. This array is
created by a summation across all workers,
hence it needs a mutex to avoid concurrent
writes (in the shared memory case) or a summation
in the master (in the message passing case).
Since mutex's can be somewhat slow, we will do the
summation into a local variable, and then do a
final summation back into global memory when the
main loop is done.
*/
/* Nothing special for the default case */
if(global_conpar_type == CONPAR_DEFAULT) {
d[l + d_offset1] -= rm * b[l + b_offset2];
}
/* In the message passing case we sum into d,
which is a local variable initialized to 0.0.
We then sum our part with the masters part
in the master. */
else if (global_conpar_type == CONPAR_MPI) {
d[l + d_offset1] -= rm * b[l + b_offset2];
}
/* In the shared memory case we sum into a local
variable our contribution, and then sum
into shared memory at the end (inside a mutex */
else if (global_conpar_type == CONPAR_PTHREADS) {
;
}
}
}
}
}
}
}
return NULL;
}
int
conpar_default_wrapper(integer *nov, integer *na, integer *nra, integer *nca, doublereal *a, integer *ncb, doublereal *b, integer *nbc, integer *nrc, doublereal *c, doublereal *d, integer *irf, integer *icf)
{
conpar_parallel_arglist data;
data.nov = nov;
data.nra = nra;
data.nca = nca;
data.a = a;
data.ncb = ncb;
data.b = b;
data.nbc = nbc;
data.nrc = nrc;
data.c = c;
data.d = d;
data.irf = irf;
data.icf = icf;
data.loop_start = 0;
data.loop_end = *na;
conpar_process(&data);
return 0;
}
int
conpar(integer *nov, integer *na, integer *nra, integer *nca, doublereal *a, integer *ncb, doublereal *b, integer *nbc, integer *nrc, doublereal *c, doublereal *d, integer *irf, integer *icf)
{
/* Aliases for the dimensions of the arrays */
integer icf_dim1, irf_dim1;
/* Local variables */
integer i,j;
integer nex;
/*< NEX=NCA-2*NOV >*/
irf_dim1 = *nra;
icf_dim1 = *nca;
/* Function Body */
nex = *nca - (*nov << 1);
if (nex == 0) {
return 0;
}
/* Initialization */
for (i = 0; i <*na; ++i) {
for (j = 0; j < *nra; ++j) {
irf[j + i * irf_dim1] = j+1;
}
for (j = 0; j < *nca; ++j) {
icf[j + i * icf_dim1] = j+1;
}
}
switch(global_conpar_type) {
default:
conpar_default_wrapper(nov, na, nra, nca, a,
ncb, b, nbc, nrc, c, d,irf, icf);
break;
}
return 0;
}