Skip to content

Commit c8f9918

Browse files
committed
part 2
1 parent d68d518 commit c8f9918

File tree

2 files changed

+61
-3
lines changed

2 files changed

+61
-3
lines changed

asst1/prog2_vecintrin/main.cpp

+54-3
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,37 @@ void clampedExpVector(float* values, int* exponents, float* output, int N) {
249249
// Your solution should work for any value of
250250
// N and VECTOR_WIDTH, not just when VECTOR_WIDTH divides N
251251
//
252+
__cs149_vec_float res,reg_x;
253+
__cs149_vec_int reg_exp,ONE=_cs149_vset_int(1);
254+
__cs149_mask selected,not_zero;
255+
float TEN = 9.999999f;
256+
__cs149_vec_float TRUNCATED= _cs149_vset_float(9.999999f);
257+
__cs149_vec_int ZERO = _cs149_vset_int(0);
258+
// const int MULTIPLE_BOUND = N - (N % VECTOR_WIDTH); // last index i % VECTOR_WIDTH=0
252259

260+
for(int i=0 ; i< N; i+= VECTOR_WIDTH){
261+
int N_SELECT = std::min(i+VECTOR_WIDTH,N) - i;
262+
selected = _cs149_init_ones(N_SELECT);
263+
_cs149_vload_float(reg_x,values+i,selected); // x = val[i]
264+
_cs149_vload_int(reg_exp,exponents+i,selected); // exp = exp[i]
265+
266+
// _cs149_veq_int(is_zero,reg_exp,ZERO,selected); // if exp[i] == 0
267+
_cs149_vset_float(res,1.0f,selected); // res[i] = 1
268+
// not_zero = _cs149_mask_not(is_zero); // else
269+
not_zero = _cs149_init_ones();
270+
not_zero = _cs149_mask_not(not_zero);// not_zero = : 0...0
271+
_cs149_vgt_int(not_zero,reg_exp,ZERO,selected);//only effect elements be selected
272+
// _cs149_vmove_float(res,reg_x,not_zero);
273+
while (_cs149_cntbits(not_zero) >0)
274+
{
275+
_cs149_vmult_float(res,res,reg_x,not_zero);
276+
_cs149_vsub_int(reg_exp,reg_exp,ONE,not_zero);
277+
_cs149_vgt_int(not_zero,reg_exp,ZERO,not_zero);
278+
}
279+
_cs149_vgt_float(not_zero,res,TRUNCATED,selected);//if res[i] > 9.99999f
280+
_cs149_vset_float(res,TEN,not_zero); // res[i] = 9.999999f
281+
_cs149_vstore_float(output+i,res,selected);
282+
}
253283
}
254284

255285
// returns the sum of all elements in values
@@ -270,11 +300,32 @@ float arraySumVector(float* values, int N) {
270300
//
271301
// CS149 STUDENTS TODO: Implement your vectorized version of arraySumSerial here
272302
//
273-
274-
for (int i=0; i<N; i+=VECTOR_WIDTH) {
303+
float ans =0;
275304

305+
__cs149_mask selected;
306+
__cs149_vec_float reg_x;
307+
308+
for (int i=0; i<N; i+=VECTOR_WIDTH) {
309+
int N_SELECT = VECTOR_WIDTH;
310+
if(i+VECTOR_WIDTH > N)
311+
N_SELECT = N-i;
312+
selected = _cs149_init_ones(N_SELECT);
313+
reg_x = _cs149_vset_float(0);
314+
_cs149_vload_float(reg_x,values+i,selected);
315+
316+
int vec_width = VECTOR_WIDTH;
317+
318+
while (vec_width > 1)
319+
{
320+
321+
// add
322+
_cs149_hadd_float(reg_x,reg_x);
323+
_cs149_interleave_float(reg_x,reg_x);
324+
vec_width >>=1;
325+
}
326+
ans += reg_x.value[0];
276327
}
277328

278-
return 0.0;
329+
return ans;
279330
}
280331

asst1/write-up.md

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# prog 1
2+
3+
## plot speed up/thread
4+
5+
```sh
6+
./run.sh 1 | ./plot.py
7+
```

0 commit comments

Comments
 (0)