Skip to content

Commit

Permalink
Neon: Make gcc actually generate VMLA instructions for sparse mul
Browse files Browse the repository at this point in the history
Otherwise it was splitting the mla into a mul and an add
  • Loading branch information
jmvalin committed Mar 20, 2019
1 parent 06b2a21 commit aee5df3
Showing 1 changed file with 5 additions and 5 deletions.
10 changes: 5 additions & 5 deletions src/vec_neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,13 +187,13 @@ static void sparse_sgemv_accum16(float *out, const float *w, int rows, const int

for (j=0;j<cols;j++)
{
float xj= x[*idx++];
float32x4_t xj= vld1q_dup_f32(&x[*idx++]);
float32x4_t wvec;

wvec = vld1q_f32(&w[0]); y0_3 = vmlaq_n_f32(y0_3, wvec, xj);
wvec = vld1q_f32(&w[4]); y4_7 = vmlaq_n_f32(y4_7, wvec, xj);
wvec = vld1q_f32(&w[8]); y8_11 = vmlaq_n_f32(y8_11, wvec, xj);
wvec = vld1q_f32(&w[12]); y12_15 = vmlaq_n_f32(y12_15, wvec, xj);
wvec = vld1q_f32(&w[0]); y0_3 = vmlaq_f32(y0_3, wvec, xj);
wvec = vld1q_f32(&w[4]); y4_7 = vmlaq_f32(y4_7, wvec, xj);
wvec = vld1q_f32(&w[8]); y8_11 = vmlaq_f32(y8_11, wvec, xj);
wvec = vld1q_f32(&w[12]); y12_15 = vmlaq_f32(y12_15, wvec, xj);

w += 16;
}
Expand Down

0 comments on commit aee5df3

Please sign in to comment.