Skip to content

Commit

Permalink
lib/raid6: arm: optimize away a mask operation in NEON recovery routine
Browse files Browse the repository at this point in the history
The NEON recovery code was modeled after the x86 SIMD code, and for
some reason, that code uses a 16 bit wide signed shift and a mask to
perform what amounts to a 8 bit unsigned shift. So fold the ops
together.

Signed-off-by: Ard Biesheuvel <[email protected]>
Signed-off-by: Catalin Marinas <[email protected]>
  • Loading branch information
Ard Biesheuvel authored and ctmarinas committed Feb 28, 2019
1 parent 1ad3935 commit 335ebe3
Showing 1 changed file with 6 additions and 6 deletions.
12 changes: 6 additions & 6 deletions lib/raid6/recov_neon_inner.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,14 @@ void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,
px = veorq_u8(vld1q_u8(p), vld1q_u8(dp));
vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq));

vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4);
vy = vshrq_n_u8(vx, 4);
vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f));
vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f));
vy = vqtbl1q_u8(qm1, vy);
qx = veorq_u8(vx, vy);

vy = (uint8x16_t)vshrq_n_s16((int16x8_t)px, 4);
vy = vshrq_n_u8(px, 4);
vx = vqtbl1q_u8(pm0, vandq_u8(px, x0f));
vy = vqtbl1q_u8(pm1, vandq_u8(vy, x0f));
vy = vqtbl1q_u8(pm1, vy);
vx = veorq_u8(vx, vy);
db = veorq_u8(vx, qx);

Expand Down Expand Up @@ -97,9 +97,9 @@ void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,

vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq));

vy = (uint8x16_t)vshrq_n_s16((int16x8_t)vx, 4);
vy = vshrq_n_u8(vx, 4);
vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f));
vy = vqtbl1q_u8(qm1, vandq_u8(vy, x0f));
vy = vqtbl1q_u8(qm1, vy);
vx = veorq_u8(vx, vy);
vy = veorq_u8(vx, vld1q_u8(p));

Expand Down

0 comments on commit 335ebe3

Please sign in to comment.