Skip to content

Commit

Permalink
md/raid6: delta syndrome for ARM NEON
Browse files Browse the repository at this point in the history
This implements XOR syndrome calculation using NEON intrinsics.
As before, the module can be built for ARM and arm64 from the
same source.

Relative performance on a Cortex-A57 based system:

  raid6: int64x1  gen()   905 MB/s
  raid6: int64x1  xor()   881 MB/s
  raid6: int64x2  gen()  1343 MB/s
  raid6: int64x2  xor()  1286 MB/s
  raid6: int64x4  gen()  1896 MB/s
  raid6: int64x4  xor()  1321 MB/s
  raid6: int64x8  gen()  1773 MB/s
  raid6: int64x8  xor()  1165 MB/s
  raid6: neonx1   gen()  1834 MB/s
  raid6: neonx1   xor()  1278 MB/s
  raid6: neonx2   gen()  2528 MB/s
  raid6: neonx2   xor()  1942 MB/s
  raid6: neonx4   gen()  2888 MB/s
  raid6: neonx4   xor()  2334 MB/s
  raid6: neonx8   gen()  2957 MB/s
  raid6: neonx8   xor()  2232 MB/s
  raid6: using algorithm neonx8 gen() 2957 MB/s
  raid6: .... xor() 2232 MB/s, rmw enabled

Cc: Markus Stockhausen <[email protected]>
Cc: Neil Brown <[email protected]>
Signed-off-by: Ard Biesheuvel <[email protected]>
Signed-off-by: NeilBrown <[email protected]>
  • Loading branch information
Ard Biesheuvel authored and NeilBrown committed Aug 31, 2015
1 parent 199dc6e commit 0e833e6
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 1 deletion.
13 changes: 12 additions & 1 deletion lib/raid6/neon.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,20 @@
(unsigned long)bytes, ptrs); \
kernel_neon_end(); \
} \
static void raid6_neon ## _n ## _xor_syndrome(int disks, \
int start, int stop, \
size_t bytes, void **ptrs) \
{ \
void raid6_neon ## _n ## _xor_syndrome_real(int, \
int, int, unsigned long, void**); \
kernel_neon_begin(); \
raid6_neon ## _n ## _xor_syndrome_real(disks, \
start, stop, (unsigned long)bytes, ptrs); \
kernel_neon_end(); \
} \
struct raid6_calls const raid6_neonx ## _n = { \
raid6_neon ## _n ## _gen_syndrome, \
NULL, /* XOR not yet implemented */ \
raid6_neon ## _n ## _xor_syndrome, \
raid6_have_neon, \
"neonx" #_n, \
0 \
Expand Down
46 changes: 46 additions & 0 deletions lib/raid6/neon.uc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* neon.uc - RAID-6 syndrome calculation using ARM NEON instructions
*
* Copyright (C) 2012 Rob Herring
* Copyright (C) 2015 Linaro Ltd. <[email protected]>
*
* Based on altivec.uc:
* Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
Expand Down Expand Up @@ -78,3 +79,48 @@ void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
vst1q_u8(&q[d+NSIZE*$$], wq$$);
}
}

void raid6_neon$#_xor_syndrome_real(int disks, int start, int stop,
unsigned long bytes, void **ptrs)
{
uint8_t **dptr = (uint8_t **)ptrs;
uint8_t *p, *q;
int d, z, z0;

register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
const unative_t x1d = NBYTES(0x1d);

z0 = stop; /* P/Q right side optimization */
p = dptr[disks-2]; /* XOR parity */
q = dptr[disks-1]; /* RS syndrome */

for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
wq$$ = vld1q_u8(&dptr[z0][d+$$*NSIZE]);
wp$$ = veorq_u8(vld1q_u8(&p[d+$$*NSIZE]), wq$$);

/* P/Q data pages */
for ( z = z0-1 ; z >= start ; z-- ) {
wd$$ = vld1q_u8(&dptr[z][d+$$*NSIZE]);
wp$$ = veorq_u8(wp$$, wd$$);
w2$$ = MASK(wq$$);
w1$$ = SHLBYTE(wq$$);

w2$$ = vandq_u8(w2$$, x1d);
w1$$ = veorq_u8(w1$$, w2$$);
wq$$ = veorq_u8(w1$$, wd$$);
}
/* P/Q left side optimization */
for ( z = start-1 ; z >= 0 ; z-- ) {
w2$$ = MASK(wq$$);
w1$$ = SHLBYTE(wq$$);

w2$$ = vandq_u8(w2$$, x1d);
wq$$ = veorq_u8(w1$$, w2$$);
}
w1$$ = vld1q_u8(&q[d+NSIZE*$$]);
wq$$ = veorq_u8(wq$$, w1$$);

vst1q_u8(&p[d+NSIZE*$$], wp$$);
vst1q_u8(&q[d+NSIZE*$$], wq$$);
}
}

0 comments on commit 0e833e6

Please sign in to comment.