Skip to content

Commit

Permalink
crypto: arm64/aegis128 - use explicit vector load for permute vectors
Browse files Browse the repository at this point in the history
When building the new aegis128 NEON code in big endian mode, Clang
complains about the const uint8x16_t permute vectors in the following
way:

  crypto/aegis128-neon-inner.c:58:40: warning: vector initializers are not
      compatible with NEON intrinsics in big endian mode
      [-Wnonportable-vector-initialization]
                static const uint8x16_t shift_rows = {
                                                     ^
  crypto/aegis128-neon-inner.c:58:40: note: consider using vld1q_u8() to
      initialize a vector from memory, or vcombine_u8(vcreate_u8(), vcreate_u8())
      to initialize from integer constants

Since the same issue applies to the uint8x16x4_t loads of the AES Sbox,
update those references as well. However, since GCC does not implement
the vld1q_u8_x4() intrinsic, switch from IS_ENABLED() to a preprocessor
conditional to conditionally include this code.

Reported-by: Nathan Chancellor <[email protected]>
Signed-off-by: Ard Biesheuvel <[email protected]>
Tested-by: Nathan Chancellor <[email protected]>
Signed-off-by: Herbert Xu <[email protected]>
  • Loading branch information
Ard Biesheuvel authored and herbertx committed Aug 30, 2019
1 parent 309b77e commit 389139b
Showing 1 changed file with 19 additions and 19 deletions.
38 changes: 19 additions & 19 deletions crypto/aegis128-neon-inner.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ struct aegis128_state {
uint8x16_t v[5];
};

extern const uint8x16x4_t crypto_aes_sbox[];
extern const uint8_t crypto_aes_sbox[];

static struct aegis128_state aegis128_load_state_neon(const void *state)
{
Expand Down Expand Up @@ -55,39 +55,39 @@ uint8x16_t aegis_aes_round(uint8x16_t w)

#ifdef CONFIG_ARM64
if (!__builtin_expect(aegis128_have_aes_insn, 1)) {
static const uint8x16_t shift_rows = {
static const uint8_t shift_rows[] = {
0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
};
static const uint8x16_t ror32by8 = {
static const uint8_t ror32by8[] = {
0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
};
uint8x16_t v;

// shift rows
w = vqtbl1q_u8(w, shift_rows);
w = vqtbl1q_u8(w, vld1q_u8(shift_rows));

// sub bytes
if (!IS_ENABLED(CONFIG_CC_IS_GCC)) {
v = vqtbl4q_u8(crypto_aes_sbox[0], w);
v = vqtbx4q_u8(v, crypto_aes_sbox[1], w - 0x40);
v = vqtbx4q_u8(v, crypto_aes_sbox[2], w - 0x80);
v = vqtbx4q_u8(v, crypto_aes_sbox[3], w - 0xc0);
} else {
asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w));
w -= 0x40;
asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w));
w -= 0x40;
asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w));
w -= 0x40;
asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w));
}
#ifndef CONFIG_CC_IS_GCC
v = vqtbl4q_u8(vld1q_u8_x4(crypto_aes_sbox), w);
v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x40), w - 0x40);
v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x80), w - 0x80);
v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0xc0), w - 0xc0);
#else
asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w));
w -= 0x40;
asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w));
w -= 0x40;
asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w));
w -= 0x40;
asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w));
#endif

// mix columns
w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);
w ^= vqtbl1q_u8(v ^ w, ror32by8);
w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));

return w;
}
Expand Down

0 comments on commit 389139b

Please sign in to comment.