Skip to content

Commit

Permalink
crypto: algapi - make crypto_xor() and crypto_inc() alignment agnostic
Browse files Browse the repository at this point in the history
Instead of unconditionally forcing 4 byte alignment for all generic
chaining modes that rely on crypto_xor() or crypto_inc() (which may
result in unnecessary copying of data when the underlying hardware
can perform unaligned accesses efficiently), make those functions
deal with unaligned input explicitly, but only if the Kconfig symbol
HAVE_EFFICIENT_UNALIGNED_ACCESS is set. This will allow us to drop
the alignmasks from the CBC, CMAC, CTR, CTS, PCBC and SEQIV drivers.

For crypto_inc(), this simply involves making the 4-byte stride
conditional on HAVE_EFFICIENT_UNALIGNED_ACCESS being set, given that
it typically operates on 16 byte buffers.

For crypto_xor(), an algorithm is implemented that simply runs through
the input using the largest strides possible if unaligned accesses are
allowed. If they are not, an optimal sequence of memory accesses is
emitted that takes the relative alignment of the input buffers into
account, e.g., if the relative misalignment of dst and src is 4 bytes,
the entire xor operation will be completed using 4 byte loads and stores
(modulo unaligned bits at the start and end). Note that all expressions
involving misalign are simply eliminated by the compiler when
HAVE_EFFICIENT_UNALIGNED_ACCESS is defined.

Signed-off-by: Ard Biesheuvel <[email protected]>
Signed-off-by: Herbert Xu <[email protected]>
  • Loading branch information
Ard Biesheuvel authored and herbertx committed Feb 11, 2017
1 parent 7d6e910 commit db91af0
Show file tree
Hide file tree
Showing 8 changed files with 70 additions and 34 deletions.
68 changes: 50 additions & 18 deletions crypto/algapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -962,34 +962,66 @@ void crypto_inc(u8 *a, unsigned int size)
__be32 *b = (__be32 *)(a + size);
u32 c;

for (; size >= 4; size -= 4) {
c = be32_to_cpu(*--b) + 1;
*b = cpu_to_be32(c);
if (c)
return;
}
if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
!((unsigned long)b & (__alignof__(*b) - 1)))
for (; size >= 4; size -= 4) {
c = be32_to_cpu(*--b) + 1;
*b = cpu_to_be32(c);
if (c)
return;
}

crypto_inc_byte(a, size);
}
EXPORT_SYMBOL_GPL(crypto_inc);

static inline void crypto_xor_byte(u8 *a, const u8 *b, unsigned int size)
void __crypto_xor(u8 *dst, const u8 *src, unsigned int len)
{
for (; size; size--)
*a++ ^= *b++;
}
int relalign = 0;

if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
int size = sizeof(unsigned long);
int d = ((unsigned long)dst ^ (unsigned long)src) & (size - 1);

relalign = d ? 1 << __ffs(d) : size;

/*
* If we care about alignment, process as many bytes as
* needed to advance dst and src to values whose alignments
* equal their relative alignment. This will allow us to
* process the remainder of the input using optimal strides.
*/
while (((unsigned long)dst & (relalign - 1)) && len > 0) {
*dst++ ^= *src++;
len--;
}
}

void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
{
u32 *a = (u32 *)dst;
u32 *b = (u32 *)src;
while (IS_ENABLED(CONFIG_64BIT) && len >= 8 && !(relalign & 7)) {
*(u64 *)dst ^= *(u64 *)src;
dst += 8;
src += 8;
len -= 8;
}

for (; size >= 4; size -= 4)
*a++ ^= *b++;
while (len >= 4 && !(relalign & 3)) {
*(u32 *)dst ^= *(u32 *)src;
dst += 4;
src += 4;
len -= 4;
}

while (len >= 2 && !(relalign & 1)) {
*(u16 *)dst ^= *(u16 *)src;
dst += 2;
src += 2;
len -= 2;
}

crypto_xor_byte((u8 *)a, (u8 *)b, size);
while (len--)
*dst++ ^= *src++;
}
EXPORT_SYMBOL_GPL(crypto_xor);
EXPORT_SYMBOL_GPL(__crypto_xor);

unsigned int crypto_alg_extsize(struct crypto_alg *alg)
{
Expand Down
3 changes: 0 additions & 3 deletions crypto/cbc.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,6 @@ static int crypto_cbc_create(struct crypto_template *tmpl, struct rtattr **tb)
inst->alg.base.cra_blocksize = alg->cra_blocksize;
inst->alg.base.cra_alignmask = alg->cra_alignmask;

/* We access the data as u32s when xoring. */
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;

inst->alg.ivsize = alg->cra_blocksize;
inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
Expand Down
3 changes: 1 addition & 2 deletions crypto/cmac.c
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,7 @@ static int cmac_create(struct crypto_template *tmpl, struct rtattr **tb)
if (err)
goto out_free_inst;

/* We access the data as u32s when xoring. */
alignmask = alg->cra_alignmask | (__alignof__(u32) - 1);
alignmask = alg->cra_alignmask;
inst->alg.base.cra_alignmask = alignmask;
inst->alg.base.cra_priority = alg->cra_priority;
inst->alg.base.cra_blocksize = alg->cra_blocksize;
Expand Down
2 changes: 1 addition & 1 deletion crypto/ctr.c
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ static struct crypto_instance *crypto_ctr_alloc(struct rtattr **tb)
inst->alg.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER;
inst->alg.cra_priority = alg->cra_priority;
inst->alg.cra_blocksize = 1;
inst->alg.cra_alignmask = alg->cra_alignmask | (__alignof__(u32) - 1);
inst->alg.cra_alignmask = alg->cra_alignmask;
inst->alg.cra_type = &crypto_blkcipher_type;

inst->alg.cra_blkcipher.ivsize = alg->cra_blocksize;
Expand Down
3 changes: 0 additions & 3 deletions crypto/cts.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,9 +374,6 @@ static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb)
inst->alg.base.cra_blocksize = alg->base.cra_blocksize;
inst->alg.base.cra_alignmask = alg->base.cra_alignmask;

/* We access the data as u32s when xoring. */
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;

inst->alg.ivsize = alg->base.cra_blocksize;
inst->alg.chunksize = crypto_skcipher_alg_chunksize(alg);
inst->alg.min_keysize = crypto_skcipher_alg_min_keysize(alg);
Expand Down
3 changes: 0 additions & 3 deletions crypto/pcbc.c
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,6 @@ static int crypto_pcbc_create(struct crypto_template *tmpl, struct rtattr **tb)
inst->alg.base.cra_blocksize = alg->cra_blocksize;
inst->alg.base.cra_alignmask = alg->cra_alignmask;

/* We access the data as u32s when xoring. */
inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;

inst->alg.ivsize = alg->cra_blocksize;
inst->alg.min_keysize = alg->cra_cipher.cia_min_keysize;
inst->alg.max_keysize = alg->cra_cipher.cia_max_keysize;
Expand Down
2 changes: 0 additions & 2 deletions crypto/seqiv.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,6 @@ static int seqiv_aead_create(struct crypto_template *tmpl, struct rtattr **tb)
if (IS_ERR(inst))
return PTR_ERR(inst);

inst->alg.base.cra_alignmask |= __alignof__(u32) - 1;

spawn = aead_instance_ctx(inst);
alg = crypto_spawn_aead_alg(spawn);

Expand Down
20 changes: 18 additions & 2 deletions include/crypto/algapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,25 @@ static inline unsigned int crypto_queue_len(struct crypto_queue *queue)
return queue->qlen;
}

/* These functions require the input/output to be aligned as u32. */
void crypto_inc(u8 *a, unsigned int size);
void crypto_xor(u8 *dst, const u8 *src, unsigned int size);
void __crypto_xor(u8 *dst, const u8 *src, unsigned int size);

static inline void crypto_xor(u8 *dst, const u8 *src, unsigned int size)
{
if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
__builtin_constant_p(size) &&
(size % sizeof(unsigned long)) == 0) {
unsigned long *d = (unsigned long *)dst;
unsigned long *s = (unsigned long *)src;

while (size > 0) {
*d++ ^= *s++;
size -= sizeof(unsigned long);
}
} else {
__crypto_xor(dst, src, size);
}
}

int blkcipher_walk_done(struct blkcipher_desc *desc,
struct blkcipher_walk *walk, int err);
Expand Down

0 comments on commit db91af0

Please sign in to comment.