From 90246e79af062fcbb8c3728a5f29cb19b3468f59 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 4 Nov 2010 13:00:22 -0500 Subject: [PATCH 01/46] crypto: hash - Fix async import on shash algorithm The function shash_async_import did not initialise the descriptor correctly prior to calling the underlying shash import function. This patch adds the required initialisation. Reported-by: Miloslav Trmac Signed-off-by: Herbert Xu --- crypto/shash.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/crypto/shash.c b/crypto/shash.c index 22fd9433141f99..76f74b96315119 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -310,7 +310,13 @@ static int shash_async_export(struct ahash_request *req, void *out) static int shash_async_import(struct ahash_request *req, const void *in) { - return crypto_shash_import(ahash_request_ctx(req), in); + struct crypto_shash **ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); + struct shash_desc *desc = ahash_request_ctx(req); + + desc->tfm = *ctx; + desc->flags = req->base.flags; + + return crypto_shash_import(desc, in); } static void crypto_exit_shash_ops_async(struct crypto_tfm *tfm) From 895be15745d59cc7ede0e1c203e3432b0abdb71c Mon Sep 17 00:00:00 2001 From: Nicolas Kaiser Date: Thu, 4 Nov 2010 14:58:12 -0400 Subject: [PATCH 02/46] crypto: cast5 - simplify if-statements I noticed that by factoring out common rounds from the branches of the if-statements in the encryption and decryption functions, the executable file size goes down significantly, for crypto/cast5.ko from 26688 bytes to 24336 bytes (amd64). On my test system, I saw a slight speedup. This is the first time I'm doing such a benchmark - I found a similar one on the crypto mailing list, and I hope I did it right? Before: # cryptsetup create dm-test /dev/hda2 -c cast5-cbc-plain -s 128 Passsatz eingeben: # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,43484 s, 21,5 MB/s # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,4089 s, 21,8 MB/s # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,41091 s, 21,7 MB/s After: # cryptsetup create dm-test /dev/hda2 -c cast5-cbc-plain -s 128 Passsatz eingeben: # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,38128 s, 22,0 MB/s # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,29486 s, 22,8 MB/s # dd if=/dev/zero of=/dev/mapper/dm-test bs=1M count=50 52428800 Bytes (52 MB) kopiert, 2,37162 s, 22,1 MB/s Signed-off-by: Nicolas Kaiser Signed-off-by: Herbert Xu --- crypto/cast5.c | 74 ++++++++++++++++---------------------------------- 1 file changed, 24 insertions(+), 50 deletions(-) diff --git a/crypto/cast5.c b/crypto/cast5.c index a1d2294b50ad70..4a230ddec87741 100644 --- a/crypto/cast5.c +++ b/crypto/cast5.c @@ -604,36 +604,23 @@ static void cast5_encrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf) * Rounds 3, 6, 9, 12, and 15 use f function Type 3. */ + t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); + t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); + t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); + t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); + t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); + t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); + t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); + t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); + t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); + t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); + t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); + t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); if (!(c->rr)) { - t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); - t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); - t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); - t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); - t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); - t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); - t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); - t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); - t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); - t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); - t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); - t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]); t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]); t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]); t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]); - } else { - t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); - t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); - t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); - t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); - t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); - t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); - t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); - t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); - t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); - t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); - t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); - t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); } /* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and @@ -663,32 +650,19 @@ static void cast5_decrypt(struct crypto_tfm *tfm, u8 *outbuf, const u8 *inbuf) t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]); t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]); t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]); - t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); - t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); - t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); - t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); - t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); - t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); - t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); - t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); - t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); - t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); - t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); - t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); - } else { - t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); - t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); - t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); - t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); - t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); - t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); - t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); - t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); - t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); - t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); - t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); - t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); } + t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]); + t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]); + t = l; l = r; r = t ^ F1(r, Km[9], Kr[9]); + t = l; l = r; r = t ^ F3(r, Km[8], Kr[8]); + t = l; l = r; r = t ^ F2(r, Km[7], Kr[7]); + t = l; l = r; r = t ^ F1(r, Km[6], Kr[6]); + t = l; l = r; r = t ^ F3(r, Km[5], Kr[5]); + t = l; l = r; r = t ^ F2(r, Km[4], Kr[4]); + t = l; l = r; r = t ^ F1(r, Km[3], Kr[3]); + t = l; l = r; r = t ^ F3(r, Km[2], Kr[2]); + t = l; l = r; r = t ^ F2(r, Km[1], Kr[1]); + t = l; l = r; r = t ^ F1(r, Km[0], Kr[0]); dst[0] = cpu_to_be32(r); dst[1] = cpu_to_be32(l); From 0bd82f5f6355775fbaf7d3c664432ce1b862be1e Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Thu, 4 Nov 2010 15:00:45 -0400 Subject: [PATCH 03/46] crypto: aesni-intel - RFC4106 AES-GCM Driver Using Intel New Instructions This patch adds an optimized RFC4106 AES-GCM implementation for 64-bit kernels. It supports 128-bit AES key size. This leverages the crypto AEAD interface type to facilitate a combined AES & GCM operation to be implemented in assembly code. The assembly code leverages Intel(R) AES New Instructions and the PCLMULQDQ instruction. Signed-off-by: Adrian Hoban Signed-off-by: Tadeusz Struk Signed-off-by: Gabriele Paoloni Signed-off-by: Aidan O'Mahony Signed-off-by: Erdinc Ozturk Signed-off-by: James Guilford Signed-off-by: Wajdi Feghali Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 1192 ++++++++++++++++++++++++++++ arch/x86/crypto/aesni-intel_glue.c | 518 +++++++++++- 2 files changed, 1708 insertions(+), 2 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index ff16756a51c1a3..aafced54df645d 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -9,6 +9,17 @@ * Vinodh Gopal * Kahraman Akdemir * + * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD + * interface for 64-bit kernels. + * Authors: Erdinc Ozturk (erdinc.ozturk@intel.com) + * Aidan O'Mahony (aidan.o.mahony@intel.com) + * Adrian Hoban + * James Guilford (james.guilford@intel.com) + * Gabriele Paoloni + * Tadeusz Struk (tadeusz.struk@intel.com) + * Wajdi Feghali (wajdi.k.feghali@intel.com) + * Copyright (c) 2010, Intel Corporation. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -18,8 +29,60 @@ #include #include +.data +POLY: .octa 0xC2000000000000000000000000000001 +TWOONE: .octa 0x00000001000000000000000000000001 + +# order of these constants should not change. +# more specifically, ALL_F should follow SHIFT_MASK, +# and ZERO should follow ALL_F + +SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F +MASK1: .octa 0x0000000000000000ffffffffffffffff +MASK2: .octa 0xffffffffffffffff0000000000000000 +SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 +ALL_F: .octa 0xffffffffffffffffffffffffffffffff +ZERO: .octa 0x00000000000000000000000000000000 +ONE: .octa 0x00000000000000000000000000000001 +F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0 +dec: .octa 0x1 +enc: .octa 0x2 + + .text + +#define STACK_OFFSET 8*3 +#define HashKey 16*0 // store HashKey <<1 mod poly here +#define HashKey_2 16*1 // store HashKey^2 <<1 mod poly here +#define HashKey_3 16*2 // store HashKey^3 <<1 mod poly here +#define HashKey_4 16*3 // store HashKey^4 <<1 mod poly here +#define HashKey_k 16*4 // store XOR of High 64 bits and Low 64 + // bits of HashKey <<1 mod poly here + //(for Karatsuba purposes) +#define HashKey_2_k 16*5 // store XOR of High 64 bits and Low 64 + // bits of HashKey^2 <<1 mod poly here + // (for Karatsuba purposes) +#define HashKey_3_k 16*6 // store XOR of High 64 bits and Low 64 + // bits of HashKey^3 <<1 mod poly here + // (for Karatsuba purposes) +#define HashKey_4_k 16*7 // store XOR of High 64 bits and Low 64 + // bits of HashKey^4 <<1 mod poly here + // (for Karatsuba purposes) +#define VARIABLE_OFFSET 16*8 + +#define arg1 rdi +#define arg2 rsi +#define arg3 rdx +#define arg4 rcx +#define arg5 r8 +#define arg6 r9 +#define arg7 STACK_OFFSET+8(%r14) +#define arg8 STACK_OFFSET+16(%r14) +#define arg9 STACK_OFFSET+24(%r14) +#define arg10 STACK_OFFSET+32(%r14) + + #define STATE1 %xmm0 #define STATE2 %xmm4 #define STATE3 %xmm5 @@ -47,6 +110,1135 @@ #define T2 %r11 #define TCTR_LOW T2 + +/* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) +* +* +* Input: A and B (128-bits each, bit-reflected) +* Output: C = A*B*x mod poly, (i.e. >>1 ) +* To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input +* GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. +* +*/ +.macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5 + movdqa \GH, \TMP1 + pshufd $78, \GH, \TMP2 + pshufd $78, \HK, \TMP3 + pxor \GH, \TMP2 # TMP2 = a1+a0 + pxor \HK, \TMP3 # TMP3 = b1+b0 + PCLMULQDQ 0x11, \HK, \TMP1 # TMP1 = a1*b1 + PCLMULQDQ 0x00, \HK, \GH # GH = a0*b0 + PCLMULQDQ 0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0) + pxor \GH, \TMP2 + pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0) + movdqa \TMP2, \TMP3 + pslldq $8, \TMP3 # left shift TMP3 2 DWs + psrldq $8, \TMP2 # right shift TMP2 2 DWs + pxor \TMP3, \GH + pxor \TMP2, \TMP1 # TMP2:GH holds the result of GH*HK + + # first phase of the reduction + + movdqa \GH, \TMP2 + movdqa \GH, \TMP3 + movdqa \GH, \TMP4 # copy GH into TMP2,TMP3 and TMP4 + # in in order to perform + # independent shifts + pslld $31, \TMP2 # packed right shift <<31 + pslld $30, \TMP3 # packed right shift <<30 + pslld $25, \TMP4 # packed right shift <<25 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + movdqa \TMP2, \TMP5 + psrldq $4, \TMP5 # right shift TMP5 1 DW + pslldq $12, \TMP2 # left shift TMP2 3 DWs + pxor \TMP2, \GH + + # second phase of the reduction + + movdqa \GH,\TMP2 # copy GH into TMP2,TMP3 and TMP4 + # in in order to perform + # independent shifts + movdqa \GH,\TMP3 + movdqa \GH,\TMP4 + psrld $1,\TMP2 # packed left shift >>1 + psrld $2,\TMP3 # packed left shift >>2 + psrld $7,\TMP4 # packed left shift >>7 + pxor \TMP3,\TMP2 # xor the shifted versions + pxor \TMP4,\TMP2 + pxor \TMP5, \TMP2 + pxor \TMP2, \GH + pxor \TMP1, \GH # result is in TMP1 +.endm + +/* +* if a = number of total plaintext bytes +* b = floor(a/16) +* num_initial_blocks = b mod 4 +* encrypt the initial num_initial_blocks blocks and apply ghash on +* the ciphertext +* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers +* are clobbered +* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified +*/ + +.macro INITIAL_BLOCKS num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ +XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation + + mov arg7, %r10 # %r10 = AAD + mov arg8, %r12 # %r12 = aadLen + mov %r12, %r11 + pxor %xmm\i, %xmm\i +_get_AAD_loop\num_initial_blocks\operation: + movd (%r10), \TMP1 + pslldq $12, \TMP1 + psrldq $4, %xmm\i + pxor \TMP1, %xmm\i + add $4, %r10 + sub $4, %r12 + jne _get_AAD_loop\num_initial_blocks\operation + cmp $16, %r11 + je _get_AAD_loop2_done\num_initial_blocks\operation + mov $16, %r12 +_get_AAD_loop2\num_initial_blocks\operation: + psrldq $4, %xmm\i + sub $4, %r12 + cmp %r11, %r12 + jne _get_AAD_loop2\num_initial_blocks\operation +_get_AAD_loop2_done\num_initial_blocks\operation: + pshufb SHUF_MASK(%rip), %xmm\i # byte-reflect the AAD data + xor %r11, %r11 # initialise the data pointer offset as zero + + # start AES for num_initial_blocks blocks + + mov %arg5, %rax # %rax = *Y0 + movdqu (%rax), \XMM0 # XMM0 = Y0 + pshufb SHUF_MASK(%rip), \XMM0 +.if \i_seq != 0 +.irpc index, \i_seq + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, %xmm\index + pshufb SHUF_MASK(%rip), %xmm\index # perform a 16 byte swap +.endr +.irpc index, \i_seq + pxor 16*0(%arg1), %xmm\index +.endr +.irpc index, \i_seq + movaps 0x10(%rdi), \TMP1 + AESENC \TMP1, %xmm\index # Round 1 +.endr +.irpc index, \i_seq + movaps 0x20(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x30(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x40(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x50(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x60(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x70(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x80(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x90(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0xa0(%arg1), \TMP1 + AESENCLAST \TMP1, %xmm\index # Round 10 +.endr +.irpc index, \i_seq + movdqu (%arg3 , %r11, 1), \TMP1 + pxor \TMP1, %xmm\index + movdqu %xmm\index, (%arg2 , %r11, 1) + # write back plaintext/ciphertext for num_initial_blocks + add $16, %r11 +.if \operation == dec + movdqa \TMP1, %xmm\index +.endif + pshufb SHUF_MASK(%rip), %xmm\index + # prepare plaintext/ciphertext for GHASH computation +.endr +.endif + GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + # apply GHASH on num_initial_blocks blocks + +.if \i == 5 + pxor %xmm5, %xmm6 + GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm6, %xmm7 + GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.elseif \i == 6 + pxor %xmm6, %xmm7 + GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.elseif \i == 7 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.endif + cmp $64, %r13 + jl _initial_blocks_done\num_initial_blocks\operation + # no need for precomputed values +/* +* +* Precomputations for HashKey parallel with encryption of first 4 blocks. +* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i +*/ + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM1 + pshufb SHUF_MASK(%rip), \XMM1 # perform a 16 byte swap + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM2 + pshufb SHUF_MASK(%rip), \XMM2 # perform a 16 byte swap + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM3 + pshufb SHUF_MASK(%rip), \XMM3 # perform a 16 byte swap + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM4 + pshufb SHUF_MASK(%rip), \XMM4 # perform a 16 byte swap + pxor 16*0(%arg1), \XMM1 + pxor 16*0(%arg1), \XMM2 + pxor 16*0(%arg1), \XMM3 + pxor 16*0(%arg1), \XMM4 + movdqa \TMP3, \TMP5 + pshufd $78, \TMP3, \TMP1 + pxor \TMP3, \TMP1 + movdqa \TMP1, HashKey_k(%rsp) + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^2<<1 (mod poly) + movdqa \TMP5, HashKey_2(%rsp) +# HashKey_2 = HashKey^2<<1 (mod poly) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_2_k(%rsp) +.irpc index, 1234 # do 4 rounds + movaps 0x10*\index(%arg1), \TMP1 + AESENC \TMP1, \XMM1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 +.endr + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_3(%rsp) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_3_k(%rsp) +.irpc index, 56789 # do next 5 rounds + movaps 0x10*\index(%arg1), \TMP1 + AESENC \TMP1, \XMM1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 +.endr + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_4(%rsp) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_4_k(%rsp) + movaps 0xa0(%arg1), \TMP2 + AESENCLAST \TMP2, \XMM1 + AESENCLAST \TMP2, \XMM2 + AESENCLAST \TMP2, \XMM3 + AESENCLAST \TMP2, \XMM4 + movdqu 16*0(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM1 +.if \operation == dec + movdqu \XMM1, 16*0(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM1 +.endif + movdqu 16*1(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM2 +.if \operation == dec + movdqu \XMM2, 16*1(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM2 +.endif + movdqu 16*2(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM3 +.if \operation == dec + movdqu \XMM3, 16*2(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM3 +.endif + movdqu 16*3(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM4 +.if \operation == dec + movdqu \XMM4, 16*3(%arg2 , %r11 , 1) + movdqa \TMP1, \XMM4 +.else + movdqu \XMM1, 16*0(%arg2 , %r11 , 1) + movdqu \XMM2, 16*1(%arg2 , %r11 , 1) + movdqu \XMM3, 16*2(%arg2 , %r11 , 1) + movdqu \XMM4, 16*3(%arg2 , %r11 , 1) +.endif + add $64, %r11 + pshufb SHUF_MASK(%rip), \XMM1 # perform a 16 byte swap + pxor \XMMDst, \XMM1 +# combine GHASHed value with the corresponding ciphertext + pshufb SHUF_MASK(%rip), \XMM2 # perform a 16 byte swap + pshufb SHUF_MASK(%rip), \XMM3 # perform a 16 byte swap + pshufb SHUF_MASK(%rip), \XMM4 # perform a 16 byte swap +_initial_blocks_done\num_initial_blocks\operation: +.endm + +/* +* encrypt 4 blocks at a time +* ghash the 4 previously encrypted ciphertext blocks +* arg1, %arg2, %arg3 are used as pointers only, not modified +* %r11 is the data offset value +*/ +.macro GHASH_4_ENCRYPT_4_PARALLEL TMP1 TMP2 TMP3 TMP4 TMP5 \ +TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation + + movdqa \XMM1, \XMM5 + movdqa \XMM2, \XMM6 + movdqa \XMM3, \XMM7 + movdqa \XMM4, \XMM8 + + # multiply TMP5 * HashKey using karatsuba + + movdqa \XMM5, \TMP4 + pshufd $78, \XMM5, \TMP6 + pxor \XMM5, \TMP6 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa HashKey_4(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 + movdqa \XMM0, \XMM1 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM2 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM3 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM4 + pshufb SHUF_MASK(%rip), \XMM1 # perform a 16 byte swap + PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 + pshufb SHUF_MASK(%rip), \XMM2 # perform a 16 byte swap + pshufb SHUF_MASK(%rip), \XMM3 # perform a 16 byte swap + pshufb SHUF_MASK(%rip), \XMM4 # perform a 16 byte swap + pxor (%arg1), \XMM1 + pxor (%arg1), \XMM2 + pxor (%arg1), \XMM3 + pxor (%arg1), \XMM4 + movdqa HashKey_4_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) + movaps 0x10(%arg1), \TMP1 + AESENC \TMP1, \XMM1 # Round 1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 + movaps 0x20(%arg1), \TMP1 + AESENC \TMP1, \XMM1 # Round 2 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 + movdqa \XMM6, \TMP1 + pshufd $78, \XMM6, \TMP2 + pxor \XMM6, \TMP2 + movdqa HashKey_3(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 + movaps 0x30(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 3 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 + movaps 0x40(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 4 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + movdqa HashKey_3_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movaps 0x50(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 5 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + pxor \TMP1, \TMP4 +# accumulate the results in TMP4:XMM5, TMP6 holds the middle part + pxor \XMM6, \XMM5 + pxor \TMP2, \TMP6 + movdqa \XMM7, \TMP1 + pshufd $78, \XMM7, \TMP2 + pxor \XMM7, \TMP2 + movdqa HashKey_2(%rsp ), \TMP5 + + # Multiply TMP5 * HashKey using karatsuba + + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + movaps 0x60(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 6 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 + movaps 0x70(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 7 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + movdqa HashKey_2_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movaps 0x80(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 8 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + pxor \TMP1, \TMP4 +# accumulate the results in TMP4:XMM5, TMP6 holds the middle part + pxor \XMM7, \XMM5 + pxor \TMP2, \TMP6 + + # Multiply XMM8 * HashKey + # XMM8 and TMP5 hold the values for the two operands + + movdqa \XMM8, \TMP1 + pshufd $78, \XMM8, \TMP2 + pxor \XMM8, \TMP2 + movdqa HashKey(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + movaps 0x90(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 9 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 + movaps 0xa0(%arg1), \TMP3 + AESENCLAST \TMP3, \XMM1 # Round 10 + AESENCLAST \TMP3, \XMM2 + AESENCLAST \TMP3, \XMM3 + AESENCLAST \TMP3, \XMM4 + movdqa HashKey_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movdqu (%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK +.if \operation == dec + movdqu \XMM1, (%arg2,%r11,1) # Write to plaintext buffer + movdqa \TMP3, \XMM1 +.endif + movdqu 16(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK +.if \operation == dec + movdqu \XMM2, 16(%arg2,%r11,1) # Write to plaintext buffer + movdqa \TMP3, \XMM2 +.endif + movdqu 32(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK +.if \operation == dec + movdqu \XMM3, 32(%arg2,%r11,1) # Write to plaintext buffer + movdqa \TMP3, \XMM3 +.endif + movdqu 48(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK +.if \operation == dec + movdqu \XMM4, 48(%arg2,%r11,1) # Write to plaintext buffer + movdqa \TMP3, \XMM4 +.else + movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer + movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer + movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer + movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer +.endif + pshufb SHUF_MASK(%rip), \XMM1 # perform a 16 byte swap + pshufb SHUF_MASK(%rip), \XMM2 # perform a 16 byte swap + pshufb SHUF_MASK(%rip), \XMM3 # perform a 16 byte swap + pshufb SHUF_MASK(%rip), \XMM4 # perform a 16 byte sway + + pxor \TMP4, \TMP1 + pxor \XMM8, \XMM5 + pxor \TMP6, \TMP2 + pxor \TMP1, \TMP2 + pxor \XMM5, \TMP2 + movdqa \TMP2, \TMP3 + pslldq $8, \TMP3 # left shift TMP3 2 DWs + psrldq $8, \TMP2 # right shift TMP2 2 DWs + pxor \TMP3, \XMM5 + pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 + + # first phase of reduction + + movdqa \XMM5, \TMP2 + movdqa \XMM5, \TMP3 + movdqa \XMM5, \TMP4 +# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently + pslld $31, \TMP2 # packed right shift << 31 + pslld $30, \TMP3 # packed right shift << 30 + pslld $25, \TMP4 # packed right shift << 25 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + movdqa \TMP2, \TMP5 + psrldq $4, \TMP5 # right shift T5 1 DW + pslldq $12, \TMP2 # left shift T2 3 DWs + pxor \TMP2, \XMM5 + + # second phase of reduction + + movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 + movdqa \XMM5,\TMP3 + movdqa \XMM5,\TMP4 + psrld $1, \TMP2 # packed left shift >>1 + psrld $2, \TMP3 # packed left shift >>2 + psrld $7, \TMP4 # packed left shift >>7 + pxor \TMP3,\TMP2 # xor the shifted versions + pxor \TMP4,\TMP2 + pxor \TMP5, \TMP2 + pxor \TMP2, \XMM5 + pxor \TMP1, \XMM5 # result is in TMP1 + + pxor \XMM5, \XMM1 +.endm + +/* GHASH the last 4 ciphertext blocks. */ +.macro GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \ +TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst + + # Multiply TMP6 * HashKey (using Karatsuba) + + movdqa \XMM1, \TMP6 + pshufd $78, \XMM1, \TMP2 + pxor \XMM1, \TMP2 + movdqa HashKey_4(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP6 # TMP6 = a1*b1 + PCLMULQDQ 0x00, \TMP5, \XMM1 # XMM1 = a0*b0 + movdqa HashKey_4_k(%rsp), \TMP4 + PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movdqa \XMM1, \XMMDst + movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1 + + # Multiply TMP1 * HashKey (using Karatsuba) + + movdqa \XMM2, \TMP1 + pshufd $78, \XMM2, \TMP2 + pxor \XMM2, \TMP2 + movdqa HashKey_3(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + PCLMULQDQ 0x00, \TMP5, \XMM2 # XMM2 = a0*b0 + movdqa HashKey_3_k(%rsp), \TMP4 + PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + pxor \TMP1, \TMP6 + pxor \XMM2, \XMMDst + pxor \TMP2, \XMM1 +# results accumulated in TMP6, XMMDst, XMM1 + + # Multiply TMP1 * HashKey (using Karatsuba) + + movdqa \XMM3, \TMP1 + pshufd $78, \XMM3, \TMP2 + pxor \XMM3, \TMP2 + movdqa HashKey_2(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + PCLMULQDQ 0x00, \TMP5, \XMM3 # XMM3 = a0*b0 + movdqa HashKey_2_k(%rsp), \TMP4 + PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + pxor \TMP1, \TMP6 + pxor \XMM3, \XMMDst + pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1 + + # Multiply TMP1 * HashKey (using Karatsuba) + movdqa \XMM4, \TMP1 + pshufd $78, \XMM4, \TMP2 + pxor \XMM4, \TMP2 + movdqa HashKey(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + PCLMULQDQ 0x00, \TMP5, \XMM4 # XMM4 = a0*b0 + movdqa HashKey_k(%rsp), \TMP4 + PCLMULQDQ 0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + pxor \TMP1, \TMP6 + pxor \XMM4, \XMMDst + pxor \XMM1, \TMP2 + pxor \TMP6, \TMP2 + pxor \XMMDst, \TMP2 + # middle section of the temp results combined as in karatsuba algorithm + movdqa \TMP2, \TMP4 + pslldq $8, \TMP4 # left shift TMP4 2 DWs + psrldq $8, \TMP2 # right shift TMP2 2 DWs + pxor \TMP4, \XMMDst + pxor \TMP2, \TMP6 +# TMP6:XMMDst holds the result of the accumulated carry-less multiplications + # first phase of the reduction + movdqa \XMMDst, \TMP2 + movdqa \XMMDst, \TMP3 + movdqa \XMMDst, \TMP4 +# move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently + pslld $31, \TMP2 # packed right shifting << 31 + pslld $30, \TMP3 # packed right shifting << 30 + pslld $25, \TMP4 # packed right shifting << 25 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + movdqa \TMP2, \TMP7 + psrldq $4, \TMP7 # right shift TMP7 1 DW + pslldq $12, \TMP2 # left shift TMP2 3 DWs + pxor \TMP2, \XMMDst + + # second phase of the reduction + movdqa \XMMDst, \TMP2 + # make 3 copies of XMMDst for doing 3 shift operations + movdqa \XMMDst, \TMP3 + movdqa \XMMDst, \TMP4 + psrld $1, \TMP2 # packed left shift >> 1 + psrld $2, \TMP3 # packed left shift >> 2 + psrld $7, \TMP4 # packed left shift >> 7 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + pxor \TMP7, \TMP2 + pxor \TMP2, \XMMDst + pxor \TMP6, \XMMDst # reduced result is in XMMDst +.endm + +/* Encryption of a single block done*/ +.macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1 + + pxor (%arg1), \XMM0 + movaps 16(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 32(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 48(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 64(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 80(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 96(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 112(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 128(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 144(%arg1), \TMP1 + AESENC \TMP1, \XMM0 + movaps 160(%arg1), \TMP1 + AESENCLAST \TMP1, \XMM0 +.endm + + +/***************************************************************************** +* void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* u8 *out, // Plaintext output. Encrypt in-place is allowed. +* const u8 *in, // Ciphertext input +* u64 plaintext_len, // Length of data in bytes for decryption. +* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) +* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) +* // concatenated with 0x00000001. 16-byte aligned pointer. +* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. +* const u8 *aad, // Additional Authentication Data (AAD) +* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes +* u8 *auth_tag, // Authenticated Tag output. The driver will compare this to the +* // given authentication tag and only return the plaintext if they match. +* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 +* // (most likely), 12 or 8. +* +* Assumptions: +* +* keys: +* keys are pre-expanded and aligned to 16 bytes. we are using the first +* set of 11 keys in the data structure void *aes_ctx +* +* iv: +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | Salt (From the SA) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | Initialization Vector | +* | (This is the sequence number from IPSec header) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x1 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* +* +* AAD: +* AAD padded to 128 bits with 0 +* for example, assume AAD is a u32 vector +* +* if AAD is 8 bytes: +* AAD[3] = {A0, A1}; +* padded AAD in xmm register = {A1 A0 0 0} +* +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | SPI (A1) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 32-bit Sequence Number (A0) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x0 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* AAD Format with 32-bit Sequence Number +* +* if AAD is 12 bytes: +* AAD[3] = {A0, A1, A2}; +* padded AAD in xmm register = {A2 A1 A0 0} +* +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | SPI (A2) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 64-bit Extended Sequence Number {A1,A0} | +* | | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x0 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* AAD Format with 64-bit Extended Sequence Number +* +* aadLen: +* from the definition of the spec, aadLen can only be 8 or 12 bytes. +* The code supports 16 too but for other sizes, the code will fail. +* +* TLen: +* from the definition of the spec, TLen can only be 8, 12 or 16 bytes. +* For other sizes, the code will fail. +* +* poly = x^128 + x^127 + x^126 + x^121 + 1 +* +*****************************************************************************/ + +ENTRY(aesni_gcm_dec) + push %r12 + push %r13 + push %r14 + mov %rsp, %r14 +/* +* states of %xmm registers %xmm6:%xmm15 not saved +* all %xmm registers are clobbered +*/ + sub $VARIABLE_OFFSET, %rsp + and $~63, %rsp # align rsp to 64 bytes + mov %arg6, %r12 + movdqu (%r12), %xmm13 # %xmm13 = HashKey + pshufb SHUF_MASK(%rip), %xmm13 + +# Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH) + + movdqa %xmm13, %xmm2 + psllq $1, %xmm13 + psrlq $63, %xmm2 + movdqa %xmm2, %xmm1 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + por %xmm2, %xmm13 + + # Reduction + + pshufd $0x24, %xmm1, %xmm2 + pcmpeqd TWOONE(%rip), %xmm2 + pand POLY(%rip), %xmm2 + pxor %xmm2, %xmm13 # %xmm13 holds the HashKey<<1 (mod poly) + + + # Decrypt first few blocks + + movdqa %xmm13, HashKey(%rsp) # store HashKey<<1 (mod poly) + mov %arg4, %r13 # save the number of bytes of plaintext/ciphertext + and $-16, %r13 # %r13 = %r13 - (%r13 mod 16) + mov %r13, %r12 + and $(3<<4), %r12 + jz _initial_num_blocks_is_0_decrypt + cmp $(2<<4), %r12 + jb _initial_num_blocks_is_1_decrypt + je _initial_num_blocks_is_2_decrypt +_initial_num_blocks_is_3_decrypt: + INITIAL_BLOCKS 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec + sub $48, %r13 + jmp _initial_blocks_decrypted +_initial_num_blocks_is_2_decrypt: + INITIAL_BLOCKS 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec + sub $32, %r13 + jmp _initial_blocks_decrypted +_initial_num_blocks_is_1_decrypt: + INITIAL_BLOCKS 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec + sub $16, %r13 + jmp _initial_blocks_decrypted +_initial_num_blocks_is_0_decrypt: + INITIAL_BLOCKS 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec +_initial_blocks_decrypted: + cmp $0, %r13 + je _zero_cipher_left_decrypt + sub $64, %r13 + je _four_cipher_left_decrypt +_decrypt_by_4: + GHASH_4_ENCRYPT_4_PARALLEL %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ +%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec + add $64, %r11 + sub $64, %r13 + jne _decrypt_by_4 +_four_cipher_left_decrypt: + GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ +%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 +_zero_cipher_left_decrypt: + mov %arg4, %r13 + and $15, %r13 # %r13 = arg4 (mod 16) + je _multiple_of_16_bytes_decrypt + + # Handle the last <16 byte block seperately + + paddd ONE(%rip), %xmm0 # increment CNT to get Yn + pshufb SHUF_MASK(%rip), %xmm0 + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) + sub $16, %r11 + add %r13, %r11 + movdqu (%arg3,%r11,1), %xmm1 # recieve the last <16 byte block + lea SHIFT_MASK+16(%rip), %r12 + sub %r13, %r12 +# adjust the shuffle mask pointer to be able to shift 16-%r13 bytes +# (%r13 is the number of bytes in plaintext mod 16) + movdqu (%r12), %xmm2 # get the appropriate shuffle mask + pshufb %xmm2, %xmm1 # right shift 16-%r13 butes + movdqa %xmm1, %xmm2 + pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn) + movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0 + pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0 + pand %xmm1, %xmm2 + pshufb SHUF_MASK(%rip),%xmm2 + pxor %xmm2, %xmm8 + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + # GHASH computation for the last <16 byte block + sub %r13, %r11 + add $16, %r11 + + # output %r13 bytes + movq %xmm0, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left_decrypt + mov %rax, (%arg2 , %r11, 1) + add $8, %r11 + psrldq $8, %xmm0 + movq %xmm0, %rax + sub $8, %r13 +_less_than_8_bytes_left_decrypt: + mov %al, (%arg2, %r11, 1) + add $1, %r11 + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left_decrypt +_multiple_of_16_bytes_decrypt: + mov arg8, %r12 # %r13 = aadLen (number of bytes) + shl $3, %r12 # convert into number of bits + movd %r12d, %xmm15 # len(A) in %xmm15 + shl $3, %arg4 # len(C) in bits (*128) + movq %arg4, %xmm1 + pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 + pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) + pxor %xmm15, %xmm8 + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + # final GHASH computation + pshufb SHUF_MASK(%rip), %xmm8 + mov %arg5, %rax # %rax = *Y0 + movdqu (%rax), %xmm0 # %xmm0 = Y0 + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) + pxor %xmm8, %xmm0 +_return_T_decrypt: + mov arg9, %r10 # %r10 = authTag + mov arg10, %r11 # %r11 = auth_tag_len + cmp $16, %r11 + je _T_16_decrypt + cmp $12, %r11 + je _T_12_decrypt +_T_8_decrypt: + movq %xmm0, %rax + mov %rax, (%r10) + jmp _return_T_done_decrypt +_T_12_decrypt: + movq %xmm0, %rax + mov %rax, (%r10) + psrldq $8, %xmm0 + movd %xmm0, %eax + mov %eax, 8(%r10) + jmp _return_T_done_decrypt +_T_16_decrypt: + movdqu %xmm0, (%r10) +_return_T_done_decrypt: + mov %r14, %rsp + pop %r14 + pop %r13 + pop %r12 + ret + + +/***************************************************************************** +* void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary. +* u8 *out, // Ciphertext output. Encrypt in-place is allowed. +* const u8 *in, // Plaintext input +* u64 plaintext_len, // Length of data in bytes for encryption. +* u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association) +* // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) +* // concatenated with 0x00000001. 16-byte aligned pointer. +* u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary. +* const u8 *aad, // Additional Authentication Data (AAD) +* u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes +* u8 *auth_tag, // Authenticated Tag output. +* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely), +* // 12 or 8. +* +* Assumptions: +* +* keys: +* keys are pre-expanded and aligned to 16 bytes. we are using the +* first set of 11 keys in the data structure void *aes_ctx +* +* +* iv: +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | Salt (From the SA) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | Initialization Vector | +* | (This is the sequence number from IPSec header) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x1 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* +* +* AAD: +* AAD padded to 128 bits with 0 +* for example, assume AAD is a u32 vector +* +* if AAD is 8 bytes: +* AAD[3] = {A0, A1}; +* padded AAD in xmm register = {A1 A0 0 0} +* +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | SPI (A1) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 32-bit Sequence Number (A0) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x0 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* AAD Format with 32-bit Sequence Number +* +* if AAD is 12 bytes: +* AAD[3] = {A0, A1, A2}; +* padded AAD in xmm register = {A2 A1 A0 0} +* +* 0 1 2 3 +* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | SPI (A2) | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 64-bit Extended Sequence Number {A1,A0} | +* | | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* | 0x0 | +* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +* +* AAD Format with 64-bit Extended Sequence Number +* +* aadLen: +* from the definition of the spec, aadLen can only be 8 or 12 bytes. +* The code supports 16 too but for other sizes, the code will fail. +* +* TLen: +* from the definition of the spec, TLen can only be 8, 12 or 16 bytes. +* For other sizes, the code will fail. +* +* poly = x^128 + x^127 + x^126 + x^121 + 1 +***************************************************************************/ +ENTRY(aesni_gcm_enc) + push %r12 + push %r13 + push %r14 + mov %rsp, %r14 +# +# states of %xmm registers %xmm6:%xmm15 not saved +# all %xmm registers are clobbered +# + sub $VARIABLE_OFFSET, %rsp + and $~63, %rsp + mov %arg6, %r12 + movdqu (%r12), %xmm13 + pshufb SHUF_MASK(%rip), %xmm13 + +# precompute HashKey<<1 mod poly from the HashKey (required for GHASH) + + movdqa %xmm13, %xmm2 + psllq $1, %xmm13 + psrlq $63, %xmm2 + movdqa %xmm2, %xmm1 + pslldq $8, %xmm2 + psrldq $8, %xmm1 + por %xmm2, %xmm13 + + # reduce HashKey<<1 + + pshufd $0x24, %xmm1, %xmm2 + pcmpeqd TWOONE(%rip), %xmm2 + pand POLY(%rip), %xmm2 + pxor %xmm2, %xmm13 + movdqa %xmm13, HashKey(%rsp) + mov %arg4, %r13 # %xmm13 holds HashKey<<1 (mod poly) + and $-16, %r13 + mov %r13, %r12 + + # Encrypt first few blocks + + and $(3<<4), %r12 + jz _initial_num_blocks_is_0_encrypt + cmp $(2<<4), %r12 + jb _initial_num_blocks_is_1_encrypt + je _initial_num_blocks_is_2_encrypt +_initial_num_blocks_is_3_encrypt: + INITIAL_BLOCKS 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc + sub $48, %r13 + jmp _initial_blocks_encrypted +_initial_num_blocks_is_2_encrypt: + INITIAL_BLOCKS 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc + sub $32, %r13 + jmp _initial_blocks_encrypted +_initial_num_blocks_is_1_encrypt: + INITIAL_BLOCKS 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc + sub $16, %r13 + jmp _initial_blocks_encrypted +_initial_num_blocks_is_0_encrypt: + INITIAL_BLOCKS 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ +%xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc +_initial_blocks_encrypted: + + # Main loop - Encrypt remaining blocks + + cmp $0, %r13 + je _zero_cipher_left_encrypt + sub $64, %r13 + je _four_cipher_left_encrypt +_encrypt_by_4_encrypt: + GHASH_4_ENCRYPT_4_PARALLEL %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ +%xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc + add $64, %r11 + sub $64, %r13 + jne _encrypt_by_4_encrypt +_four_cipher_left_encrypt: + GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \ +%xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8 +_zero_cipher_left_encrypt: + mov %arg4, %r13 + and $15, %r13 # %r13 = arg4 (mod 16) + je _multiple_of_16_bytes_encrypt + + # Handle the last <16 Byte block seperately + paddd ONE(%rip), %xmm0 # INCR CNT to get Yn + pshufb SHUF_MASK(%rip), %xmm0 + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) + sub $16, %r11 + add %r13, %r11 + movdqu (%arg3,%r11,1), %xmm1 # receive the last <16 byte blocks + lea SHIFT_MASK+16(%rip), %r12 + sub %r13, %r12 + # adjust the shuffle mask pointer to be able to shift 16-r13 bytes + # (%r13 is the number of bytes in plaintext mod 16) + movdqu (%r12), %xmm2 # get the appropriate shuffle mask + pshufb %xmm2, %xmm1 # shift right 16-r13 byte + pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn) + movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 + # get the appropriate mask to mask out top 16-r13 bytes of xmm0 + pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 + + pshufb SHUF_MASK(%rip),%xmm0 + pxor %xmm0, %xmm8 + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + # GHASH computation for the last <16 byte block + sub %r13, %r11 + add $16, %r11 + pshufb SHUF_MASK(%rip), %xmm0 + # shuffle xmm0 back to output as ciphertext + + # Output %r13 bytes + movq %xmm0, %rax + cmp $8, %r13 + jle _less_than_8_bytes_left_encrypt + mov %rax, (%arg2 , %r11, 1) + add $8, %r11 + psrldq $8, %xmm0 + movq %xmm0, %rax + sub $8, %r13 +_less_than_8_bytes_left_encrypt: + mov %al, (%arg2, %r11, 1) + add $1, %r11 + shr $8, %rax + sub $1, %r13 + jne _less_than_8_bytes_left_encrypt +_multiple_of_16_bytes_encrypt: + mov arg8, %r12 # %r12 = addLen (number of bytes) + shl $3, %r12 + movd %r12d, %xmm15 # len(A) in %xmm15 + shl $3, %arg4 # len(C) in bits (*128) + movq %arg4, %xmm1 + pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 + pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) + pxor %xmm15, %xmm8 + GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 + # final GHASH computation + + pshufb SHUF_MASK(%rip), %xmm8 # perform a 16 byte swap + mov %arg5, %rax # %rax = *Y0 + movdqu (%rax), %xmm0 # %xmm0 = Y0 + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm15 # Encrypt(K, Y0) + pxor %xmm8, %xmm0 +_return_T_encrypt: + mov arg9, %r10 # %r10 = authTag + mov arg10, %r11 # %r11 = auth_tag_len + cmp $16, %r11 + je _T_16_encrypt + cmp $12, %r11 + je _T_12_encrypt +_T_8_encrypt: + movq %xmm0, %rax + mov %rax, (%r10) + jmp _return_T_done_encrypt +_T_12_encrypt: + movq %xmm0, %rax + mov %rax, (%r10) + psrldq $8, %xmm0 + movd %xmm0, %eax + mov %eax, 8(%r10) + jmp _return_T_done_encrypt +_T_16_encrypt: + movdqu %xmm0, (%r10) +_return_T_done_encrypt: + mov %r14, %rsp + pop %r14 + pop %r13 + pop %r12 + ret + + + _key_expansion_128: _key_expansion_256a: pshufd $0b11111111, %xmm1, %xmm1 diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 2cb3dcc4490ae0..02d349d644238a 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -5,6 +5,14 @@ * Copyright (C) 2008, Intel Corp. * Author: Huang Ying * + * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD + * interface for 64-bit kernels. + * Authors: Adrian Hoban + * Gabriele Paoloni + * Tadeusz Struk (tadeusz.struk@intel.com) + * Aidan O'Mahony (aidan.o.mahony@intel.com) + * Copyright (c) 2010, Intel Corporation. + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -21,6 +29,10 @@ #include #include #include +#include +#include +#include +#include #if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE) #define HAS_CTR @@ -42,8 +54,31 @@ struct async_aes_ctx { struct cryptd_ablkcipher *cryptd_tfm; }; -#define AESNI_ALIGN 16 +/* This data is stored at the end of the crypto_tfm struct. + * It's a type of per "session" data storage location. + * This needs to be 16 byte aligned. + */ +struct aesni_rfc4106_gcm_ctx { + u8 hash_subkey[16]; + struct crypto_aes_ctx aes_key_expanded; + u8 nonce[4]; + struct cryptd_aead *cryptd_tfm; +}; + +struct aesni_gcm_set_hash_subkey_result { + int err; + struct completion completion; +}; + +struct aesni_hash_subkey_req_data { + u8 iv[16]; + struct aesni_gcm_set_hash_subkey_result result; + struct scatterlist sg; +}; + +#define AESNI_ALIGN (16) #define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1)) +#define RFC4106_HASH_SUBKEY_SIZE 16 asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key, unsigned int key_len); @@ -62,6 +97,57 @@ asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); +/* asmlinkage void aesni_gcm_enc() + * void *ctx, AES Key schedule. Starts on a 16 byte boundary. + * u8 *out, Ciphertext output. Encrypt in-place is allowed. + * const u8 *in, Plaintext input + * unsigned long plaintext_len, Length of data in bytes for encryption. + * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) + * concatenated with 8 byte Initialisation Vector (from IPSec ESP + * Payload) concatenated with 0x00000001. 16-byte aligned pointer. + * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. + * const u8 *aad, Additional Authentication Data (AAD) + * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this + * is going to be 8 or 12 bytes + * u8 *auth_tag, Authenticated Tag output. + * unsigned long auth_tag_len), Authenticated Tag Length in bytes. + * Valid values are 16 (most likely), 12 or 8. + */ +asmlinkage void aesni_gcm_enc(void *ctx, u8 *out, + const u8 *in, unsigned long plaintext_len, u8 *iv, + u8 *hash_subkey, const u8 *aad, unsigned long aad_len, + u8 *auth_tag, unsigned long auth_tag_len); + +/* asmlinkage void aesni_gcm_dec() + * void *ctx, AES Key schedule. Starts on a 16 byte boundary. + * u8 *out, Plaintext output. Decrypt in-place is allowed. + * const u8 *in, Ciphertext input + * unsigned long ciphertext_len, Length of data in bytes for decryption. + * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) + * concatenated with 8 byte Initialisation Vector (from IPSec ESP + * Payload) concatenated with 0x00000001. 16-byte aligned pointer. + * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. + * const u8 *aad, Additional Authentication Data (AAD) + * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going + * to be 8 or 12 bytes + * u8 *auth_tag, Authenticated Tag output. + * unsigned long auth_tag_len) Authenticated Tag Length in bytes. + * Valid values are 16 (most likely), 12 or 8. + */ +asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, + const u8 *in, unsigned long ciphertext_len, u8 *iv, + u8 *hash_subkey, const u8 *aad, unsigned long aad_len, + u8 *auth_tag, unsigned long auth_tag_len); + +static inline struct +aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) +{ + return + (struct aesni_rfc4106_gcm_ctx *) + PTR_ALIGN((u8 *) + crypto_tfm_ctx(crypto_aead_tfm(tfm)), AESNI_ALIGN); +} + static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) { unsigned long addr = (unsigned long)raw_ctx; @@ -730,6 +816,422 @@ static struct crypto_alg ablk_xts_alg = { }; #endif +static int rfc4106_init(struct crypto_tfm *tfm) +{ + struct cryptd_aead *cryptd_tfm; + struct aesni_rfc4106_gcm_ctx *ctx = (struct aesni_rfc4106_gcm_ctx *) + PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); + cryptd_tfm = cryptd_alloc_aead("__driver-gcm-aes-aesni", 0, 0); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + ctx->cryptd_tfm = cryptd_tfm; + tfm->crt_aead.reqsize = sizeof(struct aead_request) + + crypto_aead_reqsize(&cryptd_tfm->base); + return 0; +} + +static void rfc4106_exit(struct crypto_tfm *tfm) +{ + struct aesni_rfc4106_gcm_ctx *ctx = + (struct aesni_rfc4106_gcm_ctx *) + PTR_ALIGN((u8 *)crypto_tfm_ctx(tfm), AESNI_ALIGN); + if (!IS_ERR(ctx->cryptd_tfm)) + cryptd_free_aead(ctx->cryptd_tfm); + return; +} + +static void +rfc4106_set_hash_subkey_done(struct crypto_async_request *req, int err) +{ + struct aesni_gcm_set_hash_subkey_result *result = req->data; + + if (err == -EINPROGRESS) + return; + result->err = err; + complete(&result->completion); +} + +static int +rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len) +{ + struct crypto_ablkcipher *ctr_tfm; + struct ablkcipher_request *req; + int ret = -EINVAL; + struct aesni_hash_subkey_req_data *req_data; + + ctr_tfm = crypto_alloc_ablkcipher("ctr(aes)", 0, 0); + if (IS_ERR(ctr_tfm)) + return PTR_ERR(ctr_tfm); + + crypto_ablkcipher_clear_flags(ctr_tfm, ~0); + + ret = crypto_ablkcipher_setkey(ctr_tfm, key, key_len); + if (ret) { + crypto_free_ablkcipher(ctr_tfm); + return ret; + } + + req = ablkcipher_request_alloc(ctr_tfm, GFP_KERNEL); + if (!req) { + crypto_free_ablkcipher(ctr_tfm); + return -EINVAL; + } + + req_data = kmalloc(sizeof(*req_data), GFP_KERNEL); + if (!req_data) { + crypto_free_ablkcipher(ctr_tfm); + return -ENOMEM; + } + memset(req_data->iv, 0, sizeof(req_data->iv)); + + /* Clear the data in the hash sub key container to zero.*/ + /* We want to cipher all zeros to create the hash sub key. */ + memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE); + + init_completion(&req_data->result.completion); + sg_init_one(&req_data->sg, hash_subkey, RFC4106_HASH_SUBKEY_SIZE); + ablkcipher_request_set_tfm(req, ctr_tfm); + ablkcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP | + CRYPTO_TFM_REQ_MAY_BACKLOG, + rfc4106_set_hash_subkey_done, + &req_data->result); + + ablkcipher_request_set_crypt(req, &req_data->sg, + &req_data->sg, RFC4106_HASH_SUBKEY_SIZE, req_data->iv); + + ret = crypto_ablkcipher_encrypt(req); + if (ret == -EINPROGRESS || ret == -EBUSY) { + ret = wait_for_completion_interruptible + (&req_data->result.completion); + if (!ret) + ret = req_data->result.err; + } + ablkcipher_request_free(req); + kfree(req_data); + crypto_free_ablkcipher(ctr_tfm); + return ret; +} + +static int rfc4106_set_key(struct crypto_aead *parent, const u8 *key, + unsigned int key_len) +{ + int ret = 0; + struct crypto_tfm *tfm = crypto_aead_tfm(parent); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); + u8 *new_key_mem = NULL; + + if (key_len < 4) { + crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + /*Account for 4 byte nonce at the end.*/ + key_len -= 4; + if (key_len != AES_KEYSIZE_128) { + crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce)); + /*This must be on a 16 byte boundary!*/ + if ((unsigned long)(&(ctx->aes_key_expanded.key_enc[0])) % AESNI_ALIGN) + return -EINVAL; + + if ((unsigned long)key % AESNI_ALIGN) { + /*key is not aligned: use an auxuliar aligned pointer*/ + new_key_mem = kmalloc(key_len+AESNI_ALIGN, GFP_KERNEL); + if (!new_key_mem) + return -ENOMEM; + + new_key_mem = PTR_ALIGN(new_key_mem, AESNI_ALIGN); + memcpy(new_key_mem, key, key_len); + key = new_key_mem; + } + + if (!irq_fpu_usable()) + ret = crypto_aes_expand_key(&(ctx->aes_key_expanded), + key, key_len); + else { + kernel_fpu_begin(); + ret = aesni_set_key(&(ctx->aes_key_expanded), key, key_len); + kernel_fpu_end(); + } + /*This must be on a 16 byte boundary!*/ + if ((unsigned long)(&(ctx->hash_subkey[0])) % AESNI_ALIGN) { + ret = -EINVAL; + goto exit; + } + ret = rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); +exit: + kfree(new_key_mem); + return ret; +} + +/* This is the Integrity Check Value (aka the authentication tag length and can + * be 8, 12 or 16 bytes long. */ +static int rfc4106_set_authsize(struct crypto_aead *parent, + unsigned int authsize) +{ + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(parent); + struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); + + switch (authsize) { + case 8: + case 12: + case 16: + break; + default: + return -EINVAL; + } + crypto_aead_crt(parent)->authsize = authsize; + crypto_aead_crt(cryptd_child)->authsize = authsize; + return 0; +} + +static int rfc4106_encrypt(struct aead_request *req) +{ + int ret; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); + + if (!irq_fpu_usable()) { + struct aead_request *cryptd_req = + (struct aead_request *) aead_request_ctx(req); + memcpy(cryptd_req, req, sizeof(*req)); + aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + return crypto_aead_encrypt(cryptd_req); + } else { + kernel_fpu_begin(); + ret = cryptd_child->base.crt_aead.encrypt(req); + kernel_fpu_end(); + return ret; + } +} + +static int rfc4106_decrypt(struct aead_request *req) +{ + int ret; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + struct crypto_aead *cryptd_child = cryptd_aead_child(ctx->cryptd_tfm); + + if (!irq_fpu_usable()) { + struct aead_request *cryptd_req = + (struct aead_request *) aead_request_ctx(req); + memcpy(cryptd_req, req, sizeof(*req)); + aead_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base); + return crypto_aead_decrypt(cryptd_req); + } else { + kernel_fpu_begin(); + ret = cryptd_child->base.crt_aead.decrypt(req); + kernel_fpu_end(); + return ret; + } +} + +static struct crypto_alg rfc4106_alg = { + .cra_name = "rfc4106(gcm(aes))", + .cra_driver_name = "rfc4106-gcm-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_TYPE_AEAD | CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN, + .cra_alignmask = 0, + .cra_type = &crypto_nivaead_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(rfc4106_alg.cra_list), + .cra_init = rfc4106_init, + .cra_exit = rfc4106_exit, + .cra_u = { + .aead = { + .setkey = rfc4106_set_key, + .setauthsize = rfc4106_set_authsize, + .encrypt = rfc4106_encrypt, + .decrypt = rfc4106_decrypt, + .geniv = "seqiv", + .ivsize = 8, + .maxauthsize = 16, + }, + }, +}; + +static int __driver_rfc4106_encrypt(struct aead_request *req) +{ + u8 one_entry_in_sg = 0; + u8 *src, *dst, *assoc; + __be32 counter = cpu_to_be32(1); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + unsigned long auth_tag_len = crypto_aead_authsize(tfm); + u8 iv_tab[16+AESNI_ALIGN]; + u8* iv = (u8 *) PTR_ALIGN((u8 *)iv_tab, AESNI_ALIGN); + struct scatter_walk src_sg_walk; + struct scatter_walk assoc_sg_walk; + struct scatter_walk dst_sg_walk; + unsigned int i; + + /* Assuming we are supporting rfc4106 64-bit extended */ + /* sequence numbers We need to have the AAD length equal */ + /* to 8 or 12 bytes */ + if (unlikely(req->assoclen != 8 && req->assoclen != 12)) + return -EINVAL; + /* IV below built */ + for (i = 0; i < 4; i++) + *(iv+i) = ctx->nonce[i]; + for (i = 0; i < 8; i++) + *(iv+4+i) = req->iv[i]; + *((__be32 *)(iv+12)) = counter; + + if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) { + one_entry_in_sg = 1; + scatterwalk_start(&src_sg_walk, req->src); + scatterwalk_start(&assoc_sg_walk, req->assoc); + src = scatterwalk_map(&src_sg_walk, 0); + assoc = scatterwalk_map(&assoc_sg_walk, 0); + dst = src; + if (unlikely(req->src != req->dst)) { + scatterwalk_start(&dst_sg_walk, req->dst); + dst = scatterwalk_map(&dst_sg_walk, 0); + } + + } else { + /* Allocate memory for src, dst, assoc */ + src = kmalloc(req->cryptlen + auth_tag_len + req->assoclen, + GFP_ATOMIC); + if (unlikely(!src)) + return -ENOMEM; + assoc = (src + req->cryptlen + auth_tag_len); + scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); + scatterwalk_map_and_copy(assoc, req->assoc, 0, + req->assoclen, 0); + dst = src; + } + + aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, + ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst + + ((unsigned long)req->cryptlen), auth_tag_len); + + /* The authTag (aka the Integrity Check Value) needs to be written + * back to the packet. */ + if (one_entry_in_sg) { + if (unlikely(req->src != req->dst)) { + scatterwalk_unmap(dst, 0); + scatterwalk_done(&dst_sg_walk, 0, 0); + } + scatterwalk_unmap(src, 0); + scatterwalk_unmap(assoc, 0); + scatterwalk_done(&src_sg_walk, 0, 0); + scatterwalk_done(&assoc_sg_walk, 0, 0); + } else { + scatterwalk_map_and_copy(dst, req->dst, 0, + req->cryptlen + auth_tag_len, 1); + kfree(src); + } + return 0; +} + +static int __driver_rfc4106_decrypt(struct aead_request *req) +{ + u8 one_entry_in_sg = 0; + u8 *src, *dst, *assoc; + unsigned long tempCipherLen = 0; + __be32 counter = cpu_to_be32(1); + int retval = 0; + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + unsigned long auth_tag_len = crypto_aead_authsize(tfm); + u8 iv_and_authTag[32+AESNI_ALIGN]; + u8 *iv = (u8 *) PTR_ALIGN((u8 *)iv_and_authTag, AESNI_ALIGN); + u8 *authTag = iv + 16; + struct scatter_walk src_sg_walk; + struct scatter_walk assoc_sg_walk; + struct scatter_walk dst_sg_walk; + unsigned int i; + + if (unlikely((req->cryptlen < auth_tag_len) || + (req->assoclen != 8 && req->assoclen != 12))) + return -EINVAL; + /* Assuming we are supporting rfc4106 64-bit extended */ + /* sequence numbers We need to have the AAD length */ + /* equal to 8 or 12 bytes */ + + tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len); + /* IV below built */ + for (i = 0; i < 4; i++) + *(iv+i) = ctx->nonce[i]; + for (i = 0; i < 8; i++) + *(iv+4+i) = req->iv[i]; + *((__be32 *)(iv+12)) = counter; + + if ((sg_is_last(req->src)) && (sg_is_last(req->assoc))) { + one_entry_in_sg = 1; + scatterwalk_start(&src_sg_walk, req->src); + scatterwalk_start(&assoc_sg_walk, req->assoc); + src = scatterwalk_map(&src_sg_walk, 0); + assoc = scatterwalk_map(&assoc_sg_walk, 0); + dst = src; + if (unlikely(req->src != req->dst)) { + scatterwalk_start(&dst_sg_walk, req->dst); + dst = scatterwalk_map(&dst_sg_walk, 0); + } + + } else { + /* Allocate memory for src, dst, assoc */ + src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC); + if (!src) + return -ENOMEM; + assoc = (src + req->cryptlen + auth_tag_len); + scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0); + scatterwalk_map_and_copy(assoc, req->assoc, 0, + req->assoclen, 0); + dst = src; + } + + aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv, + ctx->hash_subkey, assoc, (unsigned long)req->assoclen, + authTag, auth_tag_len); + + /* Compare generated tag with passed in tag. */ + retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ? + -EBADMSG : 0; + + if (one_entry_in_sg) { + if (unlikely(req->src != req->dst)) { + scatterwalk_unmap(dst, 0); + scatterwalk_done(&dst_sg_walk, 0, 0); + } + scatterwalk_unmap(src, 0); + scatterwalk_unmap(assoc, 0); + scatterwalk_done(&src_sg_walk, 0, 0); + scatterwalk_done(&assoc_sg_walk, 0, 0); + } else { + scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1); + kfree(src); + } + return retval; +} + +static struct crypto_alg __rfc4106_alg = { + .cra_name = "__gcm-aes-aesni", + .cra_driver_name = "__driver-gcm-aes-aesni", + .cra_priority = 0, + .cra_flags = CRYPTO_ALG_TYPE_AEAD, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aesni_rfc4106_gcm_ctx) + AESNI_ALIGN, + .cra_alignmask = 0, + .cra_type = &crypto_aead_type, + .cra_module = THIS_MODULE, + .cra_list = LIST_HEAD_INIT(__rfc4106_alg.cra_list), + .cra_u = { + .aead = { + .encrypt = __driver_rfc4106_encrypt, + .decrypt = __driver_rfc4106_decrypt, + }, + }, +}; + static int __init aesni_init(void) { int err; @@ -738,6 +1240,7 @@ static int __init aesni_init(void) printk(KERN_INFO "Intel AES-NI instructions are not detected.\n"); return -ENODEV; } + if ((err = crypto_register_alg(&aesni_alg))) goto aes_err; if ((err = crypto_register_alg(&__aesni_alg))) @@ -770,10 +1273,19 @@ static int __init aesni_init(void) if ((err = crypto_register_alg(&ablk_xts_alg))) goto ablk_xts_err; #endif - + err = crypto_register_alg(&__rfc4106_alg); + if (err) + goto __aead_gcm_err; + err = crypto_register_alg(&rfc4106_alg); + if (err) + goto aead_gcm_err; return err; +aead_gcm_err: + crypto_unregister_alg(&__rfc4106_alg); +__aead_gcm_err: #ifdef HAS_XTS + crypto_unregister_alg(&ablk_xts_alg); ablk_xts_err: #endif #ifdef HAS_PCBC @@ -809,6 +1321,8 @@ static int __init aesni_init(void) static void __exit aesni_exit(void) { + crypto_unregister_alg(&__rfc4106_alg); + crypto_unregister_alg(&rfc4106_alg); #ifdef HAS_XTS crypto_unregister_alg(&ablk_xts_alg); #endif From 69435b94d01f49197b287eb5902fb8c5cee8fe1d Mon Sep 17 00:00:00 2001 From: Adrian Hoban Date: Thu, 4 Nov 2010 15:02:04 -0400 Subject: [PATCH 04/46] crypto: rfc4106 - Extending the RC4106 AES-GCM test vectors Updated RFC4106 AES-GCM testing. Some test vectors were taken from http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/ gcm/gcm-test-vectors.tar.gz Signed-off-by: Adrian Hoban Signed-off-by: Tadeusz Struk Signed-off-by: Gabriele Paoloni Signed-off-by: Aidan O'Mahony Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 11 ++ crypto/testmgr.c | 24 ++++ crypto/testmgr.h | 361 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 396 insertions(+) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 3ca68f9fc14d84..9aac5e58be9406 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -8,6 +8,13 @@ * Copyright (c) 2002 Jean-Francois Dive * Copyright (c) 2007 Nokia Siemens Networks * + * Updated RFC4106 AES-GCM testing. + * Authors: Aidan O'Mahony (aidan.o.mahony@intel.com) + * Adrian Hoban + * Gabriele Paoloni + * Tadeusz Struk (tadeusz.struk@intel.com) + * Copyright (c) 2010, Intel Corporation. + * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) @@ -980,6 +987,10 @@ static int do_test(int m) ret += tcrypt_test("ansi_cprng"); break; + case 151: + ret += tcrypt_test("rfc4106(gcm(aes))"); + break; + case 200: test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0, speed_template_16_24_32); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index fa8c8f78c8d4c1..27ea9fe9476fb9 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -6,6 +6,13 @@ * Copyright (c) 2007 Nokia Siemens Networks * Copyright (c) 2008 Herbert Xu * + * Updated RFC4106 AES-GCM testing. + * Authors: Aidan O'Mahony (aidan.o.mahony@intel.com) + * Adrian Hoban + * Gabriele Paoloni + * Tadeusz Struk (tadeusz.struk@intel.com) + * Copyright (c) 2010, Intel Corporation. + * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) @@ -2242,6 +2249,23 @@ static const struct alg_test_desc alg_test_descs[] = { } } }, { + .alg = "rfc4106(gcm(aes))", + .test = alg_test_aead, + .suite = { + .aead = { + .enc = { + .vecs = aes_gcm_rfc4106_enc_tv_template, + .count = AES_GCM_4106_ENC_TEST_VECTORS + }, + .dec = { + .vecs = aes_gcm_rfc4106_dec_tv_template, + .count = AES_GCM_4106_DEC_TEST_VECTORS + } + } + } + }, { + + .alg = "rfc4309(ccm(aes))", .test = alg_test_aead, .fips_allowed = 1, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 74e35377fd30d3..834af7f2adeec5 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -6,6 +6,15 @@ * Copyright (c) 2007 Nokia Siemens Networks * Copyright (c) 2008 Herbert Xu * + * Updated RFC4106 AES-GCM testing. Some test vectors were taken from + * http://csrc.nist.gov/groups/ST/toolkit/BCM/documents/proposedmodes/ + * gcm/gcm-test-vectors.tar.gz + * Authors: Aidan O'Mahony (aidan.o.mahony@intel.com) + * Adrian Hoban + * Gabriele Paoloni + * Tadeusz Struk (tadeusz.struk@intel.com) + * Copyright (c) 2010, Intel Corporation. + * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) @@ -2947,6 +2956,8 @@ static struct cipher_testvec cast6_dec_tv_template[] = { #define AES_CTR_3686_DEC_TEST_VECTORS 6 #define AES_GCM_ENC_TEST_VECTORS 9 #define AES_GCM_DEC_TEST_VECTORS 8 +#define AES_GCM_4106_ENC_TEST_VECTORS 7 +#define AES_GCM_4106_DEC_TEST_VECTORS 7 #define AES_CCM_ENC_TEST_VECTORS 7 #define AES_CCM_DEC_TEST_VECTORS 7 #define AES_CCM_4309_ENC_TEST_VECTORS 7 @@ -5829,6 +5840,356 @@ static struct aead_testvec aes_gcm_dec_tv_template[] = { } }; +static struct aead_testvec aes_gcm_rfc4106_enc_tv_template[] = { + { /* Generated using Crypto++ */ + .key = zeroed_string, + .klen = 20, + .iv = zeroed_string, + .input = zeroed_string, + .ilen = 16, + .assoc = zeroed_string, + .alen = 8, + .result = "\x03\x88\xDA\xCE\x60\xB6\xA3\x92" + "\xF3\x28\xC2\xB9\x71\xB2\xFE\x78" + "\x97\xFE\x4C\x23\x37\x42\x01\xE0" + "\x81\x9F\x8D\xC5\xD7\x41\xA0\x1B", + .rlen = 32, + },{ + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x01" + "\x00\x00\x00\x00", + .input = zeroed_string, + .ilen = 16, + .assoc = zeroed_string, + .alen = 8, + .result = "\xC0\x0D\x8B\x42\x0F\x8F\x34\x18" + "\x88\xB1\xC5\xBC\xC5\xB6\xD6\x28" + "\x6A\x9D\xDF\x11\x5E\xFE\x5E\x9D" + "\x2F\x70\x44\x92\xF7\xF2\xE3\xEF", + .rlen = 32, + + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = zeroed_string, + .input = "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01", + .ilen = 16, + .assoc = zeroed_string, + .alen = 8, + .result = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE" + "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC" + "\x0B\x8F\x88\x69\x17\xE6\xB4\x3C" + "\xB1\x68\xFD\x14\x52\x64\x61\xB2", + .rlen = 32, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = zeroed_string, + .input = "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01", + .ilen = 16, + .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01", + .alen = 8, + .result = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE" + "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC" + "\x90\x92\xB7\xE3\x5F\xA3\x9A\x63" + "\x7E\xD7\x1F\xD8\xD3\x7C\x4B\xF5", + .rlen = 32, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x01" + "\x00\x00\x00\x00", + .input = "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01", + .ilen = 16, + .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01", + .alen = 8, + .result = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19" + "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29" + "\x64\x50\xF9\x32\x13\xFB\x74\x61" + "\xF4\xED\x52\xD3\xC5\x10\x55\x3C", + .rlen = 32, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x01" + "\x00\x00\x00\x00", + .input = "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01", + .ilen = 64, + .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01", + .alen = 8, + .result = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19" + "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29" + "\x98\x14\xA1\x42\x37\x80\xFD\x90" + "\x68\x12\x01\xA8\x91\x89\xB9\x83" + "\x5B\x11\x77\x12\x9B\xFF\x24\x89" + "\x94\x5F\x18\x12\xBA\x27\x09\x39" + "\x99\x96\x76\x42\x15\x1C\xCD\xCB" + "\xDC\xD3\xDA\x65\x73\xAF\x80\xCD" + "\xD2\xB6\xC2\x4A\x76\xC2\x92\x85" + "\xBD\xCF\x62\x98\x58\x14\xE5\xBD", + .rlen = 80, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x00\x00\x00\x00", + .klen = 20, + .iv = "\x00\x00\x45\x67\x89\xab\xcd\xef" + "\x00\x00\x00\x00", + .input = "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", + .ilen = 192, + .assoc = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa", + .alen = 12, + .result = "\xC1\x76\x33\x85\xE2\x9B\x5F\xDE" + "\xDE\x89\x3D\x42\xE7\xC9\x69\x8A" + "\x44\x6D\xC3\x88\x46\x2E\xC2\x01" + "\x5E\xF6\x0C\x39\xF0\xC4\xA5\x82" + "\xCD\xE8\x31\xCC\x0A\x4C\xE4\x44" + "\x41\xA9\x82\x6F\x22\xA1\x23\x1A" + "\xA8\xE3\x16\xFD\x31\x5C\x27\x31" + "\xF1\x7F\x01\x63\xA3\xAF\x70\xA1" + "\xCF\x07\x57\x41\x67\xD0\xC4\x42" + "\xDB\x18\xC6\x4C\x4C\xE0\x3D\x9F" + "\x05\x07\xFB\x13\x7D\x4A\xCA\x5B" + "\xF0\xBF\x64\x7E\x05\xB1\x72\xEE" + "\x7C\x3B\xD4\xCD\x14\x03\xB2\x2C" + "\xD3\xA9\xEE\xFA\x17\xFC\x9C\xDF" + "\xC7\x75\x40\xFF\xAE\xAD\x1E\x59" + "\x2F\x30\x24\xFB\xAD\x6B\x10\xFA" + "\x6C\x9F\x5B\xE7\x25\xD5\xD0\x25" + "\xAC\x4A\x4B\xDA\xFC\x7A\x85\x1B" + "\x7E\x13\x06\x82\x08\x17\xA4\x35" + "\xEC\xC5\x8D\x63\x96\x81\x0A\x8F" + "\xA3\x05\x38\x95\x20\x1A\x47\x04" + "\x6F\x6D\xDA\x8F\xEF\xC1\x76\x35" + "\x6B\xC7\x4D\x0F\x94\x12\xCA\x3E" + "\x2E\xD5\x03\x2E\x86\x7E\xAA\x3B" + "\x37\x08\x1C\xCF\xBA\x5D\x71\x46" + "\x80\x72\xB0\x4C\x82\x0D\x60\x3C", + .rlen = 208, + } +}; + +static struct aead_testvec aes_gcm_rfc4106_dec_tv_template[] = { + { /* Generated using Crypto++ */ + .key = zeroed_string, + .klen = 20, + .iv = zeroed_string, + .input = "\x03\x88\xDA\xCE\x60\xB6\xA3\x92" + "\xF3\x28\xC2\xB9\x71\xB2\xFE\x78" + "\x97\xFE\x4C\x23\x37\x42\x01\xE0" + "\x81\x9F\x8D\xC5\xD7\x41\xA0\x1B", + .ilen = 32, + .assoc = zeroed_string, + .alen = 8, + .result = zeroed_string, + .rlen = 16, + + },{ + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x01" + "\x00\x00\x00\x00", + .input = "\xC0\x0D\x8B\x42\x0F\x8F\x34\x18" + "\x88\xB1\xC5\xBC\xC5\xB6\xD6\x28" + "\x6A\x9D\xDF\x11\x5E\xFE\x5E\x9D" + "\x2F\x70\x44\x92\xF7\xF2\xE3\xEF", + .ilen = 32, + .assoc = zeroed_string, + .alen = 8, + .result = zeroed_string, + .rlen = 16, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = zeroed_string, + .input = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE" + "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC" + "\x0B\x8F\x88\x69\x17\xE6\xB4\x3C" + "\xB1\x68\xFD\x14\x52\x64\x61\xB2", + .ilen = 32, + .assoc = zeroed_string, + .alen = 8, + .result = "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01", + .rlen = 16, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = zeroed_string, + .input = "\x4B\xB1\xB5\xE3\x25\x71\x70\xDE" + "\x7F\xC9\x9C\xA5\x14\x19\xF2\xAC" + "\x90\x92\xB7\xE3\x5F\xA3\x9A\x63" + "\x7E\xD7\x1F\xD8\xD3\x7C\x4B\xF5", + .ilen = 32, + .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01", + .alen = 8, + .result = "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01", + .rlen = 16, + + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x01" + "\x00\x00\x00\x00", + .input = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19" + "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29" + "\x64\x50\xF9\x32\x13\xFB\x74\x61" + "\xF4\xED\x52\xD3\xC5\x10\x55\x3C", + .ilen = 32, + .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01", + .alen = 8, + .result = "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01", + .rlen = 16, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08" + "\x00\x00\x00\x00", + .klen = 20, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x01" + "\x00\x00\x00\x00", + .input = "\xC1\x0C\x8A\x43\x0E\x8E\x35\x19" + "\x89\xB0\xC4\xBD\xC4\xB7\xD7\x29" + "\x98\x14\xA1\x42\x37\x80\xFD\x90" + "\x68\x12\x01\xA8\x91\x89\xB9\x83" + "\x5B\x11\x77\x12\x9B\xFF\x24\x89" + "\x94\x5F\x18\x12\xBA\x27\x09\x39" + "\x99\x96\x76\x42\x15\x1C\xCD\xCB" + "\xDC\xD3\xDA\x65\x73\xAF\x80\xCD" + "\xD2\xB6\xC2\x4A\x76\xC2\x92\x85" + "\xBD\xCF\x62\x98\x58\x14\xE5\xBD", + .ilen = 80, + .assoc = "\x01\x01\x01\x01\x01\x01\x01\x01", + .alen = 8, + .result = "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01" + "\x01\x01\x01\x01\x01\x01\x01\x01", + .rlen = 64, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x00\x00\x00\x00", + .klen = 20, + .iv = "\x00\x00\x45\x67\x89\xab\xcd\xef" + "\x00\x00\x00\x00", + .input = "\xC1\x76\x33\x85\xE2\x9B\x5F\xDE" + "\xDE\x89\x3D\x42\xE7\xC9\x69\x8A" + "\x44\x6D\xC3\x88\x46\x2E\xC2\x01" + "\x5E\xF6\x0C\x39\xF0\xC4\xA5\x82" + "\xCD\xE8\x31\xCC\x0A\x4C\xE4\x44" + "\x41\xA9\x82\x6F\x22\xA1\x23\x1A" + "\xA8\xE3\x16\xFD\x31\x5C\x27\x31" + "\xF1\x7F\x01\x63\xA3\xAF\x70\xA1" + "\xCF\x07\x57\x41\x67\xD0\xC4\x42" + "\xDB\x18\xC6\x4C\x4C\xE0\x3D\x9F" + "\x05\x07\xFB\x13\x7D\x4A\xCA\x5B" + "\xF0\xBF\x64\x7E\x05\xB1\x72\xEE" + "\x7C\x3B\xD4\xCD\x14\x03\xB2\x2C" + "\xD3\xA9\xEE\xFA\x17\xFC\x9C\xDF" + "\xC7\x75\x40\xFF\xAE\xAD\x1E\x59" + "\x2F\x30\x24\xFB\xAD\x6B\x10\xFA" + "\x6C\x9F\x5B\xE7\x25\xD5\xD0\x25" + "\xAC\x4A\x4B\xDA\xFC\x7A\x85\x1B" + "\x7E\x13\x06\x82\x08\x17\xA4\x35" + "\xEC\xC5\x8D\x63\x96\x81\x0A\x8F" + "\xA3\x05\x38\x95\x20\x1A\x47\x04" + "\x6F\x6D\xDA\x8F\xEF\xC1\x76\x35" + "\x6B\xC7\x4D\x0F\x94\x12\xCA\x3E" + "\x2E\xD5\x03\x2E\x86\x7E\xAA\x3B" + "\x37\x08\x1C\xCF\xBA\x5D\x71\x46" + "\x80\x72\xB0\x4C\x82\x0D\x60\x3C", + .ilen = 208, + .assoc = "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa", + .alen = 12, + .result = "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff", + .rlen = 192, + + } +}; + static struct aead_testvec aes_ccm_enc_tv_template[] = { { /* From RFC 3610 */ .key = "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" From c2f9bff5ace07fbea03a53c6c3253f6c3a81e9f9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 19 Oct 2010 21:04:42 +0800 Subject: [PATCH 05/46] net - Add AF_ALG macros This patch adds the socket family/level macros for the yet-to-be-born AF_ALG family. The AF_ALG family provides the user-space interface for the kernel crypto API. Signed-off-by: Herbert Xu Acked-by: David S. Miller --- include/linux/socket.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/socket.h b/include/linux/socket.h index 5146b50202cefa..ebc081b18da60f 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -193,7 +193,8 @@ struct ucred { #define AF_PHONET 35 /* Phonet sockets */ #define AF_IEEE802154 36 /* IEEE802154 sockets */ #define AF_CAIF 37 /* CAIF sockets */ -#define AF_MAX 38 /* For now.. */ +#define AF_ALG 38 /* Algorithm sockets */ +#define AF_MAX 39 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -234,6 +235,7 @@ struct ucred { #define PF_PHONET AF_PHONET #define PF_IEEE802154 AF_IEEE802154 #define PF_CAIF AF_CAIF +#define PF_ALG AF_ALG #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ @@ -307,6 +309,7 @@ struct ucred { #define SOL_RDS 276 #define SOL_IUCV 277 #define SOL_CAIF 278 +#define SOL_ALG 279 /* IPX options */ #define IPX_TYPE 1 From 03c8efc1ffeb6b82a22c1af8dd908af349563314 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 19 Oct 2010 21:12:39 +0800 Subject: [PATCH 06/46] crypto: af_alg - User-space interface for Crypto API This patch creates the backbone of the user-space interface for the Crypto API, through a new socket family AF_ALG. Each session corresponds to one or more connections obtained from that socket. The number depends on the number of inputs/outputs of that particular type of operation. For most types there will be a s ingle connection/file descriptor that is used for both input and output. AEAD is one of the few that require two inputs. Each algorithm type will provide its own implementation that plugs into af_alg. They're keyed using a string such as "skcipher" or "hash". IOW this patch only contains the boring bits that is required to hold everything together. Thakns to Miloslav Trmac for reviewing this and contributing fixes and improvements. Signed-off-by: Herbert Xu Acked-by: David S. Miller Tested-by: Martin Willi --- crypto/Kconfig | 3 + crypto/Makefile | 1 + crypto/af_alg.c | 482 ++++++++++++++++++++++++++++++++++++++++ include/crypto/if_alg.h | 92 ++++++++ include/linux/if_alg.h | 40 ++++ 5 files changed, 618 insertions(+) create mode 100644 crypto/af_alg.c create mode 100644 include/crypto/if_alg.h create mode 100644 include/linux/if_alg.h diff --git a/crypto/Kconfig b/crypto/Kconfig index e4bac29a32e7c4..357e3caf4cbe23 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -841,6 +841,9 @@ config CRYPTO_ANSI_CPRNG ANSI X9.31 A.2.4. Note that this option must be enabled if CRYPTO_FIPS is selected +config CRYPTO_USER_API + tristate + source "drivers/crypto/Kconfig" endif # if CRYPTO diff --git a/crypto/Makefile b/crypto/Makefile index 423b7de61f93e3..0b1319734bc09f 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -85,6 +85,7 @@ obj-$(CONFIG_CRYPTO_RNG2) += krng.o obj-$(CONFIG_CRYPTO_ANSI_CPRNG) += ansi_cprng.o obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o obj-$(CONFIG_CRYPTO_GHASH) += ghash-generic.o +obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o # # generic algorithms and the async_tx api diff --git a/crypto/af_alg.c b/crypto/af_alg.c new file mode 100644 index 00000000000000..cabed0e9c9d810 --- /dev/null +++ b/crypto/af_alg.c @@ -0,0 +1,482 @@ +/* + * af_alg: User-space algorithm interface + * + * This file provides the user-space API for algorithms. + * + * Copyright (c) 2010 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct alg_type_list { + const struct af_alg_type *type; + struct list_head list; +}; + +static atomic_t alg_memory_allocated; + +static struct proto alg_proto = { + .name = "ALG", + .owner = THIS_MODULE, + .memory_allocated = &alg_memory_allocated, + .obj_size = sizeof(struct alg_sock), +}; + +static LIST_HEAD(alg_types); +static DECLARE_RWSEM(alg_types_sem); + +static const struct af_alg_type *alg_get_type(const char *name) +{ + const struct af_alg_type *type = ERR_PTR(-ENOENT); + struct alg_type_list *node; + + down_read(&alg_types_sem); + list_for_each_entry(node, &alg_types, list) { + if (strcmp(node->type->name, name)) + continue; + + if (try_module_get(node->type->owner)) + type = node->type; + break; + } + up_read(&alg_types_sem); + + return type; +} + +int af_alg_register_type(const struct af_alg_type *type) +{ + struct alg_type_list *node; + int err = -EEXIST; + + down_write(&alg_types_sem); + list_for_each_entry(node, &alg_types, list) { + if (!strcmp(node->type->name, type->name)) + goto unlock; + } + + node = kmalloc(sizeof(*node), GFP_KERNEL); + err = -ENOMEM; + if (!node) + goto unlock; + + type->ops->owner = THIS_MODULE; + node->type = type; + list_add(&node->list, &alg_types); + err = 0; + +unlock: + up_write(&alg_types_sem); + + return err; +} +EXPORT_SYMBOL_GPL(af_alg_register_type); + +int af_alg_unregister_type(const struct af_alg_type *type) +{ + struct alg_type_list *node; + int err = -ENOENT; + + down_write(&alg_types_sem); + list_for_each_entry(node, &alg_types, list) { + if (strcmp(node->type->name, type->name)) + continue; + + list_del(&node->list); + kfree(node); + err = 0; + break; + } + up_write(&alg_types_sem); + + return err; +} +EXPORT_SYMBOL_GPL(af_alg_unregister_type); + +static void alg_do_release(const struct af_alg_type *type, void *private) +{ + if (!type) + return; + + type->release(private); + module_put(type->owner); +} + +int af_alg_release(struct socket *sock) +{ + if (sock->sk) + sock_put(sock->sk); + return 0; +} +EXPORT_SYMBOL_GPL(af_alg_release); + +static int alg_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct sockaddr_alg *sa = (void *)uaddr; + const struct af_alg_type *type; + void *private; + + if (sock->state == SS_CONNECTED) + return -EINVAL; + + if (addr_len != sizeof(*sa)) + return -EINVAL; + + sa->salg_type[sizeof(sa->salg_type) - 1] = 0; + sa->salg_name[sizeof(sa->salg_name) - 1] = 0; + + type = alg_get_type(sa->salg_type); + if (IS_ERR(type) && PTR_ERR(type) == -ENOENT) { + request_module("algif-%s", sa->salg_type); + type = alg_get_type(sa->salg_type); + } + + if (IS_ERR(type)) + return PTR_ERR(type); + + private = type->bind(sa->salg_name, sa->salg_feat, sa->salg_mask); + if (IS_ERR(private)) { + module_put(type->owner); + return PTR_ERR(private); + } + + lock_sock(sk); + + swap(ask->type, type); + swap(ask->private, private); + + release_sock(sk); + + alg_do_release(type, private); + + return 0; +} + +static int alg_setkey(struct sock *sk, char __user *ukey, + unsigned int keylen) +{ + struct alg_sock *ask = alg_sk(sk); + const struct af_alg_type *type = ask->type; + u8 *key; + int err; + + key = sock_kmalloc(sk, keylen, GFP_KERNEL); + if (!key) + return -ENOMEM; + + err = -EFAULT; + if (copy_from_user(key, ukey, keylen)) + goto out; + + err = type->setkey(ask->private, key, keylen); + +out: + sock_kfree_s(sk, key, keylen); + + return err; +} + +static int alg_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + const struct af_alg_type *type; + int err = -ENOPROTOOPT; + + lock_sock(sk); + type = ask->type; + + if (level != SOL_ALG || !type) + goto unlock; + + switch (optname) { + case ALG_SET_KEY: + if (sock->state == SS_CONNECTED) + goto unlock; + if (!type->setkey) + goto unlock; + + err = alg_setkey(sk, optval, optlen); + } + +unlock: + release_sock(sk); + + return err; +} + +int af_alg_accept(struct sock *sk, struct socket *newsock) +{ + struct alg_sock *ask = alg_sk(sk); + const struct af_alg_type *type; + struct sock *sk2; + int err; + + lock_sock(sk); + type = ask->type; + + err = -EINVAL; + if (!type) + goto unlock; + + sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto); + err = -ENOMEM; + if (!sk2) + goto unlock; + + sock_init_data(newsock, sk2); + + err = type->accept(ask->private, sk2); + if (err) { + sk_free(sk2); + goto unlock; + } + + sk2->sk_family = PF_ALG; + + sock_hold(sk); + alg_sk(sk2)->parent = sk; + alg_sk(sk2)->type = type; + + newsock->ops = type->ops; + newsock->state = SS_CONNECTED; + + err = 0; + +unlock: + release_sock(sk); + + return err; +} +EXPORT_SYMBOL_GPL(af_alg_accept); + +static int alg_accept(struct socket *sock, struct socket *newsock, int flags) +{ + return af_alg_accept(sock->sk, newsock); +} + +static const struct proto_ops alg_proto_ops = { + .family = PF_ALG, + .owner = THIS_MODULE, + + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .getname = sock_no_getname, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .getsockopt = sock_no_getsockopt, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, + .sendmsg = sock_no_sendmsg, + .recvmsg = sock_no_recvmsg, + .poll = sock_no_poll, + + .bind = alg_bind, + .release = af_alg_release, + .setsockopt = alg_setsockopt, + .accept = alg_accept, +}; + +static void alg_sock_destruct(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + + alg_do_release(ask->type, ask->private); +} + +static int alg_create(struct net *net, struct socket *sock, int protocol, + int kern) +{ + struct sock *sk; + int err; + + if (sock->type != SOCK_SEQPACKET) + return -ESOCKTNOSUPPORT; + if (protocol != 0) + return -EPROTONOSUPPORT; + + err = -ENOMEM; + sk = sk_alloc(net, PF_ALG, GFP_KERNEL, &alg_proto); + if (!sk) + goto out; + + sock->ops = &alg_proto_ops; + sock_init_data(sock, sk); + + sk->sk_family = PF_ALG; + sk->sk_destruct = alg_sock_destruct; + + return 0; +out: + return err; +} + +static const struct net_proto_family alg_family = { + .family = PF_ALG, + .create = alg_create, + .owner = THIS_MODULE, +}; + +int af_alg_make_sg(struct af_alg_sgl *sgl, void __user *addr, int len, + int write) +{ + unsigned long from = (unsigned long)addr; + unsigned long npages; + unsigned off; + int err; + int i; + + err = -EFAULT; + if (!access_ok(write ? VERIFY_READ : VERIFY_WRITE, addr, len)) + goto out; + + off = from & ~PAGE_MASK; + npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; + if (npages > ALG_MAX_PAGES) + npages = ALG_MAX_PAGES; + + err = get_user_pages_fast(from, npages, write, sgl->pages); + if (err < 0) + goto out; + + npages = err; + err = -EINVAL; + if (WARN_ON(npages == 0)) + goto out; + + err = 0; + + sg_init_table(sgl->sg, npages); + + for (i = 0; i < npages; i++) { + int plen = min_t(int, len, PAGE_SIZE - off); + + sg_set_page(sgl->sg + i, sgl->pages[i], plen, off); + + off = 0; + len -= plen; + err += plen; + } + +out: + return err; +} +EXPORT_SYMBOL_GPL(af_alg_make_sg); + +void af_alg_free_sg(struct af_alg_sgl *sgl) +{ + int i; + + i = 0; + do { + put_page(sgl->pages[i]); + } while (!sg_is_last(sgl->sg + (i++))); +} +EXPORT_SYMBOL_GPL(af_alg_free_sg); + +int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con) +{ + struct cmsghdr *cmsg; + + for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + if (cmsg->cmsg_level != SOL_ALG) + continue; + + switch(cmsg->cmsg_type) { + case ALG_SET_IV: + if (cmsg->cmsg_len < CMSG_LEN(sizeof(*con->iv))) + return -EINVAL; + con->iv = (void *)CMSG_DATA(cmsg); + if (cmsg->cmsg_len < CMSG_LEN(con->iv->ivlen + + sizeof(*con->iv))) + return -EINVAL; + break; + + case ALG_SET_OP: + if (cmsg->cmsg_len < CMSG_LEN(sizeof(u32))) + return -EINVAL; + con->op = *(u32 *)CMSG_DATA(cmsg); + break; + + default: + return -EINVAL; + } + } + + return 0; +} +EXPORT_SYMBOL_GPL(af_alg_cmsg_send); + +int af_alg_wait_for_completion(int err, struct af_alg_completion *completion) +{ + switch (err) { + case -EINPROGRESS: + case -EBUSY: + wait_for_completion(&completion->completion); + INIT_COMPLETION(completion->completion); + err = completion->err; + break; + }; + + return err; +} +EXPORT_SYMBOL_GPL(af_alg_wait_for_completion); + +void af_alg_complete(struct crypto_async_request *req, int err) +{ + struct af_alg_completion *completion = req->data; + + completion->err = err; + complete(&completion->completion); +} +EXPORT_SYMBOL_GPL(af_alg_complete); + +static int __init af_alg_init(void) +{ + int err = proto_register(&alg_proto, 0); + + if (err) + goto out; + + err = sock_register(&alg_family); + if (err != 0) + goto out_unregister_proto; + +out: + return err; + +out_unregister_proto: + proto_unregister(&alg_proto); + goto out; +} + +static void __exit af_alg_exit(void) +{ + sock_unregister(PF_ALG); + proto_unregister(&alg_proto); +} + +module_init(af_alg_init); +module_exit(af_alg_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(AF_ALG); diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h new file mode 100644 index 00000000000000..c5813c87de061e --- /dev/null +++ b/include/crypto/if_alg.h @@ -0,0 +1,92 @@ +/* + * if_alg: User-space algorithm interface + * + * Copyright (c) 2010 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef _CRYPTO_IF_ALG_H +#define _CRYPTO_IF_ALG_H + +#include +#include +#include +#include +#include + +#define ALG_MAX_PAGES 16 + +struct crypto_async_request; + +struct alg_sock { + /* struct sock must be the first member of struct alg_sock */ + struct sock sk; + + struct sock *parent; + + const struct af_alg_type *type; + void *private; +}; + +struct af_alg_completion { + struct completion completion; + int err; +}; + +struct af_alg_control { + struct af_alg_iv *iv; + int op; +}; + +struct af_alg_type { + void *(*bind)(const char *name, u32 type, u32 mask); + void (*release)(void *private); + int (*setkey)(void *private, const u8 *key, unsigned int keylen); + int (*accept)(void *private, struct sock *sk); + + struct proto_ops *ops; + struct module *owner; + char name[14]; +}; + +struct af_alg_sgl { + struct scatterlist sg[ALG_MAX_PAGES]; + struct page *pages[ALG_MAX_PAGES]; +}; + +int af_alg_register_type(const struct af_alg_type *type); +int af_alg_unregister_type(const struct af_alg_type *type); + +int af_alg_release(struct socket *sock); +int af_alg_accept(struct sock *sk, struct socket *newsock); + +int af_alg_make_sg(struct af_alg_sgl *sgl, void __user *addr, int len, + int write); +void af_alg_free_sg(struct af_alg_sgl *sgl); + +int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con); + +int af_alg_wait_for_completion(int err, struct af_alg_completion *completion); +void af_alg_complete(struct crypto_async_request *req, int err); + +static inline struct alg_sock *alg_sk(struct sock *sk) +{ + return (struct alg_sock *)sk; +} + +static inline void af_alg_release_parent(struct sock *sk) +{ + sock_put(alg_sk(sk)->parent); +} + +static inline void af_alg_init_completion(struct af_alg_completion *completion) +{ + init_completion(&completion->completion); +} + +#endif /* _CRYPTO_IF_ALG_H */ diff --git a/include/linux/if_alg.h b/include/linux/if_alg.h new file mode 100644 index 00000000000000..0f9acce5b1ff19 --- /dev/null +++ b/include/linux/if_alg.h @@ -0,0 +1,40 @@ +/* + * if_alg: User-space algorithm interface + * + * Copyright (c) 2010 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#ifndef _LINUX_IF_ALG_H +#define _LINUX_IF_ALG_H + +#include + +struct sockaddr_alg { + __u16 salg_family; + __u8 salg_type[14]; + __u32 salg_feat; + __u32 salg_mask; + __u8 salg_name[64]; +}; + +struct af_alg_iv { + __u32 ivlen; + __u8 iv[0]; +}; + +/* Socket options */ +#define ALG_SET_KEY 1 +#define ALG_SET_IV 2 +#define ALG_SET_OP 3 + +/* Operations */ +#define ALG_OP_DECRYPT 0 +#define ALG_OP_ENCRYPT 1 + +#endif /* _LINUX_IF_ALG_H */ From fe869cdb89c95d060c77eea20204d6c91f233b53 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 19 Oct 2010 21:23:00 +0800 Subject: [PATCH 07/46] crypto: algif_hash - User-space interface for hash operations This patch adds the af_alg plugin for hash, corresponding to the ahash kernel operation type. Keys can optionally be set through the setsockopt interface. Each sendmsg call will finalise the hash unless sent with a MSG_MORE flag. Partial hash states can be cloned using accept(2). The interface is completely synchronous, all operations will complete prior to the system call returning. Both sendmsg(2) and splice(2) support reading the user-space data directly without copying (except that the Crypto API itself may copy the data if alignment is off). For now only the splice(2) interface supports performing digest instead of init/update/final. In future the sendmsg(2) interface will also be modified to use digest/finup where possible so that hardware that cannot return a partial hash state can still benefit from this interface. Thakns to Miloslav Trmac for reviewing this and contributing fixes and improvements. Signed-off-by: Herbert Xu Acked-by: David S. Miller Tested-by: Martin Willi --- crypto/Kconfig | 8 ++ crypto/Makefile | 1 + crypto/algif_hash.c | 319 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 328 insertions(+) create mode 100644 crypto/algif_hash.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 357e3caf4cbe23..6db27d7ff8b3af 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -844,6 +844,14 @@ config CRYPTO_ANSI_CPRNG config CRYPTO_USER_API tristate +config CRYPTO_USER_API_HASH + tristate "User-space interface for hash algorithms" + select CRYPTO_HASH + select CRYPTO_USER_API + help + This option enables the user-spaces interface for hash + algorithms. + source "drivers/crypto/Kconfig" endif # if CRYPTO diff --git a/crypto/Makefile b/crypto/Makefile index 0b1319734bc09f..14ab4052a9c8ba 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -86,6 +86,7 @@ obj-$(CONFIG_CRYPTO_ANSI_CPRNG) += ansi_cprng.o obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o obj-$(CONFIG_CRYPTO_GHASH) += ghash-generic.o obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o +obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o # # generic algorithms and the async_tx api diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c new file mode 100644 index 00000000000000..62122a1a2f7a64 --- /dev/null +++ b/crypto/algif_hash.c @@ -0,0 +1,319 @@ +/* + * algif_hash: User-space interface for hash algorithms + * + * This file provides the user-space API for hash algorithms. + * + * Copyright (c) 2010 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct hash_ctx { + struct af_alg_sgl sgl; + + u8 *result; + + struct af_alg_completion completion; + + unsigned int len; + bool more; + + struct ahash_request req; +}; + +static int hash_sendmsg(struct kiocb *unused, struct socket *sock, + struct msghdr *msg, size_t ignored) +{ + int limit = ALG_MAX_PAGES * PAGE_SIZE; + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct hash_ctx *ctx = ask->private; + unsigned long iovlen; + struct iovec *iov; + long copied = 0; + int err; + + if (limit > sk->sk_sndbuf) + limit = sk->sk_sndbuf; + + lock_sock(sk); + if (!ctx->more) { + err = crypto_ahash_init(&ctx->req); + if (err) + goto unlock; + } + + ctx->more = 0; + + for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0; + iovlen--, iov++) { + unsigned long seglen = iov->iov_len; + char __user *from = iov->iov_base; + + while (seglen) { + int len = min_t(unsigned long, seglen, limit); + int newlen; + + newlen = af_alg_make_sg(&ctx->sgl, from, len, 0); + if (newlen < 0) + goto unlock; + + ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, NULL, + newlen); + + err = af_alg_wait_for_completion( + crypto_ahash_update(&ctx->req), + &ctx->completion); + + af_alg_free_sg(&ctx->sgl); + + if (err) + goto unlock; + + seglen -= newlen; + from += newlen; + copied += newlen; + } + } + + err = 0; + + ctx->more = msg->msg_flags & MSG_MORE; + if (!ctx->more) { + ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0); + err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req), + &ctx->completion); + } + +unlock: + release_sock(sk); + + return err ?: copied; +} + +static ssize_t hash_sendpage(struct socket *sock, struct page *page, + int offset, size_t size, int flags) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct hash_ctx *ctx = ask->private; + int err; + + lock_sock(sk); + sg_init_table(ctx->sgl.sg, 1); + sg_set_page(ctx->sgl.sg, page, size, offset); + + ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, ctx->result, size); + + if (!(flags & MSG_MORE)) { + if (ctx->more) + err = crypto_ahash_finup(&ctx->req); + else + err = crypto_ahash_digest(&ctx->req); + } else { + if (!ctx->more) { + err = crypto_ahash_init(&ctx->req); + if (err) + goto unlock; + } + + err = crypto_ahash_update(&ctx->req); + } + + err = af_alg_wait_for_completion(err, &ctx->completion); + if (err) + goto unlock; + + ctx->more = flags & MSG_MORE; + +unlock: + release_sock(sk); + + return err ?: size; +} + +static int hash_recvmsg(struct kiocb *unused, struct socket *sock, + struct msghdr *msg, size_t len, int flags) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct hash_ctx *ctx = ask->private; + unsigned ds = crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req)); + int err; + + if (len > ds) + len = ds; + else if (len < ds) + msg->msg_flags |= MSG_TRUNC; + + lock_sock(sk); + if (ctx->more) { + ctx->more = 0; + ahash_request_set_crypt(&ctx->req, NULL, ctx->result, 0); + err = af_alg_wait_for_completion(crypto_ahash_final(&ctx->req), + &ctx->completion); + if (err) + goto unlock; + } + + err = memcpy_toiovec(msg->msg_iov, ctx->result, len); + +unlock: + release_sock(sk); + + return err ?: len; +} + +static int hash_accept(struct socket *sock, struct socket *newsock, int flags) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct hash_ctx *ctx = ask->private; + struct ahash_request *req = &ctx->req; + char state[crypto_ahash_statesize(crypto_ahash_reqtfm(req))]; + struct sock *sk2; + struct alg_sock *ask2; + struct hash_ctx *ctx2; + int err; + + err = crypto_ahash_export(req, state); + if (err) + return err; + + err = af_alg_accept(ask->parent, newsock); + if (err) + return err; + + sk2 = newsock->sk; + ask2 = alg_sk(sk2); + ctx2 = ask2->private; + ctx2->more = 1; + + err = crypto_ahash_import(&ctx2->req, state); + if (err) { + sock_orphan(sk2); + sock_put(sk2); + } + + return err; +} + +static struct proto_ops algif_hash_ops = { + .family = PF_ALG, + + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .getname = sock_no_getname, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .getsockopt = sock_no_getsockopt, + .mmap = sock_no_mmap, + .bind = sock_no_bind, + .setsockopt = sock_no_setsockopt, + .poll = sock_no_poll, + + .release = af_alg_release, + .sendmsg = hash_sendmsg, + .sendpage = hash_sendpage, + .recvmsg = hash_recvmsg, + .accept = hash_accept, +}; + +static void *hash_bind(const char *name, u32 type, u32 mask) +{ + return crypto_alloc_ahash(name, type, mask); +} + +static void hash_release(void *private) +{ + crypto_free_ahash(private); +} + +static int hash_setkey(void *private, const u8 *key, unsigned int keylen) +{ + return crypto_ahash_setkey(private, key, keylen); +} + +static void hash_sock_destruct(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct hash_ctx *ctx = ask->private; + + sock_kfree_s(sk, ctx->result, + crypto_ahash_digestsize(crypto_ahash_reqtfm(&ctx->req))); + sock_kfree_s(sk, ctx, ctx->len); + af_alg_release_parent(sk); +} + +static int hash_accept_parent(void *private, struct sock *sk) +{ + struct hash_ctx *ctx; + struct alg_sock *ask = alg_sk(sk); + unsigned len = sizeof(*ctx) + crypto_ahash_reqsize(private); + unsigned ds = crypto_ahash_digestsize(private); + + ctx = sock_kmalloc(sk, len, GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->result = sock_kmalloc(sk, ds, GFP_KERNEL); + if (!ctx->result) { + sock_kfree_s(sk, ctx, len); + return -ENOMEM; + } + + memset(ctx->result, 0, ds); + + ctx->len = len; + ctx->more = 0; + af_alg_init_completion(&ctx->completion); + + ask->private = ctx; + + ahash_request_set_tfm(&ctx->req, private); + ahash_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG, + af_alg_complete, &ctx->completion); + + sk->sk_destruct = hash_sock_destruct; + + return 0; +} + +static const struct af_alg_type algif_type_hash = { + .bind = hash_bind, + .release = hash_release, + .setkey = hash_setkey, + .accept = hash_accept_parent, + .ops = &algif_hash_ops, + .name = "hash", + .owner = THIS_MODULE +}; + +static int __init algif_hash_init(void) +{ + return af_alg_register_type(&algif_type_hash); +} + +static void __exit algif_hash_exit(void) +{ + int err = af_alg_unregister_type(&algif_type_hash); + BUG_ON(err); +} + +module_init(algif_hash_init); +module_exit(algif_hash_exit); +MODULE_LICENSE("GPL"); From 8ff590903d5fc7f5a0a988c38267a3d08e6393a2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 19 Oct 2010 21:31:55 +0800 Subject: [PATCH 08/46] crypto: algif_skcipher - User-space interface for skcipher operations This patch adds the af_alg plugin for symmetric key ciphers, corresponding to the ablkcipher kernel operation type. Keys can optionally be set through the setsockopt interface. Once a sendmsg call occurs without MSG_MORE no further writes may be made to the socket until all previous data has been read. IVs and and whether encryption/decryption is performed can be set through the setsockopt interface or as a control message to sendmsg. The interface is completely synchronous, all operations are carried out in recvmsg(2) and will complete prior to the system call returning. The splice(2) interface support reading the user-space data directly without copying (except that the Crypto API itself may copy the data if alignment is off). The recvmsg(2) interface supports directly writing to user-space without additional copying, i.e., the kernel crypto interface will receive the user-space address as its output SG list. Thakns to Miloslav Trmac for reviewing this and contributing fixes and improvements. Signed-off-by: Herbert Xu Acked-by: David S. Miller --- crypto/Kconfig | 8 + crypto/Makefile | 1 + crypto/algif_skcipher.c | 640 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 649 insertions(+) create mode 100644 crypto/algif_skcipher.c diff --git a/crypto/Kconfig b/crypto/Kconfig index 6db27d7ff8b3af..69437e21217f31 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -852,6 +852,14 @@ config CRYPTO_USER_API_HASH This option enables the user-spaces interface for hash algorithms. +config CRYPTO_USER_API_SKCIPHER + tristate "User-space interface for symmetric key cipher algorithms" + select CRYPTO_BLKCIPHER + select CRYPTO_USER_API + help + This option enables the user-spaces interface for symmetric + key cipher algorithms. + source "drivers/crypto/Kconfig" endif # if CRYPTO diff --git a/crypto/Makefile b/crypto/Makefile index 14ab4052a9c8ba..efc0f18dbb372b 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -87,6 +87,7 @@ obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o obj-$(CONFIG_CRYPTO_GHASH) += ghash-generic.o obj-$(CONFIG_CRYPTO_USER_API) += af_alg.o obj-$(CONFIG_CRYPTO_USER_API_HASH) += algif_hash.o +obj-$(CONFIG_CRYPTO_USER_API_SKCIPHER) += algif_skcipher.o # # generic algorithms and the async_tx api diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c new file mode 100644 index 00000000000000..211c956952ca46 --- /dev/null +++ b/crypto/algif_skcipher.c @@ -0,0 +1,640 @@ +/* + * algif_skcipher: User-space interface for skcipher algorithms + * + * This file provides the user-space API for symmetric key ciphers. + * + * Copyright (c) 2010 Herbert Xu + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct skcipher_sg_list { + struct list_head list; + + int cur; + + struct scatterlist sg[0]; +}; + +struct skcipher_ctx { + struct list_head tsgl; + struct af_alg_sgl rsgl; + + void *iv; + + struct af_alg_completion completion; + + unsigned used; + + unsigned int len; + bool more; + bool merge; + bool enc; + + struct ablkcipher_request req; +}; + +#define MAX_SGL_ENTS ((PAGE_SIZE - sizeof(struct skcipher_sg_list)) / \ + sizeof(struct scatterlist) - 1) + +static inline bool skcipher_writable(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + + return ctx->used + PAGE_SIZE <= max_t(int, sk->sk_sndbuf, PAGE_SIZE); +} + +static int skcipher_alloc_sgl(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + struct skcipher_sg_list *sgl; + struct scatterlist *sg = NULL; + + sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list); + if (!list_empty(&ctx->tsgl)) + sg = sgl->sg; + + if (!sg || sgl->cur >= MAX_SGL_ENTS) { + sgl = sock_kmalloc(sk, sizeof(*sgl) + + sizeof(sgl->sg[0]) * (MAX_SGL_ENTS + 1), + GFP_KERNEL); + if (!sgl) + return -ENOMEM; + + sg_init_table(sgl->sg, MAX_SGL_ENTS + 1); + sgl->cur = 0; + + if (sg) + scatterwalk_sg_chain(sg, MAX_SGL_ENTS + 1, sgl->sg); + + list_add_tail(&sgl->list, &ctx->tsgl); + } + + return 0; +} + +static void skcipher_pull_sgl(struct sock *sk, int used) +{ + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + struct skcipher_sg_list *sgl; + struct scatterlist *sg; + int i; + + while (!list_empty(&ctx->tsgl)) { + sgl = list_first_entry(&ctx->tsgl, struct skcipher_sg_list, + list); + sg = sgl->sg; + + for (i = 0; i < sgl->cur; i++) { + int plen = min_t(int, used, sg[i].length); + + if (!sg_page(sg + i)) + continue; + + sg[i].length -= plen; + sg[i].offset += plen; + + used -= plen; + ctx->used -= plen; + + if (sg[i].length) + return; + + put_page(sg_page(sg + i)); + sg_assign_page(sg + i, NULL); + } + + list_del(&sgl->list); + sock_kfree_s(sk, sgl, + sizeof(*sgl) + sizeof(sgl->sg[0]) * + (MAX_SGL_ENTS + 1)); + } + + if (!ctx->used) + ctx->merge = 0; +} + +static void skcipher_free_sgl(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + + skcipher_pull_sgl(sk, ctx->used); +} + +static int skcipher_wait_for_wmem(struct sock *sk, unsigned flags) +{ + long timeout; + DEFINE_WAIT(wait); + int err = -ERESTARTSYS; + + if (flags & MSG_DONTWAIT) + return -EAGAIN; + + set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); + + for (;;) { + if (signal_pending(current)) + break; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + timeout = MAX_SCHEDULE_TIMEOUT; + if (sk_wait_event(sk, &timeout, skcipher_writable(sk))) { + err = 0; + break; + } + } + finish_wait(sk_sleep(sk), &wait); + + return err; +} + +static void skcipher_wmem_wakeup(struct sock *sk) +{ + struct socket_wq *wq; + + if (!skcipher_writable(sk)) + return; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, POLLIN | + POLLRDNORM | + POLLRDBAND); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); + rcu_read_unlock(); +} + +static int skcipher_wait_for_data(struct sock *sk, unsigned flags) +{ + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + long timeout; + DEFINE_WAIT(wait); + int err = -ERESTARTSYS; + + if (flags & MSG_DONTWAIT) { + return -EAGAIN; + } + + set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + + for (;;) { + if (signal_pending(current)) + break; + prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + timeout = MAX_SCHEDULE_TIMEOUT; + if (sk_wait_event(sk, &timeout, ctx->used)) { + err = 0; + break; + } + } + finish_wait(sk_sleep(sk), &wait); + + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + + return err; +} + +static void skcipher_data_wakeup(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + struct socket_wq *wq; + + if (!ctx->used) + return; + + rcu_read_lock(); + wq = rcu_dereference(sk->sk_wq); + if (wq_has_sleeper(wq)) + wake_up_interruptible_sync_poll(&wq->wait, POLLOUT | + POLLRDNORM | + POLLRDBAND); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + rcu_read_unlock(); +} + +static int skcipher_sendmsg(struct kiocb *unused, struct socket *sock, + struct msghdr *msg, size_t size) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(&ctx->req); + unsigned ivsize = crypto_ablkcipher_ivsize(tfm); + struct skcipher_sg_list *sgl; + struct af_alg_control con = {}; + long copied = 0; + bool enc = 0; + int limit; + int err; + int i; + + if (msg->msg_controllen) { + err = af_alg_cmsg_send(msg, &con); + if (err) + return err; + + switch (con.op) { + case ALG_OP_ENCRYPT: + enc = 1; + break; + case ALG_OP_DECRYPT: + enc = 0; + break; + default: + return -EINVAL; + } + + if (con.iv && con.iv->ivlen != ivsize) + return -EINVAL; + } + + err = -EINVAL; + + lock_sock(sk); + if (!ctx->more && ctx->used) + goto unlock; + + if (!ctx->used) { + ctx->enc = enc; + if (con.iv) + memcpy(ctx->iv, con.iv->iv, ivsize); + } + + limit = max_t(int, sk->sk_sndbuf, PAGE_SIZE); + limit -= ctx->used; + + while (size) { + struct scatterlist *sg; + unsigned long len = size; + int plen; + + if (ctx->merge) { + sgl = list_entry(ctx->tsgl.prev, + struct skcipher_sg_list, list); + sg = sgl->sg + sgl->cur - 1; + len = min_t(unsigned long, len, + PAGE_SIZE - sg->offset - sg->length); + + err = memcpy_fromiovec(page_address(sg_page(sg)) + + sg->offset + sg->length, + msg->msg_iov, len); + if (err) + goto unlock; + + sg->length += len; + ctx->merge = (sg->offset + sg->length) & + (PAGE_SIZE - 1); + + ctx->used += len; + copied += len; + size -= len; + limit -= len; + continue; + } + + if (limit < PAGE_SIZE) { + err = skcipher_wait_for_wmem(sk, msg->msg_flags); + if (err) + goto unlock; + + limit = max_t(int, sk->sk_sndbuf, PAGE_SIZE); + limit -= ctx->used; + } + + len = min_t(unsigned long, len, limit); + + err = skcipher_alloc_sgl(sk); + if (err) + goto unlock; + + sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list); + sg = sgl->sg; + do { + i = sgl->cur; + plen = min_t(int, len, PAGE_SIZE); + + sg_assign_page(sg + i, alloc_page(GFP_KERNEL)); + err = -ENOMEM; + if (!sg_page(sg + i)) + goto unlock; + + err = memcpy_fromiovec(page_address(sg_page(sg + i)), + msg->msg_iov, plen); + if (err) { + __free_page(sg_page(sg + i)); + sg_assign_page(sg + i, NULL); + goto unlock; + } + + sg[i].length = plen; + len -= plen; + ctx->used += plen; + copied += plen; + size -= plen; + limit -= plen; + sgl->cur++; + } while (len && sgl->cur < MAX_SGL_ENTS); + + ctx->merge = plen & (PAGE_SIZE - 1); + } + + err = 0; + + ctx->more = msg->msg_flags & MSG_MORE; + if (!ctx->more && !list_empty(&ctx->tsgl)) + sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list); + +unlock: + skcipher_data_wakeup(sk); + release_sock(sk); + + return copied ?: err; +} + +static ssize_t skcipher_sendpage(struct socket *sock, struct page *page, + int offset, size_t size, int flags) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + struct skcipher_sg_list *sgl; + int err = -EINVAL; + int limit; + + lock_sock(sk); + if (!ctx->more && ctx->used) + goto unlock; + + if (!size) + goto done; + + limit = max_t(int, sk->sk_sndbuf, PAGE_SIZE); + limit -= ctx->used; + + if (limit < PAGE_SIZE) { + err = skcipher_wait_for_wmem(sk, flags); + if (err) + goto unlock; + + limit = max_t(int, sk->sk_sndbuf, PAGE_SIZE); + limit -= ctx->used; + } + + err = skcipher_alloc_sgl(sk); + if (err) + goto unlock; + + ctx->merge = 0; + sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list); + + get_page(page); + sg_set_page(sgl->sg + sgl->cur, page, size, offset); + sgl->cur++; + ctx->used += size; + +done: + ctx->more = flags & MSG_MORE; + if (!ctx->more && !list_empty(&ctx->tsgl)) + sgl = list_entry(ctx->tsgl.prev, struct skcipher_sg_list, list); + +unlock: + skcipher_data_wakeup(sk); + release_sock(sk); + + return err ?: size; +} + +static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock, + struct msghdr *msg, size_t ignored, int flags) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + unsigned bs = crypto_ablkcipher_blocksize(crypto_ablkcipher_reqtfm( + &ctx->req)); + struct skcipher_sg_list *sgl; + struct scatterlist *sg; + unsigned long iovlen; + struct iovec *iov; + int err = -EAGAIN; + int used; + long copied = 0; + + lock_sock(sk); + for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0; + iovlen--, iov++) { + unsigned long seglen = iov->iov_len; + char __user *from = iov->iov_base; + + while (seglen) { + sgl = list_first_entry(&ctx->tsgl, + struct skcipher_sg_list, list); + sg = sgl->sg; + + while (!sg->length) + sg++; + + used = ctx->used; + if (!used) { + err = skcipher_wait_for_data(sk, flags); + if (err) + goto unlock; + } + + used = min_t(unsigned long, used, seglen); + + if (ctx->more || used < ctx->used) + used -= used % bs; + + err = -EINVAL; + if (!used) + goto unlock; + + used = af_alg_make_sg(&ctx->rsgl, from, used, 1); + if (used < 0) + goto unlock; + + ablkcipher_request_set_crypt(&ctx->req, sg, + ctx->rsgl.sg, used, + ctx->iv); + + err = af_alg_wait_for_completion( + ctx->enc ? + crypto_ablkcipher_encrypt(&ctx->req) : + crypto_ablkcipher_decrypt(&ctx->req), + &ctx->completion); + + af_alg_free_sg(&ctx->rsgl); + + if (err) + goto unlock; + + copied += used; + from += used; + seglen -= used; + skcipher_pull_sgl(sk, used); + } + } + + err = 0; + +unlock: + skcipher_wmem_wakeup(sk); + release_sock(sk); + + return copied ?: err; +} + + +static unsigned int skcipher_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + unsigned int mask; + + sock_poll_wait(file, sk_sleep(sk), wait); + mask = 0; + + if (ctx->used) + mask |= POLLIN | POLLRDNORM; + + if (skcipher_writable(sk)) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + + return mask; +} + +static struct proto_ops algif_skcipher_ops = { + .family = PF_ALG, + + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .getname = sock_no_getname, + .ioctl = sock_no_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .getsockopt = sock_no_getsockopt, + .mmap = sock_no_mmap, + .bind = sock_no_bind, + .accept = sock_no_accept, + .setsockopt = sock_no_setsockopt, + + .release = af_alg_release, + .sendmsg = skcipher_sendmsg, + .sendpage = skcipher_sendpage, + .recvmsg = skcipher_recvmsg, + .poll = skcipher_poll, +}; + +static void *skcipher_bind(const char *name, u32 type, u32 mask) +{ + return crypto_alloc_ablkcipher(name, type, mask); +} + +static void skcipher_release(void *private) +{ + crypto_free_ablkcipher(private); +} + +static int skcipher_setkey(void *private, const u8 *key, unsigned int keylen) +{ + return crypto_ablkcipher_setkey(private, key, keylen); +} + +static void skcipher_sock_destruct(struct sock *sk) +{ + struct alg_sock *ask = alg_sk(sk); + struct skcipher_ctx *ctx = ask->private; + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(&ctx->req); + + skcipher_free_sgl(sk); + sock_kfree_s(sk, ctx->iv, crypto_ablkcipher_ivsize(tfm)); + sock_kfree_s(sk, ctx, ctx->len); + af_alg_release_parent(sk); +} + +static int skcipher_accept_parent(void *private, struct sock *sk) +{ + struct skcipher_ctx *ctx; + struct alg_sock *ask = alg_sk(sk); + unsigned int len = sizeof(*ctx) + crypto_ablkcipher_reqsize(private); + + ctx = sock_kmalloc(sk, len, GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + ctx->iv = sock_kmalloc(sk, crypto_ablkcipher_ivsize(private), + GFP_KERNEL); + if (!ctx->iv) { + sock_kfree_s(sk, ctx, len); + return -ENOMEM; + } + + memset(ctx->iv, 0, crypto_ablkcipher_ivsize(private)); + + INIT_LIST_HEAD(&ctx->tsgl); + ctx->len = len; + ctx->used = 0; + ctx->more = 0; + ctx->merge = 0; + ctx->enc = 0; + af_alg_init_completion(&ctx->completion); + + ask->private = ctx; + + ablkcipher_request_set_tfm(&ctx->req, private); + ablkcipher_request_set_callback(&ctx->req, CRYPTO_TFM_REQ_MAY_BACKLOG, + af_alg_complete, &ctx->completion); + + sk->sk_destruct = skcipher_sock_destruct; + + return 0; +} + +static const struct af_alg_type algif_type_skcipher = { + .bind = skcipher_bind, + .release = skcipher_release, + .setkey = skcipher_setkey, + .accept = skcipher_accept_parent, + .ops = &algif_skcipher_ops, + .name = "skcipher", + .owner = THIS_MODULE +}; + +static int __init algif_skcipher_init(void) +{ + return af_alg_register_type(&algif_type_skcipher); +} + +static void __exit algif_skcipher_exit(void) +{ + int err = af_alg_unregister_type(&algif_type_skcipher); + BUG_ON(err); +} + +module_init(algif_skcipher_init); +module_exit(algif_skcipher_exit); +MODULE_LICENSE("GPL"); From c8484594aeafe889269bd01232049b184140de3f Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Sat, 27 Nov 2010 16:30:39 +0800 Subject: [PATCH 09/46] crypto: Use vzalloc Signed-off-by: Joe Perches Signed-off-by: Herbert Xu --- crypto/deflate.c | 3 +-- crypto/zlib.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/crypto/deflate.c b/crypto/deflate.c index 463dc859aa059d..cbc7a33a9600c2 100644 --- a/crypto/deflate.c +++ b/crypto/deflate.c @@ -48,12 +48,11 @@ static int deflate_comp_init(struct deflate_ctx *ctx) int ret = 0; struct z_stream_s *stream = &ctx->comp_stream; - stream->workspace = vmalloc(zlib_deflate_workspacesize()); + stream->workspace = vzalloc(zlib_deflate_workspacesize()); if (!stream->workspace) { ret = -ENOMEM; goto out; } - memset(stream->workspace, 0, zlib_deflate_workspacesize()); ret = zlib_deflateInit2(stream, DEFLATE_DEF_LEVEL, Z_DEFLATED, -DEFLATE_DEF_WINBITS, DEFLATE_DEF_MEMLEVEL, Z_DEFAULT_STRATEGY); diff --git a/crypto/zlib.c b/crypto/zlib.c index c3015733c9909c..739b8fca4cea51 100644 --- a/crypto/zlib.c +++ b/crypto/zlib.c @@ -95,11 +95,10 @@ static int zlib_compress_setup(struct crypto_pcomp *tfm, void *params, zlib_comp_exit(ctx); workspacesize = zlib_deflate_workspacesize(); - stream->workspace = vmalloc(workspacesize); + stream->workspace = vzalloc(workspacesize); if (!stream->workspace) return -ENOMEM; - memset(stream->workspace, 0, workspacesize); ret = zlib_deflateInit2(stream, tb[ZLIB_COMP_LEVEL] ? nla_get_u32(tb[ZLIB_COMP_LEVEL]) From 21ea28abcf825729f9698afd7357dfbf7040d4f8 Mon Sep 17 00:00:00 2001 From: Tracey Dent Date: Sat, 27 Nov 2010 16:32:57 +0800 Subject: [PATCH 10/46] crypto: Makefile clean up Changed Makefile to use -y instead of -objs. Signed-off-by: Tracey Dent Signed-off-by: Herbert Xu --- crypto/Makefile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/crypto/Makefile b/crypto/Makefile index efc0f18dbb372b..e9a399ca69db06 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -3,32 +3,32 @@ # obj-$(CONFIG_CRYPTO) += crypto.o -crypto-objs := api.o cipher.o compress.o +crypto-y := api.o cipher.o compress.o obj-$(CONFIG_CRYPTO_WORKQUEUE) += crypto_wq.o obj-$(CONFIG_CRYPTO_FIPS) += fips.o crypto_algapi-$(CONFIG_PROC_FS) += proc.o -crypto_algapi-objs := algapi.o scatterwalk.o $(crypto_algapi-y) +crypto_algapi-y := algapi.o scatterwalk.o $(crypto_algapi-y) obj-$(CONFIG_CRYPTO_ALGAPI2) += crypto_algapi.o obj-$(CONFIG_CRYPTO_AEAD2) += aead.o -crypto_blkcipher-objs := ablkcipher.o -crypto_blkcipher-objs += blkcipher.o +crypto_blkcipher-y := ablkcipher.o +crypto_blkcipher-y += blkcipher.o obj-$(CONFIG_CRYPTO_BLKCIPHER2) += crypto_blkcipher.o obj-$(CONFIG_CRYPTO_BLKCIPHER2) += chainiv.o obj-$(CONFIG_CRYPTO_BLKCIPHER2) += eseqiv.o obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o -crypto_hash-objs += ahash.o -crypto_hash-objs += shash.o +crypto_hash-y += ahash.o +crypto_hash-y += shash.o obj-$(CONFIG_CRYPTO_HASH2) += crypto_hash.o obj-$(CONFIG_CRYPTO_PCOMP2) += pcompress.o -cryptomgr-objs := algboss.o testmgr.o +cryptomgr-y := algboss.o testmgr.o obj-$(CONFIG_CRYPTO_MANAGER2) += cryptomgr.o obj-$(CONFIG_CRYPTO_HMAC) += hmac.o From 0d258efb6a58fe047197c3b9cff8746bb176d58a Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sat, 27 Nov 2010 16:34:46 +0800 Subject: [PATCH 11/46] crypto: aesni-intel - Ported implementation to x86-32 The AES-NI instructions are also available in legacy mode so the 32-bit architecture may profit from those, too. To illustrate the performance gain here's a short summary of a dm-crypt speed test on a Core i7 M620 running at 2.67GHz comparing both assembler implementations: x86: i568 aes-ni delta ECB, 256 bit: 93.8 MB/s 123.3 MB/s +31.4% CBC, 256 bit: 84.8 MB/s 262.3 MB/s +209.3% LRW, 256 bit: 108.6 MB/s 222.1 MB/s +104.5% XTS, 256 bit: 105.0 MB/s 205.5 MB/s +95.7% Additionally, due to some minor optimizations, the 64-bit version also got a minor performance gain as seen below: x86-64: old impl. new impl. delta ECB, 256 bit: 121.1 MB/s 123.0 MB/s +1.5% CBC, 256 bit: 285.3 MB/s 290.8 MB/s +1.9% LRW, 256 bit: 263.7 MB/s 265.3 MB/s +0.6% XTS, 256 bit: 251.1 MB/s 255.3 MB/s +1.7% Signed-off-by: Mathias Krause Reviewed-by: Huang Ying Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 197 ++++++++++++++++++++++++----- arch/x86/crypto/aesni-intel_glue.c | 22 +++- crypto/Kconfig | 12 +- 3 files changed, 191 insertions(+), 40 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index aafced54df645d..f592e03dc375e4 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -20,6 +20,9 @@ * Wajdi Feghali (wajdi.k.feghali@intel.com) * Copyright (c) 2010, Intel Corporation. * + * Ported x86_64 version to x86: + * Author: Mathias Krause + * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or @@ -95,12 +98,16 @@ enc: .octa 0x2 #define IN IN1 #define KEY %xmm2 #define IV %xmm3 + #define BSWAP_MASK %xmm10 #define CTR %xmm11 #define INC %xmm12 +#ifdef __x86_64__ +#define AREG %rax #define KEYP %rdi #define OUTP %rsi +#define UKEYP OUTP #define INP %rdx #define LEN %rcx #define IVP %r8 @@ -109,6 +116,18 @@ enc: .octa 0x2 #define TKEYP T1 #define T2 %r11 #define TCTR_LOW T2 +#else +#define AREG %eax +#define KEYP %edi +#define OUTP AREG +#define UKEYP OUTP +#define INP %edx +#define LEN %esi +#define IVP %ebp +#define KLEN %ebx +#define T1 %ecx +#define TKEYP T1 +#endif /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) @@ -1247,10 +1266,11 @@ _key_expansion_256a: shufps $0b10001100, %xmm0, %xmm4 pxor %xmm4, %xmm0 pxor %xmm1, %xmm0 - movaps %xmm0, (%rcx) - add $0x10, %rcx + movaps %xmm0, (TKEYP) + add $0x10, TKEYP ret +.align 4 _key_expansion_192a: pshufd $0b01010101, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 @@ -1268,12 +1288,13 @@ _key_expansion_192a: movaps %xmm0, %xmm1 shufps $0b01000100, %xmm0, %xmm6 - movaps %xmm6, (%rcx) + movaps %xmm6, (TKEYP) shufps $0b01001110, %xmm2, %xmm1 - movaps %xmm1, 16(%rcx) - add $0x20, %rcx + movaps %xmm1, 0x10(TKEYP) + add $0x20, TKEYP ret +.align 4 _key_expansion_192b: pshufd $0b01010101, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 @@ -1288,10 +1309,11 @@ _key_expansion_192b: pxor %xmm3, %xmm2 pxor %xmm5, %xmm2 - movaps %xmm0, (%rcx) - add $0x10, %rcx + movaps %xmm0, (TKEYP) + add $0x10, TKEYP ret +.align 4 _key_expansion_256b: pshufd $0b10101010, %xmm1, %xmm1 shufps $0b00010000, %xmm2, %xmm4 @@ -1299,8 +1321,8 @@ _key_expansion_256b: shufps $0b10001100, %xmm2, %xmm4 pxor %xmm4, %xmm2 pxor %xmm1, %xmm2 - movaps %xmm2, (%rcx) - add $0x10, %rcx + movaps %xmm2, (TKEYP) + add $0x10, TKEYP ret /* @@ -1308,17 +1330,23 @@ _key_expansion_256b: * unsigned int key_len) */ ENTRY(aesni_set_key) - movups (%rsi), %xmm0 # user key (first 16 bytes) - movaps %xmm0, (%rdi) - lea 0x10(%rdi), %rcx # key addr - movl %edx, 480(%rdi) +#ifndef __x86_64__ + pushl KEYP + movl 8(%esp), KEYP # ctx + movl 12(%esp), UKEYP # in_key + movl 16(%esp), %edx # key_len +#endif + movups (UKEYP), %xmm0 # user key (first 16 bytes) + movaps %xmm0, (KEYP) + lea 0x10(KEYP), TKEYP # key addr + movl %edx, 480(KEYP) pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x cmp $24, %dl jb .Lenc_key128 je .Lenc_key192 - movups 0x10(%rsi), %xmm2 # other user key - movaps %xmm2, (%rcx) - add $0x10, %rcx + movups 0x10(UKEYP), %xmm2 # other user key + movaps %xmm2, (TKEYP) + add $0x10, TKEYP AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 call _key_expansion_256a AESKEYGENASSIST 0x1 %xmm0 %xmm1 @@ -1347,7 +1375,7 @@ ENTRY(aesni_set_key) call _key_expansion_256a jmp .Ldec_key .Lenc_key192: - movq 0x10(%rsi), %xmm2 # other user key + movq 0x10(UKEYP), %xmm2 # other user key AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1 call _key_expansion_192a AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2 @@ -1387,33 +1415,47 @@ ENTRY(aesni_set_key) AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10 call _key_expansion_128 .Ldec_key: - sub $0x10, %rcx - movaps (%rdi), %xmm0 - movaps (%rcx), %xmm1 - movaps %xmm0, 240(%rcx) - movaps %xmm1, 240(%rdi) - add $0x10, %rdi - lea 240-16(%rcx), %rsi + sub $0x10, TKEYP + movaps (KEYP), %xmm0 + movaps (TKEYP), %xmm1 + movaps %xmm0, 240(TKEYP) + movaps %xmm1, 240(KEYP) + add $0x10, KEYP + lea 240-16(TKEYP), UKEYP .align 4 .Ldec_key_loop: - movaps (%rdi), %xmm0 + movaps (KEYP), %xmm0 AESIMC %xmm0 %xmm1 - movaps %xmm1, (%rsi) - add $0x10, %rdi - sub $0x10, %rsi - cmp %rcx, %rdi + movaps %xmm1, (UKEYP) + add $0x10, KEYP + sub $0x10, UKEYP + cmp TKEYP, KEYP jb .Ldec_key_loop - xor %rax, %rax + xor AREG, AREG +#ifndef __x86_64__ + popl KEYP +#endif ret /* * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) */ ENTRY(aesni_enc) +#ifndef __x86_64__ + pushl KEYP + pushl KLEN + movl 12(%esp), KEYP + movl 16(%esp), OUTP + movl 20(%esp), INP +#endif movl 480(KEYP), KLEN # key length movups (INP), STATE # input call _aesni_enc1 movups STATE, (OUTP) # output +#ifndef __x86_64__ + popl KLEN + popl KEYP +#endif ret /* @@ -1428,6 +1470,7 @@ ENTRY(aesni_enc) * KEY * TKEYP (T1) */ +.align 4 _aesni_enc1: movaps (KEYP), KEY # key mov KEYP, TKEYP @@ -1490,6 +1533,7 @@ _aesni_enc1: * KEY * TKEYP (T1) */ +.align 4 _aesni_enc4: movaps (KEYP), KEY # key mov KEYP, TKEYP @@ -1583,11 +1627,22 @@ _aesni_enc4: * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src) */ ENTRY(aesni_dec) +#ifndef __x86_64__ + pushl KEYP + pushl KLEN + movl 12(%esp), KEYP + movl 16(%esp), OUTP + movl 20(%esp), INP +#endif mov 480(KEYP), KLEN # key length add $240, KEYP movups (INP), STATE # input call _aesni_dec1 movups STATE, (OUTP) #output +#ifndef __x86_64__ + popl KLEN + popl KEYP +#endif ret /* @@ -1602,6 +1657,7 @@ ENTRY(aesni_dec) * KEY * TKEYP (T1) */ +.align 4 _aesni_dec1: movaps (KEYP), KEY # key mov KEYP, TKEYP @@ -1664,6 +1720,7 @@ _aesni_dec1: * KEY * TKEYP (T1) */ +.align 4 _aesni_dec4: movaps (KEYP), KEY # key mov KEYP, TKEYP @@ -1758,6 +1815,15 @@ _aesni_dec4: * size_t len) */ ENTRY(aesni_ecb_enc) +#ifndef __x86_64__ + pushl LEN + pushl KEYP + pushl KLEN + movl 16(%esp), KEYP + movl 20(%esp), OUTP + movl 24(%esp), INP + movl 28(%esp), LEN +#endif test LEN, LEN # check length jz .Lecb_enc_ret mov 480(KEYP), KLEN @@ -1794,6 +1860,11 @@ ENTRY(aesni_ecb_enc) cmp $16, LEN jge .Lecb_enc_loop1 .Lecb_enc_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN +#endif ret /* @@ -1801,6 +1872,15 @@ ENTRY(aesni_ecb_enc) * size_t len); */ ENTRY(aesni_ecb_dec) +#ifndef __x86_64__ + pushl LEN + pushl KEYP + pushl KLEN + movl 16(%esp), KEYP + movl 20(%esp), OUTP + movl 24(%esp), INP + movl 28(%esp), LEN +#endif test LEN, LEN jz .Lecb_dec_ret mov 480(KEYP), KLEN @@ -1838,6 +1918,11 @@ ENTRY(aesni_ecb_dec) cmp $16, LEN jge .Lecb_dec_loop1 .Lecb_dec_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN +#endif ret /* @@ -1845,6 +1930,17 @@ ENTRY(aesni_ecb_dec) * size_t len, u8 *iv) */ ENTRY(aesni_cbc_enc) +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl 20(%esp), KEYP + movl 24(%esp), OUTP + movl 28(%esp), INP + movl 32(%esp), LEN + movl 36(%esp), IVP +#endif cmp $16, LEN jb .Lcbc_enc_ret mov 480(KEYP), KLEN @@ -1862,6 +1958,12 @@ ENTRY(aesni_cbc_enc) jge .Lcbc_enc_loop movups STATE, (IVP) .Lcbc_enc_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif ret /* @@ -1869,6 +1971,17 @@ ENTRY(aesni_cbc_enc) * size_t len, u8 *iv) */ ENTRY(aesni_cbc_dec) +#ifndef __x86_64__ + pushl IVP + pushl LEN + pushl KEYP + pushl KLEN + movl 20(%esp), KEYP + movl 24(%esp), OUTP + movl 28(%esp), INP + movl 32(%esp), LEN + movl 36(%esp), IVP +#endif cmp $16, LEN jb .Lcbc_dec_just_ret mov 480(KEYP), KLEN @@ -1882,16 +1995,30 @@ ENTRY(aesni_cbc_dec) movaps IN1, STATE1 movups 0x10(INP), IN2 movaps IN2, STATE2 +#ifdef __x86_64__ movups 0x20(INP), IN3 movaps IN3, STATE3 movups 0x30(INP), IN4 movaps IN4, STATE4 +#else + movups 0x20(INP), IN1 + movaps IN1, STATE3 + movups 0x30(INP), IN2 + movaps IN2, STATE4 +#endif call _aesni_dec4 pxor IV, STATE1 +#ifdef __x86_64__ pxor IN1, STATE2 pxor IN2, STATE3 pxor IN3, STATE4 movaps IN4, IV +#else + pxor (INP), STATE2 + pxor 0x10(INP), STATE3 + pxor IN1, STATE4 + movaps IN2, IV +#endif movups STATE1, (OUTP) movups STATE2, 0x10(OUTP) movups STATE3, 0x20(OUTP) @@ -1919,8 +2046,15 @@ ENTRY(aesni_cbc_dec) .Lcbc_dec_ret: movups IV, (IVP) .Lcbc_dec_just_ret: +#ifndef __x86_64__ + popl KLEN + popl KEYP + popl LEN + popl IVP +#endif ret +#ifdef __x86_64__ .align 16 .Lbswap_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 @@ -1936,6 +2070,7 @@ ENTRY(aesni_cbc_dec) * INC: == 1, in little endian * BSWAP_MASK == endian swapping mask */ +.align 4 _aesni_inc_init: movaps .Lbswap_mask, BSWAP_MASK movaps IV, CTR @@ -1960,6 +2095,7 @@ _aesni_inc_init: * CTR: == output IV, in little endian * TCTR_LOW: == lower qword of CTR */ +.align 4 _aesni_inc: paddq INC, CTR add $1, TCTR_LOW @@ -2031,3 +2167,4 @@ ENTRY(aesni_ctr_enc) movups IV, (IVP) .Lctr_enc_just_ret: ret +#endif diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 02d349d644238a..8a3b80075216ee 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -94,8 +94,10 @@ asmlinkage void aesni_cbc_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); +#ifdef CONFIG_X86_64 asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); +#endif /* asmlinkage void aesni_gcm_enc() * void *ctx, AES Key schedule. Starts on a 16 byte boundary. @@ -410,6 +412,7 @@ static struct crypto_alg blk_cbc_alg = { }, }; +#ifdef CONFIG_X86_64 static void ctr_crypt_final(struct crypto_aes_ctx *ctx, struct blkcipher_walk *walk) { @@ -475,6 +478,7 @@ static struct crypto_alg blk_ctr_alg = { }, }, }; +#endif static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, unsigned int key_len) @@ -622,6 +626,7 @@ static struct crypto_alg ablk_cbc_alg = { }, }; +#ifdef CONFIG_X86_64 static int ablk_ctr_init(struct crypto_tfm *tfm) { struct cryptd_ablkcipher *cryptd_tfm; @@ -698,6 +703,7 @@ static struct crypto_alg ablk_rfc3686_ctr_alg = { }, }; #endif +#endif #ifdef HAS_LRW static int ablk_lrw_init(struct crypto_tfm *tfm) @@ -1249,18 +1255,20 @@ static int __init aesni_init(void) goto blk_ecb_err; if ((err = crypto_register_alg(&blk_cbc_alg))) goto blk_cbc_err; - if ((err = crypto_register_alg(&blk_ctr_alg))) - goto blk_ctr_err; if ((err = crypto_register_alg(&ablk_ecb_alg))) goto ablk_ecb_err; if ((err = crypto_register_alg(&ablk_cbc_alg))) goto ablk_cbc_err; +#ifdef CONFIG_X86_64 + if ((err = crypto_register_alg(&blk_ctr_alg))) + goto blk_ctr_err; if ((err = crypto_register_alg(&ablk_ctr_alg))) goto ablk_ctr_err; #ifdef HAS_CTR if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) goto ablk_rfc3686_ctr_err; #endif +#endif #ifdef HAS_LRW if ((err = crypto_register_alg(&ablk_lrw_alg))) goto ablk_lrw_err; @@ -1296,18 +1304,20 @@ static int __init aesni_init(void) crypto_unregister_alg(&ablk_lrw_alg); ablk_lrw_err: #endif +#ifdef CONFIG_X86_64 #ifdef HAS_CTR crypto_unregister_alg(&ablk_rfc3686_ctr_alg); ablk_rfc3686_ctr_err: #endif crypto_unregister_alg(&ablk_ctr_alg); ablk_ctr_err: + crypto_unregister_alg(&blk_ctr_alg); +blk_ctr_err: +#endif crypto_unregister_alg(&ablk_cbc_alg); ablk_cbc_err: crypto_unregister_alg(&ablk_ecb_alg); ablk_ecb_err: - crypto_unregister_alg(&blk_ctr_alg); -blk_ctr_err: crypto_unregister_alg(&blk_cbc_alg); blk_cbc_err: crypto_unregister_alg(&blk_ecb_alg); @@ -1332,13 +1342,15 @@ static void __exit aesni_exit(void) #ifdef HAS_LRW crypto_unregister_alg(&ablk_lrw_alg); #endif +#ifdef CONFIG_X86_64 #ifdef HAS_CTR crypto_unregister_alg(&ablk_rfc3686_ctr_alg); #endif crypto_unregister_alg(&ablk_ctr_alg); + crypto_unregister_alg(&blk_ctr_alg); +#endif crypto_unregister_alg(&ablk_cbc_alg); crypto_unregister_alg(&ablk_ecb_alg); - crypto_unregister_alg(&blk_ctr_alg); crypto_unregister_alg(&blk_cbc_alg); crypto_unregister_alg(&blk_ecb_alg); crypto_unregister_alg(&__aesni_alg); diff --git a/crypto/Kconfig b/crypto/Kconfig index 69437e21217f31..467491df3e3ae6 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -539,8 +539,9 @@ config CRYPTO_AES_X86_64 config CRYPTO_AES_NI_INTEL tristate "AES cipher algorithms (AES-NI)" - depends on (X86 || UML_X86) && 64BIT - select CRYPTO_AES_X86_64 + depends on (X86 || UML_X86) + select CRYPTO_AES_X86_64 if 64BIT + select CRYPTO_AES_586 if !64BIT select CRYPTO_CRYPTD select CRYPTO_ALGAPI select CRYPTO_FPU @@ -563,9 +564,10 @@ config CRYPTO_AES_NI_INTEL See for more information. - In addition to AES cipher algorithm support, the - acceleration for some popular block cipher mode is supported - too, including ECB, CBC, CTR, LRW, PCBC, XTS. + In addition to AES cipher algorithm support, the acceleration + for some popular block cipher mode is supported too, including + ECB, CBC, LRW, PCBC, XTS. The 64 bit version has additional + acceleration for CTR. config CRYPTO_ANUBIS tristate "Anubis cipher algorithm" From 0c3cf4cc9abd854dcc6488719348a75b8f328c54 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Fri, 19 Nov 2010 16:04:22 +0200 Subject: [PATCH 12/46] crypto: omap-sham - uses digest buffer in request context Currently driver storred digest results in req->results provided by the client. But some clients do not set it until final() call. It leads to crash. Changed to use internal buffer to store temporary digest results. Signed-off-by: Dmitry Kasatkin Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index a081c7c7d03f88..2222370140077a 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -97,6 +97,7 @@ struct omap_sham_reqctx { unsigned long flags; unsigned long op; + u8 digest[SHA1_DIGEST_SIZE]; size_t digcnt; u8 *buffer; size_t bufcnt; @@ -194,7 +195,7 @@ static inline int omap_sham_wait(struct omap_sham_dev *dd, u32 offset, u32 bit) static void omap_sham_copy_hash(struct ahash_request *req, int out) { struct omap_sham_reqctx *ctx = ahash_request_ctx(req); - u32 *hash = (u32 *)req->result; + u32 *hash = (u32 *)ctx->digest; int i; if (likely(ctx->flags & FLAGS_SHA1)) { @@ -453,8 +454,11 @@ static void omap_sham_cleanup(struct ahash_request *req) ctx->flags |= FLAGS_CLEAN; spin_unlock_irqrestore(&dd->lock, flags); - if (ctx->digcnt) + if (ctx->digcnt) { clk_disable(dd->iclk); + memcpy(req->result, ctx->digest, (ctx->flags & FLAGS_SHA1) ? + SHA1_DIGEST_SIZE : MD5_DIGEST_SIZE); + } if (ctx->dma_addr) dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen, @@ -576,6 +580,7 @@ static int omap_sham_final_req(struct omap_sham_dev *dd) static int omap_sham_finish_req_hmac(struct ahash_request *req) { + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm); struct omap_sham_hmac_ctx *bctx = tctx->base; int bs = crypto_shash_blocksize(bctx->shash); @@ -590,7 +595,7 @@ static int omap_sham_finish_req_hmac(struct ahash_request *req) return crypto_shash_init(&desc.shash) ?: crypto_shash_update(&desc.shash, bctx->opad, bs) ?: - crypto_shash_finup(&desc.shash, req->result, ds, req->result); + crypto_shash_finup(&desc.shash, ctx->digest, ds, ctx->digest); } static void omap_sham_finish_req(struct ahash_request *req, int err) From c8eb54041acc4dedeaf0d9da34b91b8658386751 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Fri, 19 Nov 2010 16:04:23 +0200 Subject: [PATCH 13/46] crypto: omap-sham - DMA initialization fixes for off mode DMA parameters for constant data were initialized during driver probe(). It seems that those settings sometimes are lost when devices goes to off mode. This patch makes DMA initialization just before use. It solves off mode problems. Fixes: NB#202786 - Aegis & SHA1 block off mode changes Signed-off-by: Dmitry Kasatkin Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 2222370140077a..9dfbc4ab771136 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -318,6 +318,16 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr, omap_set_dma_src_params(dd->dma_lch, 0, OMAP_DMA_AMODE_POST_INC, dma_addr, 0, 0); + omap_set_dma_dest_params(dd->dma_lch, 0, + OMAP_DMA_AMODE_CONSTANT, + dd->phys_base + SHA_REG_DIN(0), 0, 16); + + omap_set_dma_dest_burst_mode(dd->dma_lch, + OMAP_DMA_DATA_BURST_16); + + omap_set_dma_src_burst_mode(dd->dma_lch, + OMAP_DMA_DATA_BURST_4); + err = omap_sham_write_ctrl(dd, length, final, 1); if (err) return err; @@ -1071,15 +1081,6 @@ static int omap_sham_dma_init(struct omap_sham_dev *dd) dev_err(dd->dev, "Unable to request DMA channel\n"); return err; } - omap_set_dma_dest_params(dd->dma_lch, 0, - OMAP_DMA_AMODE_CONSTANT, - dd->phys_base + SHA_REG_DIN(0), 0, 16); - - omap_set_dma_dest_burst_mode(dd->dma_lch, - OMAP_DMA_DATA_BURST_16); - - omap_set_dma_src_burst_mode(dd->dma_lch, - OMAP_DMA_DATA_BURST_4); return 0; } From 3e133c8bf6a6723377d94bc97aa52596e49a4090 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Fri, 19 Nov 2010 16:04:24 +0200 Subject: [PATCH 14/46] crypto: omap-sham - error handling improved Introduces DMA error handling. DMA error is returned as a result code of the hash request. Clients needs to handle error codes and may repeat hash calculation attempt. Also in the case of DMA error, SHAM module is set to be re-initialized again. It significantly improves stability against possible HW failures. Signed-off-by: Dmitry Kasatkin Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 67 +++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 23 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 9dfbc4ab771136..db206284835acc 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -83,6 +83,7 @@ #define FLAGS_INIT 0x0100 #define FLAGS_CPU 0x0200 #define FLAGS_HMAC 0x0400 +#define FLAGS_ERROR 0x0800 /* 3rd byte */ #define FLAGS_BUSY 16 @@ -137,6 +138,7 @@ struct omap_sham_dev { int irq; struct clk *iclk; spinlock_t lock; + int err; int dma; int dma_lch; struct tasklet_struct done_task; @@ -234,10 +236,12 @@ static int omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length, SHA_REG_MASK_SOFTRESET, SHA_REG_MASK_SOFTRESET); if (omap_sham_wait(dd, SHA_REG_SYSSTATUS, - SHA_REG_SYSSTATUS_RESETDONE)) + SHA_REG_SYSSTATUS_RESETDONE)) { + clk_disable(dd->iclk); return -ETIMEDOUT; - + } dd->flags |= FLAGS_INIT; + dd->err = 0; } } else { omap_sham_write(dd, SHA_REG_DIGCNT, ctx->digcnt); @@ -279,11 +283,12 @@ static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf, if (err) return err; + /* should be non-zero before next lines to disable clocks later */ + ctx->digcnt += length; + if (omap_sham_wait(dd, SHA_REG_CTRL, SHA_REG_CTRL_INPUT_READY)) return -ETIMEDOUT; - ctx->digcnt += length; - if (final) ctx->flags |= FLAGS_FINAL; /* catch last interrupt */ @@ -303,7 +308,6 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr, dev_dbg(dd->dev, "xmit_dma: digcnt: %d, length: %d, final: %d\n", ctx->digcnt, length, final); - /* flush cache entries related to our page */ if (dma_addr == ctx->dma_addr) dma_sync_single_for_device(dd->dev, dma_addr, length, @@ -411,6 +415,7 @@ static int omap_sham_update_dma_fast(struct omap_sham_dev *dd) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); unsigned int length; + int err; ctx->flags |= FLAGS_FAST; @@ -424,7 +429,11 @@ static int omap_sham_update_dma_fast(struct omap_sham_dev *dd) ctx->total -= length; - return omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, 1); + err = omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, 1); + if (err != -EINPROGRESS) + dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE); + + return err; } static int omap_sham_update_cpu(struct omap_sham_dev *dd) @@ -580,9 +589,6 @@ static int omap_sham_final_req(struct omap_sham_dev *dd) ctx->bufcnt = 0; - if (err != -EINPROGRESS) - omap_sham_cleanup(req); - dev_dbg(dd->dev, "final_req: err: %d\n", err); return err; @@ -616,9 +622,11 @@ static void omap_sham_finish_req(struct ahash_request *req, int err) omap_sham_copy_hash(ctx->dd->req, 1); if (ctx->flags & FLAGS_HMAC) err = omap_sham_finish_req_hmac(req); + } else { + ctx->flags |= FLAGS_ERROR; } - if (ctx->flags & FLAGS_FINAL) + if ((ctx->flags & FLAGS_FINAL) || err) omap_sham_cleanup(req); clear_bit(FLAGS_BUSY, &ctx->dd->flags); @@ -776,12 +784,14 @@ static int omap_sham_final(struct ahash_request *req) ctx->flags |= FLAGS_FINUP; - /* OMAP HW accel works only with buffers >= 9 */ - /* HMAC is always >= 9 because of ipad */ - if ((ctx->digcnt + ctx->bufcnt) < 9) - err = omap_sham_final_shash(req); - else if (ctx->bufcnt) - return omap_sham_enqueue(req, OP_FINAL); + if (!(ctx->flags & FLAGS_ERROR)) { + /* OMAP HW accel works only with buffers >= 9 */ + /* HMAC is always >= 9 because of ipad */ + if ((ctx->digcnt + ctx->bufcnt) < 9) + err = omap_sham_final_shash(req); + else if (ctx->bufcnt) + return omap_sham_enqueue(req, OP_FINAL); + } omap_sham_cleanup(req); @@ -851,6 +861,8 @@ static int omap_sham_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base) struct omap_sham_ctx *tctx = crypto_tfm_ctx(tfm); const char *alg_name = crypto_tfm_alg_name(tfm); + pr_info("enter\n"); + /* Allocate a fallback and abort if it failed. */ tctx->fallback = crypto_alloc_shash(alg_name, 0, CRYPTO_ALG_NEED_FALLBACK); @@ -1008,7 +1020,7 @@ static void omap_sham_done_task(unsigned long data) struct omap_sham_dev *dd = (struct omap_sham_dev *)data; struct ahash_request *req = dd->req; struct omap_sham_reqctx *ctx = ahash_request_ctx(req); - int ready = 1; + int ready = 0, err = 0; if (ctx->flags & FLAGS_OUTPUT_READY) { ctx->flags &= ~FLAGS_OUTPUT_READY; @@ -1018,13 +1030,16 @@ static void omap_sham_done_task(unsigned long data) if (dd->flags & FLAGS_DMA_ACTIVE) { dd->flags &= ~FLAGS_DMA_ACTIVE; omap_sham_update_dma_stop(dd); - omap_sham_update_dma_slow(dd); + if (!dd->err) + err = omap_sham_update_dma_slow(dd); } - if (ready && !(dd->flags & FLAGS_DMA_ACTIVE)) { - dev_dbg(dd->dev, "update done\n"); + err = dd->err ? : err; + + if (err != -EINPROGRESS && (ready || err)) { + dev_dbg(dd->dev, "update done: err: %d\n", err); /* finish curent request */ - omap_sham_finish_req(req, 0); + omap_sham_finish_req(req, err); /* start new request */ omap_sham_handle_queue(dd); } @@ -1056,6 +1071,7 @@ static irqreturn_t omap_sham_irq(int irq, void *dev_id) omap_sham_read(dd, SHA_REG_CTRL); ctx->flags |= FLAGS_OUTPUT_READY; + dd->err = 0; tasklet_schedule(&dd->done_task); return IRQ_HANDLED; @@ -1065,8 +1081,13 @@ static void omap_sham_dma_callback(int lch, u16 ch_status, void *data) { struct omap_sham_dev *dd = data; - if (likely(lch == dd->dma_lch)) - tasklet_schedule(&dd->done_task); + if (ch_status != OMAP_DMA_BLOCK_IRQ) { + pr_err("omap-sham DMA error status: 0x%hx\n", ch_status); + dd->err = -EIO; + dd->flags &= ~FLAGS_INIT; /* request to re-initialize */ + } + + tasklet_schedule(&dd->done_task); } static int omap_sham_dma_init(struct omap_sham_dev *dd) From a5d87237bb15eed8449e5a30c0bbe626e0e7f43d Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Fri, 19 Nov 2010 16:04:25 +0200 Subject: [PATCH 15/46] crypto: omap-sham - removed redundunt locking Locking for queuing and dequeuing is combined. test_and_set_bit() is also replaced with checking under dd->lock. Signed-off-by: Dmitry Kasatkin Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 47 +++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index db206284835acc..6340c5ef4712c8 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -84,9 +84,7 @@ #define FLAGS_CPU 0x0200 #define FLAGS_HMAC 0x0400 #define FLAGS_ERROR 0x0800 - -/* 3rd byte */ -#define FLAGS_BUSY 16 +#define FLAGS_BUSY 0x1000 #define OP_UPDATE 1 #define OP_FINAL 2 @@ -629,32 +627,37 @@ static void omap_sham_finish_req(struct ahash_request *req, int err) if ((ctx->flags & FLAGS_FINAL) || err) omap_sham_cleanup(req); - clear_bit(FLAGS_BUSY, &ctx->dd->flags); + ctx->dd->flags &= ~FLAGS_BUSY; if (req->base.complete) req->base.complete(&req->base, err); } -static int omap_sham_handle_queue(struct omap_sham_dev *dd) +static int omap_sham_handle_queue(struct omap_sham_dev *dd, + struct ahash_request *req) { struct crypto_async_request *async_req, *backlog; struct omap_sham_reqctx *ctx; - struct ahash_request *req, *prev_req; + struct ahash_request *prev_req; unsigned long flags; - int err = 0; - - if (test_and_set_bit(FLAGS_BUSY, &dd->flags)) - return 0; + int err = 0, ret = 0; spin_lock_irqsave(&dd->lock, flags); - backlog = crypto_get_backlog(&dd->queue); + if (req) + ret = ahash_enqueue_request(&dd->queue, req); + if (dd->flags & FLAGS_BUSY) { + spin_unlock_irqrestore(&dd->lock, flags); + return ret; + } async_req = crypto_dequeue_request(&dd->queue); - if (!async_req) - clear_bit(FLAGS_BUSY, &dd->flags); + if (async_req) { + dd->flags |= FLAGS_BUSY; + backlog = crypto_get_backlog(&dd->queue); + } spin_unlock_irqrestore(&dd->lock, flags); if (!async_req) - return 0; + return ret; if (backlog) backlog->complete(backlog, -EINPROGRESS); @@ -690,7 +693,7 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd) dev_dbg(dd->dev, "exit, err: %d\n", err); - return err; + return ret; } static int omap_sham_enqueue(struct ahash_request *req, unsigned int op) @@ -698,18 +701,10 @@ static int omap_sham_enqueue(struct ahash_request *req, unsigned int op) struct omap_sham_reqctx *ctx = ahash_request_ctx(req); struct omap_sham_ctx *tctx = crypto_tfm_ctx(req->base.tfm); struct omap_sham_dev *dd = tctx->dd; - unsigned long flags; - int err; ctx->op = op; - spin_lock_irqsave(&dd->lock, flags); - err = ahash_enqueue_request(&dd->queue, req); - spin_unlock_irqrestore(&dd->lock, flags); - - omap_sham_handle_queue(dd); - - return err; + return omap_sham_handle_queue(dd, req); } static int omap_sham_update(struct ahash_request *req) @@ -1041,7 +1036,7 @@ static void omap_sham_done_task(unsigned long data) /* finish curent request */ omap_sham_finish_req(req, err); /* start new request */ - omap_sham_handle_queue(dd); + omap_sham_handle_queue(dd, NULL); } } @@ -1049,7 +1044,7 @@ static void omap_sham_queue_task(unsigned long data) { struct omap_sham_dev *dd = (struct omap_sham_dev *)data; - omap_sham_handle_queue(dd); + omap_sham_handle_queue(dd, NULL); } static irqreturn_t omap_sham_irq(int irq, void *dev_id) From 798eed5d9204b01862985ba0643ce5cf84114072 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Fri, 19 Nov 2010 16:04:26 +0200 Subject: [PATCH 16/46] crypto: omap-sham - crypto_ahash_final() now not need to be called. According to the Herbert Xu, client may not always call crypto_ahash_final(). In the case of error in hash calculation resources will be automatically cleaned up. But if no hash calculation error happens and client will not call crypto_ahash_final() at all, then internal buffer will not be freed, and clocks will not be disabled. This patch provides support for atomic crypto_ahash_update() call. Clocks are now enabled and disabled per update request. Data buffer is now allocated as a part of request context. Client is obligated to free it with crypto_free_ahash(). Signed-off-by: Dmitry Kasatkin Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 168 ++++++++++++++++++------------------- 1 file changed, 82 insertions(+), 86 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 6340c5ef4712c8..85d62777453814 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -89,6 +89,11 @@ #define OP_UPDATE 1 #define OP_FINAL 2 +#define OMAP_ALIGN_MASK (sizeof(u32)-1) +#define OMAP_ALIGNED __attribute__((aligned(sizeof(u32)))) + +#define BUFLEN PAGE_SIZE + struct omap_sham_dev; struct omap_sham_reqctx { @@ -96,9 +101,8 @@ struct omap_sham_reqctx { unsigned long flags; unsigned long op; - u8 digest[SHA1_DIGEST_SIZE]; + u8 digest[SHA1_DIGEST_SIZE] OMAP_ALIGNED; size_t digcnt; - u8 *buffer; size_t bufcnt; size_t buflen; dma_addr_t dma_addr; @@ -107,6 +111,8 @@ struct omap_sham_reqctx { struct scatterlist *sg; unsigned int offset; /* offset in current sg */ unsigned int total; /* total request */ + + u8 buffer[0] OMAP_ALIGNED; }; struct omap_sham_hmac_ctx { @@ -219,31 +225,33 @@ static void omap_sham_copy_hash(struct ahash_request *req, int out) } } -static int omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length, - int final, int dma) +static int omap_sham_hw_init(struct omap_sham_dev *dd) { - struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); - u32 val = length << 5, mask; + clk_enable(dd->iclk); - if (unlikely(!ctx->digcnt)) { + if (!(dd->flags & FLAGS_INIT)) { + omap_sham_write_mask(dd, SHA_REG_MASK, + SHA_REG_MASK_SOFTRESET, SHA_REG_MASK_SOFTRESET); - clk_enable(dd->iclk); + if (omap_sham_wait(dd, SHA_REG_SYSSTATUS, + SHA_REG_SYSSTATUS_RESETDONE)) + return -ETIMEDOUT; - if (!(dd->flags & FLAGS_INIT)) { - omap_sham_write_mask(dd, SHA_REG_MASK, - SHA_REG_MASK_SOFTRESET, SHA_REG_MASK_SOFTRESET); + dd->flags |= FLAGS_INIT; + dd->err = 0; + } - if (omap_sham_wait(dd, SHA_REG_SYSSTATUS, - SHA_REG_SYSSTATUS_RESETDONE)) { - clk_disable(dd->iclk); - return -ETIMEDOUT; - } - dd->flags |= FLAGS_INIT; - dd->err = 0; - } - } else { + return 0; +} + +static void omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length, + int final, int dma) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); + u32 val = length << 5, mask; + + if (likely(ctx->digcnt)) omap_sham_write(dd, SHA_REG_DIGCNT, ctx->digcnt); - } omap_sham_write_mask(dd, SHA_REG_MASK, SHA_REG_MASK_IT_EN | (dma ? SHA_REG_MASK_DMA_EN : 0), @@ -263,23 +271,19 @@ static int omap_sham_write_ctrl(struct omap_sham_dev *dd, size_t length, SHA_REG_CTRL_ALGO | SHA_REG_CTRL_LENGTH; omap_sham_write_mask(dd, SHA_REG_CTRL, val, mask); - - return 0; } static int omap_sham_xmit_cpu(struct omap_sham_dev *dd, const u8 *buf, size_t length, int final) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); - int err, count, len32; + int count, len32; const u32 *buffer = (const u32 *)buf; dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n", ctx->digcnt, length, final); - err = omap_sham_write_ctrl(dd, length, final, 0); - if (err) - return err; + omap_sham_write_ctrl(dd, length, final, 0); /* should be non-zero before next lines to disable clocks later */ ctx->digcnt += length; @@ -302,14 +306,10 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr, size_t length, int final) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); - int err, len32; + int len32; dev_dbg(dd->dev, "xmit_dma: digcnt: %d, length: %d, final: %d\n", ctx->digcnt, length, final); - /* flush cache entries related to our page */ - if (dma_addr == ctx->dma_addr) - dma_sync_single_for_device(dd->dev, dma_addr, length, - DMA_TO_DEVICE); len32 = DIV_ROUND_UP(length, sizeof(u32)); @@ -320,19 +320,7 @@ static int omap_sham_xmit_dma(struct omap_sham_dev *dd, dma_addr_t dma_addr, omap_set_dma_src_params(dd->dma_lch, 0, OMAP_DMA_AMODE_POST_INC, dma_addr, 0, 0); - omap_set_dma_dest_params(dd->dma_lch, 0, - OMAP_DMA_AMODE_CONSTANT, - dd->phys_base + SHA_REG_DIN(0), 0, 16); - - omap_set_dma_dest_burst_mode(dd->dma_lch, - OMAP_DMA_DATA_BURST_16); - - omap_set_dma_src_burst_mode(dd->dma_lch, - OMAP_DMA_DATA_BURST_4); - - err = omap_sham_write_ctrl(dd, length, final, 1); - if (err) - return err; + omap_sham_write_ctrl(dd, length, final, 1); ctx->digcnt += length; @@ -384,6 +372,21 @@ static size_t omap_sham_append_sg(struct omap_sham_reqctx *ctx) return 0; } +static int omap_sham_xmit_dma_map(struct omap_sham_dev *dd, + struct omap_sham_reqctx *ctx, + size_t length, int final) +{ + ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen, + DMA_TO_DEVICE); + if (dma_mapping_error(dd->dev, ctx->dma_addr)) { + dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen); + return -EINVAL; + } + + /* next call does not fail... so no unmap in the case of error */ + return omap_sham_xmit_dma(dd, ctx->dma_addr, length, final); +} + static int omap_sham_update_dma_slow(struct omap_sham_dev *dd) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); @@ -403,7 +406,7 @@ static int omap_sham_update_dma_slow(struct omap_sham_dev *dd) if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) { count = ctx->bufcnt; ctx->bufcnt = 0; - return omap_sham_xmit_dma(dd, ctx->dma_addr, count, final); + return omap_sham_xmit_dma_map(dd, ctx, count, final); } return 0; @@ -413,7 +416,6 @@ static int omap_sham_update_dma_fast(struct omap_sham_dev *dd) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); unsigned int length; - int err; ctx->flags |= FLAGS_FAST; @@ -427,11 +429,8 @@ static int omap_sham_update_dma_fast(struct omap_sham_dev *dd) ctx->total -= length; - err = omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, 1); - if (err != -EINPROGRESS) - dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE); - - return err; + /* next call does not fail... so no unmap in the case of error */ + return omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, 1); } static int omap_sham_update_cpu(struct omap_sham_dev *dd) @@ -453,6 +452,9 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd) omap_stop_dma(dd->dma_lch); if (ctx->flags & FLAGS_FAST) dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE); + else + dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen, + DMA_TO_DEVICE); return 0; } @@ -471,18 +473,9 @@ static void omap_sham_cleanup(struct ahash_request *req) ctx->flags |= FLAGS_CLEAN; spin_unlock_irqrestore(&dd->lock, flags); - if (ctx->digcnt) { - clk_disable(dd->iclk); + if (ctx->digcnt) memcpy(req->result, ctx->digest, (ctx->flags & FLAGS_SHA1) ? SHA1_DIGEST_SIZE : MD5_DIGEST_SIZE); - } - - if (ctx->dma_addr) - dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen, - DMA_TO_DEVICE); - - if (ctx->buffer) - free_page((unsigned long)ctx->buffer); dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt, ctx->bufcnt); } @@ -520,21 +513,7 @@ static int omap_sham_init(struct ahash_request *req) ctx->bufcnt = 0; ctx->digcnt = 0; - - ctx->buflen = PAGE_SIZE; - ctx->buffer = (void *)__get_free_page( - (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC); - if (!ctx->buffer) - return -ENOMEM; - - ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer, ctx->buflen, - DMA_TO_DEVICE); - if (dma_mapping_error(dd->dev, ctx->dma_addr)) { - dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen); - free_page((unsigned long)ctx->buffer); - return -EINVAL; - } + ctx->buflen = BUFLEN; if (tctx->flags & FLAGS_HMAC) { struct omap_sham_hmac_ctx *bctx = tctx->base; @@ -581,7 +560,7 @@ static int omap_sham_final_req(struct omap_sham_dev *dd) use_dma = 0; if (use_dma) - err = omap_sham_xmit_dma(dd, ctx->dma_addr, ctx->bufcnt, 1); + err = omap_sham_xmit_dma_map(dd, ctx, ctx->bufcnt, 1); else err = omap_sham_xmit_cpu(dd, ctx->buffer, ctx->bufcnt, 1); @@ -615,6 +594,7 @@ static int omap_sham_finish_req_hmac(struct ahash_request *req) static void omap_sham_finish_req(struct ahash_request *req, int err) { struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + struct omap_sham_dev *dd = ctx->dd; if (!err) { omap_sham_copy_hash(ctx->dd->req, 1); @@ -627,7 +607,8 @@ static void omap_sham_finish_req(struct ahash_request *req, int err) if ((ctx->flags & FLAGS_FINAL) || err) omap_sham_cleanup(req); - ctx->dd->flags &= ~FLAGS_BUSY; + clk_disable(dd->iclk); + dd->flags &= ~FLAGS_BUSY; if (req->base.complete) req->base.complete(&req->base, err); @@ -636,7 +617,7 @@ static void omap_sham_finish_req(struct ahash_request *req, int err) static int omap_sham_handle_queue(struct omap_sham_dev *dd, struct ahash_request *req) { - struct crypto_async_request *async_req, *backlog; + struct crypto_async_request *async_req, *backlog = 0; struct omap_sham_reqctx *ctx; struct ahash_request *prev_req; unsigned long flags; @@ -672,7 +653,22 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd, dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n", ctx->op, req->nbytes); - if (req != prev_req && ctx->digcnt) + + err = omap_sham_hw_init(dd); + if (err) + goto err1; + + omap_set_dma_dest_params(dd->dma_lch, 0, + OMAP_DMA_AMODE_CONSTANT, + dd->phys_base + SHA_REG_DIN(0), 0, 16); + + omap_set_dma_dest_burst_mode(dd->dma_lch, + OMAP_DMA_DATA_BURST_16); + + omap_set_dma_src_burst_mode(dd->dma_lch, + OMAP_DMA_DATA_BURST_4); + + if (ctx->digcnt) /* request has changed - restore hash */ omap_sham_copy_hash(req, 0); @@ -684,7 +680,7 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd, } else if (ctx->op == OP_FINAL) { err = omap_sham_final_req(dd); } - +err1: if (err != -EINPROGRESS) { /* done_task will not finish it, so do it here */ omap_sham_finish_req(req, err); @@ -868,7 +864,7 @@ static int omap_sham_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base) } crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct omap_sham_reqctx)); + sizeof(struct omap_sham_reqctx) + BUFLEN); if (alg_base) { struct omap_sham_hmac_ctx *bctx = tctx->base; @@ -954,7 +950,7 @@ static struct ahash_alg algs[] = { CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = SHA1_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_sham_ctx), - .cra_alignmask = 0, + .cra_alignmask = OMAP_ALIGN_MASK, .cra_module = THIS_MODULE, .cra_init = omap_sham_cra_init, .cra_exit = omap_sham_cra_exit, @@ -978,7 +974,7 @@ static struct ahash_alg algs[] = { .cra_blocksize = SHA1_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_sham_ctx) + sizeof(struct omap_sham_hmac_ctx), - .cra_alignmask = 0, + .cra_alignmask = OMAP_ALIGN_MASK, .cra_module = THIS_MODULE, .cra_init = omap_sham_cra_sha1_init, .cra_exit = omap_sham_cra_exit, @@ -1002,7 +998,7 @@ static struct ahash_alg algs[] = { .cra_blocksize = SHA1_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_sham_ctx) + sizeof(struct omap_sham_hmac_ctx), - .cra_alignmask = 0, + .cra_alignmask = OMAP_ALIGN_MASK, .cra_module = THIS_MODULE, .cra_init = omap_sham_cra_md5_init, .cra_exit = omap_sham_cra_exit, From 3c8d758ab528317ecd6d91f8651170ffd2331899 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Fri, 19 Nov 2010 16:04:27 +0200 Subject: [PATCH 17/46] crypto: omap-sham - hash-in-progress is stored in hw format Hash-in-progress is now stored in hw format. Only on final call, hash is converted to correct format. Speedup copy procedure and will allow to use OMAP burst mode. Signed-off-by: Dmitry Kasatkin Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 85d62777453814..c8d30eb4794a75 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -204,24 +204,35 @@ static void omap_sham_copy_hash(struct ahash_request *req, int out) u32 *hash = (u32 *)ctx->digest; int i; + /* MD5 is almost unused. So copy sha1 size to reduce code */ + for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) { + if (out) + hash[i] = omap_sham_read(ctx->dd, + SHA_REG_DIGEST(i)); + else + omap_sham_write(ctx->dd, + SHA_REG_DIGEST(i), hash[i]); + } +} + +static void omap_sham_copy_ready_hash(struct ahash_request *req) +{ + struct omap_sham_reqctx *ctx = ahash_request_ctx(req); + u32 *in = (u32 *)ctx->digest; + u32 *hash = (u32 *)req->result; + int i; + + if (!hash) + return; + if (likely(ctx->flags & FLAGS_SHA1)) { /* SHA1 results are in big endian */ for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++) - if (out) - hash[i] = be32_to_cpu(omap_sham_read(ctx->dd, - SHA_REG_DIGEST(i))); - else - omap_sham_write(ctx->dd, SHA_REG_DIGEST(i), - cpu_to_be32(hash[i])); + hash[i] = be32_to_cpu(in[i]); } else { /* MD5 results are in little endian */ for (i = 0; i < MD5_DIGEST_SIZE / sizeof(u32); i++) - if (out) - hash[i] = le32_to_cpu(omap_sham_read(ctx->dd, - SHA_REG_DIGEST(i))); - else - omap_sham_write(ctx->dd, SHA_REG_DIGEST(i), - cpu_to_le32(hash[i])); + hash[i] = le32_to_cpu(in[i]); } } @@ -474,8 +485,7 @@ static void omap_sham_cleanup(struct ahash_request *req) spin_unlock_irqrestore(&dd->lock, flags); if (ctx->digcnt) - memcpy(req->result, ctx->digest, (ctx->flags & FLAGS_SHA1) ? - SHA1_DIGEST_SIZE : MD5_DIGEST_SIZE); + omap_sham_copy_ready_hash(req); dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt, ctx->bufcnt); } From a55b290b0e41e02d1969589c5d77d966ac2b7ec8 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Fri, 19 Nov 2010 16:04:28 +0200 Subject: [PATCH 18/46] crypto: omap-sham - FLAGS_FIRST is redundant and removed bufcnt is 0 if it was no update requests before, which is exact meaning of FLAGS_FIRST. Signed-off-by: Dmitry Kasatkin Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index c8d30eb4794a75..d88d7ebfffa771 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -72,7 +72,6 @@ #define DEFAULT_TIMEOUT_INTERVAL HZ -#define FLAGS_FIRST 0x0001 #define FLAGS_FINUP 0x0002 #define FLAGS_FINAL 0x0004 #define FLAGS_FAST 0x0008 @@ -513,8 +512,6 @@ static int omap_sham_init(struct ahash_request *req) ctx->flags = 0; - ctx->flags |= FLAGS_FIRST; - dev_dbg(dd->dev, "init: digest size: %d\n", crypto_ahash_digestsize(tfm)); @@ -739,12 +736,9 @@ static int omap_sham_update(struct ahash_request *req) /* may be can use faster functions */ int aligned = IS_ALIGNED((u32)ctx->sg->offset, sizeof(u32)); - - if (aligned && (ctx->flags & FLAGS_FIRST)) + if (aligned) /* digest: first and final */ ctx->flags |= FLAGS_FAST; - - ctx->flags &= ~FLAGS_FIRST; } } else if (ctx->bufcnt + ctx->total <= ctx->buflen) { /* if not finaup -> not fast */ From 887c883eea9867535059f3c8414c8cfc952ccff1 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Fri, 19 Nov 2010 16:04:29 +0200 Subject: [PATCH 19/46] crypto: omap-sham - zero-copy scatterlist handling If scatterlist have more than one entry, current driver uses aligned buffer to copy data to to accelerator to tackle possible issues with DMA and SHA buffer alignment. This commit adds more intelligence to verify SG alignment and possibility to use DMA directly on the data without using copy buffer. Signed-off-by: Dmitry Kasatkin Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 87 ++++++++++++++++++++++++++------------ 1 file changed, 61 insertions(+), 26 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index d88d7ebfffa771..eb988e7a2fd9be 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -74,7 +74,7 @@ #define FLAGS_FINUP 0x0002 #define FLAGS_FINAL 0x0004 -#define FLAGS_FAST 0x0008 +#define FLAGS_SG 0x0008 #define FLAGS_SHA1 0x0010 #define FLAGS_DMA_ACTIVE 0x0020 #define FLAGS_OUTPUT_READY 0x0040 @@ -393,6 +393,8 @@ static int omap_sham_xmit_dma_map(struct omap_sham_dev *dd, return -EINVAL; } + ctx->flags &= ~FLAGS_SG; + /* next call does not fail... so no unmap in the case of error */ return omap_sham_xmit_dma(dd, ctx->dma_addr, length, final); } @@ -403,9 +405,6 @@ static int omap_sham_update_dma_slow(struct omap_sham_dev *dd) unsigned int final; size_t count; - if (!ctx->total) - return 0; - omap_sham_append_sg(ctx); final = (ctx->flags & FLAGS_FINUP) && !ctx->total; @@ -422,25 +421,62 @@ static int omap_sham_update_dma_slow(struct omap_sham_dev *dd) return 0; } -static int omap_sham_update_dma_fast(struct omap_sham_dev *dd) +/* Start address alignment */ +#define SG_AA(sg) (IS_ALIGNED(sg->offset, sizeof(u32))) +/* SHA1 block size alignment */ +#define SG_SA(sg) (IS_ALIGNED(sg->length, SHA1_MD5_BLOCK_SIZE)) + +static int omap_sham_update_dma_start(struct omap_sham_dev *dd) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); - unsigned int length; + unsigned int length, final, tail; + struct scatterlist *sg; + + if (!ctx->total) + return 0; + + if (ctx->bufcnt || ctx->offset) + return omap_sham_update_dma_slow(dd); + + dev_dbg(dd->dev, "fast: digcnt: %d, bufcnt: %u, total: %u\n", + ctx->digcnt, ctx->bufcnt, ctx->total); + + sg = ctx->sg; - ctx->flags |= FLAGS_FAST; + if (!SG_AA(sg)) + return omap_sham_update_dma_slow(dd); - length = min(ctx->total, sg_dma_len(ctx->sg)); - ctx->total = length; + if (!sg_is_last(sg) && !SG_SA(sg)) + /* size is not SHA1_BLOCK_SIZE aligned */ + return omap_sham_update_dma_slow(dd); + + length = min(ctx->total, sg->length); + + if (sg_is_last(sg)) { + if (!(ctx->flags & FLAGS_FINUP)) { + /* not last sg must be SHA1_MD5_BLOCK_SIZE aligned */ + tail = length & (SHA1_MD5_BLOCK_SIZE - 1); + /* without finup() we need one block to close hash */ + if (!tail) + tail = SHA1_MD5_BLOCK_SIZE; + length -= tail; + } + } if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) { dev_err(dd->dev, "dma_map_sg error\n"); return -EINVAL; } + ctx->flags |= FLAGS_SG; + ctx->total -= length; + ctx->offset = length; /* offset where to start slow */ + + final = (ctx->flags & FLAGS_FINUP) && !ctx->total; /* next call does not fail... so no unmap in the case of error */ - return omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, 1); + return omap_sham_xmit_dma(dd, sg_dma_address(ctx->sg), length, final); } static int omap_sham_update_cpu(struct omap_sham_dev *dd) @@ -460,11 +496,17 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd) struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); omap_stop_dma(dd->dma_lch); - if (ctx->flags & FLAGS_FAST) + if (ctx->flags & FLAGS_SG) { dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE); - else + if (ctx->sg->length == ctx->offset) { + ctx->sg = sg_next(ctx->sg); + if (ctx->sg) + ctx->offset = 0; + } + } else { dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen, DMA_TO_DEVICE); + } return 0; } @@ -545,10 +587,8 @@ static int omap_sham_update_req(struct omap_sham_dev *dd) if (ctx->flags & FLAGS_CPU) err = omap_sham_update_cpu(dd); - else if (ctx->flags & FLAGS_FAST) - err = omap_sham_update_dma_fast(dd); else - err = omap_sham_update_dma_slow(dd); + err = omap_sham_update_dma_start(dd); /* wait for dma completion before can take more data */ dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n", err, ctx->digcnt); @@ -730,18 +770,13 @@ static int omap_sham_update(struct ahash_request *req) */ omap_sham_append_sg(ctx); return 0; - } else if (ctx->bufcnt + ctx->total <= 64) { + } else if (ctx->bufcnt + ctx->total <= SHA1_MD5_BLOCK_SIZE) { + /* + * faster to use CPU for short transfers + */ ctx->flags |= FLAGS_CPU; - } else if (!ctx->bufcnt && sg_is_last(ctx->sg)) { - /* may be can use faster functions */ - int aligned = IS_ALIGNED((u32)ctx->sg->offset, - sizeof(u32)); - if (aligned) - /* digest: first and final */ - ctx->flags |= FLAGS_FAST; } - } else if (ctx->bufcnt + ctx->total <= ctx->buflen) { - /* if not finaup -> not fast */ + } else if (ctx->bufcnt + ctx->total < ctx->buflen) { omap_sham_append_sg(ctx); return 0; } @@ -1026,7 +1061,7 @@ static void omap_sham_done_task(unsigned long data) dd->flags &= ~FLAGS_DMA_ACTIVE; omap_sham_update_dma_stop(dd); if (!dd->err) - err = omap_sham_update_dma_slow(dd); + err = omap_sham_update_dma_start(dd); } err = dd->err ? : err; From c762be637503b833012457087133c1292fd6056d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 28 Nov 2010 16:28:01 +0800 Subject: [PATCH 20/46] crypto: algif_skcipher - Pass on error from af_alg_make_sg The error returned from af_alg_make_sg is currently lost and we always pass on -EINVAL. This patch pases on the underlying error. Signed-off-by: Herbert Xu --- crypto/algif_skcipher.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 211c956952ca46..9b2f440e88a62a 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -472,7 +472,8 @@ static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock, goto unlock; used = af_alg_make_sg(&ctx->rsgl, from, used, 1); - if (used < 0) + err = used; + if (err < 0) goto unlock; ablkcipher_request_set_crypt(&ctx->req, sg, From 559ad0ff1368baea14dbc3207d55b02bd69bda4b Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 29 Nov 2010 08:35:39 +0800 Subject: [PATCH 21/46] crypto: aesni-intel - Fixed build error on x86-32 Exclude AES-GCM code for x86-32 due to heavy usage of 64-bit registers not available on x86-32. While at it, fixed unregister order in aesni_exit(). Signed-off-by: Mathias Krause Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 5 ++++- arch/x86/crypto/aesni-intel_glue.c | 26 +++++++++++++------------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index f592e03dc375e4..d528fde219d2d2 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -32,6 +32,7 @@ #include #include +#ifdef __x86_64__ .data POLY: .octa 0xC2000000000000000000000000000001 TWOONE: .octa 0x00000001000000000000000000000001 @@ -84,6 +85,7 @@ enc: .octa 0x2 #define arg8 STACK_OFFSET+16(%r14) #define arg9 STACK_OFFSET+24(%r14) #define arg10 STACK_OFFSET+32(%r14) +#endif #define STATE1 %xmm0 @@ -130,6 +132,7 @@ enc: .octa 0x2 #endif +#ifdef __x86_64__ /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) * * @@ -1255,7 +1258,7 @@ _return_T_done_encrypt: pop %r13 pop %r12 ret - +#endif _key_expansion_128: diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 8a3b80075216ee..e1e60c7d5813c8 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -97,7 +97,6 @@ asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, #ifdef CONFIG_X86_64 asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in, unsigned int len, u8 *iv); -#endif /* asmlinkage void aesni_gcm_enc() * void *ctx, AES Key schedule. Starts on a 16 byte boundary. @@ -149,6 +148,7 @@ aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) PTR_ALIGN((u8 *) crypto_tfm_ctx(crypto_aead_tfm(tfm)), AESNI_ALIGN); } +#endif static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) { @@ -822,6 +822,7 @@ static struct crypto_alg ablk_xts_alg = { }; #endif +#ifdef CONFIG_X86_64 static int rfc4106_init(struct crypto_tfm *tfm) { struct cryptd_aead *cryptd_tfm; @@ -1237,6 +1238,7 @@ static struct crypto_alg __rfc4106_alg = { }, }, }; +#endif static int __init aesni_init(void) { @@ -1264,6 +1266,10 @@ static int __init aesni_init(void) goto blk_ctr_err; if ((err = crypto_register_alg(&ablk_ctr_alg))) goto ablk_ctr_err; + if ((err = crypto_register_alg(&__rfc4106_alg))) + goto __aead_gcm_err; + if ((err = crypto_register_alg(&rfc4106_alg))) + goto aead_gcm_err; #ifdef HAS_CTR if ((err = crypto_register_alg(&ablk_rfc3686_ctr_alg))) goto ablk_rfc3686_ctr_err; @@ -1281,19 +1287,9 @@ static int __init aesni_init(void) if ((err = crypto_register_alg(&ablk_xts_alg))) goto ablk_xts_err; #endif - err = crypto_register_alg(&__rfc4106_alg); - if (err) - goto __aead_gcm_err; - err = crypto_register_alg(&rfc4106_alg); - if (err) - goto aead_gcm_err; return err; -aead_gcm_err: - crypto_unregister_alg(&__rfc4106_alg); -__aead_gcm_err: #ifdef HAS_XTS - crypto_unregister_alg(&ablk_xts_alg); ablk_xts_err: #endif #ifdef HAS_PCBC @@ -1309,6 +1305,10 @@ static int __init aesni_init(void) crypto_unregister_alg(&ablk_rfc3686_ctr_alg); ablk_rfc3686_ctr_err: #endif + crypto_unregister_alg(&rfc4106_alg); +aead_gcm_err: + crypto_unregister_alg(&__rfc4106_alg); +__aead_gcm_err: crypto_unregister_alg(&ablk_ctr_alg); ablk_ctr_err: crypto_unregister_alg(&blk_ctr_alg); @@ -1331,8 +1331,6 @@ static int __init aesni_init(void) static void __exit aesni_exit(void) { - crypto_unregister_alg(&__rfc4106_alg); - crypto_unregister_alg(&rfc4106_alg); #ifdef HAS_XTS crypto_unregister_alg(&ablk_xts_alg); #endif @@ -1346,6 +1344,8 @@ static void __exit aesni_exit(void) #ifdef HAS_CTR crypto_unregister_alg(&ablk_rfc3686_ctr_alg); #endif + crypto_unregister_alg(&rfc4106_alg); + crypto_unregister_alg(&__rfc4106_alg); crypto_unregister_alg(&ablk_ctr_alg); crypto_unregister_alg(&blk_ctr_alg); #endif From 7451708f39db19a8303bb7fb95f00aca9f673cb5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 29 Nov 2010 22:56:03 +0800 Subject: [PATCH 22/46] crypto: af_alg - Add dependency on NET Add missing dependency on NET since we require sockets for our interface. Should really be a select but kconfig doesn't like that: net/Kconfig:6:error: found recursive dependency: NET -> NETWORK_FILESYSTEMS -> AFS_FS -> AF_RXRPC -> CRYPTO -> CRYPTO_USER_API_HASH -> CRYPTO_USER_API -> NET Reported-by: Zimny Lech Signed-off-by: Herbert Xu --- crypto/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/crypto/Kconfig b/crypto/Kconfig index 467491df3e3ae6..96b0e5542121b3 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -848,6 +848,7 @@ config CRYPTO_USER_API config CRYPTO_USER_API_HASH tristate "User-space interface for hash algorithms" + depends on NET select CRYPTO_HASH select CRYPTO_USER_API help @@ -856,6 +857,7 @@ config CRYPTO_USER_API_HASH config CRYPTO_USER_API_SKCIPHER tristate "User-space interface for symmetric key cipher algorithms" + depends on NET select CRYPTO_BLKCIPHER select CRYPTO_USER_API help From 0f6bb83cb12e4617e696ffa566f3fc6c092686e2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 30 Nov 2010 16:49:02 +0800 Subject: [PATCH 23/46] crypto: algif_skcipher - Fixed overflow when sndbuf is page aligned When sk_sndbuf is not a multiple of PAGE_SIZE, the limit tests in sendmsg fail as the limit variable becomes negative and we're using an unsigned comparison. The same thing can happen if sk_sndbuf is lowered after a sendmsg call. This patch fixes this by always taking the signed maximum of limit and 0 before we perform the comparison. It also rounds the value of sk_sndbuf down to a multiple of PAGE_SIZE so that we don't end up allocating a page only to use a small number of bytes in it because we're bound by sk_sndbuf. Signed-off-by: Herbert Xu --- crypto/algif_skcipher.c | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 9b2f440e88a62a..1f33480e32605a 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -52,12 +52,18 @@ struct skcipher_ctx { #define MAX_SGL_ENTS ((PAGE_SIZE - sizeof(struct skcipher_sg_list)) / \ sizeof(struct scatterlist) - 1) -static inline bool skcipher_writable(struct sock *sk) +static inline int skcipher_sndbuf(struct sock *sk) { struct alg_sock *ask = alg_sk(sk); struct skcipher_ctx *ctx = ask->private; - return ctx->used + PAGE_SIZE <= max_t(int, sk->sk_sndbuf, PAGE_SIZE); + return max_t(int, max_t(int, sk->sk_sndbuf & PAGE_MASK, PAGE_SIZE) - + ctx->used, 0); +} + +static inline bool skcipher_writable(struct sock *sk) +{ + return PAGE_SIZE <= skcipher_sndbuf(sk); } static int skcipher_alloc_sgl(struct sock *sk) @@ -245,7 +251,6 @@ static int skcipher_sendmsg(struct kiocb *unused, struct socket *sock, struct af_alg_control con = {}; long copied = 0; bool enc = 0; - int limit; int err; int i; @@ -281,9 +286,6 @@ static int skcipher_sendmsg(struct kiocb *unused, struct socket *sock, memcpy(ctx->iv, con.iv->iv, ivsize); } - limit = max_t(int, sk->sk_sndbuf, PAGE_SIZE); - limit -= ctx->used; - while (size) { struct scatterlist *sg; unsigned long len = size; @@ -309,20 +311,16 @@ static int skcipher_sendmsg(struct kiocb *unused, struct socket *sock, ctx->used += len; copied += len; size -= len; - limit -= len; continue; } - if (limit < PAGE_SIZE) { + if (!skcipher_writable(sk)) { err = skcipher_wait_for_wmem(sk, msg->msg_flags); if (err) goto unlock; - - limit = max_t(int, sk->sk_sndbuf, PAGE_SIZE); - limit -= ctx->used; } - len = min_t(unsigned long, len, limit); + len = min_t(unsigned long, len, skcipher_sndbuf(sk)); err = skcipher_alloc_sgl(sk); if (err) @@ -352,7 +350,6 @@ static int skcipher_sendmsg(struct kiocb *unused, struct socket *sock, ctx->used += plen; copied += plen; size -= plen; - limit -= plen; sgl->cur++; } while (len && sgl->cur < MAX_SGL_ENTS); @@ -380,7 +377,6 @@ static ssize_t skcipher_sendpage(struct socket *sock, struct page *page, struct skcipher_ctx *ctx = ask->private; struct skcipher_sg_list *sgl; int err = -EINVAL; - int limit; lock_sock(sk); if (!ctx->more && ctx->used) @@ -389,16 +385,10 @@ static ssize_t skcipher_sendpage(struct socket *sock, struct page *page, if (!size) goto done; - limit = max_t(int, sk->sk_sndbuf, PAGE_SIZE); - limit -= ctx->used; - - if (limit < PAGE_SIZE) { + if (!skcipher_writable(sk)) { err = skcipher_wait_for_wmem(sk, flags); if (err) goto unlock; - - limit = max_t(int, sk->sk_sndbuf, PAGE_SIZE); - limit -= ctx->used; } err = skcipher_alloc_sgl(sk); From bc97e57eb21f8db55bf0e1f182d384e75b2e3c99 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 30 Nov 2010 17:04:31 +0800 Subject: [PATCH 24/46] crypto: algif_skcipher - Handle unaligned receive buffer As it is if user-space passes through a receive buffer that's not aligned to to the cipher block size, we'll end up encrypting or decrypting a partial block which causes a spurious EINVAL to be returned. This patch fixes this by moving the partial block test after the af_alg_make_sg call. Signed-off-by: Herbert Xu --- crypto/algif_skcipher.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 1f33480e32605a..6a6dfc062d2a47 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -454,17 +454,17 @@ static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock, used = min_t(unsigned long, used, seglen); + used = af_alg_make_sg(&ctx->rsgl, from, used, 1); + err = used; + if (err < 0) + goto unlock; + if (ctx->more || used < ctx->used) used -= used % bs; err = -EINVAL; if (!used) - goto unlock; - - used = af_alg_make_sg(&ctx->rsgl, from, used, 1); - err = used; - if (err < 0) - goto unlock; + goto free; ablkcipher_request_set_crypt(&ctx->req, sg, ctx->rsgl.sg, used, @@ -476,6 +476,7 @@ static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock, crypto_ablkcipher_decrypt(&ctx->req), &ctx->completion); +free: af_alg_free_sg(&ctx->rsgl); if (err) From 079f2f7485648c1397a35575fae45908a0db5ba6 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 22 Nov 2010 11:25:50 +0100 Subject: [PATCH 25/46] crypto: scatterwalk - Add scatterwalk_crypto_chain helper A lot of crypto algorithms implement their own chaining function. So add a generic one that can be used from all the algorithms that need scatterlist chaining. Signed-off-by: Steffen Klassert Signed-off-by: Herbert Xu --- include/crypto/scatterwalk.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h index 833d208c25d641..4fd95a323beb29 100644 --- a/include/crypto/scatterwalk.h +++ b/include/crypto/scatterwalk.h @@ -68,6 +68,21 @@ static inline struct scatterlist *scatterwalk_sg_next(struct scatterlist *sg) return (++sg)->length ? sg : (void *)sg_page(sg); } +static inline void scatterwalk_crypto_chain(struct scatterlist *head, + struct scatterlist *sg, + int chain, int num) +{ + if (chain) { + head->length += sg->length; + sg = scatterwalk_sg_next(sg); + } + + if (sg) + scatterwalk_sg_chain(head, num, sg); + else + sg_mark_end(head); +} + static inline unsigned long scatterwalk_samebuf(struct scatter_walk *walk_in, struct scatter_walk *walk_out) { From c920fa6051c1e7eb3733eaefd01e5bcdddb3d4c8 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Mon, 22 Nov 2010 11:26:54 +0100 Subject: [PATCH 26/46] crypto: Use scatterwalk_crypto_chain Use scatterwalk_crypto_chain in favor of locally defined chaining functions. Signed-off-by: Steffen Klassert Signed-off-by: Herbert Xu --- crypto/authenc.c | 22 ++++------------------ crypto/eseqiv.c | 18 ++---------------- crypto/gcm.c | 19 ++----------------- 3 files changed, 8 insertions(+), 51 deletions(-) diff --git a/crypto/authenc.c b/crypto/authenc.c index a5a22cfcd07bd5..5ef7ba6b6a76a3 100644 --- a/crypto/authenc.c +++ b/crypto/authenc.c @@ -107,20 +107,6 @@ static int crypto_authenc_setkey(struct crypto_aead *authenc, const u8 *key, goto out; } -static void authenc_chain(struct scatterlist *head, struct scatterlist *sg, - int chain) -{ - if (chain) { - head->length += sg->length; - sg = scatterwalk_sg_next(sg); - } - - if (sg) - scatterwalk_sg_chain(head, 2, sg); - else - sg_mark_end(head); -} - static void authenc_geniv_ahash_update_done(struct crypto_async_request *areq, int err) { @@ -345,7 +331,7 @@ static int crypto_authenc_genicv(struct aead_request *req, u8 *iv, if (ivsize) { sg_init_table(cipher, 2); sg_set_buf(cipher, iv, ivsize); - authenc_chain(cipher, dst, vdst == iv + ivsize); + scatterwalk_crypto_chain(cipher, dst, vdst == iv + ivsize, 2); dst = cipher; cryptlen += ivsize; } @@ -354,7 +340,7 @@ static int crypto_authenc_genicv(struct aead_request *req, u8 *iv, authenc_ahash_fn = crypto_authenc_ahash; sg_init_table(asg, 2); sg_set_page(asg, sg_page(assoc), assoc->length, assoc->offset); - authenc_chain(asg, dst, 0); + scatterwalk_crypto_chain(asg, dst, 0, 2); dst = asg; cryptlen += req->assoclen; } @@ -499,7 +485,7 @@ static int crypto_authenc_iverify(struct aead_request *req, u8 *iv, if (ivsize) { sg_init_table(cipher, 2); sg_set_buf(cipher, iv, ivsize); - authenc_chain(cipher, src, vsrc == iv + ivsize); + scatterwalk_crypto_chain(cipher, src, vsrc == iv + ivsize, 2); src = cipher; cryptlen += ivsize; } @@ -508,7 +494,7 @@ static int crypto_authenc_iverify(struct aead_request *req, u8 *iv, authenc_ahash_fn = crypto_authenc_ahash; sg_init_table(asg, 2); sg_set_page(asg, sg_page(assoc), assoc->length, assoc->offset); - authenc_chain(asg, src, 0); + scatterwalk_crypto_chain(asg, src, 0, 2); src = asg; cryptlen += req->assoclen; } diff --git a/crypto/eseqiv.c b/crypto/eseqiv.c index 3ca3b669d5d501..42ce9f570aecce 100644 --- a/crypto/eseqiv.c +++ b/crypto/eseqiv.c @@ -62,20 +62,6 @@ static void eseqiv_complete(struct crypto_async_request *base, int err) skcipher_givcrypt_complete(req, err); } -static void eseqiv_chain(struct scatterlist *head, struct scatterlist *sg, - int chain) -{ - if (chain) { - head->length += sg->length; - sg = scatterwalk_sg_next(sg); - } - - if (sg) - scatterwalk_sg_chain(head, 2, sg); - else - sg_mark_end(head); -} - static int eseqiv_givencrypt(struct skcipher_givcrypt_request *req) { struct crypto_ablkcipher *geniv = skcipher_givcrypt_reqtfm(req); @@ -124,13 +110,13 @@ static int eseqiv_givencrypt(struct skcipher_givcrypt_request *req) sg_init_table(reqctx->src, 2); sg_set_buf(reqctx->src, giv, ivsize); - eseqiv_chain(reqctx->src, osrc, vsrc == giv + ivsize); + scatterwalk_crypto_chain(reqctx->src, osrc, vsrc == giv + ivsize, 2); dst = reqctx->src; if (osrc != odst) { sg_init_table(reqctx->dst, 2); sg_set_buf(reqctx->dst, giv, ivsize); - eseqiv_chain(reqctx->dst, odst, vdst == giv + ivsize); + scatterwalk_crypto_chain(reqctx->dst, odst, vdst == giv + ivsize, 2); dst = reqctx->dst; } diff --git a/crypto/gcm.c b/crypto/gcm.c index 2f5fbba6576c23..1a252639ef9154 100644 --- a/crypto/gcm.c +++ b/crypto/gcm.c @@ -1102,21 +1102,6 @@ static int crypto_rfc4543_setauthsize(struct crypto_aead *parent, return crypto_aead_setauthsize(ctx->child, authsize); } -/* this is the same as crypto_authenc_chain */ -static void crypto_rfc4543_chain(struct scatterlist *head, - struct scatterlist *sg, int chain) -{ - if (chain) { - head->length += sg->length; - sg = scatterwalk_sg_next(sg); - } - - if (sg) - scatterwalk_sg_chain(head, 2, sg); - else - sg_mark_end(head); -} - static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req, int enc) { @@ -1154,13 +1139,13 @@ static struct aead_request *crypto_rfc4543_crypt(struct aead_request *req, sg_init_table(payload, 2); sg_set_buf(payload, req->iv, 8); - crypto_rfc4543_chain(payload, dst, vdst == req->iv + 8); + scatterwalk_crypto_chain(payload, dst, vdst == req->iv + 8, 2); assoclen += 8 + req->cryptlen - (enc ? 0 : authsize); sg_init_table(assoc, 2); sg_set_page(assoc, sg_page(req->assoc), req->assoc->length, req->assoc->offset); - crypto_rfc4543_chain(assoc, payload, 0); + scatterwalk_crypto_chain(assoc, payload, 0, 2); aead_request_set_tfm(subreq, ctx->child); aead_request_set_callback(subreq, req->base.flags, req->base.complete, From 3bd2e2216bc82a83fc5048f8e61d2d22dd5d9cda Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Tue, 30 Nov 2010 10:13:27 +0200 Subject: [PATCH 27/46] crypto: omap-aes - DMA initialization fixes for OMAP off mode DMA parameters for constant data were initialized during driver probe(). It seems that those settings sometimes are lost when devices goes to off mode. This patch makes DMA initialization just before use. It solves off mode problems. Signed-off-by: Dmitry Kasatkin Signed-off-by: Herbert Xu --- drivers/crypto/omap-aes.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 799ca517c121d9..41c91f3c7f1408 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -339,18 +339,6 @@ static int omap_aes_dma_init(struct omap_aes_dev *dd) goto err_dma_out; } - omap_set_dma_dest_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_CONSTANT, - dd->phys_base + AES_REG_DATA, 0, 4); - - omap_set_dma_dest_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4); - omap_set_dma_src_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4); - - omap_set_dma_src_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_CONSTANT, - dd->phys_base + AES_REG_DATA, 0, 4); - - omap_set_dma_src_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4); - omap_set_dma_dest_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4); - return 0; err_dma_out: @@ -443,6 +431,12 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, len32 = DIV_ROUND_UP(length, sizeof(u32)); /* IN */ + omap_set_dma_dest_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_CONSTANT, + dd->phys_base + AES_REG_DATA, 0, 4); + + omap_set_dma_dest_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4); + omap_set_dma_src_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4); + omap_set_dma_transfer_params(dd->dma_lch_in, OMAP_DMA_DATA_TYPE_S32, len32, 1, OMAP_DMA_SYNC_PACKET, dd->dma_in, OMAP_DMA_DST_SYNC); @@ -451,6 +445,12 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, dma_addr_in, 0, 0); /* OUT */ + omap_set_dma_src_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_CONSTANT, + dd->phys_base + AES_REG_DATA, 0, 4); + + omap_set_dma_src_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4); + omap_set_dma_dest_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4); + omap_set_dma_transfer_params(dd->dma_lch_out, OMAP_DMA_DATA_TYPE_S32, len32, 1, OMAP_DMA_SYNC_PACKET, dd->dma_out, OMAP_DMA_SRC_SYNC); From eeb2b202c5b886b76c3bfa76f47e450fa69389fb Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Tue, 30 Nov 2010 10:13:28 +0200 Subject: [PATCH 28/46] crypto: omap-aes - redundant locking is removed Submitting request involved double locking for enqueuing and dequeuing. Now it is done under the same lock. FLAGS_BUSY is now handled under the same lock. Signed-off-by: Dmitry Kasatkin Signed-off-by: Herbert Xu --- drivers/crypto/omap-aes.c | 70 ++++++++++++++++++--------------------- 1 file changed, 32 insertions(+), 38 deletions(-) diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 41c91f3c7f1408..2d8f72eaf8963a 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -78,7 +78,7 @@ #define FLAGS_NEW_IV BIT(5) #define FLAGS_INIT BIT(6) #define FLAGS_FAST BIT(7) -#define FLAGS_BUSY 8 +#define FLAGS_BUSY BIT(8) struct omap_aes_ctx { struct omap_aes_dev *dd; @@ -179,9 +179,8 @@ static int omap_aes_wait(struct omap_aes_dev *dd, u32 offset, u32 bit) static int omap_aes_hw_init(struct omap_aes_dev *dd) { - int err = 0; - clk_enable(dd->iclk); + if (!(dd->flags & FLAGS_INIT)) { /* is it necessary to reset before every operation? */ omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_SOFTRESET, @@ -193,18 +192,15 @@ static int omap_aes_hw_init(struct omap_aes_dev *dd) __asm__ __volatile__("nop"); __asm__ __volatile__("nop"); - err = omap_aes_wait(dd, AES_REG_SYSSTATUS, - AES_REG_SYSSTATUS_RESETDONE); - if (!err) - dd->flags |= FLAGS_INIT; + if (omap_aes_wait(dd, AES_REG_SYSSTATUS, + AES_REG_SYSSTATUS_RESETDONE)) { + clk_disable(dd->iclk); + return -ETIMEDOUT; + } + dd->flags |= FLAGS_INIT; } - return err; -} - -static void omap_aes_hw_cleanup(struct omap_aes_dev *dd) -{ - clk_disable(dd->iclk); + return 0; } static void omap_aes_write_ctrl(struct omap_aes_dev *dd) @@ -538,6 +534,8 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err) pr_debug("err: %d\n", err); + dd->flags &= ~FLAGS_BUSY; + ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(dd->req)); if (!dd->total) @@ -553,7 +551,7 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) omap_aes_write_mask(dd, AES_REG_MASK, 0, AES_REG_MASK_START); - omap_aes_hw_cleanup(dd); + clk_disable(dd->iclk); omap_stop_dma(dd->dma_lch_in); omap_stop_dma(dd->dma_lch_out); @@ -580,22 +578,26 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) return err; } -static int omap_aes_handle_req(struct omap_aes_dev *dd) +static int omap_aes_handle_req(struct omap_aes_dev *dd, + struct ablkcipher_request *req) { struct crypto_async_request *async_req, *backlog; struct omap_aes_ctx *ctx; struct omap_aes_reqctx *rctx; - struct ablkcipher_request *req; unsigned long flags; - - if (dd->total) - goto start; + int err = 0; spin_lock_irqsave(&dd->lock, flags); + if (req) + err = ablkcipher_enqueue_request(&dd->queue, req); + if (dd->flags & FLAGS_BUSY) { + spin_unlock_irqrestore(&dd->lock, flags); + return err; + } backlog = crypto_get_backlog(&dd->queue); async_req = crypto_dequeue_request(&dd->queue); - if (!async_req) - clear_bit(FLAGS_BUSY, &dd->flags); + if (async_req) + dd->flags |= FLAGS_BUSY; spin_unlock_irqrestore(&dd->lock, flags); if (!async_req) @@ -637,20 +639,23 @@ static int omap_aes_handle_req(struct omap_aes_dev *dd) if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) pr_err("request size is not exact amount of AES blocks\n"); -start: - return omap_aes_crypt_dma_start(dd); + omap_aes_crypt_dma_start(dd); + + return err; } static void omap_aes_task(unsigned long data) { struct omap_aes_dev *dd = (struct omap_aes_dev *)data; - int err; pr_debug("enter\n"); - err = omap_aes_crypt_dma_stop(dd); + omap_aes_crypt_dma_stop(dd); - err = omap_aes_handle_req(dd); + if (dd->total) + omap_aes_crypt_dma_start(dd); + else + omap_aes_handle_req(dd, NULL); pr_debug("exit\n"); } @@ -661,8 +666,6 @@ static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode) crypto_ablkcipher_reqtfm(req)); struct omap_aes_reqctx *rctx = ablkcipher_request_ctx(req); struct omap_aes_dev *dd; - unsigned long flags; - int err; pr_debug("nbytes: %d, enc: %d, cbc: %d\n", req->nbytes, !!(mode & FLAGS_ENCRYPT), @@ -674,16 +677,7 @@ static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode) rctx->mode = mode; - spin_lock_irqsave(&dd->lock, flags); - err = ablkcipher_enqueue_request(&dd->queue, req); - spin_unlock_irqrestore(&dd->lock, flags); - - if (!test_and_set_bit(FLAGS_BUSY, &dd->flags)) - omap_aes_handle_req(dd); - - pr_debug("exit\n"); - - return err; + return omap_aes_handle_req(dd, req); } /* ********************** ALG API ************************************ */ From 21fe9767f3bd56fd9a271dc43b93cd4608d47f4a Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Tue, 30 Nov 2010 10:13:29 +0200 Subject: [PATCH 29/46] crypto: omap-aes - error handling implementation improved Previous version had not error handling. Request could remain uncompleted. Also in the case of DMA error, FLAGS_INIT is unset and accelerator will be initialized again. Buffer size allignment is checked. Signed-off-by: Dmitry Kasatkin Signed-off-by: Herbert Xu --- drivers/crypto/omap-aes.c | 134 ++++++++++++++++++++++++++------------ 1 file changed, 93 insertions(+), 41 deletions(-) diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 2d8f72eaf8963a..704cc701ab4266 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -103,14 +103,16 @@ struct omap_aes_dev { struct omap_aes_ctx *ctx; struct device *dev; unsigned long flags; + int err; u32 *iv; u32 ctrl; - spinlock_t lock; - struct crypto_queue queue; + spinlock_t lock; + struct crypto_queue queue; - struct tasklet_struct task; + struct tasklet_struct done_task; + struct tasklet_struct queue_task; struct ablkcipher_request *req; size_t total; @@ -198,24 +200,30 @@ static int omap_aes_hw_init(struct omap_aes_dev *dd) return -ETIMEDOUT; } dd->flags |= FLAGS_INIT; + dd->err = 0; } return 0; } -static void omap_aes_write_ctrl(struct omap_aes_dev *dd) +static int omap_aes_write_ctrl(struct omap_aes_dev *dd) { unsigned int key32; - int i; + int i, err, init = dd->flags & FLAGS_INIT; u32 val, mask; + err = omap_aes_hw_init(dd); + if (err) + return err; + val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3); if (dd->flags & FLAGS_CBC) val |= AES_REG_CTRL_CBC; if (dd->flags & FLAGS_ENCRYPT) val |= AES_REG_CTRL_DIRECTION; - if (dd->ctrl == val && !(dd->flags & FLAGS_NEW_IV) && + /* check if hw state & mode have not changed */ + if (init && dd->ctrl == val && !(dd->flags & FLAGS_NEW_IV) && !(dd->ctx->flags & FLAGS_NEW_KEY)) goto out; @@ -257,6 +265,8 @@ static void omap_aes_write_ctrl(struct omap_aes_dev *dd) /* start DMA or disable idle mode */ omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_START, AES_REG_MASK_START); + + return 0; } static struct omap_aes_dev *omap_aes_find_dev(struct omap_aes_ctx *ctx) @@ -284,8 +294,16 @@ static void omap_aes_dma_callback(int lch, u16 ch_status, void *data) { struct omap_aes_dev *dd = data; - if (lch == dd->dma_lch_out) - tasklet_schedule(&dd->task); + if (ch_status != OMAP_DMA_BLOCK_IRQ) { + pr_err("omap-aes DMA error status: 0x%hx\n", ch_status); + dd->err = -EIO; + dd->flags &= ~FLAGS_INIT; /* request to re-initialize */ + } else if (lch == dd->dma_lch_in) { + return; + } + + /* dma_lch_out - completed */ + tasklet_schedule(&dd->done_task); } static int omap_aes_dma_init(struct omap_aes_dev *dd) @@ -390,6 +408,11 @@ static int sg_copy(struct scatterlist **sg, size_t *offset, void *buf, if (!count) return off; + /* + * buflen and total are AES_BLOCK_SIZE size aligned, + * so count should be also aligned + */ + sg_copy_buf(buf + off, *sg, *offset, count, out); off += count; @@ -415,6 +438,7 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm); struct omap_aes_dev *dd = ctx->dd; int len32; + int err; pr_debug("len: %d\n", length); @@ -454,11 +478,13 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, omap_set_dma_dest_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_POST_INC, dma_addr_out, 0, 0); + err = omap_aes_write_ctrl(dd); + if (err) + return err; + omap_start_dma(dd->dma_lch_in); omap_start_dma(dd->dma_lch_out); - omap_aes_write_ctrl(dd); - return 0; } @@ -484,8 +510,10 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd) count = min(dd->total, sg_dma_len(dd->in_sg)); count = min(count, sg_dma_len(dd->out_sg)); - if (count != dd->total) + if (count != dd->total) { + pr_err("request length != buffer length\n"); return -EINVAL; + } pr_debug("fast\n"); @@ -521,25 +549,28 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd) dd->total -= count; - err = omap_aes_hw_init(dd); - err = omap_aes_crypt_dma(tfm, addr_in, addr_out, count); + if (err) { + dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); + dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_TO_DEVICE); + } return err; } static void omap_aes_finish_req(struct omap_aes_dev *dd, int err) { + struct ablkcipher_request *req = dd->req; struct omap_aes_ctx *ctx; pr_debug("err: %d\n", err); dd->flags &= ~FLAGS_BUSY; - ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(dd->req)); + ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); - if (!dd->total) - dd->req->base.complete(&dd->req->base, err); + if (req->base.complete) + req->base.complete(&req->base, err); } static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) @@ -551,11 +582,11 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) omap_aes_write_mask(dd, AES_REG_MASK, 0, AES_REG_MASK_START); - clk_disable(dd->iclk); - omap_stop_dma(dd->dma_lch_in); omap_stop_dma(dd->dma_lch_out); + clk_disable(dd->iclk); + if (dd->flags & FLAGS_FAST) { dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE); dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); @@ -572,27 +603,24 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) } } - if (err || !dd->total) - omap_aes_finish_req(dd, err); - return err; } -static int omap_aes_handle_req(struct omap_aes_dev *dd, +static int omap_aes_handle_queue(struct omap_aes_dev *dd, struct ablkcipher_request *req) { struct crypto_async_request *async_req, *backlog; struct omap_aes_ctx *ctx; struct omap_aes_reqctx *rctx; unsigned long flags; - int err = 0; + int err, ret = 0; spin_lock_irqsave(&dd->lock, flags); if (req) - err = ablkcipher_enqueue_request(&dd->queue, req); + ret = ablkcipher_enqueue_request(&dd->queue, req); if (dd->flags & FLAGS_BUSY) { spin_unlock_irqrestore(&dd->lock, flags); - return err; + return ret; } backlog = crypto_get_backlog(&dd->queue); async_req = crypto_dequeue_request(&dd->queue); @@ -601,7 +629,7 @@ static int omap_aes_handle_req(struct omap_aes_dev *dd, spin_unlock_irqrestore(&dd->lock, flags); if (!async_req) - return 0; + return ret; if (backlog) backlog->complete(backlog, -EINPROGRESS); @@ -636,30 +664,46 @@ static int omap_aes_handle_req(struct omap_aes_dev *dd, ctx->flags |= FLAGS_NEW_KEY; } - if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) - pr_err("request size is not exact amount of AES blocks\n"); - - omap_aes_crypt_dma_start(dd); + err = omap_aes_crypt_dma_start(dd); + if (err) { + /* aes_task will not finish it, so do it here */ + omap_aes_finish_req(dd, err); + tasklet_schedule(&dd->queue_task); + } - return err; + return ret; /* return ret, which is enqueue return value */ } -static void omap_aes_task(unsigned long data) +static void omap_aes_done_task(unsigned long data) { struct omap_aes_dev *dd = (struct omap_aes_dev *)data; + int err; pr_debug("enter\n"); - omap_aes_crypt_dma_stop(dd); + err = omap_aes_crypt_dma_stop(dd); - if (dd->total) - omap_aes_crypt_dma_start(dd); - else - omap_aes_handle_req(dd, NULL); + err = dd->err ? : err; + + if (dd->total && !err) { + err = omap_aes_crypt_dma_start(dd); + if (!err) + return; /* DMA started. Not fininishing. */ + } + + omap_aes_finish_req(dd, err); + omap_aes_handle_queue(dd, NULL); pr_debug("exit\n"); } +static void omap_aes_queue_task(unsigned long data) +{ + struct omap_aes_dev *dd = (struct omap_aes_dev *)data; + + omap_aes_handle_queue(dd, NULL); +} + static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode) { struct omap_aes_ctx *ctx = crypto_ablkcipher_ctx( @@ -671,13 +715,18 @@ static int omap_aes_crypt(struct ablkcipher_request *req, unsigned long mode) !!(mode & FLAGS_ENCRYPT), !!(mode & FLAGS_CBC)); + if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) { + pr_err("request size is not exact amount of AES blocks\n"); + return -EINVAL; + } + dd = omap_aes_find_dev(ctx); if (!dd) return -ENODEV; rctx->mode = mode; - return omap_aes_handle_req(dd, req); + return omap_aes_handle_queue(dd, req); } /* ********************** ALG API ************************************ */ @@ -843,7 +892,8 @@ static int omap_aes_probe(struct platform_device *pdev) (reg & AES_REG_REV_MAJOR) >> 4, reg & AES_REG_REV_MINOR); clk_disable(dd->iclk); - tasklet_init(&dd->task, omap_aes_task, (unsigned long)dd); + tasklet_init(&dd->done_task, omap_aes_done_task, (unsigned long)dd); + tasklet_init(&dd->queue_task, omap_aes_queue_task, (unsigned long)dd); err = omap_aes_dma_init(dd); if (err) @@ -870,7 +920,8 @@ static int omap_aes_probe(struct platform_device *pdev) crypto_unregister_alg(&algs[j]); omap_aes_dma_cleanup(dd); err_dma: - tasklet_kill(&dd->task); + tasklet_kill(&dd->done_task); + tasklet_kill(&dd->queue_task); iounmap(dd->io_base); err_io: clk_put(dd->iclk); @@ -897,7 +948,8 @@ static int omap_aes_remove(struct platform_device *pdev) for (i = 0; i < ARRAY_SIZE(algs); i++) crypto_unregister_alg(&algs[i]); - tasklet_kill(&dd->task); + tasklet_kill(&dd->done_task); + tasklet_kill(&dd->queue_task); omap_aes_dma_cleanup(dd); iounmap(dd->io_base); clk_put(dd->iclk); From 67a730ce449561f6df838f0b38a2b72cbf4e3c4c Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Tue, 30 Nov 2010 10:13:30 +0200 Subject: [PATCH 30/46] crypto: omap-aes - unnecessary code removed Key and IV should always be set before AES operation. So no need to check if it has changed or not. Signed-off-by: Dmitry Kasatkin Signed-off-by: Herbert Xu --- drivers/crypto/omap-aes.c | 70 ++++++++++----------------------------- 1 file changed, 17 insertions(+), 53 deletions(-) diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 704cc701ab4266..0b21dcef028929 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -74,11 +74,9 @@ #define FLAGS_CBC BIT(1) #define FLAGS_GIV BIT(2) -#define FLAGS_NEW_KEY BIT(4) -#define FLAGS_NEW_IV BIT(5) -#define FLAGS_INIT BIT(6) -#define FLAGS_FAST BIT(7) -#define FLAGS_BUSY BIT(8) +#define FLAGS_INIT BIT(4) +#define FLAGS_FAST BIT(5) +#define FLAGS_BUSY BIT(6) struct omap_aes_ctx { struct omap_aes_dev *dd; @@ -105,9 +103,6 @@ struct omap_aes_dev { unsigned long flags; int err; - u32 *iv; - u32 ctrl; - spinlock_t lock; struct crypto_queue queue; @@ -209,28 +204,13 @@ static int omap_aes_hw_init(struct omap_aes_dev *dd) static int omap_aes_write_ctrl(struct omap_aes_dev *dd) { unsigned int key32; - int i, err, init = dd->flags & FLAGS_INIT; + int i, err; u32 val, mask; err = omap_aes_hw_init(dd); if (err) return err; - val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3); - if (dd->flags & FLAGS_CBC) - val |= AES_REG_CTRL_CBC; - if (dd->flags & FLAGS_ENCRYPT) - val |= AES_REG_CTRL_DIRECTION; - - /* check if hw state & mode have not changed */ - if (init && dd->ctrl == val && !(dd->flags & FLAGS_NEW_IV) && - !(dd->ctx->flags & FLAGS_NEW_KEY)) - goto out; - - /* only need to write control registers for new settings */ - - dd->ctrl = val; - val = 0; if (dd->dma_lch_out >= 0) val |= AES_REG_MASK_DMA_OUT_EN; @@ -241,27 +221,28 @@ static int omap_aes_write_ctrl(struct omap_aes_dev *dd) omap_aes_write_mask(dd, AES_REG_MASK, val, mask); - pr_debug("Set key\n"); key32 = dd->ctx->keylen / sizeof(u32); - /* set a key */ + + /* it seems a key should always be set even if it has not changed */ for (i = 0; i < key32; i++) { omap_aes_write(dd, AES_REG_KEY(i), __le32_to_cpu(dd->ctx->key[i])); } - dd->ctx->flags &= ~FLAGS_NEW_KEY; - if (dd->flags & FLAGS_NEW_IV) { - pr_debug("Set IV\n"); - omap_aes_write_n(dd, AES_REG_IV(0), dd->iv, 4); - dd->flags &= ~FLAGS_NEW_IV; - } + if ((dd->flags & FLAGS_CBC) && dd->req->info) + omap_aes_write_n(dd, AES_REG_IV(0), dd->req->info, 4); + + val = FLD_VAL(((dd->ctx->keylen >> 3) - 1), 4, 3); + if (dd->flags & FLAGS_CBC) + val |= AES_REG_CTRL_CBC; + if (dd->flags & FLAGS_ENCRYPT) + val |= AES_REG_CTRL_DIRECTION; mask = AES_REG_CTRL_CBC | AES_REG_CTRL_DIRECTION | AES_REG_CTRL_KEY_SIZE; - omap_aes_write_mask(dd, AES_REG_CTRL, dd->ctrl, mask); + omap_aes_write_mask(dd, AES_REG_CTRL, val, mask); -out: /* start DMA or disable idle mode */ omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_START, AES_REG_MASK_START); @@ -561,16 +542,12 @@ static int omap_aes_crypt_dma_start(struct omap_aes_dev *dd) static void omap_aes_finish_req(struct omap_aes_dev *dd, int err) { struct ablkcipher_request *req = dd->req; - struct omap_aes_ctx *ctx; pr_debug("err: %d\n", err); dd->flags &= ~FLAGS_BUSY; - ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); - - if (req->base.complete) - req->base.complete(&req->base, err); + req->base.complete(&req->base, err); } static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) @@ -636,8 +613,6 @@ static int omap_aes_handle_queue(struct omap_aes_dev *dd, req = ablkcipher_request_cast(async_req); - pr_debug("get new req\n"); - /* assign new request to device */ dd->req = req; dd->total = req->nbytes; @@ -651,18 +626,8 @@ static int omap_aes_handle_queue(struct omap_aes_dev *dd, rctx->mode &= FLAGS_MODE_MASK; dd->flags = (dd->flags & ~FLAGS_MODE_MASK) | rctx->mode; - dd->iv = req->info; - if ((dd->flags & FLAGS_CBC) && dd->iv) - dd->flags |= FLAGS_NEW_IV; - else - dd->flags &= ~FLAGS_NEW_IV; - + dd->ctx = ctx; ctx->dd = dd; - if (dd->ctx != ctx) { - /* assign new context to device */ - dd->ctx = ctx; - ctx->flags |= FLAGS_NEW_KEY; - } err = omap_aes_crypt_dma_start(dd); if (err) { @@ -744,7 +709,6 @@ static int omap_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, memcpy(ctx->key, key, keylen); ctx->keylen = keylen; - ctx->flags |= FLAGS_NEW_KEY; return 0; } From 83ea7e0fe1471508ab8e8d7b317e743fe7a05a5f Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Tue, 30 Nov 2010 10:13:31 +0200 Subject: [PATCH 31/46] crypto: omap-aes - initialize aes module once per request AES module was initialized for every DMA transaction. That is redundant. Now it is initialized once per request. Signed-off-by: Dmitry Kasatkin Signed-off-by: Herbert Xu --- drivers/crypto/omap-aes.c | 54 ++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 26 deletions(-) diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 0b21dcef028929..b69da4f07c8907 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -176,6 +176,11 @@ static int omap_aes_wait(struct omap_aes_dev *dd, u32 offset, u32 bit) static int omap_aes_hw_init(struct omap_aes_dev *dd) { + /* + * clocks are enabled when request starts and disabled when finished. + * It may be long delays between requests. + * Device might go to off mode to save power. + */ clk_enable(dd->iclk); if (!(dd->flags & FLAGS_INIT)) { @@ -190,10 +195,9 @@ static int omap_aes_hw_init(struct omap_aes_dev *dd) __asm__ __volatile__("nop"); if (omap_aes_wait(dd, AES_REG_SYSSTATUS, - AES_REG_SYSSTATUS_RESETDONE)) { - clk_disable(dd->iclk); + AES_REG_SYSSTATUS_RESETDONE)) return -ETIMEDOUT; - } + dd->flags |= FLAGS_INIT; dd->err = 0; } @@ -243,9 +247,19 @@ static int omap_aes_write_ctrl(struct omap_aes_dev *dd) omap_aes_write_mask(dd, AES_REG_CTRL, val, mask); - /* start DMA or disable idle mode */ - omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_START, - AES_REG_MASK_START); + /* IN */ + omap_set_dma_dest_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_CONSTANT, + dd->phys_base + AES_REG_DATA, 0, 4); + + omap_set_dma_dest_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4); + omap_set_dma_src_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4); + + /* OUT */ + omap_set_dma_src_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_CONSTANT, + dd->phys_base + AES_REG_DATA, 0, 4); + + omap_set_dma_src_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4); + omap_set_dma_dest_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4); return 0; } @@ -419,7 +433,6 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, struct omap_aes_ctx *ctx = crypto_tfm_ctx(tfm); struct omap_aes_dev *dd = ctx->dd; int len32; - int err; pr_debug("len: %d\n", length); @@ -432,12 +445,6 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, len32 = DIV_ROUND_UP(length, sizeof(u32)); /* IN */ - omap_set_dma_dest_params(dd->dma_lch_in, 0, OMAP_DMA_AMODE_CONSTANT, - dd->phys_base + AES_REG_DATA, 0, 4); - - omap_set_dma_dest_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4); - omap_set_dma_src_burst_mode(dd->dma_lch_in, OMAP_DMA_DATA_BURST_4); - omap_set_dma_transfer_params(dd->dma_lch_in, OMAP_DMA_DATA_TYPE_S32, len32, 1, OMAP_DMA_SYNC_PACKET, dd->dma_in, OMAP_DMA_DST_SYNC); @@ -446,12 +453,6 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, dma_addr_in, 0, 0); /* OUT */ - omap_set_dma_src_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_CONSTANT, - dd->phys_base + AES_REG_DATA, 0, 4); - - omap_set_dma_src_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4); - omap_set_dma_dest_burst_mode(dd->dma_lch_out, OMAP_DMA_DATA_BURST_4); - omap_set_dma_transfer_params(dd->dma_lch_out, OMAP_DMA_DATA_TYPE_S32, len32, 1, OMAP_DMA_SYNC_PACKET, dd->dma_out, OMAP_DMA_SRC_SYNC); @@ -459,13 +460,13 @@ static int omap_aes_crypt_dma(struct crypto_tfm *tfm, dma_addr_t dma_addr_in, omap_set_dma_dest_params(dd->dma_lch_out, 0, OMAP_DMA_AMODE_POST_INC, dma_addr_out, 0, 0); - err = omap_aes_write_ctrl(dd); - if (err) - return err; - omap_start_dma(dd->dma_lch_in); omap_start_dma(dd->dma_lch_out); + /* start DMA or disable idle mode */ + omap_aes_write_mask(dd, AES_REG_MASK, AES_REG_MASK_START, + AES_REG_MASK_START); + return 0; } @@ -545,6 +546,7 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err) pr_debug("err: %d\n", err); + clk_disable(dd->iclk); dd->flags &= ~FLAGS_BUSY; req->base.complete(&req->base, err); @@ -562,8 +564,6 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) omap_stop_dma(dd->dma_lch_in); omap_stop_dma(dd->dma_lch_out); - clk_disable(dd->iclk); - if (dd->flags & FLAGS_FAST) { dma_unmap_sg(dd->dev, dd->out_sg, 1, DMA_FROM_DEVICE); dma_unmap_sg(dd->dev, dd->in_sg, 1, DMA_TO_DEVICE); @@ -629,7 +629,9 @@ static int omap_aes_handle_queue(struct omap_aes_dev *dd, dd->ctx = ctx; ctx->dd = dd; - err = omap_aes_crypt_dma_start(dd); + err = omap_aes_write_ctrl(dd); + if (!err) + err = omap_aes_crypt_dma_start(dd); if (err) { /* aes_task will not finish it, so do it here */ omap_aes_finish_req(dd, err); From efce41b65f66251d60484781df305e8a85c9507b Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Tue, 30 Nov 2010 10:13:32 +0200 Subject: [PATCH 32/46] crypto: omap-aes - checkpatch --file warning fixes Signed-off-by: Dmitry Kasatkin Signed-off-by: Herbert Xu --- drivers/crypto/omap-aes.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index b69da4f07c8907..add2a1a72ba4c0 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -96,7 +96,7 @@ struct omap_aes_reqctx { struct omap_aes_dev { struct list_head list; unsigned long phys_base; - void __iomem *io_base; + void __iomem *io_base; struct clk *iclk; struct omap_aes_ctx *ctx; struct device *dev; @@ -759,7 +759,7 @@ static struct crypto_alg algs[] = { .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_aes_ctx), - .cra_alignmask = 0, + .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, .cra_init = omap_aes_cra_init, @@ -779,7 +779,7 @@ static struct crypto_alg algs[] = { .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, .cra_blocksize = AES_BLOCK_SIZE, .cra_ctxsize = sizeof(struct omap_aes_ctx), - .cra_alignmask = 0, + .cra_alignmask = 0, .cra_type = &crypto_ablkcipher_type, .cra_module = THIS_MODULE, .cra_init = omap_aes_cra_init, From f689b34bfbd2154a8fa255060dd872a6db3b4742 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Fri, 3 Dec 2010 13:51:52 +0800 Subject: [PATCH 33/46] include: Install linux/if_alg.h for user-space crypto API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miloslav Trmač Signed-off-by: Herbert Xu --- include/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/Kbuild b/include/linux/Kbuild index 831c4634162c6e..b3cca8c88e86c1 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -154,6 +154,7 @@ header-y += icmpv6.h header-y += if.h header-y += if_addr.h header-y += if_addrlabel.h +header-y += if_alg.h header-y += if_arcnet.h header-y += if_arp.h header-y += if_bonding.h From 6f107b5861ecb09abfa7e2f9927e3884d1d81f91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 8 Dec 2010 14:35:34 +0800 Subject: [PATCH 34/46] net: Add missing lockdep class names for af_alg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miloslav Trmač Signed-off-by: Herbert Xu --- net/core/sock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/sock.c b/net/core/sock.c index 3eed5424e659a1..634d5bc409bb60 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -157,7 +157,7 @@ static const char *const af_family_key_strings[AF_MAX+1] = { "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , - "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , + "sk_lock-AF_IEEE802154", "sk_lock-AF_CAIF" , "sk_lock-AF_ALG" , "sk_lock-AF_MAX" }; static const char *const af_family_slock_key_strings[AF_MAX+1] = { @@ -173,7 +173,7 @@ static const char *const af_family_slock_key_strings[AF_MAX+1] = { "slock-27" , "slock-28" , "slock-AF_CAN" , "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , - "slock-AF_IEEE802154", "slock-AF_CAIF" , + "slock-AF_IEEE802154", "slock-AF_CAIF" , "slock-AF_ALG" , "slock-AF_MAX" }; static const char *const af_family_clock_key_strings[AF_MAX+1] = { @@ -189,7 +189,7 @@ static const char *const af_family_clock_key_strings[AF_MAX+1] = { "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , - "clock-AF_IEEE802154", "clock-AF_CAIF" , + "clock-AF_IEEE802154", "clock-AF_CAIF" , "clock-AF_ALG" , "clock-AF_MAX" }; From 507cad355fc9e426f2846c46a4edca2d22d25f44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 8 Dec 2010 14:36:19 +0800 Subject: [PATCH 35/46] crypto: af_alg - Make sure sk_security is initialized on accept()ed sockets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Miloslav Trmač Signed-off-by: Herbert Xu --- crypto/af_alg.c | 1 + 1 file changed, 1 insertion(+) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index cabed0e9c9d810..bd9e53c780269d 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -242,6 +242,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock) goto unlock; sock_init_data(newsock, sk2); + sock_graft(sk2, newsock); err = type->accept(ask->private, sk2); if (err) { From 3c097b800816c0e4c2a34c38f8b2409427770f7a Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Mon, 13 Dec 2010 19:51:15 +0800 Subject: [PATCH 36/46] crypto: aesni-intel - Fixed build with binutils 2.16 This patch fixes the problem with 2.16 binutils. Signed-off-by: Aidan O'Mahony Signed-off-by: Adrian Hoban Signed-off-by: Gabriele Paoloni Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 598 ++++++++++++++++++++++++++---- 1 file changed, 519 insertions(+), 79 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index d528fde219d2d2..8fe2a4966b7afe 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -204,9 +204,9 @@ enc: .octa 0x2 * arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified */ -.macro INITIAL_BLOCKS num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ -XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation +.macro INITIAL_BLOCKS_DEC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ +XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation mov arg7, %r10 # %r10 = AAD mov arg8, %r12 # %r12 = aadLen mov %r12, %r11 @@ -228,19 +228,25 @@ _get_AAD_loop2\num_initial_blocks\operation: cmp %r11, %r12 jne _get_AAD_loop2\num_initial_blocks\operation _get_AAD_loop2_done\num_initial_blocks\operation: - pshufb SHUF_MASK(%rip), %xmm\i # byte-reflect the AAD data + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data + xor %r11, %r11 # initialise the data pointer offset as zero # start AES for num_initial_blocks blocks mov %arg5, %rax # %rax = *Y0 movdqu (%rax), \XMM0 # XMM0 = Y0 - pshufb SHUF_MASK(%rip), \XMM0 -.if \i_seq != 0 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM0 + +.if (\i == 5) || (\i == 6) || (\i == 7) .irpc index, \i_seq paddd ONE(%rip), \XMM0 # INCR Y0 movdqa \XMM0, %xmm\index - pshufb SHUF_MASK(%rip), %xmm\index # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap + .endr .irpc index, \i_seq pxor 16*0(%arg1), %xmm\index @@ -291,10 +297,11 @@ _get_AAD_loop2_done\num_initial_blocks\operation: movdqu %xmm\index, (%arg2 , %r11, 1) # write back plaintext/ciphertext for num_initial_blocks add $16, %r11 -.if \operation == dec + movdqa \TMP1, %xmm\index -.endif - pshufb SHUF_MASK(%rip), %xmm\index + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\index + # prepare plaintext/ciphertext for GHASH computation .endr .endif @@ -327,16 +334,24 @@ _get_AAD_loop2_done\num_initial_blocks\operation: */ paddd ONE(%rip), \XMM0 # INCR Y0 movdqa \XMM0, \XMM1 - pshufb SHUF_MASK(%rip), \XMM1 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap + paddd ONE(%rip), \XMM0 # INCR Y0 movdqa \XMM0, \XMM2 - pshufb SHUF_MASK(%rip), \XMM2 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap + paddd ONE(%rip), \XMM0 # INCR Y0 movdqa \XMM0, \XMM3 - pshufb SHUF_MASK(%rip), \XMM3 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap + paddd ONE(%rip), \XMM0 # INCR Y0 movdqa \XMM0, \XMM4 - pshufb SHUF_MASK(%rip), \XMM4 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap + pxor 16*0(%arg1), \XMM1 pxor 16*0(%arg1), \XMM2 pxor 16*0(%arg1), \XMM3 @@ -385,41 +400,268 @@ _get_AAD_loop2_done\num_initial_blocks\operation: AESENCLAST \TMP2, \XMM4 movdqu 16*0(%arg3 , %r11 , 1), \TMP1 pxor \TMP1, \XMM1 -.if \operation == dec movdqu \XMM1, 16*0(%arg2 , %r11 , 1) movdqa \TMP1, \XMM1 -.endif movdqu 16*1(%arg3 , %r11 , 1), \TMP1 pxor \TMP1, \XMM2 -.if \operation == dec movdqu \XMM2, 16*1(%arg2 , %r11 , 1) movdqa \TMP1, \XMM2 -.endif movdqu 16*2(%arg3 , %r11 , 1), \TMP1 pxor \TMP1, \XMM3 -.if \operation == dec movdqu \XMM3, 16*2(%arg2 , %r11 , 1) movdqa \TMP1, \XMM3 -.endif movdqu 16*3(%arg3 , %r11 , 1), \TMP1 pxor \TMP1, \XMM4 -.if \operation == dec movdqu \XMM4, 16*3(%arg2 , %r11 , 1) movdqa \TMP1, \XMM4 -.else + add $64, %r11 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap + pxor \XMMDst, \XMM1 +# combine GHASHed value with the corresponding ciphertext + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap + +_initial_blocks_done\num_initial_blocks\operation: + +.endm + + +/* +* if a = number of total plaintext bytes +* b = floor(a/16) +* num_initial_blocks = b mod 4 +* encrypt the initial num_initial_blocks blocks and apply ghash on +* the ciphertext +* %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers +* are clobbered +* arg1, %arg2, %arg3, %r14 are used as a pointer only, not modified +*/ + + +.macro INITIAL_BLOCKS_ENC num_initial_blocks TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \ +XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation + mov arg7, %r10 # %r10 = AAD + mov arg8, %r12 # %r12 = aadLen + mov %r12, %r11 + pxor %xmm\i, %xmm\i +_get_AAD_loop\num_initial_blocks\operation: + movd (%r10), \TMP1 + pslldq $12, \TMP1 + psrldq $4, %xmm\i + pxor \TMP1, %xmm\i + add $4, %r10 + sub $4, %r12 + jne _get_AAD_loop\num_initial_blocks\operation + cmp $16, %r11 + je _get_AAD_loop2_done\num_initial_blocks\operation + mov $16, %r12 +_get_AAD_loop2\num_initial_blocks\operation: + psrldq $4, %xmm\i + sub $4, %r12 + cmp %r11, %r12 + jne _get_AAD_loop2\num_initial_blocks\operation +_get_AAD_loop2_done\num_initial_blocks\operation: + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data + + xor %r11, %r11 # initialise the data pointer offset as zero + + # start AES for num_initial_blocks blocks + + mov %arg5, %rax # %rax = *Y0 + movdqu (%rax), \XMM0 # XMM0 = Y0 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM0 + +.if (\i == 5) || (\i == 6) || (\i == 7) +.irpc index, \i_seq + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, %xmm\index + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\index # perform a 16 byte swap + +.endr +.irpc index, \i_seq + pxor 16*0(%arg1), %xmm\index +.endr +.irpc index, \i_seq + movaps 0x10(%rdi), \TMP1 + AESENC \TMP1, %xmm\index # Round 1 +.endr +.irpc index, \i_seq + movaps 0x20(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x30(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x40(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x50(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x60(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x70(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x80(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0x90(%arg1), \TMP1 + AESENC \TMP1, %xmm\index # Round 2 +.endr +.irpc index, \i_seq + movaps 0xa0(%arg1), \TMP1 + AESENCLAST \TMP1, %xmm\index # Round 10 +.endr +.irpc index, \i_seq + movdqu (%arg3 , %r11, 1), \TMP1 + pxor \TMP1, %xmm\index + movdqu %xmm\index, (%arg2 , %r11, 1) + # write back plaintext/ciphertext for num_initial_blocks + add $16, %r11 + + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, %xmm\index + + # prepare plaintext/ciphertext for GHASH computation +.endr +.endif + GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + # apply GHASH on num_initial_blocks blocks + +.if \i == 5 + pxor %xmm5, %xmm6 + GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm6, %xmm7 + GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.elseif \i == 6 + pxor %xmm6, %xmm7 + GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.elseif \i == 7 + pxor %xmm7, %xmm8 + GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +.endif + cmp $64, %r13 + jl _initial_blocks_done\num_initial_blocks\operation + # no need for precomputed values +/* +* +* Precomputations for HashKey parallel with encryption of first 4 blocks. +* Haskey_i_k holds XORed values of the low and high parts of the Haskey_i +*/ + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM1 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap + + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM2 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap + + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM3 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap + + paddd ONE(%rip), \XMM0 # INCR Y0 + movdqa \XMM0, \XMM4 + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap + + pxor 16*0(%arg1), \XMM1 + pxor 16*0(%arg1), \XMM2 + pxor 16*0(%arg1), \XMM3 + pxor 16*0(%arg1), \XMM4 + movdqa \TMP3, \TMP5 + pshufd $78, \TMP3, \TMP1 + pxor \TMP3, \TMP1 + movdqa \TMP1, HashKey_k(%rsp) + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^2<<1 (mod poly) + movdqa \TMP5, HashKey_2(%rsp) +# HashKey_2 = HashKey^2<<1 (mod poly) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_2_k(%rsp) +.irpc index, 1234 # do 4 rounds + movaps 0x10*\index(%arg1), \TMP1 + AESENC \TMP1, \XMM1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 +.endr + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_3(%rsp) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_3_k(%rsp) +.irpc index, 56789 # do next 5 rounds + movaps 0x10*\index(%arg1), \TMP1 + AESENC \TMP1, \XMM1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 +.endr + GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7 +# TMP5 = HashKey^3<<1 (mod poly) + movdqa \TMP5, HashKey_4(%rsp) + pshufd $78, \TMP5, \TMP1 + pxor \TMP5, \TMP1 + movdqa \TMP1, HashKey_4_k(%rsp) + movaps 0xa0(%arg1), \TMP2 + AESENCLAST \TMP2, \XMM1 + AESENCLAST \TMP2, \XMM2 + AESENCLAST \TMP2, \XMM3 + AESENCLAST \TMP2, \XMM4 + movdqu 16*0(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM1 + movdqu 16*1(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM2 + movdqu 16*2(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM3 + movdqu 16*3(%arg3 , %r11 , 1), \TMP1 + pxor \TMP1, \XMM4 movdqu \XMM1, 16*0(%arg2 , %r11 , 1) movdqu \XMM2, 16*1(%arg2 , %r11 , 1) movdqu \XMM3, 16*2(%arg2 , %r11 , 1) movdqu \XMM4, 16*3(%arg2 , %r11 , 1) -.endif + add $64, %r11 - pshufb SHUF_MASK(%rip), \XMM1 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM1 # perform a 16 byte swap pxor \XMMDst, \XMM1 # combine GHASHed value with the corresponding ciphertext - pshufb SHUF_MASK(%rip), \XMM2 # perform a 16 byte swap - pshufb SHUF_MASK(%rip), \XMM3 # perform a 16 byte swap - pshufb SHUF_MASK(%rip), \XMM4 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM2 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM3 # perform a 16 byte swap + movdqa SHUF_MASK(%rip), %xmm14 + PSHUFB_XMM %xmm14, \XMM4 # perform a 16 byte swap + _initial_blocks_done\num_initial_blocks\operation: + .endm /* @@ -428,7 +670,199 @@ _initial_blocks_done\num_initial_blocks\operation: * arg1, %arg2, %arg3 are used as pointers only, not modified * %r11 is the data offset value */ -.macro GHASH_4_ENCRYPT_4_PARALLEL TMP1 TMP2 TMP3 TMP4 TMP5 \ +.macro GHASH_4_ENCRYPT_4_PARALLEL_ENC TMP1 TMP2 TMP3 TMP4 TMP5 \ +TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation + + movdqa \XMM1, \XMM5 + movdqa \XMM2, \XMM6 + movdqa \XMM3, \XMM7 + movdqa \XMM4, \XMM8 + + movdqa SHUF_MASK(%rip), %xmm15 + # multiply TMP5 * HashKey using karatsuba + + movdqa \XMM5, \TMP4 + pshufd $78, \XMM5, \TMP6 + pxor \XMM5, \TMP6 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa HashKey_4(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP4 # TMP4 = a1*b1 + movdqa \XMM0, \XMM1 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM2 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM3 + paddd ONE(%rip), \XMM0 # INCR CNT + movdqa \XMM0, \XMM4 + PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap + PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 + PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap + + pxor (%arg1), \XMM1 + pxor (%arg1), \XMM2 + pxor (%arg1), \XMM3 + pxor (%arg1), \XMM4 + movdqa HashKey_4_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0) + movaps 0x10(%arg1), \TMP1 + AESENC \TMP1, \XMM1 # Round 1 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 + movaps 0x20(%arg1), \TMP1 + AESENC \TMP1, \XMM1 # Round 2 + AESENC \TMP1, \XMM2 + AESENC \TMP1, \XMM3 + AESENC \TMP1, \XMM4 + movdqa \XMM6, \TMP1 + pshufd $78, \XMM6, \TMP2 + pxor \XMM6, \TMP2 + movdqa HashKey_3(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1 * b1 + movaps 0x30(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 3 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM6 # XMM6 = a0*b0 + movaps 0x40(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 4 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + movdqa HashKey_3_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movaps 0x50(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 5 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + pxor \TMP1, \TMP4 +# accumulate the results in TMP4:XMM5, TMP6 holds the middle part + pxor \XMM6, \XMM5 + pxor \TMP2, \TMP6 + movdqa \XMM7, \TMP1 + pshufd $78, \XMM7, \TMP2 + pxor \XMM7, \TMP2 + movdqa HashKey_2(%rsp ), \TMP5 + + # Multiply TMP5 * HashKey using karatsuba + + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + movaps 0x60(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 6 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM7 # XMM7 = a0*b0 + movaps 0x70(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 7 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + movdqa HashKey_2_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movaps 0x80(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 8 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + pxor \TMP1, \TMP4 +# accumulate the results in TMP4:XMM5, TMP6 holds the middle part + pxor \XMM7, \XMM5 + pxor \TMP2, \TMP6 + + # Multiply XMM8 * HashKey + # XMM8 and TMP5 hold the values for the two operands + + movdqa \XMM8, \TMP1 + pshufd $78, \XMM8, \TMP2 + pxor \XMM8, \TMP2 + movdqa HashKey(%rsp), \TMP5 + PCLMULQDQ 0x11, \TMP5, \TMP1 # TMP1 = a1*b1 + movaps 0x90(%arg1), \TMP3 + AESENC \TMP3, \XMM1 # Round 9 + AESENC \TMP3, \XMM2 + AESENC \TMP3, \XMM3 + AESENC \TMP3, \XMM4 + PCLMULQDQ 0x00, \TMP5, \XMM8 # XMM8 = a0*b0 + movaps 0xa0(%arg1), \TMP3 + AESENCLAST \TMP3, \XMM1 # Round 10 + AESENCLAST \TMP3, \XMM2 + AESENCLAST \TMP3, \XMM3 + AESENCLAST \TMP3, \XMM4 + movdqa HashKey_k(%rsp), \TMP5 + PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) + movdqu (%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK + movdqu 16(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK + movdqu 32(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK + movdqu 48(%arg3,%r11,1), \TMP3 + pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK + movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer + movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer + movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer + movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer + PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap + + pxor \TMP4, \TMP1 + pxor \XMM8, \XMM5 + pxor \TMP6, \TMP2 + pxor \TMP1, \TMP2 + pxor \XMM5, \TMP2 + movdqa \TMP2, \TMP3 + pslldq $8, \TMP3 # left shift TMP3 2 DWs + psrldq $8, \TMP2 # right shift TMP2 2 DWs + pxor \TMP3, \XMM5 + pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5 + + # first phase of reduction + + movdqa \XMM5, \TMP2 + movdqa \XMM5, \TMP3 + movdqa \XMM5, \TMP4 +# move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently + pslld $31, \TMP2 # packed right shift << 31 + pslld $30, \TMP3 # packed right shift << 30 + pslld $25, \TMP4 # packed right shift << 25 + pxor \TMP3, \TMP2 # xor the shifted versions + pxor \TMP4, \TMP2 + movdqa \TMP2, \TMP5 + psrldq $4, \TMP5 # right shift T5 1 DW + pslldq $12, \TMP2 # left shift T2 3 DWs + pxor \TMP2, \XMM5 + + # second phase of reduction + + movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4 + movdqa \XMM5,\TMP3 + movdqa \XMM5,\TMP4 + psrld $1, \TMP2 # packed left shift >>1 + psrld $2, \TMP3 # packed left shift >>2 + psrld $7, \TMP4 # packed left shift >>7 + pxor \TMP3,\TMP2 # xor the shifted versions + pxor \TMP4,\TMP2 + pxor \TMP5, \TMP2 + pxor \TMP2, \XMM5 + pxor \TMP1, \XMM5 # result is in TMP1 + + pxor \XMM5, \XMM1 +.endm + +/* +* decrypt 4 blocks at a time +* ghash the 4 previously decrypted ciphertext blocks +* arg1, %arg2, %arg3 are used as pointers only, not modified +* %r11 is the data offset value +*/ +.macro GHASH_4_ENCRYPT_4_PARALLEL_DEC TMP1 TMP2 TMP3 TMP4 TMP5 \ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM1, \XMM5 @@ -436,6 +870,7 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM3, \XMM7 movdqa \XMM4, \XMM8 + movdqa SHUF_MASK(%rip), %xmm15 # multiply TMP5 * HashKey using karatsuba movdqa \XMM5, \TMP4 @@ -451,11 +886,12 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation movdqa \XMM0, \XMM3 paddd ONE(%rip), \XMM0 # INCR CNT movdqa \XMM0, \XMM4 - pshufb SHUF_MASK(%rip), \XMM1 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap PCLMULQDQ 0x00, \TMP5, \XMM5 # XMM5 = a0*b0 - pshufb SHUF_MASK(%rip), \XMM2 # perform a 16 byte swap - pshufb SHUF_MASK(%rip), \XMM3 # perform a 16 byte swap - pshufb SHUF_MASK(%rip), \XMM4 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap + pxor (%arg1), \XMM1 pxor (%arg1), \XMM2 pxor (%arg1), \XMM3 @@ -553,37 +989,24 @@ TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation PCLMULQDQ 0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0) movdqu (%arg3,%r11,1), \TMP3 pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK -.if \operation == dec movdqu \XMM1, (%arg2,%r11,1) # Write to plaintext buffer movdqa \TMP3, \XMM1 -.endif movdqu 16(%arg3,%r11,1), \TMP3 pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK -.if \operation == dec movdqu \XMM2, 16(%arg2,%r11,1) # Write to plaintext buffer movdqa \TMP3, \XMM2 -.endif movdqu 32(%arg3,%r11,1), \TMP3 pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK -.if \operation == dec movdqu \XMM3, 32(%arg2,%r11,1) # Write to plaintext buffer movdqa \TMP3, \XMM3 -.endif movdqu 48(%arg3,%r11,1), \TMP3 pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK -.if \operation == dec movdqu \XMM4, 48(%arg2,%r11,1) # Write to plaintext buffer movdqa \TMP3, \XMM4 -.else - movdqu \XMM1, (%arg2,%r11,1) # Write to the ciphertext buffer - movdqu \XMM2, 16(%arg2,%r11,1) # Write to the ciphertext buffer - movdqu \XMM3, 32(%arg2,%r11,1) # Write to the ciphertext buffer - movdqu \XMM4, 48(%arg2,%r11,1) # Write to the ciphertext buffer -.endif - pshufb SHUF_MASK(%rip), \XMM1 # perform a 16 byte swap - pshufb SHUF_MASK(%rip), \XMM2 # perform a 16 byte swap - pshufb SHUF_MASK(%rip), \XMM3 # perform a 16 byte swap - pshufb SHUF_MASK(%rip), \XMM4 # perform a 16 byte sway + PSHUFB_XMM %xmm15, \XMM1 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM2 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM3 # perform a 16 byte swap + PSHUFB_XMM %xmm15, \XMM4 # perform a 16 byte swap pxor \TMP4, \TMP1 pxor \XMM8, \XMM5 @@ -853,7 +1276,9 @@ ENTRY(aesni_gcm_dec) and $~63, %rsp # align rsp to 64 bytes mov %arg6, %r12 movdqu (%r12), %xmm13 # %xmm13 = HashKey - pshufb SHUF_MASK(%rip), %xmm13 + movdqa SHUF_MASK(%rip), %xmm2 + PSHUFB_XMM %xmm2, %xmm13 + # Precompute HashKey<<1 (mod poly) from the hash key (required for GHASH) @@ -885,22 +1310,22 @@ ENTRY(aesni_gcm_dec) jb _initial_num_blocks_is_1_decrypt je _initial_num_blocks_is_2_decrypt _initial_num_blocks_is_3_decrypt: - INITIAL_BLOCKS 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ + INITIAL_BLOCKS_DEC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, dec sub $48, %r13 jmp _initial_blocks_decrypted _initial_num_blocks_is_2_decrypt: - INITIAL_BLOCKS 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ + INITIAL_BLOCKS_DEC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, dec sub $32, %r13 jmp _initial_blocks_decrypted _initial_num_blocks_is_1_decrypt: - INITIAL_BLOCKS 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ + INITIAL_BLOCKS_DEC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, dec sub $16, %r13 jmp _initial_blocks_decrypted _initial_num_blocks_is_0_decrypt: - INITIAL_BLOCKS 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ + INITIAL_BLOCKS_DEC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, dec _initial_blocks_decrypted: cmp $0, %r13 @@ -908,7 +1333,7 @@ _initial_blocks_decrypted: sub $64, %r13 je _four_cipher_left_decrypt _decrypt_by_4: - GHASH_4_ENCRYPT_4_PARALLEL %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ + GHASH_4_ENCRYPT_4_PARALLEL_DEC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, dec add $64, %r11 sub $64, %r13 @@ -924,7 +1349,9 @@ _zero_cipher_left_decrypt: # Handle the last <16 byte block seperately paddd ONE(%rip), %xmm0 # increment CNT to get Yn - pshufb SHUF_MASK(%rip), %xmm0 + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm0 + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Yn) sub $16, %r11 add %r13, %r11 @@ -934,14 +1361,17 @@ _zero_cipher_left_decrypt: # adjust the shuffle mask pointer to be able to shift 16-%r13 bytes # (%r13 is the number of bytes in plaintext mod 16) movdqu (%r12), %xmm2 # get the appropriate shuffle mask - pshufb %xmm2, %xmm1 # right shift 16-%r13 butes + PSHUFB_XMM %xmm2, %xmm1 # right shift 16-%r13 butes + movdqa %xmm1, %xmm2 pxor %xmm1, %xmm0 # Ciphertext XOR E(K, Yn) movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-%r13 bytes of %xmm0 pand %xmm1, %xmm0 # mask out top 16-%r13 bytes of %xmm0 pand %xmm1, %xmm2 - pshufb SHUF_MASK(%rip),%xmm2 + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10 ,%xmm2 + pxor %xmm2, %xmm8 GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 # GHASH computation for the last <16 byte block @@ -949,13 +1379,13 @@ _zero_cipher_left_decrypt: add $16, %r11 # output %r13 bytes - movq %xmm0, %rax + MOVQ_R64_XMM %xmm0, %rax cmp $8, %r13 jle _less_than_8_bytes_left_decrypt mov %rax, (%arg2 , %r11, 1) add $8, %r11 psrldq $8, %xmm0 - movq %xmm0, %rax + MOVQ_R64_XMM %xmm0, %rax sub $8, %r13 _less_than_8_bytes_left_decrypt: mov %al, (%arg2, %r11, 1) @@ -968,13 +1398,15 @@ _multiple_of_16_bytes_decrypt: shl $3, %r12 # convert into number of bits movd %r12d, %xmm15 # len(A) in %xmm15 shl $3, %arg4 # len(C) in bits (*128) - movq %arg4, %xmm1 + MOVQ_R64_XMM %arg4, %xmm1 pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) pxor %xmm15, %xmm8 GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation - pshufb SHUF_MASK(%rip), %xmm8 + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm8 + mov %arg5, %rax # %rax = *Y0 movdqu (%rax), %xmm0 # %xmm0 = Y0 ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0) @@ -987,11 +1419,11 @@ _return_T_decrypt: cmp $12, %r11 je _T_12_decrypt _T_8_decrypt: - movq %xmm0, %rax + MOVQ_R64_XMM %xmm0, %rax mov %rax, (%r10) jmp _return_T_done_decrypt _T_12_decrypt: - movq %xmm0, %rax + MOVQ_R64_XMM %xmm0, %rax mov %rax, (%r10) psrldq $8, %xmm0 movd %xmm0, %eax @@ -1103,7 +1535,9 @@ ENTRY(aesni_gcm_enc) and $~63, %rsp mov %arg6, %r12 movdqu (%r12), %xmm13 - pshufb SHUF_MASK(%rip), %xmm13 + movdqa SHUF_MASK(%rip), %xmm2 + PSHUFB_XMM %xmm2, %xmm13 + # precompute HashKey<<1 mod poly from the HashKey (required for GHASH) @@ -1134,22 +1568,22 @@ ENTRY(aesni_gcm_enc) jb _initial_num_blocks_is_1_encrypt je _initial_num_blocks_is_2_encrypt _initial_num_blocks_is_3_encrypt: - INITIAL_BLOCKS 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ + INITIAL_BLOCKS_ENC 3, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, enc sub $48, %r13 jmp _initial_blocks_encrypted _initial_num_blocks_is_2_encrypt: - INITIAL_BLOCKS 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ + INITIAL_BLOCKS_ENC 2, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, enc sub $32, %r13 jmp _initial_blocks_encrypted _initial_num_blocks_is_1_encrypt: - INITIAL_BLOCKS 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ + INITIAL_BLOCKS_ENC 1, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, enc sub $16, %r13 jmp _initial_blocks_encrypted _initial_num_blocks_is_0_encrypt: - INITIAL_BLOCKS 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ + INITIAL_BLOCKS_ENC 0, %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \ %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, enc _initial_blocks_encrypted: @@ -1160,7 +1594,7 @@ _initial_blocks_encrypted: sub $64, %r13 je _four_cipher_left_encrypt _encrypt_by_4_encrypt: - GHASH_4_ENCRYPT_4_PARALLEL %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ + GHASH_4_ENCRYPT_4_PARALLEL_ENC %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, \ %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, enc add $64, %r11 sub $64, %r13 @@ -1175,7 +1609,9 @@ _zero_cipher_left_encrypt: # Handle the last <16 Byte block seperately paddd ONE(%rip), %xmm0 # INCR CNT to get Yn - pshufb SHUF_MASK(%rip), %xmm0 + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm0 + ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn) sub $16, %r11 add %r13, %r11 @@ -1185,29 +1621,31 @@ _zero_cipher_left_encrypt: # adjust the shuffle mask pointer to be able to shift 16-r13 bytes # (%r13 is the number of bytes in plaintext mod 16) movdqu (%r12), %xmm2 # get the appropriate shuffle mask - pshufb %xmm2, %xmm1 # shift right 16-r13 byte + PSHUFB_XMM %xmm2, %xmm1 # shift right 16-r13 byte pxor %xmm1, %xmm0 # Plaintext XOR Encrypt(K, Yn) movdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm0 pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0 + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10,%xmm0 - pshufb SHUF_MASK(%rip),%xmm0 pxor %xmm0, %xmm8 GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 # GHASH computation for the last <16 byte block sub %r13, %r11 add $16, %r11 - pshufb SHUF_MASK(%rip), %xmm0 + PSHUFB_XMM %xmm10, %xmm1 + # shuffle xmm0 back to output as ciphertext # Output %r13 bytes - movq %xmm0, %rax + MOVQ_R64_XMM %xmm0, %rax cmp $8, %r13 jle _less_than_8_bytes_left_encrypt mov %rax, (%arg2 , %r11, 1) add $8, %r11 psrldq $8, %xmm0 - movq %xmm0, %rax + MOVQ_R64_XMM %xmm0, %rax sub $8, %r13 _less_than_8_bytes_left_encrypt: mov %al, (%arg2, %r11, 1) @@ -1220,14 +1658,15 @@ _multiple_of_16_bytes_encrypt: shl $3, %r12 movd %r12d, %xmm15 # len(A) in %xmm15 shl $3, %arg4 # len(C) in bits (*128) - movq %arg4, %xmm1 + MOVQ_R64_XMM %arg4, %xmm1 pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000 pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C) pxor %xmm15, %xmm8 GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation + movdqa SHUF_MASK(%rip), %xmm10 + PSHUFB_XMM %xmm10, %xmm8 # perform a 16 byte swap - pshufb SHUF_MASK(%rip), %xmm8 # perform a 16 byte swap mov %arg5, %rax # %rax = *Y0 movdqu (%rax), %xmm0 # %xmm0 = Y0 ENCRYPT_SINGLE_BLOCK %xmm0, %xmm15 # Encrypt(K, Y0) @@ -1240,11 +1679,11 @@ _return_T_encrypt: cmp $12, %r11 je _T_12_encrypt _T_8_encrypt: - movq %xmm0, %rax + MOVQ_R64_XMM %xmm0, %rax mov %rax, (%r10) jmp _return_T_done_encrypt _T_12_encrypt: - movq %xmm0, %rax + MOVQ_R64_XMM %xmm0, %rax mov %rax, (%r10) psrldq $8, %xmm0 movd %xmm0, %eax @@ -1258,6 +1697,7 @@ _return_T_done_encrypt: pop %r13 pop %r12 ret + #endif From 0686952458780e8a29d5a75dea03472fe2302c5a Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 21 Dec 2010 22:22:40 +1100 Subject: [PATCH 37/46] crypto: af_alg - fix af_alg memory_allocated data type Change data type to fix warning: crypto/af_alg.c:35: warning: initialization from incompatible pointer type Signed-off-by: Randy Dunlap Signed-off-by: Herbert Xu --- crypto/af_alg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/af_alg.c b/crypto/af_alg.c index bd9e53c780269d..940d70cb5c2551 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -27,7 +27,7 @@ struct alg_type_list { struct list_head list; }; -static atomic_t alg_memory_allocated; +static atomic_long_t alg_memory_allocated; static struct proto alg_proto = { .name = "ALG", From 8ad225e8e4f530f500c12ec77fd5a51caf6a2f66 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 28 Dec 2010 22:56:26 +1100 Subject: [PATCH 38/46] crypto: gf128mul - Remove experimental tag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This feature no longer needs the experimental tag. Reported-by: Toralf Förster Signed-off-by: Herbert Xu --- crypto/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/crypto/Kconfig b/crypto/Kconfig index 96b0e5542121b3..4b7cb0e691cd00 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -110,7 +110,6 @@ config CRYPTO_MANAGER_DISABLE_TESTS config CRYPTO_GF128MUL tristate "GF(2^128) multiplication functions (EXPERIMENTAL)" - depends on EXPERIMENTAL help Efficient table driven implementation of multiplications in the field GF(2^128). This is needed by some cypher modes. This From 6c39d116ba308ccf9007773a090ca6d20eb68459 Mon Sep 17 00:00:00 2001 From: Dmitry Kasatkin Date: Wed, 29 Dec 2010 21:52:04 +1100 Subject: [PATCH 39/46] crypto: omap-sham - backlog handling fix Previous commit "removed redundant locking" introduced a bug in handling backlog. In certain cases, when async request complete callback will call complete() on -EINPROGRESS code, it will cause uncompleted requests. It does not happen in implementation similar to crypto test manager, but it will happen in implementation similar to dm-crypt. Backlog needs to be checked before dequeuing next request. Signed-off-by: Dmitry Kasatkin Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index eb988e7a2fd9be..2e71123516e09f 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -664,7 +664,7 @@ static void omap_sham_finish_req(struct ahash_request *req, int err) static int omap_sham_handle_queue(struct omap_sham_dev *dd, struct ahash_request *req) { - struct crypto_async_request *async_req, *backlog = 0; + struct crypto_async_request *async_req, *backlog; struct omap_sham_reqctx *ctx; struct ahash_request *prev_req; unsigned long flags; @@ -677,11 +677,10 @@ static int omap_sham_handle_queue(struct omap_sham_dev *dd, spin_unlock_irqrestore(&dd->lock, flags); return ret; } + backlog = crypto_get_backlog(&dd->queue); async_req = crypto_dequeue_request(&dd->queue); - if (async_req) { + if (async_req) dd->flags |= FLAGS_BUSY; - backlog = crypto_get_backlog(&dd->queue); - } spin_unlock_irqrestore(&dd->lock, flags); if (!async_req) From 3181c22587cfeb1fe415e55b2dd8b83c7cc33e44 Mon Sep 17 00:00:00 2001 From: Adrian-Ken Rueegsegger Date: Tue, 4 Jan 2011 15:35:51 +1100 Subject: [PATCH 40/46] crypto: ripemd - Set module author and update email address Signed-off-by: Adrian-Ken Rueegsegger Signed-off-by: Herbert Xu --- crypto/rmd128.c | 3 ++- crypto/rmd160.c | 3 ++- crypto/rmd256.c | 3 ++- crypto/rmd320.c | 3 ++- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/crypto/rmd128.c b/crypto/rmd128.c index 1ceb6735aa5386..8a0f68b7f257fa 100644 --- a/crypto/rmd128.c +++ b/crypto/rmd128.c @@ -5,7 +5,7 @@ * * Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC * - * Copyright (c) 2008 Adrian-Ken Rueegsegger + * Copyright (c) 2008 Adrian-Ken Rueegsegger * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -325,4 +325,5 @@ module_init(rmd128_mod_init); module_exit(rmd128_mod_fini); MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Adrian-Ken Rueegsegger "); MODULE_DESCRIPTION("RIPEMD-128 Message Digest"); diff --git a/crypto/rmd160.c b/crypto/rmd160.c index 472261fc913fb4..525d7bb752cf6a 100644 --- a/crypto/rmd160.c +++ b/crypto/rmd160.c @@ -5,7 +5,7 @@ * * Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC * - * Copyright (c) 2008 Adrian-Ken Rueegsegger + * Copyright (c) 2008 Adrian-Ken Rueegsegger * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -369,4 +369,5 @@ module_init(rmd160_mod_init); module_exit(rmd160_mod_fini); MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Adrian-Ken Rueegsegger "); MODULE_DESCRIPTION("RIPEMD-160 Message Digest"); diff --git a/crypto/rmd256.c b/crypto/rmd256.c index 72eafa8d2e7bdc..69293d9b56e0c4 100644 --- a/crypto/rmd256.c +++ b/crypto/rmd256.c @@ -5,7 +5,7 @@ * * Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC * - * Copyright (c) 2008 Adrian-Ken Rueegsegger + * Copyright (c) 2008 Adrian-Ken Rueegsegger * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -344,4 +344,5 @@ module_init(rmd256_mod_init); module_exit(rmd256_mod_fini); MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Adrian-Ken Rueegsegger "); MODULE_DESCRIPTION("RIPEMD-256 Message Digest"); diff --git a/crypto/rmd320.c b/crypto/rmd320.c index 86becaba2f05bb..09f97dfdfbba37 100644 --- a/crypto/rmd320.c +++ b/crypto/rmd320.c @@ -5,7 +5,7 @@ * * Based on the reference implementation by Antoon Bosselaers, ESAT-COSIC * - * Copyright (c) 2008 Adrian-Ken Rueegsegger + * Copyright (c) 2008 Adrian-Ken Rueegsegger * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -393,4 +393,5 @@ module_init(rmd320_mod_init); module_exit(rmd320_mod_fini); MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Adrian-Ken Rueegsegger "); MODULE_DESCRIPTION("RIPEMD-320 Message Digest"); From 41f2977d40798ce45f4da7a1291039ffbe9e1dbc Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 4 Jan 2011 15:37:16 +1100 Subject: [PATCH 41/46] crypto: mv_cesa - dont return PTR_ERR() of wrong pointer Fix a PTR_ERR() return of the wrong pointer Signed-off-by: Roel Kluin Signed-off-by: Herbert Xu --- drivers/crypto/mv_cesa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index 7d279e578df572..c99305afa58ab2 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c @@ -857,7 +857,7 @@ static int mv_cra_hash_init(struct crypto_tfm *tfm, const char *base_hash_name, printk(KERN_WARNING MV_CESA "Base driver '%s' could not be loaded!\n", base_hash_name); - err = PTR_ERR(fallback_tfm); + err = PTR_ERR(base_hash); goto err_bad_base; } } From c73b7d02da9bfb4fadafc118a24ee868708839b6 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 4 Jan 2011 15:38:44 +1100 Subject: [PATCH 42/46] crypto: mark crypto workqueues CPU_INTENSIVE kcrypto_wq and pcrypt->wq's are used to run ciphers and may consume considerable amount of CPU cycles. Mark both as CPU_INTENSIVE so that they don't block other work items. As the workqueues are primarily used to burn CPU cycles, concurrency levels shouldn't matter much and are left at 1. A higher value may be beneficial and needs investigation. Signed-off-by: Tejun Heo Signed-off-by: Herbert Xu --- crypto/crypto_wq.c | 3 ++- crypto/pcrypt.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/crypto/crypto_wq.c b/crypto/crypto_wq.c index fdcf6248f152cb..b980ee1af459f5 100644 --- a/crypto/crypto_wq.c +++ b/crypto/crypto_wq.c @@ -20,7 +20,8 @@ EXPORT_SYMBOL_GPL(kcrypto_wq); static int __init crypto_wq_init(void) { - kcrypto_wq = create_workqueue("crypto"); + kcrypto_wq = alloc_workqueue("crypto", + WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1); if (unlikely(!kcrypto_wq)) return -ENOMEM; return 0; diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index de3078215fe6ea..806635f5d33114 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -455,7 +455,8 @@ static int pcrypt_init_padata(struct padata_pcrypt *pcrypt, get_online_cpus(); - pcrypt->wq = create_workqueue(name); + pcrypt->wq = alloc_workqueue(name, + WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE, 1); if (!pcrypt->wq) goto err; From dffa18449a334cf436c1fabfb2fcf7d4240c994b Mon Sep 17 00:00:00 2001 From: Dennis Gilmore Date: Thu, 6 Jan 2011 17:15:31 +1100 Subject: [PATCH 43/46] crypto: n2 - use __devexit not __exit in n2_unregister_algs fixes fedora sparc build failure, thanks to kylem for helping with debugging Signed-off-by: Dennis Gilmore Acked-by: David S. Miller Signed-off-by: Herbert Xu --- drivers/crypto/n2_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 88ee01510ec013..337249155c055f 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1542,7 +1542,7 @@ static int __devinit n2_register_algs(void) return err; } -static void __exit n2_unregister_algs(void) +static void __devexit n2_unregister_algs(void) { mutex_lock(&spu_lock); if (!--algs_registered) From 0735ac1f2551d9f9d356126aaf3b1110150918e6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 7 Jan 2011 14:48:57 +1100 Subject: [PATCH 44/46] hwrng: via_rng - Fix asm constraints The inline asm to invoke xstore did not specify the constraints correctly. In particular, dx/di should have been marked as output registers as well as input as they're modified by xstore. Thanks to Mario Holbe for creating this patch and testing it. Tested-by: Mario 'BitKoenig' Holbe Signed-off-by: Herbert Xu --- drivers/char/hw_random/via-rng.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index 794aacb715c1d8..7f86666bb393ab 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c @@ -81,8 +81,7 @@ static inline u32 xstore(u32 *addr, u32 edx_in) ts_state = irq_ts_save(); asm(".byte 0x0F,0xA7,0xC0 /* xstore %%edi (addr=%0) */" - :"=m"(*addr), "=a"(eax_out) - :"D"(addr), "d"(edx_in)); + : "=m" (*addr), "=a" (eax_out), "+d" (edx_in), "+D" (addr)); irq_ts_restore(ts_state); return eax_out; From 21493088733e6e09dac6f54595a1b6b8ab1e68fd Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 7 Jan 2011 14:52:00 +1100 Subject: [PATCH 45/46] crypto: padlock - Move padlock.h into include/crypto This patch moves padlock.h from drivers/crypto into include/crypto so that it may be used by the via-rng driver. Signed-off-by: Herbert Xu --- drivers/crypto/padlock-aes.c | 2 +- drivers/crypto/padlock-sha.c | 8 +------- {drivers => include}/crypto/padlock.h | 8 +++++++- 3 files changed, 9 insertions(+), 9 deletions(-) rename {drivers => include}/crypto/padlock.h (82%) diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 2e992bc8015b7b..2e56508754d076 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -21,7 +22,6 @@ #include #include #include -#include "padlock.h" /* * Number of data blocks actually fetched for each xcrypt insn. diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c index d3a27e0119bc2e..adf075b6b9a810 100644 --- a/drivers/crypto/padlock-sha.c +++ b/drivers/crypto/padlock-sha.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include @@ -22,13 +23,6 @@ #include #include #include -#include "padlock.h" - -#ifdef CONFIG_64BIT -#define STACK_ALIGN 16 -#else -#define STACK_ALIGN 4 -#endif struct padlock_sha_desc { struct shash_desc fallback; diff --git a/drivers/crypto/padlock.h b/include/crypto/padlock.h similarity index 82% rename from drivers/crypto/padlock.h rename to include/crypto/padlock.h index b728e4518bd158..d2cfa2ef49e822 100644 --- a/drivers/crypto/padlock.h +++ b/include/crypto/padlock.h @@ -15,9 +15,15 @@ #define PADLOCK_ALIGNMENT 16 -#define PFX "padlock: " +#define PFX KBUILD_MODNAME ": " #define PADLOCK_CRA_PRIORITY 300 #define PADLOCK_COMPOSITE_PRIORITY 400 +#ifdef CONFIG_64BIT +#define STACK_ALIGN 16 +#else +#define STACK_ALIGN 4 +#endif + #endif /* _CRYPTO_PADLOCK_H */ From 55db8387a5e8d07407f0b7c6b2526417a2bc6243 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 7 Jan 2011 14:55:06 +1100 Subject: [PATCH 46/46] hwrng: via_rng - Fix memory scribbling on some CPUs It has been reported that on at least one Nano CPU the xstore instruction will write as many as 16 bytes of data to the output buffer. This causes memory corruption as we use rng->priv which is only 4-8 bytes long. This patch fixes this by using an intermediate buffer on the stack with at least 16 bytes and aligned to a 16-byte boundary. The problem was observed on the following processor: processor : 0 vendor_id : CentaurHauls cpu family : 6 model : 15 model name : VIA Nano processor U2250 (1.6GHz Capable) stepping : 3 cpu MHz : 1600.000 cache size : 1024 KB fdiv_bug : no hlt_bug : no f00f_bug : no coma_bug : no fpu : yes fpu_exception : yes cpuid level : 10 wp : yes flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat clflush acpi mmx fxsr sse sse2 ss tm syscall nx lm constant_tsc up rep_good pni monitor vmx est tm2 ssse3 cx16 xtpr rng rng_en ace ace_en ace2 phe phe_en lahf_lm bogomips : 3192.08 clflush size : 64 cache_alignment : 128 address sizes : 36 bits physical, 48 bits virtual power management: Tested-by: Mario 'BitKoenig' Holbe Signed-off-by: Herbert Xu --- drivers/char/hw_random/via-rng.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/via-rng.c b/drivers/char/hw_random/via-rng.c index 7f86666bb393ab..d0387a84eec136 100644 --- a/drivers/char/hw_random/via-rng.c +++ b/drivers/char/hw_random/via-rng.c @@ -24,6 +24,7 @@ * warranty of any kind, whether express or implied. */ +#include #include #include #include @@ -34,7 +35,6 @@ #include -#define PFX KBUILD_MODNAME ": " enum { @@ -89,8 +89,10 @@ static inline u32 xstore(u32 *addr, u32 edx_in) static int via_rng_data_present(struct hwrng *rng, int wait) { + char buf[16 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__ + ((aligned(STACK_ALIGN))); + u32 *via_rng_datum = (u32 *)PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT); u32 bytes_out; - u32 *via_rng_datum = (u32 *)(&rng->priv); int i; /* We choose the recommended 1-byte-per-instruction RNG rate, @@ -114,6 +116,7 @@ static int via_rng_data_present(struct hwrng *rng, int wait) break; udelay(10); } + rng->priv = *via_rng_datum; return bytes_out ? 1 : 0; }