Skip to content

Commit

Permalink
Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git…
Browse files Browse the repository at this point in the history
…/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "API:
   - Add speed testing on 1420-byte blocks for networking

  Algorithms:
   - Improve performance of chacha on ARM for network packets
   - Improve performance of aegis128 on ARM for network packets

  Drivers:
   - Add support for Keem Bay OCS AES/SM4
   - Add support for QAT 4xxx devices
   - Enable crypto-engine retry mechanism in caam
   - Enable support for crypto engine on sdm845 in qce
   - Add HiSilicon PRNG driver support"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (161 commits)
  crypto: qat - add capability detection logic in qat_4xxx
  crypto: qat - add AES-XTS support for QAT GEN4 devices
  crypto: qat - add AES-CTR support for QAT GEN4 devices
  crypto: atmel-i2c - select CONFIG_BITREVERSE
  crypto: hisilicon/trng - replace atomic_add_return()
  crypto: keembay - Add support for Keem Bay OCS AES/SM4
  dt-bindings: Add Keem Bay OCS AES bindings
  crypto: aegis128 - avoid spurious references crypto_aegis128_update_simd
  crypto: seed - remove trailing semicolon in macro definition
  crypto: x86/poly1305 - Use TEST %reg,%reg instead of CMP $0,%reg
  crypto: x86/sha512 - Use TEST %reg,%reg instead of CMP $0,%reg
  crypto: aesni - Use TEST %reg,%reg instead of CMP $0,%reg
  crypto: cpt - Fix sparse warnings in cptpf
  hwrng: ks-sa - Add dependency on IOMEM and OF
  crypto: lib/blake2s - Move selftest prototype into header file
  crypto: arm/aes-ce - work around Cortex-A57/A72 silion errata
  crypto: ecdh - avoid unaligned accesses in ecdh_set_secret()
  crypto: ccree - rework cache parameters handling
  crypto: cavium - Use dma_set_mask_and_coherent to simplify code
  crypto: marvell/octeontx - Use dma_set_mask_and_coherent to simplify code
  ...
  • Loading branch information
torvalds committed Dec 14, 2020
2 parents 51895d5 + 93cebeb commit 9e4b0d5
Show file tree
Hide file tree
Showing 264 changed files with 8,428 additions and 1,993 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
%YAML 1.2
---
$id: http://devicetree.org/schemas/crypto/intel,keembay-ocs-aes.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#

title: Intel Keem Bay OCS AES Device Tree Bindings

maintainers:
- Daniele Alessandrelli <[email protected]>

description:
The Intel Keem Bay Offload and Crypto Subsystem (OCS) AES engine provides
hardware-accelerated AES/SM4 encryption/decryption.

properties:
compatible:
const: intel,keembay-ocs-aes

reg:
maxItems: 1

interrupts:
maxItems: 1

clocks:
maxItems: 1

required:
- compatible
- reg
- interrupts
- clocks

additionalProperties: false

examples:
- |
#include <dt-bindings/interrupt-controller/arm-gic.h>
crypto@30008000 {
compatible = "intel,keembay-ocs-aes";
reg = <0x30008000 0x1000>;
interrupts = <GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&scmi_clk 95>;
};
14 changes: 12 additions & 2 deletions MAINTAINERS
Original file line number Diff line number Diff line change
Expand Up @@ -8016,7 +8016,7 @@ F: drivers/staging/hikey9xx/
HISILICON TRUE RANDOM NUMBER GENERATOR V2 SUPPORT
M: Zaibo Xu <[email protected]>
S: Maintained
F: drivers/char/hw_random/hisi-trng-v2.c
F: drivers/crypto/hisilicon/trng/trng.c

HISILICON V3XX SPI NOR FLASH Controller Driver
M: John Garry <[email protected]>
Expand Down Expand Up @@ -8975,13 +8975,23 @@ M: Deepak Saxena <[email protected]>
S: Maintained
F: drivers/char/hw_random/ixp4xx-rng.c

INTEL KEEMBAY DRM DRIVER
INTEL KEEM BAY DRM DRIVER
M: Anitha Chrisanthus <[email protected]>
M: Edmund Dea <[email protected]>
S: Maintained
F: Documentation/devicetree/bindings/display/intel,kmb_display.yaml
F: drivers/gpu/drm/kmb/

INTEL KEEM BAY OCS AES/SM4 CRYPTO DRIVER
M: Daniele Alessandrelli <[email protected]>
S: Maintained
F: Documentation/devicetree/bindings/crypto/intel,keembay-ocs-aes.yaml
F: drivers/crypto/keembay/Kconfig
F: drivers/crypto/keembay/Makefile
F: drivers/crypto/keembay/keembay-ocs-aes-core.c
F: drivers/crypto/keembay/ocs-aes.c
F: drivers/crypto/keembay/ocs-aes.h

INTEL MANAGEMENT ENGINE (mei)
M: Tomas Winkler <[email protected]>
L: [email protected]
Expand Down
32 changes: 22 additions & 10 deletions arch/arm/crypto/aes-ce-core.S
Original file line number Diff line number Diff line change
Expand Up @@ -386,20 +386,32 @@ ENTRY(ce_aes_ctr_encrypt)
.Lctrloop4x:
subs r4, r4, #4
bmi .Lctr1x
add r6, r6, #1

/*
* NOTE: the sequence below has been carefully tweaked to avoid
* a silicon erratum that exists in Cortex-A57 (#1742098) and
* Cortex-A72 (#1655431) cores, where AESE/AESMC instruction pairs
* may produce an incorrect result if they take their input from a
* register of which a single 32-bit lane has been updated the last
* time it was modified. To work around this, the lanes of registers
* q0-q3 below are not manipulated individually, and the different
* counter values are prepared by successive manipulations of q7.
*/
add ip, r6, #1
vmov q0, q7
rev ip, ip
add lr, r6, #2
vmov s31, ip @ set lane 3 of q1 via q7
add ip, r6, #3
rev lr, lr
vmov q1, q7
rev ip, r6
add r6, r6, #1
vmov s31, lr @ set lane 3 of q2 via q7
rev ip, ip
vmov q2, q7
vmov s7, ip
rev ip, r6
add r6, r6, #1
vmov s31, ip @ set lane 3 of q3 via q7
add r6, r6, #4
vmov q3, q7
vmov s11, ip
rev ip, r6
add r6, r6, #1
vmov s15, ip

vld1.8 {q4-q5}, [r1]!
vld1.8 {q6}, [r1]!
vld1.8 {q15}, [r1]!
Expand Down
8 changes: 5 additions & 3 deletions arch/arm/crypto/aes-neonbs-glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ MODULE_AUTHOR("Ard Biesheuvel <[email protected]>");
MODULE_LICENSE("GPL v2");

MODULE_ALIAS_CRYPTO("ecb(aes)");
MODULE_ALIAS_CRYPTO("cbc(aes)");
MODULE_ALIAS_CRYPTO("cbc(aes)-all");
MODULE_ALIAS_CRYPTO("ctr(aes)");
MODULE_ALIAS_CRYPTO("xts(aes)");

Expand Down Expand Up @@ -191,7 +191,8 @@ static int cbc_init(struct crypto_skcipher *tfm)
struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
unsigned int reqsize;

ctx->enc_tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC);
ctx->enc_tfm = crypto_alloc_skcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC |
CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(ctx->enc_tfm))
return PTR_ERR(ctx->enc_tfm);

Expand Down Expand Up @@ -441,7 +442,8 @@ static struct skcipher_alg aes_algs[] = { {
.base.cra_blocksize = AES_BLOCK_SIZE,
.base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx),
.base.cra_module = THIS_MODULE,
.base.cra_flags = CRYPTO_ALG_INTERNAL,
.base.cra_flags = CRYPTO_ALG_INTERNAL |
CRYPTO_ALG_NEED_FALLBACK,

.min_keysize = AES_MIN_KEY_SIZE,
.max_keysize = AES_MAX_KEY_SIZE,
Expand Down
34 changes: 17 additions & 17 deletions arch/arm/crypto/chacha-glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
int nrounds);
asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
int nrounds);
int nrounds, unsigned int nbytes);
asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);

Expand All @@ -42,24 +42,24 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
{
u8 buf[CHACHA_BLOCK_SIZE];

while (bytes >= CHACHA_BLOCK_SIZE * 4) {
chacha_4block_xor_neon(state, dst, src, nrounds);
bytes -= CHACHA_BLOCK_SIZE * 4;
src += CHACHA_BLOCK_SIZE * 4;
dst += CHACHA_BLOCK_SIZE * 4;
state[12] += 4;
}
while (bytes >= CHACHA_BLOCK_SIZE) {
chacha_block_xor_neon(state, dst, src, nrounds);
bytes -= CHACHA_BLOCK_SIZE;
src += CHACHA_BLOCK_SIZE;
dst += CHACHA_BLOCK_SIZE;
state[12]++;
while (bytes > CHACHA_BLOCK_SIZE) {
unsigned int l = min(bytes, CHACHA_BLOCK_SIZE * 4U);

chacha_4block_xor_neon(state, dst, src, nrounds, l);
bytes -= l;
src += l;
dst += l;
state[12] += DIV_ROUND_UP(l, CHACHA_BLOCK_SIZE);
}
if (bytes) {
memcpy(buf, src, bytes);
chacha_block_xor_neon(state, buf, buf, nrounds);
memcpy(dst, buf, bytes);
const u8 *s = src;
u8 *d = dst;

if (bytes != CHACHA_BLOCK_SIZE)
s = d = memcpy(buf, src, bytes);
chacha_block_xor_neon(state, d, s, nrounds);
if (d != dst)
memcpy(dst, buf, bytes);
}
}

Expand Down
97 changes: 90 additions & 7 deletions arch/arm/crypto/chacha-neon-core.S
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
*/

#include <linux/linkage.h>
#include <asm/cache.h>

.text
.fpu neon
Expand Down Expand Up @@ -205,7 +206,7 @@ ENDPROC(hchacha_block_neon)

.align 5
ENTRY(chacha_4block_xor_neon)
push {r4-r5}
push {r4, lr}
mov r4, sp // preserve the stack pointer
sub ip, sp, #0x20 // allocate a 32 byte buffer
bic ip, ip, #0x1f // aligned to 32 bytes
Expand All @@ -229,10 +230,10 @@ ENTRY(chacha_4block_xor_neon)
vld1.32 {q0-q1}, [r0]
vld1.32 {q2-q3}, [ip]

adr r5, .Lctrinc
adr lr, .Lctrinc
vdup.32 q15, d7[1]
vdup.32 q14, d7[0]
vld1.32 {q4}, [r5, :128]
vld1.32 {q4}, [lr, :128]
vdup.32 q13, d6[1]
vdup.32 q12, d6[0]
vdup.32 q11, d5[1]
Expand Down Expand Up @@ -455,7 +456,7 @@ ENTRY(chacha_4block_xor_neon)

// Re-interleave the words in the first two rows of each block (x0..7).
// Also add the counter values 0-3 to x12[0-3].
vld1.32 {q8}, [r5, :128] // load counter values 0-3
vld1.32 {q8}, [lr, :128] // load counter values 0-3
vzip.32 q0, q1 // => (0 1 0 1) (0 1 0 1)
vzip.32 q2, q3 // => (2 3 2 3) (2 3 2 3)
vzip.32 q4, q5 // => (4 5 4 5) (4 5 4 5)
Expand Down Expand Up @@ -493,6 +494,8 @@ ENTRY(chacha_4block_xor_neon)

// Re-interleave the words in the last two rows of each block (x8..15).
vld1.32 {q8-q9}, [sp, :256]
mov sp, r4 // restore original stack pointer
ldr r4, [r4, #8] // load number of bytes
vzip.32 q12, q13 // => (12 13 12 13) (12 13 12 13)
vzip.32 q14, q15 // => (14 15 14 15) (14 15 14 15)
vzip.32 q8, q9 // => (8 9 8 9) (8 9 8 9)
Expand Down Expand Up @@ -520,41 +523,121 @@ ENTRY(chacha_4block_xor_neon)
// XOR the rest of the data with the keystream

vld1.8 {q0-q1}, [r2]!
subs r4, r4, #96
veor q0, q0, q8
veor q1, q1, q12
ble .Lle96
vst1.8 {q0-q1}, [r1]!

vld1.8 {q0-q1}, [r2]!
subs r4, r4, #32
veor q0, q0, q2
veor q1, q1, q6
ble .Lle128
vst1.8 {q0-q1}, [r1]!

vld1.8 {q0-q1}, [r2]!
subs r4, r4, #32
veor q0, q0, q10
veor q1, q1, q14
ble .Lle160
vst1.8 {q0-q1}, [r1]!

vld1.8 {q0-q1}, [r2]!
subs r4, r4, #32
veor q0, q0, q4
veor q1, q1, q5
ble .Lle192
vst1.8 {q0-q1}, [r1]!

vld1.8 {q0-q1}, [r2]!
subs r4, r4, #32
veor q0, q0, q9
veor q1, q1, q13
ble .Lle224
vst1.8 {q0-q1}, [r1]!

vld1.8 {q0-q1}, [r2]!
subs r4, r4, #32
veor q0, q0, q3
veor q1, q1, q7
blt .Llt256
.Lout:
vst1.8 {q0-q1}, [r1]!

vld1.8 {q0-q1}, [r2]
mov sp, r4 // restore original stack pointer
veor q0, q0, q11
veor q1, q1, q15
vst1.8 {q0-q1}, [r1]

pop {r4-r5}
bx lr
pop {r4, pc}

.Lle192:
vmov q4, q9
vmov q5, q13

.Lle160:
// nothing to do

.Lfinalblock:
// Process the final block if processing less than 4 full blocks.
// Entered with 32 bytes of ChaCha cipher stream in q4-q5, and the
// previous 32 byte output block that still needs to be written at
// [r1] in q0-q1.
beq .Lfullblock

.Lpartialblock:
adr lr, .Lpermute + 32
add r2, r2, r4
add lr, lr, r4
add r4, r4, r1

vld1.8 {q2-q3}, [lr]
vld1.8 {q6-q7}, [r2]

add r4, r4, #32

vtbl.8 d4, {q4-q5}, d4
vtbl.8 d5, {q4-q5}, d5
vtbl.8 d6, {q4-q5}, d6
vtbl.8 d7, {q4-q5}, d7

veor q6, q6, q2
veor q7, q7, q3

vst1.8 {q6-q7}, [r4] // overlapping stores
vst1.8 {q0-q1}, [r1]
pop {r4, pc}

.Lfullblock:
vmov q11, q4
vmov q15, q5
b .Lout
.Lle96:
vmov q4, q2
vmov q5, q6
b .Lfinalblock
.Lle128:
vmov q4, q10
vmov q5, q14
b .Lfinalblock
.Lle224:
vmov q4, q3
vmov q5, q7
b .Lfinalblock
.Llt256:
vmov q4, q11
vmov q5, q15
b .Lpartialblock
ENDPROC(chacha_4block_xor_neon)

.align L1_CACHE_SHIFT
.Lpermute:
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
2 changes: 1 addition & 1 deletion arch/arm/crypto/sha1-ce-glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

#include <crypto/internal/hash.h>
#include <crypto/internal/simd.h>
#include <crypto/sha.h>
#include <crypto/sha1.h>
#include <crypto/sha1_base.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
Expand Down
2 changes: 1 addition & 1 deletion arch/arm/crypto/sha1.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#define ASM_ARM_CRYPTO_SHA1_H

#include <linux/crypto.h>
#include <crypto/sha.h>
#include <crypto/sha1.h>

extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
unsigned int len);
Expand Down
Loading

0 comments on commit 9e4b0d5

Please sign in to comment.