forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
powerpc: Add a powerpc implementation of SHA-1
This patch adds a crypto driver which provides a powerpc accelerated implementation of SHA-1, accelerated in that it is written in asm. Original patch by Paul, minor fixups for upstream by moi. Lightly tested on 64-bit with the test program here: http://michael.ellerman.id.au/files/junkcode/sha1test.c Seems to work, and is "not slower" than the generic version. Needs testing on 32-bit. Signed-off-by: Paul Mackerras <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Signed-off-by: Benjamin Herrenschmidt <[email protected]>
- Loading branch information
Showing
5 changed files
with
353 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# | ||
# powerpc/crypto/Makefile | ||
# | ||
# Arch-specific CryptoAPI modules. | ||
# | ||
|
||
obj-$(CONFIG_CRYPTO_SHA1_PPC) += sha1-powerpc.o | ||
|
||
sha1-powerpc-y := sha1-powerpc-asm.o sha1.o |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,179 @@ | ||
/* | ||
* SHA-1 implementation for PowerPC. | ||
* | ||
* Copyright (C) 2005 Paul Mackerras <[email protected]> | ||
*/ | ||
|
||
#include <asm/ppc_asm.h> | ||
#include <asm/asm-offsets.h> | ||
|
||
/* | ||
* We roll the registers for T, A, B, C, D, E around on each | ||
* iteration; T on iteration t is A on iteration t+1, and so on. | ||
* We use registers 7 - 12 for this. | ||
*/ | ||
#define RT(t) ((((t)+5)%6)+7) | ||
#define RA(t) ((((t)+4)%6)+7) | ||
#define RB(t) ((((t)+3)%6)+7) | ||
#define RC(t) ((((t)+2)%6)+7) | ||
#define RD(t) ((((t)+1)%6)+7) | ||
#define RE(t) ((((t)+0)%6)+7) | ||
|
||
/* We use registers 16 - 31 for the W values */ | ||
#define W(t) (((t)%16)+16) | ||
|
||
#define LOADW(t) \ | ||
lwz W(t),(t)*4(r4) | ||
|
||
#define STEPD0_LOAD(t) \ | ||
andc r0,RD(t),RB(t); \ | ||
and r6,RB(t),RC(t); \ | ||
rotlwi RT(t),RA(t),5; \ | ||
or r6,r6,r0; \ | ||
add r0,RE(t),r15; \ | ||
add RT(t),RT(t),r6; \ | ||
add r14,r0,W(t); \ | ||
lwz W((t)+4),((t)+4)*4(r4); \ | ||
rotlwi RB(t),RB(t),30; \ | ||
add RT(t),RT(t),r14 | ||
|
||
#define STEPD0_UPDATE(t) \ | ||
and r6,RB(t),RC(t); \ | ||
andc r0,RD(t),RB(t); \ | ||
rotlwi RT(t),RA(t),5; \ | ||
rotlwi RB(t),RB(t),30; \ | ||
or r6,r6,r0; \ | ||
add r0,RE(t),r15; \ | ||
xor r5,W((t)+4-3),W((t)+4-8); \ | ||
add RT(t),RT(t),r6; \ | ||
xor W((t)+4),W((t)+4-16),W((t)+4-14); \ | ||
add r0,r0,W(t); \ | ||
xor W((t)+4),W((t)+4),r5; \ | ||
add RT(t),RT(t),r0; \ | ||
rotlwi W((t)+4),W((t)+4),1 | ||
|
||
#define STEPD1(t) \ | ||
xor r6,RB(t),RC(t); \ | ||
rotlwi RT(t),RA(t),5; \ | ||
rotlwi RB(t),RB(t),30; \ | ||
xor r6,r6,RD(t); \ | ||
add r0,RE(t),r15; \ | ||
add RT(t),RT(t),r6; \ | ||
add r0,r0,W(t); \ | ||
add RT(t),RT(t),r0 | ||
|
||
#define STEPD1_UPDATE(t) \ | ||
xor r6,RB(t),RC(t); \ | ||
rotlwi RT(t),RA(t),5; \ | ||
rotlwi RB(t),RB(t),30; \ | ||
xor r6,r6,RD(t); \ | ||
add r0,RE(t),r15; \ | ||
xor r5,W((t)+4-3),W((t)+4-8); \ | ||
add RT(t),RT(t),r6; \ | ||
xor W((t)+4),W((t)+4-16),W((t)+4-14); \ | ||
add r0,r0,W(t); \ | ||
xor W((t)+4),W((t)+4),r5; \ | ||
add RT(t),RT(t),r0; \ | ||
rotlwi W((t)+4),W((t)+4),1 | ||
|
||
#define STEPD2_UPDATE(t) \ | ||
and r6,RB(t),RC(t); \ | ||
and r0,RB(t),RD(t); \ | ||
rotlwi RT(t),RA(t),5; \ | ||
or r6,r6,r0; \ | ||
rotlwi RB(t),RB(t),30; \ | ||
and r0,RC(t),RD(t); \ | ||
xor r5,W((t)+4-3),W((t)+4-8); \ | ||
or r6,r6,r0; \ | ||
xor W((t)+4),W((t)+4-16),W((t)+4-14); \ | ||
add r0,RE(t),r15; \ | ||
add RT(t),RT(t),r6; \ | ||
add r0,r0,W(t); \ | ||
xor W((t)+4),W((t)+4),r5; \ | ||
add RT(t),RT(t),r0; \ | ||
rotlwi W((t)+4),W((t)+4),1 | ||
|
||
#define STEP0LD4(t) \ | ||
STEPD0_LOAD(t); \ | ||
STEPD0_LOAD((t)+1); \ | ||
STEPD0_LOAD((t)+2); \ | ||
STEPD0_LOAD((t)+3) | ||
|
||
#define STEPUP4(t, fn) \ | ||
STEP##fn##_UPDATE(t); \ | ||
STEP##fn##_UPDATE((t)+1); \ | ||
STEP##fn##_UPDATE((t)+2); \ | ||
STEP##fn##_UPDATE((t)+3) | ||
|
||
#define STEPUP20(t, fn) \ | ||
STEPUP4(t, fn); \ | ||
STEPUP4((t)+4, fn); \ | ||
STEPUP4((t)+8, fn); \ | ||
STEPUP4((t)+12, fn); \ | ||
STEPUP4((t)+16, fn) | ||
|
||
_GLOBAL(powerpc_sha_transform) | ||
PPC_STLU r1,-STACKFRAMESIZE(r1) | ||
SAVE_8GPRS(14, r1) | ||
SAVE_10GPRS(22, r1) | ||
|
||
/* Load up A - E */ | ||
lwz RA(0),0(r3) /* A */ | ||
lwz RB(0),4(r3) /* B */ | ||
lwz RC(0),8(r3) /* C */ | ||
lwz RD(0),12(r3) /* D */ | ||
lwz RE(0),16(r3) /* E */ | ||
|
||
LOADW(0) | ||
LOADW(1) | ||
LOADW(2) | ||
LOADW(3) | ||
|
||
lis r15,0x5a82 /* K0-19 */ | ||
ori r15,r15,0x7999 | ||
STEP0LD4(0) | ||
STEP0LD4(4) | ||
STEP0LD4(8) | ||
STEPUP4(12, D0) | ||
STEPUP4(16, D0) | ||
|
||
lis r15,0x6ed9 /* K20-39 */ | ||
ori r15,r15,0xeba1 | ||
STEPUP20(20, D1) | ||
|
||
lis r15,0x8f1b /* K40-59 */ | ||
ori r15,r15,0xbcdc | ||
STEPUP20(40, D2) | ||
|
||
lis r15,0xca62 /* K60-79 */ | ||
ori r15,r15,0xc1d6 | ||
STEPUP4(60, D1) | ||
STEPUP4(64, D1) | ||
STEPUP4(68, D1) | ||
STEPUP4(72, D1) | ||
lwz r20,16(r3) | ||
STEPD1(76) | ||
lwz r19,12(r3) | ||
STEPD1(77) | ||
lwz r18,8(r3) | ||
STEPD1(78) | ||
lwz r17,4(r3) | ||
STEPD1(79) | ||
|
||
lwz r16,0(r3) | ||
add r20,RE(80),r20 | ||
add RD(0),RD(80),r19 | ||
add RC(0),RC(80),r18 | ||
add RB(0),RB(80),r17 | ||
add RA(0),RA(80),r16 | ||
mr RE(0),r20 | ||
stw RA(0),0(r3) | ||
stw RB(0),4(r3) | ||
stw RC(0),8(r3) | ||
stw RD(0),12(r3) | ||
stw RE(0),16(r3) | ||
|
||
REST_8GPRS(14, r1) | ||
REST_10GPRS(22, r1) | ||
addi r1,r1,STACKFRAMESIZE | ||
blr |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
/* | ||
* Cryptographic API. | ||
* | ||
* powerpc implementation of the SHA1 Secure Hash Algorithm. | ||
* | ||
* Derived from cryptoapi implementation, adapted for in-place | ||
* scatterlist interface. | ||
* | ||
* Derived from "crypto/sha1.c" | ||
* Copyright (c) Alan Smithee. | ||
* Copyright (c) Andrew McDonald <[email protected]> | ||
* Copyright (c) Jean-Francois Dive <[email protected]> | ||
* | ||
* This program is free software; you can redistribute it and/or modify it | ||
* under the terms of the GNU General Public License as published by the Free | ||
* Software Foundation; either version 2 of the License, or (at your option) | ||
* any later version. | ||
* | ||
*/ | ||
#include <crypto/internal/hash.h> | ||
#include <linux/init.h> | ||
#include <linux/module.h> | ||
#include <linux/mm.h> | ||
#include <linux/cryptohash.h> | ||
#include <linux/types.h> | ||
#include <crypto/sha.h> | ||
#include <asm/byteorder.h> | ||
|
||
extern void powerpc_sha_transform(u32 *state, const u8 *src, u32 *temp); | ||
|
||
static int sha1_init(struct shash_desc *desc) | ||
{ | ||
struct sha1_state *sctx = shash_desc_ctx(desc); | ||
|
||
*sctx = (struct sha1_state){ | ||
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 }, | ||
}; | ||
|
||
return 0; | ||
} | ||
|
||
static int sha1_update(struct shash_desc *desc, const u8 *data, | ||
unsigned int len) | ||
{ | ||
struct sha1_state *sctx = shash_desc_ctx(desc); | ||
unsigned int partial, done; | ||
const u8 *src; | ||
|
||
partial = sctx->count & 0x3f; | ||
sctx->count += len; | ||
done = 0; | ||
src = data; | ||
|
||
if ((partial + len) > 63) { | ||
u32 temp[SHA_WORKSPACE_WORDS]; | ||
|
||
if (partial) { | ||
done = -partial; | ||
memcpy(sctx->buffer + partial, data, done + 64); | ||
src = sctx->buffer; | ||
} | ||
|
||
do { | ||
powerpc_sha_transform(sctx->state, src, temp); | ||
done += 64; | ||
src = data + done; | ||
} while (done + 63 < len); | ||
|
||
memset(temp, 0, sizeof(temp)); | ||
partial = 0; | ||
} | ||
memcpy(sctx->buffer + partial, src, len - done); | ||
|
||
return 0; | ||
} | ||
|
||
|
||
/* Add padding and return the message digest. */ | ||
static int sha1_final(struct shash_desc *desc, u8 *out) | ||
{ | ||
struct sha1_state *sctx = shash_desc_ctx(desc); | ||
__be32 *dst = (__be32 *)out; | ||
u32 i, index, padlen; | ||
__be64 bits; | ||
static const u8 padding[64] = { 0x80, }; | ||
|
||
bits = cpu_to_be64(sctx->count << 3); | ||
|
||
/* Pad out to 56 mod 64 */ | ||
index = sctx->count & 0x3f; | ||
padlen = (index < 56) ? (56 - index) : ((64+56) - index); | ||
sha1_update(desc, padding, padlen); | ||
|
||
/* Append length */ | ||
sha1_update(desc, (const u8 *)&bits, sizeof(bits)); | ||
|
||
/* Store state in digest */ | ||
for (i = 0; i < 5; i++) | ||
dst[i] = cpu_to_be32(sctx->state[i]); | ||
|
||
/* Wipe context */ | ||
memset(sctx, 0, sizeof *sctx); | ||
|
||
return 0; | ||
} | ||
|
||
static int sha1_export(struct shash_desc *desc, void *out) | ||
{ | ||
struct sha1_state *sctx = shash_desc_ctx(desc); | ||
|
||
memcpy(out, sctx, sizeof(*sctx)); | ||
return 0; | ||
} | ||
|
||
static int sha1_import(struct shash_desc *desc, const void *in) | ||
{ | ||
struct sha1_state *sctx = shash_desc_ctx(desc); | ||
|
||
memcpy(sctx, in, sizeof(*sctx)); | ||
return 0; | ||
} | ||
|
||
static struct shash_alg alg = { | ||
.digestsize = SHA1_DIGEST_SIZE, | ||
.init = sha1_init, | ||
.update = sha1_update, | ||
.final = sha1_final, | ||
.export = sha1_export, | ||
.import = sha1_import, | ||
.descsize = sizeof(struct sha1_state), | ||
.statesize = sizeof(struct sha1_state), | ||
.base = { | ||
.cra_name = "sha1", | ||
.cra_driver_name= "sha1-powerpc", | ||
.cra_flags = CRYPTO_ALG_TYPE_SHASH, | ||
.cra_blocksize = SHA1_BLOCK_SIZE, | ||
.cra_module = THIS_MODULE, | ||
} | ||
}; | ||
|
||
static int __init sha1_powerpc_mod_init(void) | ||
{ | ||
return crypto_register_shash(&alg); | ||
} | ||
|
||
static void __exit sha1_powerpc_mod_fini(void) | ||
{ | ||
crypto_unregister_shash(&alg); | ||
} | ||
|
||
module_init(sha1_powerpc_mod_init); | ||
module_exit(sha1_powerpc_mod_fini); | ||
|
||
MODULE_LICENSE("GPL"); | ||
MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm"); | ||
|
||
MODULE_ALIAS("sha1-powerpc"); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters