-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[PATCH] PPC assembly implementation of SHA1
Here is a SHA1 implementation with the core written in PPC assembly. On my 2GHz G5, it does 218MB/s, compared to 135MB/s for the openssl version or 45MB/s for the mozilla version. Signed-off-by: Paul Mackerras <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
- Loading branch information
1 parent
b5af910
commit a6ef351
Showing
4 changed files
with
283 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
/* | ||
* SHA-1 implementation. | ||
* | ||
* Copyright (C) 2005 Paul Mackerras <[email protected]> | ||
* | ||
* This version assumes we are running on a big-endian machine. | ||
* It calls an external sha1_core() to process blocks of 64 bytes. | ||
*/ | ||
#include <stdio.h> | ||
#include <string.h> | ||
#include "sha1.h" | ||
|
||
extern void sha1_core(uint32_t *hash, const unsigned char *p, | ||
unsigned int nblocks); | ||
|
||
int SHA1_Init(SHA_CTX *c) | ||
{ | ||
c->hash[0] = 0x67452301; | ||
c->hash[1] = 0xEFCDAB89; | ||
c->hash[2] = 0x98BADCFE; | ||
c->hash[3] = 0x10325476; | ||
c->hash[4] = 0xC3D2E1F0; | ||
c->len = 0; | ||
c->cnt = 0; | ||
return 0; | ||
} | ||
|
||
int SHA1_Update(SHA_CTX *c, const void *ptr, unsigned long n) | ||
{ | ||
unsigned long nb; | ||
const unsigned char *p = ptr; | ||
|
||
c->len += n << 3; | ||
while (n != 0) { | ||
if (c->cnt || n < 64) { | ||
nb = 64 - c->cnt; | ||
if (nb > n) | ||
nb = n; | ||
memcpy(&c->buf.b[c->cnt], p, nb); | ||
if ((c->cnt += nb) == 64) { | ||
sha1_core(c->hash, c->buf.b, 1); | ||
c->cnt = 0; | ||
} | ||
} else { | ||
nb = n >> 6; | ||
sha1_core(c->hash, p, nb); | ||
nb <<= 6; | ||
} | ||
n -= nb; | ||
p += nb; | ||
} | ||
return 0; | ||
} | ||
|
||
int SHA1_Final(unsigned char *hash, SHA_CTX *c) | ||
{ | ||
unsigned int cnt = c->cnt; | ||
|
||
c->buf.b[cnt++] = 0x80; | ||
if (cnt > 56) { | ||
if (cnt < 64) | ||
memset(&c->buf.b[cnt], 0, 64 - cnt); | ||
sha1_core(c->hash, c->buf.b, 1); | ||
cnt = 0; | ||
} | ||
if (cnt < 56) | ||
memset(&c->buf.b[cnt], 0, 56 - cnt); | ||
c->buf.l[7] = c->len; | ||
sha1_core(c->hash, c->buf.b, 1); | ||
memcpy(hash, c->hash, 20); | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
/* | ||
* SHA-1 implementation. | ||
* | ||
* Copyright (C) 2005 Paul Mackerras <[email protected]> | ||
*/ | ||
#include <stdint.h> | ||
|
||
typedef struct sha_context { | ||
uint32_t hash[5]; | ||
uint32_t cnt; | ||
uint64_t len; | ||
union { | ||
unsigned char b[64]; | ||
uint64_t l[8]; | ||
} buf; | ||
} SHA_CTX; | ||
|
||
int SHA1_Init(SHA_CTX *c); | ||
int SHA1_Update(SHA_CTX *c, const void *p, unsigned long n); | ||
int SHA1_Final(unsigned char *hash, SHA_CTX *c); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,185 @@ | ||
/* | ||
* SHA-1 implementation for PowerPC. | ||
* | ||
* Copyright (C) 2005 Paul Mackerras <[email protected]> | ||
*/ | ||
#define FS 80 | ||
|
||
/* | ||
* We roll the registers for T, A, B, C, D, E around on each | ||
* iteration; T on iteration t is A on iteration t+1, and so on. | ||
* We use registers 7 - 12 for this. | ||
*/ | ||
#define RT(t) ((((t)+5)%6)+7) | ||
#define RA(t) ((((t)+4)%6)+7) | ||
#define RB(t) ((((t)+3)%6)+7) | ||
#define RC(t) ((((t)+2)%6)+7) | ||
#define RD(t) ((((t)+1)%6)+7) | ||
#define RE(t) ((((t)+0)%6)+7) | ||
|
||
/* We use registers 16 - 31 for the W values */ | ||
#define W(t) (((t)%16)+16) | ||
|
||
#define STEPD0(t) \ | ||
and %r6,RB(t),RC(t); \ | ||
andc %r0,RD(t),RB(t); \ | ||
rotlwi RT(t),RA(t),5; \ | ||
rotlwi RB(t),RB(t),30; \ | ||
or %r6,%r6,%r0; \ | ||
add %r0,RE(t),%r15; \ | ||
add RT(t),RT(t),%r6; \ | ||
add %r0,%r0,W(t); \ | ||
add RT(t),RT(t),%r0 | ||
|
||
#define STEPD1(t) \ | ||
xor %r6,RB(t),RC(t); \ | ||
rotlwi RT(t),RA(t),5; \ | ||
rotlwi RB(t),RB(t),30; \ | ||
xor %r6,%r6,RD(t); \ | ||
add %r0,RE(t),%r15; \ | ||
add RT(t),RT(t),%r6; \ | ||
add %r0,%r0,W(t); \ | ||
add RT(t),RT(t),%r0 | ||
|
||
#define STEPD2(t) \ | ||
and %r6,RB(t),RC(t); \ | ||
and %r0,RB(t),RD(t); \ | ||
rotlwi RT(t),RA(t),5; \ | ||
rotlwi RB(t),RB(t),30; \ | ||
or %r6,%r6,%r0; \ | ||
and %r0,RC(t),RD(t); \ | ||
or %r6,%r6,%r0; \ | ||
add %r0,RE(t),%r15; \ | ||
add RT(t),RT(t),%r6; \ | ||
add %r0,%r0,W(t); \ | ||
add RT(t),RT(t),%r0 | ||
|
||
#define LOADW(t) \ | ||
lwz W(t),(t)*4(%r4) | ||
|
||
#define UPDATEW(t) \ | ||
xor %r0,W((t)-3),W((t)-8); \ | ||
xor W(t),W((t)-16),W((t)-14); \ | ||
xor W(t),W(t),%r0; \ | ||
rotlwi W(t),W(t),1 | ||
|
||
#define STEP0LD4(t) \ | ||
STEPD0(t); LOADW((t)+4); \ | ||
STEPD0((t)+1); LOADW((t)+5); \ | ||
STEPD0((t)+2); LOADW((t)+6); \ | ||
STEPD0((t)+3); LOADW((t)+7) | ||
|
||
#define STEPUP4(t, fn) \ | ||
STEP##fn(t); UPDATEW((t)+4); \ | ||
STEP##fn((t)+1); UPDATEW((t)+5); \ | ||
STEP##fn((t)+2); UPDATEW((t)+6); \ | ||
STEP##fn((t)+3); UPDATEW((t)+7) | ||
|
||
#define STEPUP20(t, fn) \ | ||
STEPUP4(t, fn); \ | ||
STEPUP4((t)+4, fn); \ | ||
STEPUP4((t)+8, fn); \ | ||
STEPUP4((t)+12, fn); \ | ||
STEPUP4((t)+16, fn) | ||
|
||
.globl sha1_core | ||
sha1_core: | ||
stwu %r1,-FS(%r1) | ||
stw %r15,FS-68(%r1) | ||
stw %r16,FS-64(%r1) | ||
stw %r17,FS-60(%r1) | ||
stw %r18,FS-56(%r1) | ||
stw %r19,FS-52(%r1) | ||
stw %r20,FS-48(%r1) | ||
stw %r21,FS-44(%r1) | ||
stw %r22,FS-40(%r1) | ||
stw %r23,FS-36(%r1) | ||
stw %r24,FS-32(%r1) | ||
stw %r25,FS-28(%r1) | ||
stw %r26,FS-24(%r1) | ||
stw %r27,FS-20(%r1) | ||
stw %r28,FS-16(%r1) | ||
stw %r29,FS-12(%r1) | ||
stw %r30,FS-8(%r1) | ||
stw %r31,FS-4(%r1) | ||
|
||
/* Load up A - E */ | ||
lwz RA(0),0(%r3) /* A */ | ||
lwz RB(0),4(%r3) /* B */ | ||
lwz RC(0),8(%r3) /* C */ | ||
lwz RD(0),12(%r3) /* D */ | ||
lwz RE(0),16(%r3) /* E */ | ||
|
||
mtctr %r5 | ||
|
||
1: LOADW(0) | ||
LOADW(1) | ||
LOADW(2) | ||
LOADW(3) | ||
|
||
lis %r15,0x5a82 /* K0-19 */ | ||
ori %r15,%r15,0x7999 | ||
STEP0LD4(0) | ||
STEP0LD4(4) | ||
STEP0LD4(8) | ||
STEPUP4(12, D0) | ||
STEPUP4(16, D0) | ||
|
||
lis %r15,0x6ed9 /* K20-39 */ | ||
ori %r15,%r15,0xeba1 | ||
STEPUP20(20, D1) | ||
|
||
lis %r15,0x8f1b /* K40-59 */ | ||
ori %r15,%r15,0xbcdc | ||
STEPUP20(40, D2) | ||
|
||
lis %r15,0xca62 /* K60-79 */ | ||
ori %r15,%r15,0xc1d6 | ||
STEPUP4(60, D1) | ||
STEPUP4(64, D1) | ||
STEPUP4(68, D1) | ||
STEPUP4(72, D1) | ||
STEPD1(76) | ||
STEPD1(77) | ||
STEPD1(78) | ||
STEPD1(79) | ||
|
||
lwz %r20,16(%r3) | ||
lwz %r19,12(%r3) | ||
lwz %r18,8(%r3) | ||
lwz %r17,4(%r3) | ||
lwz %r16,0(%r3) | ||
add %r20,RE(80),%r20 | ||
add RD(0),RD(80),%r19 | ||
add RC(0),RC(80),%r18 | ||
add RB(0),RB(80),%r17 | ||
add RA(0),RA(80),%r16 | ||
mr RE(0),%r20 | ||
stw RA(0),0(%r3) | ||
stw RB(0),4(%r3) | ||
stw RC(0),8(%r3) | ||
stw RD(0),12(%r3) | ||
stw RE(0),16(%r3) | ||
|
||
addi %r4,%r4,64 | ||
bdnz 1b | ||
|
||
lwz %r15,FS-68(%r1) | ||
lwz %r16,FS-64(%r1) | ||
lwz %r17,FS-60(%r1) | ||
lwz %r18,FS-56(%r1) | ||
lwz %r19,FS-52(%r1) | ||
lwz %r20,FS-48(%r1) | ||
lwz %r21,FS-44(%r1) | ||
lwz %r22,FS-40(%r1) | ||
lwz %r23,FS-36(%r1) | ||
lwz %r24,FS-32(%r1) | ||
lwz %r25,FS-28(%r1) | ||
lwz %r26,FS-24(%r1) | ||
lwz %r27,FS-20(%r1) | ||
lwz %r28,FS-16(%r1) | ||
lwz %r29,FS-12(%r1) | ||
lwz %r30,FS-8(%r1) | ||
lwz %r31,FS-4(%r1) | ||
addi %r1,%r1,FS | ||
blr |