Skip to content

Commit

Permalink
Aligned our shit and used faster SSE load ops.
Browse files Browse the repository at this point in the history
  • Loading branch information
Wolf committed May 22, 2014
1 parent 5447d43 commit 194148b
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 9 deletions.
16 changes: 8 additions & 8 deletions aesb-x64.S
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
fast_aesb_single_round:
_fast_aesb_single_round:
#if defined(_WIN64) || defined(__CYGWIN__)
movdqu (%rcx), %xmm1
movdqa (%rcx), %xmm1
aesenc (%r8), %xmm1
movdqu %xmm1, (%rdx)
movdqa %xmm1, (%rdx)
#else
movdqu (%rdi), %xmm1
movdqa (%rdi), %xmm1
aesenc (%rdx), %xmm1
movdqu %xmm1, (%rsi)
movdqa %xmm1, (%rsi)
#endif
ret

Expand All @@ -30,26 +30,26 @@ _fast_aesb_pseudo_round_mut:
#if defined(_WIN64) || defined(__CYGWIN__)
mov %rdx, %r9
add $0xA0, %r9
movdqu (%rcx), %xmm1
movdqa (%rcx), %xmm1

.LOOP:
aesenc (%rdx), %xmm1
add $0x10, %rdx
cmp %r9, %rdx
jl .LOOP

movdqu %xmm1, (%rcx)
movdqa %xmm1, (%rcx)
#else
mov %rsi, %r9
add $0xA0, %r9
movdqu (%rdi), %xmm1
movdqa (%rdi), %xmm1

.LOOP:
aesenc (%rsi), %xmm1
add $0x10, %rsi
cmp %r9, %rsi
jl .LOOP

movdqu %xmm1, (%rdi)
movdqa %xmm1, (%rdi)
#endif
ret
2 changes: 1 addition & 1 deletion cryptonight.c
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* d
struct cryptonight_ctx {
uint8_t long_state[MEMORY] __attribute((aligned(16)));
union cn_slow_hash_state state;
uint8_t text[INIT_SIZE_BYTE];
uint8_t text[INIT_SIZE_BYTE] __attribute((aligned(16)));
uint8_t a[AES_BLOCK_SIZE] __attribute__((aligned(16)));
uint8_t b[AES_BLOCK_SIZE] __attribute__((aligned(16)));
uint8_t c[AES_BLOCK_SIZE] __attribute__((aligned(16)));
Expand Down

0 comments on commit 194148b

Please sign in to comment.