diff --git a/aesb-x64.S b/aesb-x64.S index 120d537c9..06dd189c4 100644 --- a/aesb-x64.S +++ b/aesb-x64.S @@ -11,13 +11,13 @@ fast_aesb_single_round: _fast_aesb_single_round: #if defined(_WIN64) || defined(__CYGWIN__) - movdqu (%rcx), %xmm1 + movdqa (%rcx), %xmm1 aesenc (%r8), %xmm1 - movdqu %xmm1, (%rdx) + movdqa %xmm1, (%rdx) #else - movdqu (%rdi), %xmm1 + movdqa (%rdi), %xmm1 aesenc (%rdx), %xmm1 - movdqu %xmm1, (%rsi) + movdqa %xmm1, (%rsi) #endif ret @@ -28,30 +28,28 @@ _fast_aesb_single_round: fast_aesb_pseudo_round_mut: _fast_aesb_pseudo_round_mut: #if defined(_WIN64) || defined(__CYGWIN__) - mov $0, %r9 - mov $10, %r10 - movdqu (%rcx), %xmm1 + mov %rdx, %r9 + add $0xA0, %r9 + movdqa (%rcx), %xmm1 .LOOP: aesenc (%rdx), %xmm1 add $0x10, %rdx - inc %r9 - cmp %r10, %r9 + cmp %r9, %rdx jl .LOOP - movdqu %xmm1, (%rcx) + movdqa %xmm1, (%rcx) #else - mov $0, %r9 - mov $10, %r10 - movdqu (%rdi), %xmm1 + mov %rsi, %r9 + add $0xA0, %r9 + movdqa (%rdi), %xmm1 .LOOP: aesenc (%rsi), %xmm1 add $0x10, %rsi - inc %r9 - cmp %r10, %r9 + cmp %r9, %rsi jl .LOOP - movdqu %xmm1, (%rdi) + movdqa %xmm1, (%rdi) #endif ret diff --git a/cryptonight.c b/cryptonight.c index 6042af980..8180dfe16 100644 --- a/cryptonight.c +++ b/cryptonight.c @@ -123,7 +123,7 @@ static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* d struct cryptonight_ctx { uint8_t long_state[MEMORY] __attribute((aligned(16))); union cn_slow_hash_state state; - uint8_t text[INIT_SIZE_BYTE]; + uint8_t text[INIT_SIZE_BYTE] __attribute((aligned(16))); uint8_t a[AES_BLOCK_SIZE] __attribute__((aligned(16))); uint8_t b[AES_BLOCK_SIZE] __attribute__((aligned(16))); uint8_t c[AES_BLOCK_SIZE] __attribute__((aligned(16)));