From f1290371f2cf15b2abd645f5b292b6a11adfea28 Mon Sep 17 00:00:00 2001 From: Erik Mossberg Date: Thu, 22 May 2014 04:31:31 +0200 Subject: [PATCH 1/6] fix segfault and compilation error on OSX10.9 and FreeBSD 10 with clang --- crypto/oaes_lib.c | 2 ++ cryptonight.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/crypto/oaes_lib.c b/crypto/oaes_lib.c index 2063b30e8..94133f782 100644 --- a/crypto/oaes_lib.c +++ b/crypto/oaes_lib.c @@ -36,7 +36,9 @@ static const char _NR[] = { #include #include #include +#if !((defined(__FreeBSD__) && __FreeBSD__ >= 10) || defined(__APPLE__)) #include +#endif #include #include #include diff --git a/cryptonight.c b/cryptonight.c index 6042af980..e0e7f5b5c 100644 --- a/cryptonight.c +++ b/cryptonight.c @@ -194,7 +194,7 @@ void cryptonight_hash_ctx(void* output, const void* input, size_t len, struct cr } void cryptonight_hash(void* output, const void* input, size_t len) { - cryptonight_hash_ctx(output, input, len, alloca(sizeof(struct cryptonight_ctx))); + cryptonight_hash_ctx(output, input, len, malloc(sizeof(struct cryptonight_ctx))); } void cryptonight_hash_ctx_aes_ni(void* output, const void* input, size_t len, struct cryptonight_ctx* ctx) { @@ -269,7 +269,7 @@ int scanhash_cryptonight(int thr_id, uint32_t *pdata, const uint32_t *ptarget, const uint32_t Htarg = ptarget[7]; uint32_t hash[HASH_SIZE / 4] __attribute__((aligned(32))); - struct cryptonight_ctx *ctx = alloca(sizeof(struct cryptonight_ctx)); + struct cryptonight_ctx *ctx = malloc(sizeof(struct cryptonight_ctx)); if (aes_ni) { do { From f507c1f78130ac06813e9391dd893ca4349dfc51 Mon Sep 17 00:00:00 2001 From: Erik Mossberg Date: Thu, 22 May 2014 05:50:31 +0200 Subject: [PATCH 2/6] add deallocation, doh --- cryptonight.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cryptonight.c b/cryptonight.c index e0e7f5b5c..492500c21 100644 --- a/cryptonight.c +++ b/cryptonight.c @@ -194,7 +194,9 @@ void cryptonight_hash_ctx(void* output, const void* input, size_t len, struct cr } void cryptonight_hash(void* output, const void* input, size_t len) { - cryptonight_hash_ctx(output, input, len, malloc(sizeof(struct cryptonight_ctx))); + struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx)); + cryptonight_hash_ctx(output, input, len, ctx); + free(ctx); } void cryptonight_hash_ctx_aes_ni(void* output, const void* input, size_t len, struct cryptonight_ctx* ctx) { @@ -269,7 +271,7 @@ int scanhash_cryptonight(int thr_id, uint32_t *pdata, const uint32_t *ptarget, const uint32_t Htarg = ptarget[7]; uint32_t hash[HASH_SIZE / 4] __attribute__((aligned(32))); - struct cryptonight_ctx *ctx = malloc(sizeof(struct cryptonight_ctx)); + struct cryptonight_ctx *ctx = (struct cryptonight_ctx*)malloc(sizeof(struct cryptonight_ctx)); if (aes_ni) { do { @@ -290,6 +292,8 @@ int scanhash_cryptonight(int thr_id, uint32_t *pdata, const uint32_t *ptarget, } } while (likely((n <= max_nonce && !work_restart[thr_id].restart))); } + + free(ctx); *hashes_done = n - first_nonce + 1; return 0; } From 95fdd41601a67ebb7a05716db6566f0bb63087c2 Mon Sep 17 00:00:00 2001 From: Wolf Date: Thu, 22 May 2014 00:20:42 -0500 Subject: [PATCH 3/6] Tighten up that loop in fast_aesb_pseudu_round_mut(). --- aesb-x64.S | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/aesb-x64.S b/aesb-x64.S index 120d537c9..e1fd4b837 100644 --- a/aesb-x64.S +++ b/aesb-x64.S @@ -28,28 +28,26 @@ _fast_aesb_single_round: fast_aesb_pseudo_round_mut: _fast_aesb_pseudo_round_mut: #if defined(_WIN64) || defined(__CYGWIN__) - mov $0, %r9 - mov $10, %r10 + mov %rsi, %r9 + add $0xA0, %r9 movdqu (%rcx), %xmm1 .LOOP: aesenc (%rdx), %xmm1 add $0x10, %rdx - inc %r9 - cmp %r10, %r9 + cmp %r9, %rsi jl .LOOP movdqu %xmm1, (%rcx) #else - mov $0, %r9 - mov $10, %r10 + mov %rsi, %r9 + add $0xA0, %r9 movdqu (%rdi), %xmm1 .LOOP: aesenc (%rsi), %xmm1 add $0x10, %rsi - inc %r9 - cmp %r10, %r9 + cmp %r9, %rsi jl .LOOP movdqu %xmm1, (%rdi) From 5447d43fd0844c5ab9ce315b2dc418f7bff91ff4 Mon Sep 17 00:00:00 2001 From: Wolf Date: Thu, 22 May 2014 00:22:52 -0500 Subject: [PATCH 4/6] I oopsed in the Windows code last commit. Fixed. --- aesb-x64.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aesb-x64.S b/aesb-x64.S index e1fd4b837..c04fa8942 100644 --- a/aesb-x64.S +++ b/aesb-x64.S @@ -28,14 +28,14 @@ _fast_aesb_single_round: fast_aesb_pseudo_round_mut: _fast_aesb_pseudo_round_mut: #if defined(_WIN64) || defined(__CYGWIN__) - mov %rsi, %r9 + mov %rdx, %r9 add $0xA0, %r9 movdqu (%rcx), %xmm1 .LOOP: aesenc (%rdx), %xmm1 add $0x10, %rdx - cmp %r9, %rsi + cmp %r9, %rdx jl .LOOP movdqu %xmm1, (%rcx) From 194148b6ac8406cc90c7ec2bdefe6f6fb2dbc51c Mon Sep 17 00:00:00 2001 From: Wolf Date: Thu, 22 May 2014 00:39:25 -0500 Subject: [PATCH 5/6] Aligned our shit and used faster SSE load ops. --- aesb-x64.S | 16 ++++++++-------- cryptonight.c | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/aesb-x64.S b/aesb-x64.S index c04fa8942..06dd189c4 100644 --- a/aesb-x64.S +++ b/aesb-x64.S @@ -11,13 +11,13 @@ fast_aesb_single_round: _fast_aesb_single_round: #if defined(_WIN64) || defined(__CYGWIN__) - movdqu (%rcx), %xmm1 + movdqa (%rcx), %xmm1 aesenc (%r8), %xmm1 - movdqu %xmm1, (%rdx) + movdqa %xmm1, (%rdx) #else - movdqu (%rdi), %xmm1 + movdqa (%rdi), %xmm1 aesenc (%rdx), %xmm1 - movdqu %xmm1, (%rsi) + movdqa %xmm1, (%rsi) #endif ret @@ -30,7 +30,7 @@ _fast_aesb_pseudo_round_mut: #if defined(_WIN64) || defined(__CYGWIN__) mov %rdx, %r9 add $0xA0, %r9 - movdqu (%rcx), %xmm1 + movdqa (%rcx), %xmm1 .LOOP: aesenc (%rdx), %xmm1 @@ -38,11 +38,11 @@ _fast_aesb_pseudo_round_mut: cmp %r9, %rdx jl .LOOP - movdqu %xmm1, (%rcx) + movdqa %xmm1, (%rcx) #else mov %rsi, %r9 add $0xA0, %r9 - movdqu (%rdi), %xmm1 + movdqa (%rdi), %xmm1 .LOOP: aesenc (%rsi), %xmm1 @@ -50,6 +50,6 @@ _fast_aesb_pseudo_round_mut: cmp %r9, %rsi jl .LOOP - movdqu %xmm1, (%rdi) + movdqa %xmm1, (%rdi) #endif ret diff --git a/cryptonight.c b/cryptonight.c index 6042af980..8180dfe16 100644 --- a/cryptonight.c +++ b/cryptonight.c @@ -123,7 +123,7 @@ static inline void xor_blocks_dst(const uint8_t* a, const uint8_t* b, uint8_t* d struct cryptonight_ctx { uint8_t long_state[MEMORY] __attribute((aligned(16))); union cn_slow_hash_state state; - uint8_t text[INIT_SIZE_BYTE]; + uint8_t text[INIT_SIZE_BYTE] __attribute((aligned(16))); uint8_t a[AES_BLOCK_SIZE] __attribute__((aligned(16))); uint8_t b[AES_BLOCK_SIZE] __attribute__((aligned(16))); uint8_t c[AES_BLOCK_SIZE] __attribute__((aligned(16))); From 79a4772167cf22d90e411ae4f22656aebd52084d Mon Sep 17 00:00:00 2001 From: Erik Mossberg Date: Thu, 22 May 2014 16:15:36 +0200 Subject: [PATCH 6/6] make sure to free context in all paths --- cryptonight.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cryptonight.c b/cryptonight.c index 492500c21..4aab68ca6 100644 --- a/cryptonight.c +++ b/cryptonight.c @@ -279,6 +279,7 @@ int scanhash_cryptonight(int thr_id, uint32_t *pdata, const uint32_t *ptarget, cryptonight_hash_ctx_aes_ni(hash, pdata, 76, ctx); if (unlikely(hash[7] < ptarget[7])) { *hashes_done = n - first_nonce + 1; + free(ctx); return true; } } while (likely((n <= max_nonce && !work_restart[thr_id].restart))); @@ -288,6 +289,7 @@ int scanhash_cryptonight(int thr_id, uint32_t *pdata, const uint32_t *ptarget, cryptonight_hash_ctx(hash, pdata, 76, ctx); if (unlikely(hash[7] < ptarget[7])) { *hashes_done = n - first_nonce + 1; + free(ctx); return true; } } while (likely((n <= max_nonce && !work_restart[thr_id].restart)));