Skip to content

Commit

Permalink
Finally figured out how to add an autoconf option. Added the ability …
Browse files Browse the repository at this point in the history
…to disable AES-NI, and split cryptonight.c into four files - a header, functions common to both implementations, the AES-NI implementation, and the slow implementation. Also moved AES-NI specific asm to its own file.
  • Loading branch information
Wolf committed May 26, 2014
1 parent 974348b commit 74a4002
Show file tree
Hide file tree
Showing 9 changed files with 544 additions and 559 deletions.
11 changes: 9 additions & 2 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ EXTRA_DIST = example-cfg.json nomacro.pl
SUBDIRS = compat

AM_CPPFLAGS = $(PTHREAD_FLAGS) $(JANSSON_INCLUDES)
AM_CFLAGS = -O3 -Ofast -flto -fuse-linker-plugin -falign-loops=16 -falign-functions=16 -falign-jumps=16 -falign-loops=16 -falign-labels=16 -maes
AM_CFLAGS = -O3 -Ofast -flto -fuse-linker-plugin -falign-loops=16 -falign-functions=16 -falign-jumps=16 -falign-loops=16 -falign-labels=16

bin_PROGRAMS = minerd

Expand All @@ -32,7 +32,7 @@ minerd_SOURCES = elist.h \
skein.c \
ink.c \
blake.c \
cryptonight.c \
cryptonight_common.c \
x11.c \
sha3/sph_keccak.c \
sha3/sph_hefty1.c \
Expand Down Expand Up @@ -60,6 +60,13 @@ endif
if ARCH_x86_64
minerd_SOURCES += sha2-x64.S scrypt-x64.S aesb-x64.S
endif
if USE_LOBOTOMIZED_AES
minerd_SOURCES += cryptonight_lobotomized.c
else
minerd_SOURCES += cryptonight_aesni.c aesni.S
AM_CFLAGS += -maes
endif

if ARCH_ARM
minerd_SOURCES += sha2-arm.S scrypt-arm.S
endif
Expand Down
205 changes: 0 additions & 205 deletions aesb-x64.S
Original file line number Diff line number Diff line change
Expand Up @@ -4,56 +4,6 @@
.section .note.GNU-stack,"",%progbits
#endif

.text
.p2align 6
.globl fast_aesb_single_round
.globl _fast_aesb_single_round
fast_aesb_single_round:
_fast_aesb_single_round:
#if defined(_WIN64) || defined(__CYGWIN__)
movdqa (%rcx), %xmm1
aesenc (%r8), %xmm1
movdqa %xmm1, (%rdx)
#else
movdqa (%rdi), %xmm1
aesenc (%rdx), %xmm1
movdqa %xmm1, (%rsi)
#endif
ret

.text
.p2align 6
.globl fast_aesb_pseudo_round_mut
.globl _fast_aesb_pseudo_round_mut
fast_aesb_pseudo_round_mut:
_fast_aesb_pseudo_round_mut:
#if defined(_WIN64) || defined(__CYGWIN__)
mov %rdx, %r9
add $0xA0, %r9
movdqa (%rcx), %xmm1

.LOOP:
aesenc (%rdx), %xmm1
add $0x10, %rdx
cmp %r9, %rdx
jl .LOOP

movdqa %xmm1, (%rcx)
#else
mov %rsi, %r9
add $0xA0, %r9
movdqa (%rdi), %xmm1

.LOOP:
aesenc (%rsi), %xmm1
add $0x10, %rsi
cmp %r9, %rsi
jl .LOOP

movdqa %xmm1, (%rdi)
#endif
ret

.text
.globl mul128
.globl _mul128
Expand All @@ -70,158 +20,3 @@ _mul128:
mov %rdx, (%r8)
#endif
ret

.text
.p2align 4
.globl aesni_parallel_noxor
# void aesni_parallel_noxor(void *output, uint8_t *input, uint8_t *expkey)
aesni_parallel_noxor:
mov $10, %r9
movdqa (%rsi), %xmm0
movdqa 0x10(%rsi), %xmm1
movdqa 0x20(%rsi), %xmm2
movdqa 0x30(%rsi), %xmm3
movdqa 0x40(%rsi), %xmm4
movdqa 0x50(%rsi), %xmm5
movdqa 0x60(%rsi), %xmm6
movdqa 0x70(%rsi), %xmm7

.ENCRYPT:
aesenc (%rdx), %xmm0
aesenc (%rdx), %xmm1
aesenc (%rdx), %xmm2
aesenc (%rdx), %xmm3
aesenc (%rdx), %xmm4
aesenc (%rdx), %xmm5
aesenc (%rdx), %xmm6
aesenc (%rdx), %xmm7
add $0x10, %rdx
dec %r9
jnz .ENCRYPT

movdqa %xmm0, (%rdi)
movdqa %xmm1, 0x10(%rdi)
movdqa %xmm2, 0x20(%rdi)
movdqa %xmm3, 0x30(%rdi)
movdqa %xmm4, 0x40(%rdi)
movdqa %xmm5, 0x50(%rdi)
movdqa %xmm6, 0x60(%rdi)
movdqa %xmm7, 0x70(%rdi)

movdqa %xmm0, (%rsi)
movdqa %xmm1, 0x10(%rsi)
movdqa %xmm2, 0x20(%rsi)
movdqa %xmm3, 0x30(%rsi)
movdqa %xmm4, 0x40(%rsi)
movdqa %xmm5, 0x50(%rsi)
movdqa %xmm6, 0x60(%rsi)
movdqa %xmm7, 0x70(%rsi)

ret

.text
.p2align 4
.globl aesni_parallel_xor
# void aesni_parallel_xor(void *state, uint8_t *expkey, uint8_t *xorval)
aesni_parallel_xor:
mov $10, %r9
movdqa (%rdi), %xmm0
movdqa 0x10(%rdi), %xmm1
movdqa 0x20(%rdi), %xmm2
movdqa 0x30(%rdi), %xmm3
movdqa 0x40(%rdi), %xmm4
movdqa 0x50(%rdi), %xmm5
movdqa 0x60(%rdi), %xmm6
movdqa 0x70(%rdi), %xmm7

pxor (%rdx), %xmm0
pxor 0x10(%rdx), %xmm1
pxor 0x20(%rdx), %xmm2
pxor 0x30(%rdx), %xmm3
pxor 0x40(%rdx), %xmm4
pxor 0x50(%rdx), %xmm5
pxor 0x60(%rdx), %xmm6
pxor 0x70(%rdx), %xmm7

.ENCRYPT2:
aesenc (%rsi), %xmm0
aesenc (%rsi), %xmm1
aesenc (%rsi), %xmm2
aesenc (%rsi), %xmm3
aesenc (%rsi), %xmm4
aesenc (%rsi), %xmm5
aesenc (%rsi), %xmm6
aesenc (%rsi), %xmm7
add $0x10, %rsi
dec %r9
jnz .ENCRYPT2

movdqa %xmm0, (%rdi)
movdqa %xmm1, 0x10(%rdi)
movdqa %xmm2, 0x20(%rdi)
movdqa %xmm3, 0x30(%rdi)
movdqa %xmm4, 0x40(%rdi)
movdqa %xmm5, 0x50(%rdi)
movdqa %xmm6, 0x60(%rdi)
movdqa %xmm7, 0x70(%rdi)

ret

.text
.p2align 4
.globl that_fucking_loop
# void that_fucking_loop(uint64_t *a, uint64_t *b, uint8_t *long_state)
that_fucking_loop:
mov $0x80000, %r11
movdqa (%rdi), %xmm1
movdqa (%rsi), %xmm2
# b == xmm2 & a == xmm1 from now on

.p2align 4
.HUGELOOP:
movq %xmm1, %r9

and $0x1FFFF0, %r9
add %rdx, %r9

movdqa (%r9), %xmm3
aesenc %xmm1, %xmm3

pxor %xmm3, %xmm2

movdqa %xmm2, (%r9)

movq %xmm3, %r9

and $0x1FFFF0, %r9
add %rdx, %r9

movdqa (%r9), %xmm4
push %rdx

movq %xmm3, %rax
mulq (%r9)

sub $16, %rsp
movdqa %xmm1, (%rsp)
add %rdx, (%rsp)
add %rax, 8(%rsp)

movdqa (%rsp), %xmm1
add $16, %rsp
pop %rdx

movdqa %xmm1, (%r9)

pxor %xmm4, %xmm1
movdqa %xmm3, %xmm2

# This is a branch prediction hint. Don't fuck with it.
dec %r11
cmp $0, %r11
setne %al
movzx %al, %eax
test %rax, %rax
jne .HUGELOOP

ret
Loading

0 comments on commit 74a4002

Please sign in to comment.