forked from lucasjones/cpuminer-multi
-
Notifications
You must be signed in to change notification settings - Fork 0
/
aesni.S
210 lines (177 loc) · 3.96 KB
/
aesni.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
#include "cpuminer-config.h"
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
.text
.p2align 6
.globl fast_aesb_single_round
.globl _fast_aesb_single_round
fast_aesb_single_round:
_fast_aesb_single_round:
#if defined(_WIN64) || defined(__CYGWIN__)
movdqa (%rcx), %xmm1
aesenc (%r8), %xmm1
movdqa %xmm1, (%rdx)
#else
movdqa (%rdi), %xmm1
aesenc (%rdx), %xmm1
movdqa %xmm1, (%rsi)
#endif
ret
.text
.p2align 6
.globl fast_aesb_pseudo_round_mut
.globl _fast_aesb_pseudo_round_mut
fast_aesb_pseudo_round_mut:
_fast_aesb_pseudo_round_mut:
#if defined(_WIN64) || defined(__CYGWIN__)
mov %rdx, %r9
add $0xA0, %r9
movdqa (%rcx), %xmm1
.LOOP:
aesenc (%rdx), %xmm1
add $0x10, %rdx
cmp %r9, %rdx
jl .LOOP
movdqa %xmm1, (%rcx)
#else
mov %rsi, %r9
add $0xA0, %r9
movdqa (%rdi), %xmm1
.LOOP:
aesenc (%rsi), %xmm1
add $0x10, %rsi
cmp %r9, %rsi
jl .LOOP
movdqa %xmm1, (%rdi)
#endif
ret
.text
.p2align 4
.globl aesni_parallel_noxor
# void aesni_parallel_noxor(void *output, uint8_t *input, uint8_t *expkey)
aesni_parallel_noxor:
mov $10, %r9
movdqa (%rsi), %xmm0
movdqa 0x10(%rsi), %xmm1
movdqa 0x20(%rsi), %xmm2
movdqa 0x30(%rsi), %xmm3
movdqa 0x40(%rsi), %xmm4
movdqa 0x50(%rsi), %xmm5
movdqa 0x60(%rsi), %xmm6
movdqa 0x70(%rsi), %xmm7
.ENCRYPT:
aesenc (%rdx), %xmm0
aesenc (%rdx), %xmm1
aesenc (%rdx), %xmm2
aesenc (%rdx), %xmm3
aesenc (%rdx), %xmm4
aesenc (%rdx), %xmm5
aesenc (%rdx), %xmm6
aesenc (%rdx), %xmm7
add $0x10, %rdx
dec %r9
jnz .ENCRYPT
movdqa %xmm0, (%rdi)
movdqa %xmm1, 0x10(%rdi)
movdqa %xmm2, 0x20(%rdi)
movdqa %xmm3, 0x30(%rdi)
movdqa %xmm4, 0x40(%rdi)
movdqa %xmm5, 0x50(%rdi)
movdqa %xmm6, 0x60(%rdi)
movdqa %xmm7, 0x70(%rdi)
movdqa %xmm0, (%rsi)
movdqa %xmm1, 0x10(%rsi)
movdqa %xmm2, 0x20(%rsi)
movdqa %xmm3, 0x30(%rsi)
movdqa %xmm4, 0x40(%rsi)
movdqa %xmm5, 0x50(%rsi)
movdqa %xmm6, 0x60(%rsi)
movdqa %xmm7, 0x70(%rsi)
ret
.text
.p2align 4
.globl aesni_parallel_xor
# void aesni_parallel_xor(void *state, uint8_t *expkey, uint8_t *xorval)
aesni_parallel_xor:
mov $10, %r9
movdqa (%rdi), %xmm0
movdqa 0x10(%rdi), %xmm1
movdqa 0x20(%rdi), %xmm2
movdqa 0x30(%rdi), %xmm3
movdqa 0x40(%rdi), %xmm4
movdqa 0x50(%rdi), %xmm5
movdqa 0x60(%rdi), %xmm6
movdqa 0x70(%rdi), %xmm7
pxor (%rdx), %xmm0
pxor 0x10(%rdx), %xmm1
pxor 0x20(%rdx), %xmm2
pxor 0x30(%rdx), %xmm3
pxor 0x40(%rdx), %xmm4
pxor 0x50(%rdx), %xmm5
pxor 0x60(%rdx), %xmm6
pxor 0x70(%rdx), %xmm7
.ENCRYPT2:
aesenc (%rsi), %xmm0
aesenc (%rsi), %xmm1
aesenc (%rsi), %xmm2
aesenc (%rsi), %xmm3
aesenc (%rsi), %xmm4
aesenc (%rsi), %xmm5
aesenc (%rsi), %xmm6
aesenc (%rsi), %xmm7
add $0x10, %rsi
dec %r9
jnz .ENCRYPT2
movdqa %xmm0, (%rdi)
movdqa %xmm1, 0x10(%rdi)
movdqa %xmm2, 0x20(%rdi)
movdqa %xmm3, 0x30(%rdi)
movdqa %xmm4, 0x40(%rdi)
movdqa %xmm5, 0x50(%rdi)
movdqa %xmm6, 0x60(%rdi)
movdqa %xmm7, 0x70(%rdi)
ret
.text
.p2align 4
.globl that_fucking_loop
# void that_fucking_loop(uint64_t *a, uint64_t *b, uint8_t *long_state)
that_fucking_loop:
mov $0x80000, %r11
movdqa (%rdi), %xmm1
movdqa (%rsi), %xmm2
# b == xmm2 & a == xmm1 from now on
.p2align 4
.HUGELOOP:
movq %xmm1, %r9
and $0x1FFFF0, %r9
add %rdx, %r9
movdqa (%r9), %xmm3
aesenc %xmm1, %xmm3
pxor %xmm3, %xmm2
movdqa %xmm2, (%r9)
movq %xmm3, %r9
and $0x1FFFF0, %r9
add %rdx, %r9
movdqa (%r9), %xmm4
push %rdx
movq %xmm3, %rax
mulq (%r9)
sub $16, %rsp
movdqa %xmm1, (%rsp)
add %rdx, (%rsp)
add %rax, 8(%rsp)
movdqa (%rsp), %xmm1
add $16, %rsp
pop %rdx
movdqa %xmm1, (%r9)
pxor %xmm4, %xmm1
movdqa %xmm3, %xmm2
# This is a branch prediction hint. Don't fuck with it.
dec %r11
cmp $0, %r11
setne %al
movzx %al, %eax
test %rax, %rax
jne .HUGELOOP
ret