forked from protovist/ccminer
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcryptonight.h
144 lines (123 loc) · 4.93 KB
/
cryptonight.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#pragma once
#include <cuda_runtime.h>
#include <miner.h>
#ifdef __INTELLISENSE__
/* avoid red underlining */
#define __CUDA_ARCH__ 520
struct uint3 {
unsigned int x, y, z;
};
struct uint3 threadIdx;
struct uint3 blockIdx;
struct uint3 blockDim;
#define atomicExch(p,y) (*p) = y
#define __funnelshift_r(a,b,c) 1
#define __syncthreads()
#define __threadfence_block()
#define asm(x)
#define __shfl(a,b,c) 1
#define __umul64hi(a,b) a*b
#endif
#define MEMORY (1U << 21) // 2 MiB / 2097152 B
#define ITER (1U << 20) // 1048576
#define E2I_MASK 0x1FFFF0u
#define AES_BLOCK_SIZE 16U
#define AES_KEY_SIZE 32
#define INIT_SIZE_BLK 8
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) // 128 B
#define AES_RKEY_LEN 4
#define AES_COL_LEN 4
#define AES_ROUND_BASE 7
#ifndef HASH_SIZE
#define HASH_SIZE 32
#endif
#ifndef HASH_DATA_AREA
#define HASH_DATA_AREA 136
#endif
#define hi_dword(x) (x >> 32)
#define lo_dword(x) (x & 0xFFFFFFFF)
#define C32(x) ((uint32_t)(x ## U))
#define T32(x) ((x) & C32(0xFFFFFFFF))
#ifndef ROTL64
#if __CUDA_ARCH__ >= 350
__forceinline__ __device__ uint64_t cuda_ROTL64(const uint64_t value, const int offset) {
uint2 result;
if(offset >= 32) {
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset));
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset));
} else {
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(__double2hiint(__longlong_as_double(value))), "r"(__double2loint(__longlong_as_double(value))), "r"(offset));
asm("shf.l.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(__double2loint(__longlong_as_double(value))), "r"(__double2hiint(__longlong_as_double(value))), "r"(offset));
}
return __double_as_longlong(__hiloint2double(result.y, result.x));
}
#define ROTL64(x, n) (cuda_ROTL64(x, n))
#else
#define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
#endif
#endif
#ifndef ROTL32
#if __CUDA_ARCH__ < 350
#define ROTL32(x, n) T32(((x) << (n)) | ((x) >> (32 - (n))))
#else
#define ROTL32(x, n) __funnelshift_l( (x), (x), (n) )
#endif
#endif
#ifndef ROTR32
#if __CUDA_ARCH__ < 350
#define ROTR32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
#else
#define ROTR32(x, n) __funnelshift_r( (x), (x), (n) )
#endif
#endif
#define MEMSET8(dst,what,cnt) { \
int i_memset8; \
uint64_t *out_memset8 = (uint64_t *)(dst); \
for( i_memset8 = 0; i_memset8 < cnt; i_memset8++ ) \
out_memset8[i_memset8] = (what); }
#define MEMSET4(dst,what,cnt) { \
int i_memset4; \
uint32_t *out_memset4 = (uint32_t *)(dst); \
for( i_memset4 = 0; i_memset4 < cnt; i_memset4++ ) \
out_memset4[i_memset4] = (what); }
#define MEMCPY8(dst,src,cnt) { \
int i_memcpy8; \
uint64_t *in_memcpy8 = (uint64_t *)(src); \
uint64_t *out_memcpy8 = (uint64_t *)(dst); \
for( i_memcpy8 = 0; i_memcpy8 < cnt; i_memcpy8++ ) \
out_memcpy8[i_memcpy8] = in_memcpy8[i_memcpy8]; }
#define MEMCPY4(dst,src,cnt) { \
int i_memcpy4; \
uint32_t *in_memcpy4 = (uint32_t *)(src); \
uint32_t *out_memcpy4 = (uint32_t *)(dst); \
for( i_memcpy4 = 0; i_memcpy4 < cnt; i_memcpy4++ ) \
out_memcpy4[i_memcpy4] = in_memcpy4[i_memcpy4]; }
#define XOR_BLOCKS_DST(x,y,z) { \
((uint64_t *)z)[0] = ((uint64_t *)(x))[0] ^ ((uint64_t *)(y))[0]; \
((uint64_t *)z)[1] = ((uint64_t *)(x))[1] ^ ((uint64_t *)(y))[1]; }
#define E2I(x) ((size_t)(((*((uint64_t*)(x)) >> 4) & 0x1ffff)))
union hash_state {
uint8_t b[200];
uint64_t w[25];
};
union cn_slow_hash_state {
union hash_state hs;
struct {
uint8_t k[64];
uint8_t init[INIT_SIZE_BYTE];
};
};
static inline void exit_if_cudaerror(int thr_id, const char *src, int line)
{
cudaError_t err = cudaGetLastError();
if(err != cudaSuccess) {
gpulog(LOG_ERR, thr_id, "%s %s line %d", cudaGetErrorString(err), src, line);
exit(1);
}
}
void cryptonight_core_cuda(int thr_id, uint32_t blocks, uint32_t threads, uint64_t *d_long_state, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2, int variant, uint64_t *d_ctx_tweak);
void cryptonight_extra_setData(int thr_id, const void *data, const void *ptarget);
void cryptonight_extra_init(int thr_id);
void cryptonight_extra_free(int thr_id);
void cryptonight_extra_prepare(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *d_ctx_state, uint32_t *d_ctx_a, uint32_t *d_ctx_b, uint32_t *d_ctx_key1, uint32_t *d_ctx_key2, int variant, uint64_t *d_ctx_tweak);
void cryptonight_extra_final(int thr_id, uint32_t threads, uint32_t startNonce, uint32_t *resnonce, uint32_t *d_ctx_state);