From b9712854ced2d68d93ab8dc4da15e5503583bc7b Mon Sep 17 00:00:00 2001 From: Tanguy Pruvot Date: Sun, 22 May 2016 06:37:53 +0200 Subject: [PATCH] small changes in --version output + add sph haval which was half there in the vstudio project --- compat.h | 11 + compat/cpuminer-config.h | 6 +- cpu-miner.c | 43 +- cpuminer.vcxproj | 1 + cpuminer.vcxproj.filters | 3 + sha3/haval_helper.c | 195 ++++++++ sha3/sph_haval.c | 975 +++++++++++++++++++++++++++++++++++++++ sha3/sph_haval.h | 969 ++++++++++++++++++++++++++++++++++++++ sha3/sph_types.h | 10 + util.c | 2 +- 10 files changed, 2201 insertions(+), 14 deletions(-) create mode 100644 sha3/haval_helper.c create mode 100644 sha3/sph_haval.c create mode 100644 sha3/sph_haval.h diff --git a/compat.h b/compat.h index 124bc40aa..9f2611fe4 100644 --- a/compat.h +++ b/compat.h @@ -52,6 +52,17 @@ static __inline int setpriority(int which, int who, int prio) #define _ALIGN(x) __declspec(align(x)) typedef int ssize_t; +__inline int msver(void) { + switch (_MSC_VER) { + case 1500: return 2008; + case 1600: return 2010; + case 1700: return 2012; + case 1800: return 2013; + case 1900: return 2015; + default: return (_MSC_VER/100); + } +} + #include // This static var is made to be compatible with linux/mingw (no free on string result) // This is not thread safe but we only use that once on process start diff --git a/compat/cpuminer-config.h b/compat/cpuminer-config.h index e05194d94..a20aa6d72 100644 --- a/compat/cpuminer-config.h +++ b/compat/cpuminer-config.h @@ -94,7 +94,7 @@ #define PACKAGE_NAME "cpuminer-multi" /* Define to the full name and version of this package. */ -#define PACKAGE_STRING "cpuminer-multi 1.2" +#define PACKAGE_STRING "cpuminer-multi 1.3" /* Define to the one symbol short name of this package. */ #define PACKAGE_TARNAME "cpuminer-multi" @@ -103,7 +103,7 @@ #define PACKAGE_URL "" /* Define to the version of this package. */ -#define PACKAGE_VERSION "1.2" +#define PACKAGE_VERSION "1.3" /* If using the C implementation of alloca, define if you know the direction of stack growth for your system; otherwise it will be @@ -132,7 +132,7 @@ #define USE_XOP 1 /* Version number of package */ -#define VERSION "1.2" +#define VERSION "1.3" /* Define to `unsigned int' if does not define. */ /* #undef size_t */ diff --git a/cpu-miner.c b/cpu-miner.c index 3abfb9665..68f4366e0 100644 --- a/cpu-miner.c +++ b/cpu-miner.c @@ -2575,15 +2575,38 @@ static void *stratum_thread(void *userdata) static void show_version_and_exit(void) { - printf(" built on " __DATE__ + printf(" built " #ifdef _MSC_VER - " with VC++ 2013\n"); + "with VC++ %d", msver()); #elif defined(__GNUC__) - " with GCC"); - printf(" %d.%d.%d\n", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); + "with GCC "); + printf("%d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__); #endif + printf(" the " __DATE__ "\n"); - printf(" features:" + // Note: if compiled with cpu opts (instruction sets), + // the binary is no more compatible with older ones! + printf(" compiled for" +#if defined(__ARM_NEON__) + " ARM NEON" +#elif defined(__AVX2__) + " AVX2" +#elif defined(__AVX__) + " AVX" +#elif defined(__XOP__) + " XOP" +#elif defined(__SSE4_1__) + " SSE4" +#elif defined(_M_X64) || defined(__x86_64__) + " x64" +#elif defined(_M_IX86) || defined(__x86__) + " x86" +#else + " general use" +#endif + "\n"); + + printf(" config features:" #if defined(USE_ASM) && defined(__i386__) " i386" #endif @@ -2593,15 +2616,15 @@ static void show_version_and_exit(void) #if defined(USE_ASM) && (defined(__i386__) || defined(__x86_64__)) " SSE2" #endif +#if defined(__x86_64__) && defined(USE_XOP) + " XOP" +#endif #if defined(__x86_64__) && defined(USE_AVX) " AVX" #endif #if defined(__x86_64__) && defined(USE_AVX2) " AVX2" #endif -#if defined(__x86_64__) && defined(USE_XOP) - " XOP" -#endif #if defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__) " ARM" #if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \ @@ -3133,8 +3156,8 @@ static int thread_create(struct thr_info *thr, void* func) static void show_credits() { - printf("** " PACKAGE_NAME " " PACKAGE_VERSION " by Tanguy Pruvot (tpruvot@github) **\n"); - printf("BTC donation address: 1FhDPLPpw18X4srecguG3MxJYe4a1JsZnd\n\n"); + printf("** " PACKAGE_NAME " " PACKAGE_VERSION " by tpruvot@github **\n"); + printf("BTC donation address: 1FhDPLPpw18X4srecguG3MxJYe4a1JsZnd (tpruvot)\n\n"); } void get_defconfig_path(char *out, size_t bufsize, char *argv0); diff --git a/cpuminer.vcxproj b/cpuminer.vcxproj index aeffe1e80..afc1467e1 100644 --- a/cpuminer.vcxproj +++ b/cpuminer.vcxproj @@ -268,6 +268,7 @@ true + diff --git a/cpuminer.vcxproj.filters b/cpuminer.vcxproj.filters index 736177fa1..a7b77cbe5 100644 --- a/cpuminer.vcxproj.filters +++ b/cpuminer.vcxproj.filters @@ -31,6 +31,9 @@ sph + + sph + sph diff --git a/sha3/haval_helper.c b/sha3/haval_helper.c new file mode 100644 index 000000000..c5080f75d --- /dev/null +++ b/sha3/haval_helper.c @@ -0,0 +1,195 @@ +/* $Id: haval_helper.c 218 2010-06-08 17:06:34Z tp $ */ +/* + * Helper code, included (three times !) by HAVAL implementation. + * + * TODO: try to merge this with md_helper.c. + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#undef SPH_XCAT +#define SPH_XCAT(a, b) SPH_XCAT_(a, b) +#undef SPH_XCAT_ +#define SPH_XCAT_(a, b) a ## b + +static void +#ifdef SPH_UPTR +SPH_XCAT(SPH_XCAT(haval, PASSES), _short) +#else +SPH_XCAT(haval, PASSES) +#endif +(sph_haval_context *sc, const void *data, size_t len) +{ + unsigned current; + +#if SPH_64 + current = (unsigned)sc->count & 127U; +#else + current = (unsigned)sc->count_low & 127U; +#endif + while (len > 0) { + unsigned clen; +#if !SPH_64 + sph_u32 clow, clow2; +#endif + + clen = 128U - current; + if (clen > len) + clen = (unsigned) len; + memcpy(sc->buf + current, data, clen); + data = (const unsigned char *)data + clen; + current += clen; + len -= clen; + if (current == 128U) { + DSTATE; + IN_PREPARE(sc->buf); + + RSTATE; + SPH_XCAT(CORE, PASSES)(INW); + WSTATE; + current = 0; + } +#if SPH_64 + sc->count += clen; +#else + clow = sc->count_low; + clow2 = SPH_T32(clow + clen); + sc->count_low = clow2; + if (clow2 < clow) + sc->count_high ++; +#endif + } +} + +#ifdef SPH_UPTR +static void +SPH_XCAT(haval, PASSES)(sph_haval_context *sc, const void *data, size_t len) +{ + unsigned current; + size_t orig_len; +#if !SPH_64 + sph_u32 clow, clow2; +#endif + DSTATE; + + if (len < 256U) { + SPH_XCAT(SPH_XCAT(haval, PASSES), _short)(sc, data, len); + return; + } +#if SPH_64 + current = (unsigned)sc->count & 127U; +#else + current = (unsigned)sc->count_low & 127U; +#endif + if (current > 0) { + unsigned clen; + + clen = 128U - current; + SPH_XCAT(SPH_XCAT(haval, PASSES), _short)(sc, data, clen); + data = (const unsigned char *)data + clen; + len -= clen; + } +#if !SPH_UNALIGNED + if (((SPH_UPTR)data & 3U) != 0) { + SPH_XCAT(SPH_XCAT(haval, PASSES), _short)(sc, data, len); + return; + } +#endif + orig_len = len; + RSTATE; + while (len >= 128U) { + IN_PREPARE(data); + + SPH_XCAT(CORE, PASSES)(INW); + data = (const unsigned char *)data + 128U; + len -= 128U; + } + WSTATE; + if (len > 0) + memcpy(sc->buf, data, len); +#if SPH_64 + sc->count += (sph_u64)orig_len; +#else + clow = sc->count_low; + clow2 = SPH_T32(clow + orig_len); + sc->count_low = clow2; + if (clow2 < clow) + sc->count_high ++; + orig_len >>= 12; + orig_len >>= 10; + orig_len >>= 10; + sc->count_high += orig_len; +#endif +} +#endif + +static void +SPH_XCAT(SPH_XCAT(haval, PASSES), _close)(sph_haval_context *sc, + unsigned ub, unsigned n, void *dst) +{ + unsigned current; + DSTATE; + +#if SPH_64 + current = (unsigned)sc->count & 127U; +#else + current = (unsigned)sc->count_low & 127U; +#endif + sc->buf[current ++] = (0x01 << n) | ((ub & 0xFF) >> (8 - n)); + RSTATE; + if (current > 118U) { + memset(sc->buf + current, 0, 128U - current); + + do { + IN_PREPARE(sc->buf); + + SPH_XCAT(CORE, PASSES)(INW); + } while (0); + current = 0; + } + memset(sc->buf + current, 0, 118U - current); + sc->buf[118] = 0x01 | (PASSES << 3); + sc->buf[119] = sc->olen << 3; +#if SPH_64 + sph_enc64le_aligned(sc->buf + 120, SPH_T64(sc->count << 3)); +#else + sph_enc32le_aligned(sc->buf + 120, SPH_T32(sc->count_low << 3)); + sph_enc32le_aligned(sc->buf + 124, + SPH_T32((sc->count_high << 3) | (sc->count_low >> 29))); +#endif + do { + IN_PREPARE(sc->buf); + + SPH_XCAT(CORE, PASSES)(INW); + } while (0); + + WSTATE; + haval_out(sc, dst); + haval_init(sc, sc->olen, sc->passes); +} + diff --git a/sha3/sph_haval.c b/sha3/sph_haval.c new file mode 100644 index 000000000..90922b638 --- /dev/null +++ b/sha3/sph_haval.c @@ -0,0 +1,975 @@ +/* $Id: haval.c 227 2010-06-16 17:28:38Z tp $ */ +/* + * HAVAL implementation. + * + * The HAVAL reference paper is of questionable clarity with regards to + * some details such as endianness of bits within a byte, bytes within + * a 32-bit word, or the actual ordering of words within a stream of + * words. This implementation has been made compatible with the reference + * implementation available on: http://labs.calyptix.com/haval.php + * + * ==========================(LICENSE BEGIN)============================ + * + * Copyright (c) 2007-2010 Projet RNRT SAPHIR + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * ===========================(LICENSE END)============================= + * + * @author Thomas Pornin + */ + +#include +#include + +#include "sph_haval.h" + +#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAVAL +#define SPH_SMALL_FOOTPRINT_HAVAL 1 +#endif + +/* + * Basic definition from the reference paper. + * +#define F1(x6, x5, x4, x3, x2, x1, x0) \ + (((x1) & (x4)) ^ ((x2) & (x5)) ^ ((x3) & (x6)) ^ ((x0) & (x1)) ^ (x0)) + * + */ + +#define F1(x6, x5, x4, x3, x2, x1, x0) \ + (((x1) & ((x0) ^ (x4))) ^ ((x2) & (x5)) ^ ((x3) & (x6)) ^ (x0)) + +/* + * Basic definition from the reference paper. + * +#define F2(x6, x5, x4, x3, x2, x1, x0) \ + (((x1) & (x2) & (x3)) ^ ((x2) & (x4) & (x5)) ^ ((x1) & (x2)) \ + ^ ((x1) & (x4)) ^ ((x2) & (x6)) ^ ((x3) & (x5)) \ + ^ ((x4) & (x5)) ^ ((x0) & (x2)) ^ (x0)) + * + */ + +#define F2(x6, x5, x4, x3, x2, x1, x0) \ + (((x2) & (((x1) & ~(x3)) ^ ((x4) & (x5)) ^ (x6) ^ (x0))) \ + ^ ((x4) & ((x1) ^ (x5))) ^ ((x3 & (x5)) ^ (x0))) + +/* + * Basic definition from the reference paper. + * +#define F3(x6, x5, x4, x3, x2, x1, x0) \ + (((x1) & (x2) & (x3)) ^ ((x1) & (x4)) ^ ((x2) & (x5)) \ + ^ ((x3) & (x6)) ^ ((x0) & (x3)) ^ (x0)) + * + */ + +#define F3(x6, x5, x4, x3, x2, x1, x0) \ + (((x3) & (((x1) & (x2)) ^ (x6) ^ (x0))) \ + ^ ((x1) & (x4)) ^ ((x2) & (x5)) ^ (x0)) + +/* + * Basic definition from the reference paper. + * +#define F4(x6, x5, x4, x3, x2, x1, x0) \ + (((x1) & (x2) & (x3)) ^ ((x2) & (x4) & (x5)) ^ ((x3) & (x4) & (x6)) \ + ^ ((x1) & (x4)) ^ ((x2) & (x6)) ^ ((x3) & (x4)) ^ ((x3) & (x5)) \ + ^ ((x3) & (x6)) ^ ((x4) & (x5)) ^ ((x4) & (x6)) ^ ((x0) & (x4)) ^ (x0)) + * + */ + +#define F4(x6, x5, x4, x3, x2, x1, x0) \ + (((x3) & (((x1) & (x2)) ^ ((x4) | (x6)) ^ (x5))) \ + ^ ((x4) & ((~(x2) & (x5)) ^ (x1) ^ (x6) ^ (x0))) \ + ^ ((x2) & (x6)) ^ (x0)) + +/* + * Basic definition from the reference paper. + * +#define F5(x6, x5, x4, x3, x2, x1, x0) \ + (((x1) & (x4)) ^ ((x2) & (x5)) ^ ((x3) & (x6)) \ + ^ ((x0) & (x1) & (x2) & (x3)) ^ ((x0) & (x5)) ^ (x0)) + * + */ + +#define F5(x6, x5, x4, x3, x2, x1, x0) \ + (((x0) & ~(((x1) & (x2) & (x3)) ^ (x5))) \ + ^ ((x1) & (x4)) ^ ((x2) & (x5)) ^ ((x3) & (x6))) + +/* + * The macros below integrate the phi() permutations, depending on the + * pass and the total number of passes. + */ + +#define FP3_1(x6, x5, x4, x3, x2, x1, x0) \ + F1(x1, x0, x3, x5, x6, x2, x4) +#define FP3_2(x6, x5, x4, x3, x2, x1, x0) \ + F2(x4, x2, x1, x0, x5, x3, x6) +#define FP3_3(x6, x5, x4, x3, x2, x1, x0) \ + F3(x6, x1, x2, x3, x4, x5, x0) + +#define FP4_1(x6, x5, x4, x3, x2, x1, x0) \ + F1(x2, x6, x1, x4, x5, x3, x0) +#define FP4_2(x6, x5, x4, x3, x2, x1, x0) \ + F2(x3, x5, x2, x0, x1, x6, x4) +#define FP4_3(x6, x5, x4, x3, x2, x1, x0) \ + F3(x1, x4, x3, x6, x0, x2, x5) +#define FP4_4(x6, x5, x4, x3, x2, x1, x0) \ + F4(x6, x4, x0, x5, x2, x1, x3) + +#define FP5_1(x6, x5, x4, x3, x2, x1, x0) \ + F1(x3, x4, x1, x0, x5, x2, x6) +#define FP5_2(x6, x5, x4, x3, x2, x1, x0) \ + F2(x6, x2, x1, x0, x3, x4, x5) +#define FP5_3(x6, x5, x4, x3, x2, x1, x0) \ + F3(x2, x6, x0, x4, x3, x1, x5) +#define FP5_4(x6, x5, x4, x3, x2, x1, x0) \ + F4(x1, x5, x3, x2, x0, x4, x6) +#define FP5_5(x6, x5, x4, x3, x2, x1, x0) \ + F5(x2, x5, x0, x6, x4, x3, x1) + +/* + * One step, for "n" passes, pass number "p" (1 <= p <= n), using + * input word number "w" and step constant "c". + */ +#define STEP(n, p, x7, x6, x5, x4, x3, x2, x1, x0, w, c) do { \ + sph_u32 t = FP ## n ## _ ## p(x6, x5, x4, x3, x2, x1, x0); \ + (x7) = SPH_T32(SPH_ROTR32(t, 7) + SPH_ROTR32((x7), 11) \ + + (w) + (c)); \ + } while (0) + +/* + * PASSy(n, in) computes pass number "y", for a total of "n", using the + * one-argument macro "in" to access input words. Current state is assumed + * to be held in variables "s0" to "s7". + */ + +#if SPH_SMALL_FOOTPRINT_HAVAL + +#define PASS1(n, in) do { \ + unsigned pass_count; \ + for (pass_count = 0; pass_count < 32; pass_count += 8) { \ + STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, \ + in(pass_count + 0), SPH_C32(0x00000000)); \ + STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, \ + in(pass_count + 1), SPH_C32(0x00000000)); \ + STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, \ + in(pass_count + 2), SPH_C32(0x00000000)); \ + STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, \ + in(pass_count + 3), SPH_C32(0x00000000)); \ + STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, \ + in(pass_count + 4), SPH_C32(0x00000000)); \ + STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, \ + in(pass_count + 5), SPH_C32(0x00000000)); \ + STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, \ + in(pass_count + 6), SPH_C32(0x00000000)); \ + STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, \ + in(pass_count + 7), SPH_C32(0x00000000)); \ + } \ + } while (0) + +#define PASSG(p, n, in) do { \ + unsigned pass_count; \ + for (pass_count = 0; pass_count < 32; pass_count += 8) { \ + STEP(n, p, s7, s6, s5, s4, s3, s2, s1, s0, \ + in(MP ## p[pass_count + 0]), \ + RK ## p[pass_count + 0]); \ + STEP(n, p, s6, s5, s4, s3, s2, s1, s0, s7, \ + in(MP ## p[pass_count + 1]), \ + RK ## p[pass_count + 1]); \ + STEP(n, p, s5, s4, s3, s2, s1, s0, s7, s6, \ + in(MP ## p[pass_count + 2]), \ + RK ## p[pass_count + 2]); \ + STEP(n, p, s4, s3, s2, s1, s0, s7, s6, s5, \ + in(MP ## p[pass_count + 3]), \ + RK ## p[pass_count + 3]); \ + STEP(n, p, s3, s2, s1, s0, s7, s6, s5, s4, \ + in(MP ## p[pass_count + 4]), \ + RK ## p[pass_count + 4]); \ + STEP(n, p, s2, s1, s0, s7, s6, s5, s4, s3, \ + in(MP ## p[pass_count + 5]), \ + RK ## p[pass_count + 5]); \ + STEP(n, p, s1, s0, s7, s6, s5, s4, s3, s2, \ + in(MP ## p[pass_count + 6]), \ + RK ## p[pass_count + 6]); \ + STEP(n, p, s0, s7, s6, s5, s4, s3, s2, s1, \ + in(MP ## p[pass_count + 7]), \ + RK ## p[pass_count + 7]); \ + } \ + } while (0) + +#define PASS2(n, in) PASSG(2, n, in) +#define PASS3(n, in) PASSG(3, n, in) +#define PASS4(n, in) PASSG(4, n, in) +#define PASS5(n, in) PASSG(5, n, in) + +static const unsigned MP2[32] = { + 5, 14, 26, 18, 11, 28, 7, 16, + 0, 23, 20, 22, 1, 10, 4, 8, + 30, 3, 21, 9, 17, 24, 29, 6, + 19, 12, 15, 13, 2, 25, 31, 27 +}; + +static const unsigned MP3[32] = { + 19, 9, 4, 20, 28, 17, 8, 22, + 29, 14, 25, 12, 24, 30, 16, 26, + 31, 15, 7, 3, 1, 0, 18, 27, + 13, 6, 21, 10, 23, 11, 5, 2 +}; + +static const unsigned MP4[32] = { + 24, 4, 0, 14, 2, 7, 28, 23, + 26, 6, 30, 20, 18, 25, 19, 3, + 22, 11, 31, 21, 8, 27, 12, 9, + 1, 29, 5, 15, 17, 10, 16, 13 +}; + +static const unsigned MP5[32] = { + 27, 3, 21, 26, 17, 11, 20, 29, + 19, 0, 12, 7, 13, 8, 31, 10, + 5, 9, 14, 30, 18, 6, 28, 24, + 2, 23, 16, 22, 4, 1, 25, 15 +}; + +static const sph_u32 RK2[32] = { + SPH_C32(0x452821E6), SPH_C32(0x38D01377), + SPH_C32(0xBE5466CF), SPH_C32(0x34E90C6C), + SPH_C32(0xC0AC29B7), SPH_C32(0xC97C50DD), + SPH_C32(0x3F84D5B5), SPH_C32(0xB5470917), + SPH_C32(0x9216D5D9), SPH_C32(0x8979FB1B), + SPH_C32(0xD1310BA6), SPH_C32(0x98DFB5AC), + SPH_C32(0x2FFD72DB), SPH_C32(0xD01ADFB7), + SPH_C32(0xB8E1AFED), SPH_C32(0x6A267E96), + SPH_C32(0xBA7C9045), SPH_C32(0xF12C7F99), + SPH_C32(0x24A19947), SPH_C32(0xB3916CF7), + SPH_C32(0x0801F2E2), SPH_C32(0x858EFC16), + SPH_C32(0x636920D8), SPH_C32(0x71574E69), + SPH_C32(0xA458FEA3), SPH_C32(0xF4933D7E), + SPH_C32(0x0D95748F), SPH_C32(0x728EB658), + SPH_C32(0x718BCD58), SPH_C32(0x82154AEE), + SPH_C32(0x7B54A41D), SPH_C32(0xC25A59B5) +}; + +static const sph_u32 RK3[32] = { + SPH_C32(0x9C30D539), SPH_C32(0x2AF26013), + SPH_C32(0xC5D1B023), SPH_C32(0x286085F0), + SPH_C32(0xCA417918), SPH_C32(0xB8DB38EF), + SPH_C32(0x8E79DCB0), SPH_C32(0x603A180E), + SPH_C32(0x6C9E0E8B), SPH_C32(0xB01E8A3E), + SPH_C32(0xD71577C1), SPH_C32(0xBD314B27), + SPH_C32(0x78AF2FDA), SPH_C32(0x55605C60), + SPH_C32(0xE65525F3), SPH_C32(0xAA55AB94), + SPH_C32(0x57489862), SPH_C32(0x63E81440), + SPH_C32(0x55CA396A), SPH_C32(0x2AAB10B6), + SPH_C32(0xB4CC5C34), SPH_C32(0x1141E8CE), + SPH_C32(0xA15486AF), SPH_C32(0x7C72E993), + SPH_C32(0xB3EE1411), SPH_C32(0x636FBC2A), + SPH_C32(0x2BA9C55D), SPH_C32(0x741831F6), + SPH_C32(0xCE5C3E16), SPH_C32(0x9B87931E), + SPH_C32(0xAFD6BA33), SPH_C32(0x6C24CF5C) +}; + +static const sph_u32 RK4[32] = { + SPH_C32(0x7A325381), SPH_C32(0x28958677), + SPH_C32(0x3B8F4898), SPH_C32(0x6B4BB9AF), + SPH_C32(0xC4BFE81B), SPH_C32(0x66282193), + SPH_C32(0x61D809CC), SPH_C32(0xFB21A991), + SPH_C32(0x487CAC60), SPH_C32(0x5DEC8032), + SPH_C32(0xEF845D5D), SPH_C32(0xE98575B1), + SPH_C32(0xDC262302), SPH_C32(0xEB651B88), + SPH_C32(0x23893E81), SPH_C32(0xD396ACC5), + SPH_C32(0x0F6D6FF3), SPH_C32(0x83F44239), + SPH_C32(0x2E0B4482), SPH_C32(0xA4842004), + SPH_C32(0x69C8F04A), SPH_C32(0x9E1F9B5E), + SPH_C32(0x21C66842), SPH_C32(0xF6E96C9A), + SPH_C32(0x670C9C61), SPH_C32(0xABD388F0), + SPH_C32(0x6A51A0D2), SPH_C32(0xD8542F68), + SPH_C32(0x960FA728), SPH_C32(0xAB5133A3), + SPH_C32(0x6EEF0B6C), SPH_C32(0x137A3BE4) +}; + +static const sph_u32 RK5[32] = { + SPH_C32(0xBA3BF050), SPH_C32(0x7EFB2A98), + SPH_C32(0xA1F1651D), SPH_C32(0x39AF0176), + SPH_C32(0x66CA593E), SPH_C32(0x82430E88), + SPH_C32(0x8CEE8619), SPH_C32(0x456F9FB4), + SPH_C32(0x7D84A5C3), SPH_C32(0x3B8B5EBE), + SPH_C32(0xE06F75D8), SPH_C32(0x85C12073), + SPH_C32(0x401A449F), SPH_C32(0x56C16AA6), + SPH_C32(0x4ED3AA62), SPH_C32(0x363F7706), + SPH_C32(0x1BFEDF72), SPH_C32(0x429B023D), + SPH_C32(0x37D0D724), SPH_C32(0xD00A1248), + SPH_C32(0xDB0FEAD3), SPH_C32(0x49F1C09B), + SPH_C32(0x075372C9), SPH_C32(0x80991B7B), + SPH_C32(0x25D479D8), SPH_C32(0xF6E8DEF7), + SPH_C32(0xE3FE501A), SPH_C32(0xB6794C3B), + SPH_C32(0x976CE0BD), SPH_C32(0x04C006BA), + SPH_C32(0xC1A94FB6), SPH_C32(0x409F60C4) +}; + +#else + +#define PASS1(n, in) do { \ + STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in( 0), SPH_C32(0x00000000)); \ + STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in( 1), SPH_C32(0x00000000)); \ + STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in( 2), SPH_C32(0x00000000)); \ + STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in( 3), SPH_C32(0x00000000)); \ + STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in( 4), SPH_C32(0x00000000)); \ + STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in( 5), SPH_C32(0x00000000)); \ + STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in( 6), SPH_C32(0x00000000)); \ + STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in( 7), SPH_C32(0x00000000)); \ + \ + STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in( 8), SPH_C32(0x00000000)); \ + STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in( 9), SPH_C32(0x00000000)); \ + STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in(10), SPH_C32(0x00000000)); \ + STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in(11), SPH_C32(0x00000000)); \ + STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in(12), SPH_C32(0x00000000)); \ + STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in(13), SPH_C32(0x00000000)); \ + STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in(14), SPH_C32(0x00000000)); \ + STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in(15), SPH_C32(0x00000000)); \ + \ + STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in(16), SPH_C32(0x00000000)); \ + STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in(17), SPH_C32(0x00000000)); \ + STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in(18), SPH_C32(0x00000000)); \ + STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in(19), SPH_C32(0x00000000)); \ + STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in(20), SPH_C32(0x00000000)); \ + STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in(21), SPH_C32(0x00000000)); \ + STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in(22), SPH_C32(0x00000000)); \ + STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in(23), SPH_C32(0x00000000)); \ + \ + STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in(24), SPH_C32(0x00000000)); \ + STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in(25), SPH_C32(0x00000000)); \ + STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in(26), SPH_C32(0x00000000)); \ + STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in(27), SPH_C32(0x00000000)); \ + STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in(28), SPH_C32(0x00000000)); \ + STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in(29), SPH_C32(0x00000000)); \ + STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in(30), SPH_C32(0x00000000)); \ + STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in(31), SPH_C32(0x00000000)); \ + } while (0) + +#define PASS2(n, in) do { \ + STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in( 5), SPH_C32(0x452821E6)); \ + STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in(14), SPH_C32(0x38D01377)); \ + STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in(26), SPH_C32(0xBE5466CF)); \ + STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in(18), SPH_C32(0x34E90C6C)); \ + STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in(11), SPH_C32(0xC0AC29B7)); \ + STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in(28), SPH_C32(0xC97C50DD)); \ + STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in( 7), SPH_C32(0x3F84D5B5)); \ + STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in(16), SPH_C32(0xB5470917)); \ + \ + STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in( 0), SPH_C32(0x9216D5D9)); \ + STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in(23), SPH_C32(0x8979FB1B)); \ + STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in(20), SPH_C32(0xD1310BA6)); \ + STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in(22), SPH_C32(0x98DFB5AC)); \ + STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in( 1), SPH_C32(0x2FFD72DB)); \ + STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in(10), SPH_C32(0xD01ADFB7)); \ + STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in( 4), SPH_C32(0xB8E1AFED)); \ + STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in( 8), SPH_C32(0x6A267E96)); \ + \ + STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in(30), SPH_C32(0xBA7C9045)); \ + STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in( 3), SPH_C32(0xF12C7F99)); \ + STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in(21), SPH_C32(0x24A19947)); \ + STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in( 9), SPH_C32(0xB3916CF7)); \ + STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in(17), SPH_C32(0x0801F2E2)); \ + STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in(24), SPH_C32(0x858EFC16)); \ + STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in(29), SPH_C32(0x636920D8)); \ + STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in( 6), SPH_C32(0x71574E69)); \ + \ + STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in(19), SPH_C32(0xA458FEA3)); \ + STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in(12), SPH_C32(0xF4933D7E)); \ + STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in(15), SPH_C32(0x0D95748F)); \ + STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in(13), SPH_C32(0x728EB658)); \ + STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in( 2), SPH_C32(0x718BCD58)); \ + STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in(25), SPH_C32(0x82154AEE)); \ + STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in(31), SPH_C32(0x7B54A41D)); \ + STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in(27), SPH_C32(0xC25A59B5)); \ + } while (0) + +#define PASS3(n, in) do { \ + STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in(19), SPH_C32(0x9C30D539)); \ + STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in( 9), SPH_C32(0x2AF26013)); \ + STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in( 4), SPH_C32(0xC5D1B023)); \ + STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in(20), SPH_C32(0x286085F0)); \ + STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in(28), SPH_C32(0xCA417918)); \ + STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in(17), SPH_C32(0xB8DB38EF)); \ + STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in( 8), SPH_C32(0x8E79DCB0)); \ + STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in(22), SPH_C32(0x603A180E)); \ + \ + STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in(29), SPH_C32(0x6C9E0E8B)); \ + STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in(14), SPH_C32(0xB01E8A3E)); \ + STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in(25), SPH_C32(0xD71577C1)); \ + STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in(12), SPH_C32(0xBD314B27)); \ + STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in(24), SPH_C32(0x78AF2FDA)); \ + STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in(30), SPH_C32(0x55605C60)); \ + STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in(16), SPH_C32(0xE65525F3)); \ + STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in(26), SPH_C32(0xAA55AB94)); \ + \ + STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in(31), SPH_C32(0x57489862)); \ + STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in(15), SPH_C32(0x63E81440)); \ + STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in( 7), SPH_C32(0x55CA396A)); \ + STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in( 3), SPH_C32(0x2AAB10B6)); \ + STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in( 1), SPH_C32(0xB4CC5C34)); \ + STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in( 0), SPH_C32(0x1141E8CE)); \ + STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in(18), SPH_C32(0xA15486AF)); \ + STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in(27), SPH_C32(0x7C72E993)); \ + \ + STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in(13), SPH_C32(0xB3EE1411)); \ + STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in( 6), SPH_C32(0x636FBC2A)); \ + STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in(21), SPH_C32(0x2BA9C55D)); \ + STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in(10), SPH_C32(0x741831F6)); \ + STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in(23), SPH_C32(0xCE5C3E16)); \ + STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in(11), SPH_C32(0x9B87931E)); \ + STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in( 5), SPH_C32(0xAFD6BA33)); \ + STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in( 2), SPH_C32(0x6C24CF5C)); \ + } while (0) + +#define PASS4(n, in) do { \ + STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in(24), SPH_C32(0x7A325381)); \ + STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in( 4), SPH_C32(0x28958677)); \ + STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in( 0), SPH_C32(0x3B8F4898)); \ + STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in(14), SPH_C32(0x6B4BB9AF)); \ + STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in( 2), SPH_C32(0xC4BFE81B)); \ + STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in( 7), SPH_C32(0x66282193)); \ + STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in(28), SPH_C32(0x61D809CC)); \ + STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in(23), SPH_C32(0xFB21A991)); \ + \ + STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in(26), SPH_C32(0x487CAC60)); \ + STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in( 6), SPH_C32(0x5DEC8032)); \ + STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in(30), SPH_C32(0xEF845D5D)); \ + STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in(20), SPH_C32(0xE98575B1)); \ + STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in(18), SPH_C32(0xDC262302)); \ + STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in(25), SPH_C32(0xEB651B88)); \ + STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in(19), SPH_C32(0x23893E81)); \ + STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in( 3), SPH_C32(0xD396ACC5)); \ + \ + STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in(22), SPH_C32(0x0F6D6FF3)); \ + STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in(11), SPH_C32(0x83F44239)); \ + STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in(31), SPH_C32(0x2E0B4482)); \ + STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in(21), SPH_C32(0xA4842004)); \ + STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in( 8), SPH_C32(0x69C8F04A)); \ + STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in(27), SPH_C32(0x9E1F9B5E)); \ + STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in(12), SPH_C32(0x21C66842)); \ + STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in( 9), SPH_C32(0xF6E96C9A)); \ + \ + STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in( 1), SPH_C32(0x670C9C61)); \ + STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in(29), SPH_C32(0xABD388F0)); \ + STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in( 5), SPH_C32(0x6A51A0D2)); \ + STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in(15), SPH_C32(0xD8542F68)); \ + STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in(17), SPH_C32(0x960FA728)); \ + STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in(10), SPH_C32(0xAB5133A3)); \ + STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in(16), SPH_C32(0x6EEF0B6C)); \ + STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in(13), SPH_C32(0x137A3BE4)); \ + } while (0) + +#define PASS5(n, in) do { \ + STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in(27), SPH_C32(0xBA3BF050)); \ + STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in( 3), SPH_C32(0x7EFB2A98)); \ + STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in(21), SPH_C32(0xA1F1651D)); \ + STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in(26), SPH_C32(0x39AF0176)); \ + STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in(17), SPH_C32(0x66CA593E)); \ + STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in(11), SPH_C32(0x82430E88)); \ + STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in(20), SPH_C32(0x8CEE8619)); \ + STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in(29), SPH_C32(0x456F9FB4)); \ + \ + STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in(19), SPH_C32(0x7D84A5C3)); \ + STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in( 0), SPH_C32(0x3B8B5EBE)); \ + STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in(12), SPH_C32(0xE06F75D8)); \ + STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in( 7), SPH_C32(0x85C12073)); \ + STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in(13), SPH_C32(0x401A449F)); \ + STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in( 8), SPH_C32(0x56C16AA6)); \ + STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in(31), SPH_C32(0x4ED3AA62)); \ + STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in(10), SPH_C32(0x363F7706)); \ + \ + STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in( 5), SPH_C32(0x1BFEDF72)); \ + STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in( 9), SPH_C32(0x429B023D)); \ + STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in(14), SPH_C32(0x37D0D724)); \ + STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in(30), SPH_C32(0xD00A1248)); \ + STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in(18), SPH_C32(0xDB0FEAD3)); \ + STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in( 6), SPH_C32(0x49F1C09B)); \ + STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in(28), SPH_C32(0x075372C9)); \ + STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in(24), SPH_C32(0x80991B7B)); \ + \ + STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in( 2), SPH_C32(0x25D479D8)); \ + STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in(23), SPH_C32(0xF6E8DEF7)); \ + STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in(16), SPH_C32(0xE3FE501A)); \ + STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in(22), SPH_C32(0xB6794C3B)); \ + STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in( 4), SPH_C32(0x976CE0BD)); \ + STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in( 1), SPH_C32(0x04C006BA)); \ + STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in(25), SPH_C32(0xC1A94FB6)); \ + STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in(15), SPH_C32(0x409F60C4)); \ + } while (0) + +#endif + +#define SAVE_STATE \ + sph_u32 u0, u1, u2, u3, u4, u5, u6, u7; \ + do { \ + u0 = s0; \ + u1 = s1; \ + u2 = s2; \ + u3 = s3; \ + u4 = s4; \ + u5 = s5; \ + u6 = s6; \ + u7 = s7; \ + } while (0) + +#define UPDATE_STATE do { \ + s0 = SPH_T32(s0 + u0); \ + s1 = SPH_T32(s1 + u1); \ + s2 = SPH_T32(s2 + u2); \ + s3 = SPH_T32(s3 + u3); \ + s4 = SPH_T32(s4 + u4); \ + s5 = SPH_T32(s5 + u5); \ + s6 = SPH_T32(s6 + u6); \ + s7 = SPH_T32(s7 + u7); \ + } while (0) + +/* + * COREn(in) performs the core HAVAL computation for "n" passes, using + * the one-argument macro "in" to access the input words. Running state + * is held in variable "s0" to "s7". + */ + +#define CORE3(in) do { \ + SAVE_STATE; \ + PASS1(3, in); \ + PASS2(3, in); \ + PASS3(3, in); \ + UPDATE_STATE; \ + } while (0) + +#define CORE4(in) do { \ + SAVE_STATE; \ + PASS1(4, in); \ + PASS2(4, in); \ + PASS3(4, in); \ + PASS4(4, in); \ + UPDATE_STATE; \ + } while (0) + +#define CORE5(in) do { \ + SAVE_STATE; \ + PASS1(5, in); \ + PASS2(5, in); \ + PASS3(5, in); \ + PASS4(5, in); \ + PASS5(5, in); \ + UPDATE_STATE; \ + } while (0) + +/* + * DSTATE declares the state variables "s0" to "s7". + */ +#define DSTATE sph_u32 s0, s1, s2, s3, s4, s5, s6, s7 + +/* + * RSTATE fills the state variables from the context "sc". + */ +#define RSTATE do { \ + s0 = sc->s0; \ + s1 = sc->s1; \ + s2 = sc->s2; \ + s3 = sc->s3; \ + s4 = sc->s4; \ + s5 = sc->s5; \ + s6 = sc->s6; \ + s7 = sc->s7; \ + } while (0) + +/* + * WSTATE updates the context "sc" from the state variables. + */ +#define WSTATE do { \ + sc->s0 = s0; \ + sc->s1 = s1; \ + sc->s2 = s2; \ + sc->s3 = s3; \ + sc->s4 = s4; \ + sc->s5 = s5; \ + sc->s6 = s6; \ + sc->s7 = s7; \ + } while (0) + +/* + * Initialize a context. "olen" is the output length, in 32-bit words + * (between 4 and 8, inclusive). "passes" is the number of passes + * (3, 4 or 5). + */ +static void +haval_init(sph_haval_context *sc, unsigned olen, unsigned passes) +{ + sc->s0 = SPH_C32(0x243F6A88); + sc->s1 = SPH_C32(0x85A308D3); + sc->s2 = SPH_C32(0x13198A2E); + sc->s3 = SPH_C32(0x03707344); + sc->s4 = SPH_C32(0xA4093822); + sc->s5 = SPH_C32(0x299F31D0); + sc->s6 = SPH_C32(0x082EFA98); + sc->s7 = SPH_C32(0xEC4E6C89); + sc->olen = olen; + sc->passes = passes; +#if SPH_64 + sc->count = 0; +#else + sc->count_high = 0; + sc->count_low = 0; +#endif +} + +/* + * IN_PREPARE(data) contains declarations and code to prepare for + * reading input words pointed to by "data". + * INW(i) reads the word number "i" (from 0 to 31). + */ +#if SPH_LITTLE_FAST +#define IN_PREPARE(indata) const unsigned char *const load_ptr = \ + (const unsigned char *)(indata) +#define INW(i) sph_dec32le_aligned(load_ptr + 4 * (i)) +#else +#define IN_PREPARE(indata) \ + sph_u32 X_var[32]; \ + int load_index; \ + \ + for (load_index = 0; load_index < 32; load_index ++) \ + X_var[load_index] = sph_dec32le_aligned( \ + (const unsigned char *)(indata) + 4 * load_index) +#define INW(i) X_var[i] +#endif + +/* + * Mixing operation used for 128-bit output tailoring. This function + * takes the byte 0 from a0, byte 1 from a1, byte 2 from a2 and byte 3 + * from a3, and combines them into a 32-bit word, which is then rotated + * to the left by n bits. + */ +static SPH_INLINE sph_u32 +mix128(sph_u32 a0, sph_u32 a1, sph_u32 a2, sph_u32 a3, int n) +{ + sph_u32 tmp; + + tmp = (a0 & SPH_C32(0x000000FF)) + | (a1 & SPH_C32(0x0000FF00)) + | (a2 & SPH_C32(0x00FF0000)) + | (a3 & SPH_C32(0xFF000000)); + if (n > 0) + tmp = SPH_ROTL32(tmp, n); + return tmp; +} + +/* + * Mixing operation used to compute output word 0 for 160-bit output. + */ +static SPH_INLINE sph_u32 +mix160_0(sph_u32 x5, sph_u32 x6, sph_u32 x7) +{ + sph_u32 tmp; + + tmp = (x5 & SPH_C32(0x01F80000)) + | (x6 & SPH_C32(0xFE000000)) + | (x7 & SPH_C32(0x0000003F)); + return SPH_ROTL32(tmp, 13); +} + +/* + * Mixing operation used to compute output word 1 for 160-bit output. + */ +static SPH_INLINE sph_u32 +mix160_1(sph_u32 x5, sph_u32 x6, sph_u32 x7) +{ + sph_u32 tmp; + + tmp = (x5 & SPH_C32(0xFE000000)) + | (x6 & SPH_C32(0x0000003F)) + | (x7 & SPH_C32(0x00000FC0)); + return SPH_ROTL32(tmp, 7); +} + +/* + * Mixing operation used to compute output word 2 for 160-bit output. + */ +static SPH_INLINE sph_u32 +mix160_2(sph_u32 x5, sph_u32 x6, sph_u32 x7) +{ + sph_u32 tmp; + + tmp = (x5 & SPH_C32(0x0000003F)) + | (x6 & SPH_C32(0x00000FC0)) + | (x7 & SPH_C32(0x0007F000)); + return tmp; +} + +/* + * Mixing operation used to compute output word 3 for 160-bit output. + */ +static SPH_INLINE sph_u32 +mix160_3(sph_u32 x5, sph_u32 x6, sph_u32 x7) +{ + sph_u32 tmp; + + tmp = (x5 & SPH_C32(0x00000FC0)) + | (x6 & SPH_C32(0x0007F000)) + | (x7 & SPH_C32(0x01F80000)); + return tmp >> 6; +} + +/* + * Mixing operation used to compute output word 4 for 160-bit output. + */ +static SPH_INLINE sph_u32 +mix160_4(sph_u32 x5, sph_u32 x6, sph_u32 x7) +{ + sph_u32 tmp; + + tmp = (x5 & SPH_C32(0x0007F000)) + | (x6 & SPH_C32(0x01F80000)) + | (x7 & SPH_C32(0xFE000000)); + return tmp >> 12; +} + +/* + * Mixing operation used to compute output word 0 for 192-bit output. + */ +static SPH_INLINE sph_u32 +mix192_0(sph_u32 x6, sph_u32 x7) +{ + sph_u32 tmp; + + tmp = (x6 & SPH_C32(0xFC000000)) | (x7 & SPH_C32(0x0000001F)); + return SPH_ROTL32(tmp, 6); +} + +/* + * Mixing operation used to compute output word 1 for 192-bit output. + */ +static SPH_INLINE sph_u32 +mix192_1(sph_u32 x6, sph_u32 x7) +{ + return (x6 & SPH_C32(0x0000001F)) | (x7 & SPH_C32(0x000003E0)); +} + +/* + * Mixing operation used to compute output word 2 for 192-bit output. + */ +static SPH_INLINE sph_u32 +mix192_2(sph_u32 x6, sph_u32 x7) +{ + return ((x6 & SPH_C32(0x000003E0)) | (x7 & SPH_C32(0x0000FC00))) >> 5; +} + +/* + * Mixing operation used to compute output word 3 for 192-bit output. + */ +static SPH_INLINE sph_u32 +mix192_3(sph_u32 x6, sph_u32 x7) +{ + return ((x6 & SPH_C32(0x0000FC00)) | (x7 & SPH_C32(0x001F0000))) >> 10; +} + +/* + * Mixing operation used to compute output word 4 for 192-bit output. + */ +static SPH_INLINE sph_u32 +mix192_4(sph_u32 x6, sph_u32 x7) +{ + return ((x6 & SPH_C32(0x001F0000)) | (x7 & SPH_C32(0x03E00000))) >> 16; +} + +/* + * Mixing operation used to compute output word 5 for 192-bit output. + */ +static SPH_INLINE sph_u32 +mix192_5(sph_u32 x6, sph_u32 x7) +{ + return ((x6 & SPH_C32(0x03E00000)) | (x7 & SPH_C32(0xFC000000))) >> 21; +} + +/* + * Write out HAVAL output. The output length is tailored to the requested + * length. + */ +static void +haval_out(sph_haval_context *sc, void *dst) +{ + DSTATE; + unsigned char *buf; + + buf = (unsigned char*)dst; + RSTATE; + switch (sc->olen) { + case 4: + sph_enc32le(buf, SPH_T32(s0 + mix128(s7, s4, s5, s6, 24))); + sph_enc32le(buf + 4, SPH_T32(s1 + mix128(s6, s7, s4, s5, 16))); + sph_enc32le(buf + 8, SPH_T32(s2 + mix128(s5, s6, s7, s4, 8))); + sph_enc32le(buf + 12, SPH_T32(s3 + mix128(s4, s5, s6, s7, 0))); + break; + case 5: + sph_enc32le(buf, SPH_T32(s0 + mix160_0(s5, s6, s7))); + sph_enc32le(buf + 4, SPH_T32(s1 + mix160_1(s5, s6, s7))); + sph_enc32le(buf + 8, SPH_T32(s2 + mix160_2(s5, s6, s7))); + sph_enc32le(buf + 12, SPH_T32(s3 + mix160_3(s5, s6, s7))); + sph_enc32le(buf + 16, SPH_T32(s4 + mix160_4(s5, s6, s7))); + break; + case 6: + sph_enc32le(buf, SPH_T32(s0 + mix192_0(s6, s7))); + sph_enc32le(buf + 4, SPH_T32(s1 + mix192_1(s6, s7))); + sph_enc32le(buf + 8, SPH_T32(s2 + mix192_2(s6, s7))); + sph_enc32le(buf + 12, SPH_T32(s3 + mix192_3(s6, s7))); + sph_enc32le(buf + 16, SPH_T32(s4 + mix192_4(s6, s7))); + sph_enc32le(buf + 20, SPH_T32(s5 + mix192_5(s6, s7))); + break; + case 7: + sph_enc32le(buf, SPH_T32(s0 + ((s7 >> 27) & 0x1F))); + sph_enc32le(buf + 4, SPH_T32(s1 + ((s7 >> 22) & 0x1F))); + sph_enc32le(buf + 8, SPH_T32(s2 + ((s7 >> 18) & 0x0F))); + sph_enc32le(buf + 12, SPH_T32(s3 + ((s7 >> 13) & 0x1F))); + sph_enc32le(buf + 16, SPH_T32(s4 + ((s7 >> 9) & 0x0F))); + sph_enc32le(buf + 20, SPH_T32(s5 + ((s7 >> 4) & 0x1F))); + sph_enc32le(buf + 24, SPH_T32(s6 + ((s7 ) & 0x0F))); + break; + case 8: + sph_enc32le(buf, s0); + sph_enc32le(buf + 4, s1); + sph_enc32le(buf + 8, s2); + sph_enc32le(buf + 12, s3); + sph_enc32le(buf + 16, s4); + sph_enc32le(buf + 20, s5); + sph_enc32le(buf + 24, s6); + sph_enc32le(buf + 28, s7); + break; + } +} + +/* + * The main core functions inline the code with the COREx() macros. We + * use a helper file, included three times, which avoids code copying. + */ + +#undef PASSES +#define PASSES 3 +#include "haval_helper.c" + +#undef PASSES +#define PASSES 4 +#include "haval_helper.c" + +#undef PASSES +#define PASSES 5 +#include "haval_helper.c" + +/* ====================================================================== */ + +#define API(xxx, y) \ +void \ +sph_haval ## xxx ## _ ## y ## _init(void *cc) \ +{ \ + haval_init((sph_haval_context*)cc, xxx >> 5, y); \ +} \ + \ +void \ +sph_haval ## xxx ## _ ## y (void *cc, const void *data, size_t len) \ +{ \ + haval ## y((sph_haval_context*)cc, data, len); \ +} \ + \ +void \ +sph_haval ## xxx ## _ ## y ## _close(void *cc, void *dst) \ +{ \ + haval ## y ## _close((sph_haval_context*)cc, 0, 0, dst); \ +} \ + \ +void \ +sph_haval ## xxx ## _ ## y ## addbits_and_close( \ + void *cc, unsigned ub, unsigned n, void *dst) \ +{ \ + haval ## y ## _close((sph_haval_context*)cc, ub, n, dst); \ +} + +API(128, 3) +API(128, 4) +API(128, 5) +API(160, 3) +API(160, 4) +API(160, 5) +API(192, 3) +API(192, 4) +API(192, 5) +API(224, 3) +API(224, 4) +API(224, 5) +API(256, 3) +API(256, 4) +API(256, 5) + +#define RVAL do { \ + s0 = val[0]; \ + s1 = val[1]; \ + s2 = val[2]; \ + s3 = val[3]; \ + s4 = val[4]; \ + s5 = val[5]; \ + s6 = val[6]; \ + s7 = val[7]; \ + } while (0) + +#define WVAL do { \ + val[0] = s0; \ + val[1] = s1; \ + val[2] = s2; \ + val[3] = s3; \ + val[4] = s4; \ + val[5] = s5; \ + val[6] = s6; \ + val[7] = s7; \ + } while (0) + +#define INMSG(i) msg[i] + +/* see sph_haval.h */ +void +sph_haval_3_comp(const sph_u32 msg[32], sph_u32 val[8]) +{ + DSTATE; + + RVAL; + CORE3(INMSG); + WVAL; +} + +/* see sph_haval.h */ +void +sph_haval_4_comp(const sph_u32 msg[32], sph_u32 val[8]) +{ + DSTATE; + + RVAL; + CORE4(INMSG); + WVAL; +} + +/* see sph_haval.h */ +void +sph_haval_5_comp(const sph_u32 msg[32], sph_u32 val[8]) +{ + DSTATE; + + RVAL; + CORE5(INMSG); + WVAL; +} + diff --git a/sha3/sph_haval.h b/sha3/sph_haval.h new file mode 100644 index 000000000..6334a9226 --- /dev/null +++ b/sha3/sph_haval.h @@ -0,0 +1,969 @@ +/* $Id: sph_haval.h 218 2010-06-08 17:06:34Z tp $ */ +/** +* HAVAL interface. +* +* HAVAL is actually a family of 15 hash functions, depending on whether +* the internal computation uses 3, 4 or 5 passes, and on the output +* length, which is 128, 160, 192, 224 or 256 bits. This implementation +* provides interface functions for all 15, which internally map to +* three cores (depending on the number of passes). Note that output +* lengths other than 256 bits are not obtained by a simple truncation +* of a longer result; the requested length is encoded within the +* padding data. +* +* HAVAL was published in: Yuliang Zheng, Josef Pieprzyk and Jennifer +* Seberry: "HAVAL -- a one-way hashing algorithm with variable length +* of output", Advances in Cryptology -- AUSCRYPT'92, Lecture Notes in +* Computer Science, Vol.718, pp.83-104, Springer-Verlag, 1993. +* +* This paper, and a reference implementation, are available on the +* Calyptix web site: http://labs.calyptix.com/haval.php +* +* The HAVAL reference paper is quite unclear on the data encoding +* details, i.e. endianness (both byte order within a 32-bit word, and +* word order within a message block). This implementation has been +* made compatible with the reference implementation referenced above. +* +* @warning A collision for HAVAL-128/3 (HAVAL with three passes and +* 128-bit output) has been published; this function is thus considered +* as cryptographically broken. The status for other variants is unclear; +* use only with care. +* +* ==========================(LICENSE BEGIN)============================ +* +* Copyright (c) 2007-2010 Projet RNRT SAPHIR +* +* Permission is hereby granted, free of charge, to any person obtaining +* a copy of this software and associated documentation files (the +* "Software"), to deal in the Software without restriction, including +* without limitation the rights to use, copy, modify, merge, publish, +* distribute, sublicense, and/or sell copies of the Software, and to +* permit persons to whom the Software is furnished to do so, subject to +* the following conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +* +* ===========================(LICENSE END)============================= +* +* @file sph_haval.h +* @author Thomas Pornin +*/ + +#ifndef SPH_HAVAL_H__ +#define SPH_HAVAL_H__ + +#include +#include "sph_types.h" + +/** +* Output size (in bits) for HAVAL-128/3. +*/ +#define SPH_SIZE_haval128_3 128 + +/** +* Output size (in bits) for HAVAL-128/4. +*/ +#define SPH_SIZE_haval128_4 128 + +/** +* Output size (in bits) for HAVAL-128/5. +*/ +#define SPH_SIZE_haval128_5 128 + +/** +* Output size (in bits) for HAVAL-160/3. +*/ +#define SPH_SIZE_haval160_3 160 + +/** +* Output size (in bits) for HAVAL-160/4. +*/ +#define SPH_SIZE_haval160_4 160 + +/** +* Output size (in bits) for HAVAL-160/5. +*/ +#define SPH_SIZE_haval160_5 160 + +/** +* Output size (in bits) for HAVAL-192/3. +*/ +#define SPH_SIZE_haval192_3 192 + +/** +* Output size (in bits) for HAVAL-192/4. +*/ +#define SPH_SIZE_haval192_4 192 + +/** +* Output size (in bits) for HAVAL-192/5. +*/ +#define SPH_SIZE_haval192_5 192 + +/** +* Output size (in bits) for HAVAL-224/3. +*/ +#define SPH_SIZE_haval224_3 224 + +/** +* Output size (in bits) for HAVAL-224/4. +*/ +#define SPH_SIZE_haval224_4 224 + +/** +* Output size (in bits) for HAVAL-224/5. +*/ +#define SPH_SIZE_haval224_5 224 + +/** +* Output size (in bits) for HAVAL-256/3. +*/ +#define SPH_SIZE_haval256_3 256 + +/** +* Output size (in bits) for HAVAL-256/4. +*/ +#define SPH_SIZE_haval256_4 256 + +/** +* Output size (in bits) for HAVAL-256/5. +*/ +#define SPH_SIZE_haval256_5 256 + +/** +* This structure is a context for HAVAL computations: it contains the +* intermediate values and some data from the last entered block. Once +* a HAVAL computation has been performed, the context can be reused for +* another computation. +* +* The contents of this structure are private. A running HAVAL computation +* can be cloned by copying the context (e.g. with a simple +* memcpy()). +*/ +typedef struct { +#ifndef DOXYGEN_IGNORE +unsigned char buf[128]; /* first field, for alignment */ +sph_u32 s0, s1, s2, s3, s4, s5, s6, s7; +unsigned olen, passes; +#if SPH_64 +sph_u64 count; +#else +sph_u32 count_high, count_low; +#endif +#endif +} sph_haval_context; + +/** +* Type for a HAVAL-128/3 context (identical to the common context). +*/ +typedef sph_haval_context sph_haval128_3_context; + +/** +* Type for a HAVAL-128/4 context (identical to the common context). +*/ +typedef sph_haval_context sph_haval128_4_context; + +/** +* Type for a HAVAL-128/5 context (identical to the common context). +*/ +typedef sph_haval_context sph_haval128_5_context; + +/** +* Type for a HAVAL-160/3 context (identical to the common context). +*/ +typedef sph_haval_context sph_haval160_3_context; + +/** +* Type for a HAVAL-160/4 context (identical to the common context). +*/ +typedef sph_haval_context sph_haval160_4_context; + +/** +* Type for a HAVAL-160/5 context (identical to the common context). +*/ +typedef sph_haval_context sph_haval160_5_context; + +/** +* Type for a HAVAL-192/3 context (identical to the common context). +*/ +typedef sph_haval_context sph_haval192_3_context; + +/** +* Type for a HAVAL-192/4 context (identical to the common context). +*/ +typedef sph_haval_context sph_haval192_4_context; + +/** +* Type for a HAVAL-192/5 context (identical to the common context). +*/ +typedef sph_haval_context sph_haval192_5_context; + +/** +* Type for a HAVAL-224/3 context (identical to the common context). +*/ +typedef sph_haval_context sph_haval224_3_context; + +/** +* Type for a HAVAL-224/4 context (identical to the common context). +*/ +typedef sph_haval_context sph_haval224_4_context; + +/** +* Type for a HAVAL-224/5 context (identical to the common context). +*/ +typedef sph_haval_context sph_haval224_5_context; + +/** +* Type for a HAVAL-256/3 context (identical to the common context). +*/ +typedef sph_haval_context sph_haval256_3_context; + +/** +* Type for a HAVAL-256/4 context (identical to the common context). +*/ +typedef sph_haval_context sph_haval256_4_context; + +/** +* Type for a HAVAL-256/5 context (identical to the common context). +*/ +typedef sph_haval_context sph_haval256_5_context; + +/** +* Initialize the context for HAVAL-128/3. +* +* @param cc context to initialize (pointer to a +* sph_haval128_3_context structure) +*/ +void sph_haval128_3_init(void *cc); + +/** +* Process some data bytes for HAVAL-128/3. If len is 0, +* then this function does nothing. +* +* @param cc the HAVAL-128/3 context +* @param data the input data +* @param len the input data length (in bytes) +*/ +void sph_haval128_3(void *cc, const void *data, size_t len); + +/** +* Close a HAVAL-128/3 computation. The output buffer must be wide +* enough to accomodate the result (16 bytes). The context is automatically +* reinitialized. +* +* @param cc the HAVAL-128/3 context +* @param dst the output buffer +*/ +void sph_haval128_3_close(void *cc, void *dst); + +/** +* Close a HAVAL-128/3 computation. Up to 7 extra input bits may be added +* to the input message; these are the n upper bits of +* the ub byte (i.e. the first extra bit has value 128 in +* ub, the second extra bit has value 64, and so on). Other +* bits in ub are ignored. +* +* The output buffer must be wide enough to accomodate the result (16 +* bytes). The context is automatically reinitialized. +* +* @param cc the HAVAL-128/3 context +* @param ub the extra bits +* @param n the number of extra bits (0 to 7) +* @param dst the output buffer +*/ +void sph_haval128_3_addbits_and_close(void *cc, +unsigned ub, unsigned n, void *dst); + +/** +* Initialize the context for HAVAL-128/4. +* +* @param cc context to initialize (pointer to a +* sph_haval128_4_context structure) +*/ +void sph_haval128_4_init(void *cc); + +/** +* Process some data bytes for HAVAL-128/4. If len is 0, +* then this function does nothing. +* +* @param cc the HAVAL-128/4 context +* @param data the input data +* @param len the input data length (in bytes) +*/ +void sph_haval128_4(void *cc, const void *data, size_t len); + +/** +* Close a HAVAL-128/4 computation. The output buffer must be wide +* enough to accomodate the result (16 bytes). The context is automatically +* reinitialized. +* +* @param cc the HAVAL-128/4 context +* @param dst the output buffer +*/ +void sph_haval128_4_close(void *cc, void *dst); + +/** +* Close a HAVAL-128/4 computation. Up to 7 extra input bits may be added +* to the input message; these are the n upper bits of +* the ub byte (i.e. the first extra bit has value 128 in +* ub, the second extra bit has value 64, and so on). Other +* bits in ub are ignored. +* +* The output buffer must be wide enough to accomodate the result (16 +* bytes). The context is automatically reinitialized. +* +* @param cc the HAVAL-128/4 context +* @param ub the extra bits +* @param n the number of extra bits (0 to 7) +* @param dst the output buffer +*/ +void sph_haval128_4_addbits_and_close(void *cc, +unsigned ub, unsigned n, void *dst); + +/** +* Initialize the context for HAVAL-128/5. +* +* @param cc context to initialize (pointer to a +* sph_haval128_5_context structure) +*/ +void sph_haval128_5_init(void *cc); + +/** +* Process some data bytes for HAVAL-128/5. If len is 0, +* then this function does nothing. +* +* @param cc the HAVAL-128/5 context +* @param data the input data +* @param len the input data length (in bytes) +*/ +void sph_haval128_5(void *cc, const void *data, size_t len); + +/** +* Close a HAVAL-128/5 computation. The output buffer must be wide +* enough to accomodate the result (16 bytes). The context is automatically +* reinitialized. +* +* @param cc the HAVAL-128/5 context +* @param dst the output buffer +*/ +void sph_haval128_5_close(void *cc, void *dst); + +/** +* Close a HAVAL-128/5 computation. Up to 7 extra input bits may be added +* to the input message; these are the n upper bits of +* the ub byte (i.e. the first extra bit has value 128 in +* ub, the second extra bit has value 64, and so on). Other +* bits in ub are ignored. +* +* The output buffer must be wide enough to accomodate the result (16 +* bytes). The context is automatically reinitialized. +* +* @param cc the HAVAL-128/5 context +* @param ub the extra bits +* @param n the number of extra bits (0 to 7) +* @param dst the output buffer +*/ +void sph_haval128_5_addbits_and_close(void *cc, +unsigned ub, unsigned n, void *dst); + +/** +* Initialize the context for HAVAL-160/3. +* +* @param cc context to initialize (pointer to a +* sph_haval160_3_context structure) +*/ +void sph_haval160_3_init(void *cc); + +/** +* Process some data bytes for HAVAL-160/3. If len is 0, +* then this function does nothing. +* +* @param cc the HAVAL-160/3 context +* @param data the input data +* @param len the input data length (in bytes) +*/ +void sph_haval160_3(void *cc, const void *data, size_t len); + +/** +* Close a HAVAL-160/3 computation. The output buffer must be wide +* enough to accomodate the result (20 bytes). The context is automatically +* reinitialized. +* +* @param cc the HAVAL-160/3 context +* @param dst the output buffer +*/ +void sph_haval160_3_close(void *cc, void *dst); + +/** +* Close a HAVAL-160/3 computation. Up to 7 extra input bits may be added +* to the input message; these are the n upper bits of +* the ub byte (i.e. the first extra bit has value 128 in +* ub, the second extra bit has value 64, and so on). Other +* bits in ub are ignored. +* +* The output buffer must be wide enough to accomodate the result (20 +* bytes). The context is automatically reinitialized. +* +* @param cc the HAVAL-160/3 context +* @param ub the extra bits +* @param n the number of extra bits (0 to 7) +* @param dst the output buffer +*/ +void sph_haval160_3_addbits_and_close(void *cc, +unsigned ub, unsigned n, void *dst); + +/** +* Initialize the context for HAVAL-160/4. +* +* @param cc context to initialize (pointer to a +* sph_haval160_4_context structure) +*/ +void sph_haval160_4_init(void *cc); + +/** +* Process some data bytes for HAVAL-160/4. If len is 0, +* then this function does nothing. +* +* @param cc the HAVAL-160/4 context +* @param data the input data +* @param len the input data length (in bytes) +*/ +void sph_haval160_4(void *cc, const void *data, size_t len); + +/** +* Close a HAVAL-160/4 computation. The output buffer must be wide +* enough to accomodate the result (20 bytes). The context is automatically +* reinitialized. +* +* @param cc the HAVAL-160/4 context +* @param dst the output buffer +*/ +void sph_haval160_4_close(void *cc, void *dst); + +/** +* Close a HAVAL-160/4 computation. Up to 7 extra input bits may be added +* to the input message; these are the n upper bits of +* the ub byte (i.e. the first extra bit has value 128 in +* ub, the second extra bit has value 64, and so on). Other +* bits in ub are ignored. +* +* The output buffer must be wide enough to accomodate the result (20 +* bytes). The context is automatically reinitialized. +* +* @param cc the HAVAL-160/4 context +* @param ub the extra bits +* @param n the number of extra bits (0 to 7) +* @param dst the output buffer +*/ +void sph_haval160_3_addbits_and_close(void *cc, +unsigned ub, unsigned n, void *dst); + +/** +* Initialize the context for HAVAL-160/5. +* +* @param cc context to initialize (pointer to a +* sph_haval160_5_context structure) +*/ +void sph_haval160_5_init(void *cc); + +/** +* Process some data bytes for HAVAL-160/5. If len is 0, +* then this function does nothing. +* +* @param cc the HAVAL-160/5 context +* @param data the input data +* @param len the input data length (in bytes) +*/ +void sph_haval160_5(void *cc, const void *data, size_t len); + +/** +* Close a HAVAL-160/5 computation. The output buffer must be wide +* enough to accomodate the result (20 bytes). The context is automatically +* reinitialized. +* +* @param cc the HAVAL-160/5 context +* @param dst the output buffer +*/ +void sph_haval160_5_close(void *cc, void *dst); + +/** +* Close a HAVAL-160/5 computation. Up to 7 extra input bits may be added +* to the input message; these are the n upper bits of +* the ub byte (i.e. the first extra bit has value 128 in +* ub, the second extra bit has value 64, and so on). Other +* bits in ub are ignored. +* +* The output buffer must be wide enough to accomodate the result (20 +* bytes). The context is automatically reinitialized. +* +* @param cc the HAVAL-160/5 context +* @param ub the extra bits +* @param n the number of extra bits (0 to 7) +* @param dst the output buffer +*/ +void sph_haval160_5_addbits_and_close(void *cc, +unsigned ub, unsigned n, void *dst); + +/** +* Initialize the context for HAVAL-192/3. +* +* @param cc context to initialize (pointer to a +* sph_haval192_3_context structure) +*/ +void sph_haval192_3_init(void *cc); + +/** +* Process some data bytes for HAVAL-192/3. If len is 0, +* then this function does nothing. +* +* @param cc the HAVAL-192/3 context +* @param data the input data +* @param len the input data length (in bytes) +*/ +void sph_haval192_3(void *cc, const void *data, size_t len); + +/** +* Close a HAVAL-192/3 computation. The output buffer must be wide +* enough to accomodate the result (24 bytes). The context is automatically +* reinitialized. +* +* @param cc the HAVAL-192/3 context +* @param dst the output buffer +*/ +void sph_haval192_3_close(void *cc, void *dst); + +/** +* Close a HAVAL-192/3 computation. Up to 7 extra input bits may be added +* to the input message; these are the n upper bits of +* the ub byte (i.e. the first extra bit has value 128 in +* ub, the second extra bit has value 64, and so on). Other +* bits in ub are ignored. +* +* The output buffer must be wide enough to accomodate the result (24 +* bytes). The context is automatically reinitialized. +* +* @param cc the HAVAL-192/3 context +* @param ub the extra bits +* @param n the number of extra bits (0 to 7) +* @param dst the output buffer +*/ +void sph_haval192_3_addbits_and_close(void *cc, +unsigned ub, unsigned n, void *dst); + +/** +* Initialize the context for HAVAL-192/4. +* +* @param cc context to initialize (pointer to a +* sph_haval192_4_context structure) +*/ +void sph_haval192_4_init(void *cc); + +/** +* Process some data bytes for HAVAL-192/4. If len is 0, +* then this function does nothing. +* +* @param cc the HAVAL-192/4 context +* @param data the input data +* @param len the input data length (in bytes) +*/ +void sph_haval192_4(void *cc, const void *data, size_t len); + +/** +* Close a HAVAL-192/4 computation. The output buffer must be wide +* enough to accomodate the result (24 bytes). The context is automatically +* reinitialized. +* +* @param cc the HAVAL-192/4 context +* @param dst the output buffer +*/ +void sph_haval192_4_close(void *cc, void *dst); + +/** +* Close a HAVAL-192/4 computation. Up to 7 extra input bits may be added +* to the input message; these are the n upper bits of +* the ub byte (i.e. the first extra bit has value 128 in +* ub, the second extra bit has value 64, and so on). Other +* bits in ub are ignored. +* +* The output buffer must be wide enough to accomodate the result (24 +* bytes). The context is automatically reinitialized. +* +* @param cc the HAVAL-192/4 context +* @param ub the extra bits +* @param n the number of extra bits (0 to 7) +* @param dst the output buffer +*/ +void sph_haval192_4_addbits_and_close(void *cc, +unsigned ub, unsigned n, void *dst); + +/** +* Initialize the context for HAVAL-192/5. +* +* @param cc context to initialize (pointer to a +* sph_haval192_5_context structure) +*/ +void sph_haval192_5_init(void *cc); + +/** +* Process some data bytes for HAVAL-192/5. If len is 0, +* then this function does nothing. +* +* @param cc the HAVAL-192/5 context +* @param data the input data +* @param len the input data length (in bytes) +*/ +void sph_haval192_5(void *cc, const void *data, size_t len); + +/** +* Close a HAVAL-192/5 computation. The output buffer must be wide +* enough to accomodate the result (24 bytes). The context is automatically +* reinitialized. +* +* @param cc the HAVAL-192/5 context +* @param dst the output buffer +*/ +void sph_haval192_5_close(void *cc, void *dst); + +/** +* Close a HAVAL-192/5 computation. Up to 7 extra input bits may be added +* to the input message; these are the n upper bits of +* the ub byte (i.e. the first extra bit has value 128 in +* ub, the second extra bit has value 64, and so on). Other +* bits in ub are ignored. +* +* The output buffer must be wide enough to accomodate the result (24 +* bytes). The context is automatically reinitialized. +* +* @param cc the HAVAL-192/5 context +* @param ub the extra bits +* @param n the number of extra bits (0 to 7) +* @param dst the output buffer +*/ +void sph_haval192_5_addbits_and_close(void *cc, +unsigned ub, unsigned n, void *dst); + +/** +* Initialize the context for HAVAL-224/3. +* +* @param cc context to initialize (pointer to a +* sph_haval224_3_context structure) +*/ +void sph_haval224_3_init(void *cc); + +/** +* Process some data bytes for HAVAL-224/3. If len is 0, +* then this function does nothing. +* +* @param cc the HAVAL-224/3 context +* @param data the input data +* @param len the input data length (in bytes) +*/ +void sph_haval224_3(void *cc, const void *data, size_t len); + +/** +* Close a HAVAL-224/3 computation. The output buffer must be wide +* enough to accomodate the result (28 bytes). The context is automatically +* reinitialized. +* +* @param cc the HAVAL-224/3 context +* @param dst the output buffer +*/ +void sph_haval224_3_close(void *cc, void *dst); + +/** +* Close a HAVAL-224/3 computation. Up to 7 extra input bits may be added +* to the input message; these are the n upper bits of +* the ub byte (i.e. the first extra bit has value 128 in +* ub, the second extra bit has value 64, and so on). Other +* bits in ub are ignored. +* +* The output buffer must be wide enough to accomodate the result (28 +* bytes). The context is automatically reinitialized. +* +* @param cc the HAVAL-224/3 context +* @param ub the extra bits +* @param n the number of extra bits (0 to 7) +* @param dst the output buffer +*/ +void sph_haval224_3_addbits_and_close(void *cc, +unsigned ub, unsigned n, void *dst); + +/** +* Initialize the context for HAVAL-224/4. +* +* @param cc context to initialize (pointer to a +* sph_haval224_4_context structure) +*/ +void sph_haval224_4_init(void *cc); + +/** +* Process some data bytes for HAVAL-224/4. If len is 0, +* then this function does nothing. +* +* @param cc the HAVAL-224/4 context +* @param data the input data +* @param len the input data length (in bytes) +*/ +void sph_haval224_4(void *cc, const void *data, size_t len); + +/** +* Close a HAVAL-224/4 computation. The output buffer must be wide +* enough to accomodate the result (28 bytes). The context is automatically +* reinitialized. +* +* @param cc the HAVAL-224/4 context +* @param dst the output buffer +*/ +void sph_haval224_4_close(void *cc, void *dst); + +/** +* Close a HAVAL-224/4 computation. Up to 7 extra input bits may be added +* to the input message; these are the n upper bits of +* the ub byte (i.e. the first extra bit has value 128 in +* ub, the second extra bit has value 64, and so on). Other +* bits in ub are ignored. +* +* The output buffer must be wide enough to accomodate the result (28 +* bytes). The context is automatically reinitialized. +* +* @param cc the HAVAL-224/4 context +* @param ub the extra bits +* @param n the number of extra bits (0 to 7) +* @param dst the output buffer +*/ +void sph_haval224_4_addbits_and_close(void *cc, +unsigned ub, unsigned n, void *dst); + +/** +* Initialize the context for HAVAL-224/5. +* +* @param cc context to initialize (pointer to a +* sph_haval224_5_context structure) +*/ +void sph_haval224_5_init(void *cc); + +/** +* Process some data bytes for HAVAL-224/5. If len is 0, +* then this function does nothing. +* +* @param cc the HAVAL-224/5 context +* @param data the input data +* @param len the input data length (in bytes) +*/ +void sph_haval224_5(void *cc, const void *data, size_t len); + +/** +* Close a HAVAL-224/5 computation. The output buffer must be wide +* enough to accomodate the result (28 bytes). The context is automatically +* reinitialized. +* +* @param cc the HAVAL-224/5 context +* @param dst the output buffer +*/ +void sph_haval224_5_close(void *cc, void *dst); + +/** +* Close a HAVAL-224/5 computation. Up to 7 extra input bits may be added +* to the input message; these are the n upper bits of +* the ub byte (i.e. the first extra bit has value 128 in +* ub, the second extra bit has value 64, and so on). Other +* bits in ub are ignored. +* +* The output buffer must be wide enough to accomodate the result (28 +* bytes). The context is automatically reinitialized. +* +* @param cc the HAVAL-224/5 context +* @param ub the extra bits +* @param n the number of extra bits (0 to 7) +* @param dst the output buffer +*/ +void sph_haval224_5_addbits_and_close(void *cc, +unsigned ub, unsigned n, void *dst); + +/** +* Initialize the context for HAVAL-256/3. +* +* @param cc context to initialize (pointer to a +* sph_haval256_3_context structure) +*/ +void sph_haval256_3_init(void *cc); + +/** +* Process some data bytes for HAVAL-256/3. If len is 0, +* then this function does nothing. +* +* @param cc the HAVAL-256/3 context +* @param data the input data +* @param len the input data length (in bytes) +*/ +void sph_haval256_3(void *cc, const void *data, size_t len); + +/** +* Close a HAVAL-256/3 computation. The output buffer must be wide +* enough to accomodate the result (32 bytes). The context is automatically +* reinitialized. +* +* @param cc the HAVAL-256/3 context +* @param dst the output buffer +*/ +void sph_haval256_3_close(void *cc, void *dst); + +/** +* Close a HAVAL-256/3 computation. Up to 7 extra input bits may be added +* to the input message; these are the n upper bits of +* the ub byte (i.e. the first extra bit has value 128 in +* ub, the second extra bit has value 64, and so on). Other +* bits in ub are ignored. +* +* The output buffer must be wide enough to accomodate the result (32 +* bytes). The context is automatically reinitialized. +* +* @param cc the HAVAL-256/3 context +* @param ub the extra bits +* @param n the number of extra bits (0 to 7) +* @param dst the output buffer +*/ +void sph_haval256_3_addbits_and_close(void *cc, +unsigned ub, unsigned n, void *dst); + +/** +* Initialize the context for HAVAL-256/4. +* +* @param cc context to initialize (pointer to a +* sph_haval256_4_context structure) +*/ +void sph_haval256_4_init(void *cc); + +/** +* Process some data bytes for HAVAL-256/4. If len is 0, +* then this function does nothing. +* +* @param cc the HAVAL-256/4 context +* @param data the input data +* @param len the input data length (in bytes) +*/ +void sph_haval256_4(void *cc, const void *data, size_t len); + +/** +* Close a HAVAL-256/4 computation. The output buffer must be wide +* enough to accomodate the result (32 bytes). The context is automatically +* reinitialized. +* +* @param cc the HAVAL-256/4 context +* @param dst the output buffer +*/ +void sph_haval256_4_close(void *cc, void *dst); + +/** +* Close a HAVAL-256/4 computation. Up to 7 extra input bits may be added +* to the input message; these are the n upper bits of +* the ub byte (i.e. the first extra bit has value 128 in +* ub, the second extra bit has value 64, and so on). Other +* bits in ub are ignored. +* +* The output buffer must be wide enough to accomodate the result (32 +* bytes). The context is automatically reinitialized. +* +* @param cc the HAVAL-256/4 context +* @param ub the extra bits +* @param n the number of extra bits (0 to 7) +* @param dst the output buffer +*/ +void sph_haval256_4_addbits_and_close(void *cc, +unsigned ub, unsigned n, void *dst); + +/** +* Initialize the context for HAVAL-256/5. +* +* @param cc context to initialize (pointer to a +* sph_haval256_5_context structure) +*/ +void sph_haval256_5_init(void *cc); + +/** +* Process some data bytes for HAVAL-256/5. If len is 0, +* then this function does nothing. +* +* @param cc the HAVAL-256/5 context +* @param data the input data +* @param len the input data length (in bytes) +*/ +void sph_haval256_5(void *cc, const void *data, size_t len); + +/** +* Close a HAVAL-256/5 computation. The output buffer must be wide +* enough to accomodate the result (32 bytes). The context is automatically +* reinitialized. +* +* @param cc the HAVAL-256/5 context +* @param dst the output buffer +*/ +void sph_haval256_5_close(void *cc, void *dst); + +/** +* Close a HAVAL-256/5 computation. Up to 7 extra input bits may be added +* to the input message; these are the n upper bits of +* the ub byte (i.e. the first extra bit has value 128 in +* ub, the second extra bit has value 64, and so on). Other +* bits in ub are ignored. +* +* The output buffer must be wide enough to accomodate the result (32 +* bytes). The context is automatically reinitialized. +* +* @param cc the HAVAL-256/5 context +* @param ub the extra bits +* @param n the number of extra bits (0 to 7) +* @param dst the output buffer +*/ +void sph_haval256_5_addbits_and_close(void *cc, +unsigned ub, unsigned n, void *dst); + +/** +* Apply the HAVAL compression function on the provided data. The +* msg parameter contains the 32 32-bit input blocks, +* as numerical values (hence after the little-endian decoding). The +* val parameter contains the 8 32-bit input blocks for +* the compression function; the output is written in place in this +* array. This function uses three internal passes. +* +* @param msg the message block (32 values) +* @param val the function 256-bit input and output +*/ +void sph_haval_3_comp(const sph_u32 msg[32], sph_u32 val[8]); + +/** +* Apply the HAVAL compression function on the provided data. The +* msg parameter contains the 32 32-bit input blocks, +* as numerical values (hence after the little-endian decoding). The +* val parameter contains the 8 32-bit input blocks for +* the compression function; the output is written in place in this +* array. This function uses four internal passes. +* +* @param msg the message block (32 values) +* @param val the function 256-bit input and output +*/ +void sph_haval_4_comp(const sph_u32 msg[32], sph_u32 val[8]); + +/** +* Apply the HAVAL compression function on the provided data. The +* msg parameter contains the 32 32-bit input blocks, +* as numerical values (hence after the little-endian decoding). The +* val parameter contains the 8 32-bit input blocks for +* the compression function; the output is written in place in this +* array. This function uses five internal passes. +* +* @param msg the message block (32 values) +* @param val the function 256-bit input and output +*/ +void sph_haval_5_comp(const sph_u32 msg[32], sph_u32 val[8]); + +#endif diff --git a/sha3/sph_types.h b/sha3/sph_types.h index 7295b0b37..6c8ecf4b6 100644 --- a/sha3/sph_types.h +++ b/sha3/sph_types.h @@ -930,14 +930,24 @@ typedef long long sph_s64; */ #define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) +#ifdef _MSC_VER +#define SPH_ROTL32(x, n) _rotl(x, n) +#define SPH_ROTR32(x, n) _rotr(x, n) +#else #define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) #define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) +#endif #if SPH_64 #define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) +#ifdef _MSC_VER +#define SPH_ROTL64(x, n) _rotl64(x, n) +#define SPH_ROTR64(x, n) _rotr64(x, n) +#else #define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) #define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) +#endif #endif diff --git a/util.c b/util.c index 362b9f8f1..afef6fd82 100644 --- a/util.c +++ b/util.c @@ -1841,7 +1841,7 @@ static bool stratum_benchdata(json_t *result, json_t *params, int thr_id) #endif #ifdef _MSC_VER - sprintf(compiler, "VC++ %d\n", _MSC_VER / 100); + sprintf(compiler, "MSVC %d\n", msver()); #elif defined(__clang__) sprintf(compiler, "clang %s\n", __clang_version__); #elif defined(__GNUC__)