From b9712854ced2d68d93ab8dc4da15e5503583bc7b Mon Sep 17 00:00:00 2001
From: Tanguy Pruvot <tanguy.pruvot@gmail.com>
Date: Sun, 22 May 2016 06:37:53 +0200
Subject: [PATCH] small changes in --version output

+ add sph haval which was half there in the vstudio project
---
 compat.h                 |  11 +
 compat/cpuminer-config.h |   6 +-
 cpu-miner.c              |  43 +-
 cpuminer.vcxproj         |   1 +
 cpuminer.vcxproj.filters |   3 +
 sha3/haval_helper.c      | 195 ++++++++
 sha3/sph_haval.c         | 975 +++++++++++++++++++++++++++++++++++++++
 sha3/sph_haval.h         | 969 ++++++++++++++++++++++++++++++++++++++
 sha3/sph_types.h         |  10 +
 util.c                   |   2 +-
 10 files changed, 2201 insertions(+), 14 deletions(-)
 create mode 100644 sha3/haval_helper.c
 create mode 100644 sha3/sph_haval.c
 create mode 100644 sha3/sph_haval.h

diff --git a/compat.h b/compat.h
index 124bc40aa..9f2611fe4 100644
--- a/compat.h
+++ b/compat.h
@@ -52,6 +52,17 @@ static __inline int setpriority(int which, int who, int prio)
 #define _ALIGN(x) __declspec(align(x))
 typedef int ssize_t;
 
+__inline int msver(void) {
+	switch (_MSC_VER) {
+	case 1500: return 2008;
+	case 1600: return 2010;
+	case 1700: return 2012;
+	case 1800: return 2013;
+	case 1900: return 2015;
+	default: return (_MSC_VER/100);
+	}
+}
+
 #include <stdlib.h>
 // This static var is made to be compatible with linux/mingw (no free on string result)
 // This is not thread safe but we only use that once on process start
diff --git a/compat/cpuminer-config.h b/compat/cpuminer-config.h
index e05194d94..a20aa6d72 100644
--- a/compat/cpuminer-config.h
+++ b/compat/cpuminer-config.h
@@ -94,7 +94,7 @@
 #define PACKAGE_NAME "cpuminer-multi"
 
 /* Define to the full name and version of this package. */
-#define PACKAGE_STRING "cpuminer-multi 1.2"
+#define PACKAGE_STRING "cpuminer-multi 1.3"
 
 /* Define to the one symbol short name of this package. */
 #define PACKAGE_TARNAME "cpuminer-multi"
@@ -103,7 +103,7 @@
 #define PACKAGE_URL ""
 
 /* Define to the version of this package. */
-#define PACKAGE_VERSION "1.2"
+#define PACKAGE_VERSION "1.3"
 
 /* If using the C implementation of alloca, define if you know the
    direction of stack growth for your system; otherwise it will be
@@ -132,7 +132,7 @@
 #define USE_XOP 1
 
 /* Version number of package */
-#define VERSION "1.2"
+#define VERSION "1.3"
 
 /* Define to `unsigned int' if <sys/types.h> does not define. */
 /* #undef size_t */
diff --git a/cpu-miner.c b/cpu-miner.c
index 3abfb9665..68f4366e0 100644
--- a/cpu-miner.c
+++ b/cpu-miner.c
@@ -2575,15 +2575,38 @@ static void *stratum_thread(void *userdata)
 
 static void show_version_and_exit(void)
 {
-	printf(" built on " __DATE__
+	printf(" built "
 #ifdef _MSC_VER
-	 " with VC++ 2013\n");
+	 "with VC++ %d", msver());
 #elif defined(__GNUC__)
-	 " with GCC");
-	printf(" %d.%d.%d\n", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
+	 "with GCC ");
+	printf("%d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__);
 #endif
+	printf(" the " __DATE__ "\n");
 
-	printf(" features:"
+	// Note: if compiled with cpu opts (instruction sets),
+	// the binary is no more compatible with older ones!
+	printf(" compiled for"
+#if defined(__ARM_NEON__)
+		" ARM NEON"
+#elif defined(__AVX2__)
+		" AVX2"
+#elif defined(__AVX__)
+		" AVX"
+#elif defined(__XOP__)
+		" XOP"
+#elif defined(__SSE4_1__)
+		" SSE4"
+#elif defined(_M_X64) || defined(__x86_64__)
+		" x64"
+#elif defined(_M_IX86) || defined(__x86__)
+		" x86"
+#else
+		" general use"
+#endif
+		"\n");
+
+	printf(" config features:"
 #if defined(USE_ASM) && defined(__i386__)
 		" i386"
 #endif
@@ -2593,15 +2616,15 @@ static void show_version_and_exit(void)
 #if defined(USE_ASM) && (defined(__i386__) || defined(__x86_64__))
 		" SSE2"
 #endif
+#if defined(__x86_64__) && defined(USE_XOP)
+		" XOP"
+#endif
 #if defined(__x86_64__) && defined(USE_AVX)
 		" AVX"
 #endif
 #if defined(__x86_64__) && defined(USE_AVX2)
 		" AVX2"
 #endif
-#if defined(__x86_64__) && defined(USE_XOP)
-		" XOP"
-#endif
 #if defined(USE_ASM) && defined(__arm__) && defined(__APCS_32__)
 		" ARM"
 #if defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \
@@ -3133,8 +3156,8 @@ static int thread_create(struct thr_info *thr, void* func)
 
 static void show_credits()
 {
-	printf("** " PACKAGE_NAME " " PACKAGE_VERSION " by Tanguy Pruvot (tpruvot@github) **\n");
-	printf("BTC donation address: 1FhDPLPpw18X4srecguG3MxJYe4a1JsZnd\n\n");
+	printf("** " PACKAGE_NAME " " PACKAGE_VERSION " by tpruvot@github **\n");
+	printf("BTC donation address: 1FhDPLPpw18X4srecguG3MxJYe4a1JsZnd (tpruvot)\n\n");
 }
 
 void get_defconfig_path(char *out, size_t bufsize, char *argv0);
diff --git a/cpuminer.vcxproj b/cpuminer.vcxproj
index aeffe1e80..afc1467e1 100644
--- a/cpuminer.vcxproj
+++ b/cpuminer.vcxproj
@@ -268,6 +268,7 @@
     <ClCompile Include="sha3\sph_hamsi_helper.c">
       <ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</ExcludedFromBuild>
     </ClCompile>
+    <ClCompile Include="sha3\sph_haval.c" />
     <ClCompile Include="sha3\sph_whirlpool.c" />
     <ClCompile Include="sha3\sph_gost.c" />
     <ClCompile Include="sha3\md_helper.c">
diff --git a/cpuminer.vcxproj.filters b/cpuminer.vcxproj.filters
index 736177fa1..a7b77cbe5 100644
--- a/cpuminer.vcxproj.filters
+++ b/cpuminer.vcxproj.filters
@@ -31,6 +31,9 @@
     <ClCompile Include="sha3\sph_hamsi_helper.c">
       <Filter>sph</Filter>
     </ClCompile>
+    <ClCompile Include="sha3\sph_haval.c">
+      <Filter>sph</Filter>
+    </ClCompile>
     <ClCompile Include="sha3\sph_hefty1.c">
       <Filter>sph</Filter>
     </ClCompile>
diff --git a/sha3/haval_helper.c b/sha3/haval_helper.c
new file mode 100644
index 000000000..c5080f75d
--- /dev/null
+++ b/sha3/haval_helper.c
@@ -0,0 +1,195 @@
+/* $Id: haval_helper.c 218 2010-06-08 17:06:34Z tp $ */
+/*
+ * Helper code, included (three times !) by HAVAL implementation.
+ *
+ * TODO: try to merge this with md_helper.c.
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#undef SPH_XCAT
+#define SPH_XCAT(a, b)    SPH_XCAT_(a, b)
+#undef SPH_XCAT_
+#define SPH_XCAT_(a, b)   a ## b
+
+static void
+#ifdef SPH_UPTR
+SPH_XCAT(SPH_XCAT(haval, PASSES), _short)
+#else
+SPH_XCAT(haval, PASSES)
+#endif
+(sph_haval_context *sc, const void *data, size_t len)
+{
+	unsigned current;
+
+#if SPH_64
+	current = (unsigned)sc->count & 127U;
+#else
+	current = (unsigned)sc->count_low & 127U;
+#endif
+	while (len > 0) {
+		unsigned clen;
+#if !SPH_64
+		sph_u32 clow, clow2;
+#endif
+
+		clen = 128U - current;
+		if (clen > len)
+			clen = (unsigned) len;
+		memcpy(sc->buf + current, data, clen);
+		data = (const unsigned char *)data + clen;
+		current += clen;
+		len -= clen;
+		if (current == 128U) {
+			DSTATE;
+			IN_PREPARE(sc->buf);
+
+			RSTATE;
+			SPH_XCAT(CORE, PASSES)(INW);
+			WSTATE;
+			current = 0;
+		}
+#if SPH_64
+		sc->count += clen;
+#else
+		clow = sc->count_low;
+		clow2 = SPH_T32(clow + clen);
+		sc->count_low = clow2;
+		if (clow2 < clow)
+			sc->count_high ++;
+#endif
+	}
+}
+
+#ifdef SPH_UPTR
+static void
+SPH_XCAT(haval, PASSES)(sph_haval_context *sc, const void *data, size_t len)
+{
+	unsigned current;
+	size_t orig_len;
+#if !SPH_64
+	sph_u32 clow, clow2;
+#endif
+	DSTATE;
+
+	if (len < 256U) {
+		SPH_XCAT(SPH_XCAT(haval, PASSES), _short)(sc, data, len);
+		return;
+	}
+#if SPH_64
+	current = (unsigned)sc->count & 127U;
+#else
+	current = (unsigned)sc->count_low & 127U;
+#endif
+	if (current > 0) {
+		unsigned clen;
+
+		clen = 128U - current;
+		SPH_XCAT(SPH_XCAT(haval, PASSES), _short)(sc, data, clen);
+		data = (const unsigned char *)data + clen;
+		len -= clen;
+	}
+#if !SPH_UNALIGNED
+	if (((SPH_UPTR)data & 3U) != 0) {
+		SPH_XCAT(SPH_XCAT(haval, PASSES), _short)(sc, data, len);
+		return;
+	}
+#endif
+	orig_len = len;
+	RSTATE;
+	while (len >= 128U) {
+		IN_PREPARE(data);
+
+		SPH_XCAT(CORE, PASSES)(INW);
+		data = (const unsigned char *)data + 128U;
+		len -= 128U;
+	}
+	WSTATE;
+	if (len > 0)
+		memcpy(sc->buf, data, len);
+#if SPH_64
+	sc->count += (sph_u64)orig_len;
+#else
+	clow = sc->count_low;
+	clow2 = SPH_T32(clow + orig_len);
+	sc->count_low = clow2;
+	if (clow2 < clow)
+		sc->count_high ++;
+	orig_len >>= 12;
+	orig_len >>= 10;
+	orig_len >>= 10;
+	sc->count_high += orig_len;
+#endif
+}
+#endif
+
+static void
+SPH_XCAT(SPH_XCAT(haval, PASSES), _close)(sph_haval_context *sc,
+	unsigned ub, unsigned n, void *dst)
+{
+	unsigned current;
+	DSTATE;
+
+#if SPH_64
+	current = (unsigned)sc->count & 127U;
+#else
+	current = (unsigned)sc->count_low & 127U;
+#endif
+	sc->buf[current ++] = (0x01 << n) | ((ub & 0xFF) >> (8 - n));
+	RSTATE;
+	if (current > 118U) {
+		memset(sc->buf + current, 0, 128U - current);
+
+		do {
+			IN_PREPARE(sc->buf);
+
+			SPH_XCAT(CORE, PASSES)(INW);
+		} while (0);
+		current = 0;
+	}
+	memset(sc->buf + current, 0, 118U - current);
+	sc->buf[118] = 0x01 | (PASSES << 3);
+	sc->buf[119] = sc->olen << 3;
+#if SPH_64
+	sph_enc64le_aligned(sc->buf + 120, SPH_T64(sc->count << 3));
+#else
+	sph_enc32le_aligned(sc->buf + 120, SPH_T32(sc->count_low << 3));
+	sph_enc32le_aligned(sc->buf + 124,
+		SPH_T32((sc->count_high << 3) | (sc->count_low >> 29)));
+#endif
+	do {
+		IN_PREPARE(sc->buf);
+
+		SPH_XCAT(CORE, PASSES)(INW);
+	} while (0);
+
+	WSTATE;
+	haval_out(sc, dst);
+	haval_init(sc, sc->olen, sc->passes);
+}
+
diff --git a/sha3/sph_haval.c b/sha3/sph_haval.c
new file mode 100644
index 000000000..90922b638
--- /dev/null
+++ b/sha3/sph_haval.c
@@ -0,0 +1,975 @@
+/* $Id: haval.c 227 2010-06-16 17:28:38Z tp $ */
+/*
+ * HAVAL implementation.
+ *
+ * The HAVAL reference paper is of questionable clarity with regards to
+ * some details such as endianness of bits within a byte, bytes within
+ * a 32-bit word, or the actual ordering of words within a stream of
+ * words. This implementation has been made compatible with the reference
+ * implementation available on: http://labs.calyptix.com/haval.php
+ *
+ * ==========================(LICENSE BEGIN)============================
+ *
+ * Copyright (c) 2007-2010  Projet RNRT SAPHIR
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ===========================(LICENSE END)=============================
+ *
+ * @author   Thomas Pornin <thomas.pornin@cryptolog.com>
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "sph_haval.h"
+
+#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAVAL
+#define SPH_SMALL_FOOTPRINT_HAVAL   1
+#endif
+
+/*
+ * Basic definition from the reference paper.
+ *
+#define F1(x6, x5, x4, x3, x2, x1, x0) \
+	(((x1) & (x4)) ^ ((x2) & (x5)) ^ ((x3) & (x6)) ^ ((x0) & (x1)) ^ (x0))
+ *
+ */
+
+#define F1(x6, x5, x4, x3, x2, x1, x0) \
+	(((x1) & ((x0) ^ (x4))) ^ ((x2) & (x5)) ^ ((x3) & (x6)) ^ (x0))
+
+/*
+ * Basic definition from the reference paper.
+ *
+#define F2(x6, x5, x4, x3, x2, x1, x0) \
+	(((x1) & (x2) & (x3)) ^ ((x2) & (x4) & (x5)) ^ ((x1) & (x2)) \
+	^ ((x1) & (x4)) ^ ((x2) & (x6)) ^ ((x3) & (x5)) \
+	^ ((x4) & (x5)) ^ ((x0) & (x2)) ^ (x0))
+ *
+ */
+
+#define F2(x6, x5, x4, x3, x2, x1, x0) \
+	(((x2) & (((x1) & ~(x3)) ^ ((x4) & (x5)) ^ (x6) ^ (x0))) \
+	^ ((x4) & ((x1) ^ (x5))) ^ ((x3 & (x5)) ^ (x0)))
+
+/*
+ * Basic definition from the reference paper.
+ *
+#define F3(x6, x5, x4, x3, x2, x1, x0) \
+	(((x1) & (x2) & (x3)) ^ ((x1) & (x4)) ^ ((x2) & (x5)) \
+	^ ((x3) & (x6)) ^ ((x0) & (x3)) ^ (x0))
+ *
+ */
+
+#define F3(x6, x5, x4, x3, x2, x1, x0) \
+	(((x3) & (((x1) & (x2)) ^ (x6) ^ (x0))) \
+	^ ((x1) & (x4)) ^ ((x2) & (x5)) ^ (x0))
+
+/*
+ * Basic definition from the reference paper.
+ *
+#define F4(x6, x5, x4, x3, x2, x1, x0) \
+	(((x1) & (x2) & (x3)) ^ ((x2) & (x4) & (x5)) ^ ((x3) & (x4) & (x6)) \
+	^ ((x1) & (x4)) ^ ((x2) & (x6)) ^ ((x3) & (x4)) ^ ((x3) & (x5)) \
+	^ ((x3) & (x6)) ^ ((x4) & (x5)) ^ ((x4) & (x6)) ^ ((x0) & (x4)) ^ (x0))
+ *
+ */
+
+#define F4(x6, x5, x4, x3, x2, x1, x0) \
+	(((x3) & (((x1) & (x2)) ^ ((x4) | (x6)) ^ (x5))) \
+	^ ((x4) & ((~(x2) & (x5)) ^ (x1) ^ (x6) ^ (x0))) \
+	^ ((x2) & (x6)) ^ (x0))
+
+/*
+ * Basic definition from the reference paper.
+ *
+#define F5(x6, x5, x4, x3, x2, x1, x0) \
+	(((x1) & (x4)) ^ ((x2) & (x5)) ^ ((x3) & (x6)) \
+	^ ((x0) & (x1) & (x2) & (x3)) ^ ((x0) & (x5)) ^ (x0))
+ *
+ */
+
+#define F5(x6, x5, x4, x3, x2, x1, x0) \
+	(((x0) & ~(((x1) & (x2) & (x3)) ^ (x5))) \
+	^ ((x1) & (x4)) ^ ((x2) & (x5)) ^ ((x3) & (x6)))
+
+/*
+ * The macros below integrate the phi() permutations, depending on the
+ * pass and the total number of passes.
+ */
+
+#define FP3_1(x6, x5, x4, x3, x2, x1, x0) \
+	F1(x1, x0, x3, x5, x6, x2, x4)
+#define FP3_2(x6, x5, x4, x3, x2, x1, x0) \
+	F2(x4, x2, x1, x0, x5, x3, x6)
+#define FP3_3(x6, x5, x4, x3, x2, x1, x0) \
+	F3(x6, x1, x2, x3, x4, x5, x0)
+
+#define FP4_1(x6, x5, x4, x3, x2, x1, x0) \
+	F1(x2, x6, x1, x4, x5, x3, x0)
+#define FP4_2(x6, x5, x4, x3, x2, x1, x0) \
+	F2(x3, x5, x2, x0, x1, x6, x4)
+#define FP4_3(x6, x5, x4, x3, x2, x1, x0) \
+	F3(x1, x4, x3, x6, x0, x2, x5)
+#define FP4_4(x6, x5, x4, x3, x2, x1, x0) \
+	F4(x6, x4, x0, x5, x2, x1, x3)
+
+#define FP5_1(x6, x5, x4, x3, x2, x1, x0) \
+	F1(x3, x4, x1, x0, x5, x2, x6)
+#define FP5_2(x6, x5, x4, x3, x2, x1, x0) \
+	F2(x6, x2, x1, x0, x3, x4, x5)
+#define FP5_3(x6, x5, x4, x3, x2, x1, x0) \
+	F3(x2, x6, x0, x4, x3, x1, x5)
+#define FP5_4(x6, x5, x4, x3, x2, x1, x0) \
+	F4(x1, x5, x3, x2, x0, x4, x6)
+#define FP5_5(x6, x5, x4, x3, x2, x1, x0) \
+	F5(x2, x5, x0, x6, x4, x3, x1)
+
+/*
+ * One step, for "n" passes, pass number "p" (1 <= p <= n), using
+ * input word number "w" and step constant "c".
+ */
+#define STEP(n, p, x7, x6, x5, x4, x3, x2, x1, x0, w, c)  do { \
+		sph_u32 t = FP ## n ## _ ## p(x6, x5, x4, x3, x2, x1, x0); \
+		(x7) = SPH_T32(SPH_ROTR32(t, 7) + SPH_ROTR32((x7), 11) \
+			+ (w) + (c)); \
+	} while (0)
+
+/*
+ * PASSy(n, in) computes pass number "y", for a total of "n", using the
+ * one-argument macro "in" to access input words. Current state is assumed
+ * to be held in variables "s0" to "s7".
+ */
+
+#if SPH_SMALL_FOOTPRINT_HAVAL
+
+#define PASS1(n, in)   do { \
+		unsigned pass_count; \
+		for (pass_count = 0; pass_count < 32; pass_count += 8) { \
+			STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, \
+				in(pass_count + 0), SPH_C32(0x00000000)); \
+			STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, \
+				in(pass_count + 1), SPH_C32(0x00000000)); \
+			STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, \
+				in(pass_count + 2), SPH_C32(0x00000000)); \
+			STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, \
+				in(pass_count + 3), SPH_C32(0x00000000)); \
+			STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, \
+				in(pass_count + 4), SPH_C32(0x00000000)); \
+			STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, \
+				in(pass_count + 5), SPH_C32(0x00000000)); \
+			STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, \
+				in(pass_count + 6), SPH_C32(0x00000000)); \
+			STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, \
+				in(pass_count + 7), SPH_C32(0x00000000)); \
+   		} \
+	} while (0)
+
+#define PASSG(p, n, in)   do { \
+		unsigned pass_count; \
+		for (pass_count = 0; pass_count < 32; pass_count += 8) { \
+			STEP(n, p, s7, s6, s5, s4, s3, s2, s1, s0, \
+				in(MP ## p[pass_count + 0]), \
+				RK ## p[pass_count + 0]); \
+			STEP(n, p, s6, s5, s4, s3, s2, s1, s0, s7, \
+				in(MP ## p[pass_count + 1]), \
+				RK ## p[pass_count + 1]); \
+			STEP(n, p, s5, s4, s3, s2, s1, s0, s7, s6, \
+				in(MP ## p[pass_count + 2]), \
+				RK ## p[pass_count + 2]); \
+			STEP(n, p, s4, s3, s2, s1, s0, s7, s6, s5, \
+				in(MP ## p[pass_count + 3]), \
+				RK ## p[pass_count + 3]); \
+			STEP(n, p, s3, s2, s1, s0, s7, s6, s5, s4, \
+				in(MP ## p[pass_count + 4]), \
+				RK ## p[pass_count + 4]); \
+			STEP(n, p, s2, s1, s0, s7, s6, s5, s4, s3, \
+				in(MP ## p[pass_count + 5]), \
+				RK ## p[pass_count + 5]); \
+			STEP(n, p, s1, s0, s7, s6, s5, s4, s3, s2, \
+				in(MP ## p[pass_count + 6]), \
+				RK ## p[pass_count + 6]); \
+			STEP(n, p, s0, s7, s6, s5, s4, s3, s2, s1, \
+				in(MP ## p[pass_count + 7]), \
+				RK ## p[pass_count + 7]); \
+   		} \
+	} while (0)
+
+#define PASS2(n, in)    PASSG(2, n, in)
+#define PASS3(n, in)    PASSG(3, n, in)
+#define PASS4(n, in)    PASSG(4, n, in)
+#define PASS5(n, in)    PASSG(5, n, in)
+
+static const unsigned MP2[32] = {
+	 5, 14, 26, 18, 11, 28,  7, 16,
+	 0, 23, 20, 22,  1, 10,  4,  8,
+	30,  3, 21,  9, 17, 24, 29,  6,
+	19, 12, 15, 13,  2, 25, 31, 27
+};
+
+static const unsigned MP3[32] = {
+	19,  9,  4, 20, 28, 17,  8, 22,
+	29, 14, 25, 12, 24, 30, 16, 26,
+	31, 15,  7,  3,  1,  0, 18, 27,
+	13,  6, 21, 10, 23, 11,  5,  2
+};
+
+static const unsigned MP4[32] = {
+	24,  4,  0, 14,  2,  7, 28, 23,
+	26,  6, 30, 20, 18, 25, 19,  3,
+	22, 11, 31, 21,  8, 27, 12,  9,
+	 1, 29,  5, 15, 17, 10, 16, 13
+};
+
+static const unsigned MP5[32] = {
+	27,  3, 21, 26, 17, 11, 20, 29,
+	19,  0, 12,  7, 13,  8, 31, 10,
+	 5,  9, 14, 30, 18,  6, 28, 24,
+	 2, 23, 16, 22,  4,  1, 25, 15
+};
+
+static const sph_u32 RK2[32] = {
+	SPH_C32(0x452821E6), SPH_C32(0x38D01377),
+	SPH_C32(0xBE5466CF), SPH_C32(0x34E90C6C),
+	SPH_C32(0xC0AC29B7), SPH_C32(0xC97C50DD),
+	SPH_C32(0x3F84D5B5), SPH_C32(0xB5470917),
+	SPH_C32(0x9216D5D9), SPH_C32(0x8979FB1B),
+	SPH_C32(0xD1310BA6), SPH_C32(0x98DFB5AC),
+	SPH_C32(0x2FFD72DB), SPH_C32(0xD01ADFB7),
+	SPH_C32(0xB8E1AFED), SPH_C32(0x6A267E96),
+	SPH_C32(0xBA7C9045), SPH_C32(0xF12C7F99),
+	SPH_C32(0x24A19947), SPH_C32(0xB3916CF7),
+	SPH_C32(0x0801F2E2), SPH_C32(0x858EFC16),
+	SPH_C32(0x636920D8), SPH_C32(0x71574E69),
+	SPH_C32(0xA458FEA3), SPH_C32(0xF4933D7E),
+	SPH_C32(0x0D95748F), SPH_C32(0x728EB658),
+	SPH_C32(0x718BCD58), SPH_C32(0x82154AEE),
+	SPH_C32(0x7B54A41D), SPH_C32(0xC25A59B5)
+};
+
+static const sph_u32 RK3[32] = {
+	SPH_C32(0x9C30D539), SPH_C32(0x2AF26013),
+	SPH_C32(0xC5D1B023), SPH_C32(0x286085F0),
+	SPH_C32(0xCA417918), SPH_C32(0xB8DB38EF),
+	SPH_C32(0x8E79DCB0), SPH_C32(0x603A180E),
+	SPH_C32(0x6C9E0E8B), SPH_C32(0xB01E8A3E),
+	SPH_C32(0xD71577C1), SPH_C32(0xBD314B27),
+	SPH_C32(0x78AF2FDA), SPH_C32(0x55605C60),
+	SPH_C32(0xE65525F3), SPH_C32(0xAA55AB94),
+	SPH_C32(0x57489862), SPH_C32(0x63E81440),
+	SPH_C32(0x55CA396A), SPH_C32(0x2AAB10B6),
+	SPH_C32(0xB4CC5C34), SPH_C32(0x1141E8CE),
+	SPH_C32(0xA15486AF), SPH_C32(0x7C72E993),
+	SPH_C32(0xB3EE1411), SPH_C32(0x636FBC2A),
+	SPH_C32(0x2BA9C55D), SPH_C32(0x741831F6),
+	SPH_C32(0xCE5C3E16), SPH_C32(0x9B87931E),
+	SPH_C32(0xAFD6BA33), SPH_C32(0x6C24CF5C)
+};
+
+static const sph_u32 RK4[32] = {
+	SPH_C32(0x7A325381), SPH_C32(0x28958677),
+	SPH_C32(0x3B8F4898), SPH_C32(0x6B4BB9AF),
+	SPH_C32(0xC4BFE81B), SPH_C32(0x66282193),
+	SPH_C32(0x61D809CC), SPH_C32(0xFB21A991),
+	SPH_C32(0x487CAC60), SPH_C32(0x5DEC8032),
+	SPH_C32(0xEF845D5D), SPH_C32(0xE98575B1),
+	SPH_C32(0xDC262302), SPH_C32(0xEB651B88),
+	SPH_C32(0x23893E81), SPH_C32(0xD396ACC5),
+	SPH_C32(0x0F6D6FF3), SPH_C32(0x83F44239),
+	SPH_C32(0x2E0B4482), SPH_C32(0xA4842004),
+	SPH_C32(0x69C8F04A), SPH_C32(0x9E1F9B5E),
+	SPH_C32(0x21C66842), SPH_C32(0xF6E96C9A),
+	SPH_C32(0x670C9C61), SPH_C32(0xABD388F0),
+	SPH_C32(0x6A51A0D2), SPH_C32(0xD8542F68),
+	SPH_C32(0x960FA728), SPH_C32(0xAB5133A3),
+	SPH_C32(0x6EEF0B6C), SPH_C32(0x137A3BE4)
+};
+
+static const sph_u32 RK5[32] = {
+	SPH_C32(0xBA3BF050), SPH_C32(0x7EFB2A98),
+	SPH_C32(0xA1F1651D), SPH_C32(0x39AF0176),
+	SPH_C32(0x66CA593E), SPH_C32(0x82430E88),
+	SPH_C32(0x8CEE8619), SPH_C32(0x456F9FB4),
+	SPH_C32(0x7D84A5C3), SPH_C32(0x3B8B5EBE),
+	SPH_C32(0xE06F75D8), SPH_C32(0x85C12073),
+	SPH_C32(0x401A449F), SPH_C32(0x56C16AA6),
+	SPH_C32(0x4ED3AA62), SPH_C32(0x363F7706),
+	SPH_C32(0x1BFEDF72), SPH_C32(0x429B023D),
+	SPH_C32(0x37D0D724), SPH_C32(0xD00A1248),
+	SPH_C32(0xDB0FEAD3), SPH_C32(0x49F1C09B),
+	SPH_C32(0x075372C9), SPH_C32(0x80991B7B),
+	SPH_C32(0x25D479D8), SPH_C32(0xF6E8DEF7),
+	SPH_C32(0xE3FE501A), SPH_C32(0xB6794C3B),
+	SPH_C32(0x976CE0BD), SPH_C32(0x04C006BA),
+	SPH_C32(0xC1A94FB6), SPH_C32(0x409F60C4)
+};
+
+#else
+
+#define PASS1(n, in)   do { \
+   STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in( 0), SPH_C32(0x00000000)); \
+   STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in( 1), SPH_C32(0x00000000)); \
+   STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in( 2), SPH_C32(0x00000000)); \
+   STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in( 3), SPH_C32(0x00000000)); \
+   STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in( 4), SPH_C32(0x00000000)); \
+   STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in( 5), SPH_C32(0x00000000)); \
+   STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in( 6), SPH_C32(0x00000000)); \
+   STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in( 7), SPH_C32(0x00000000)); \
+ \
+   STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in( 8), SPH_C32(0x00000000)); \
+   STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in( 9), SPH_C32(0x00000000)); \
+   STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in(10), SPH_C32(0x00000000)); \
+   STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in(11), SPH_C32(0x00000000)); \
+   STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in(12), SPH_C32(0x00000000)); \
+   STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in(13), SPH_C32(0x00000000)); \
+   STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in(14), SPH_C32(0x00000000)); \
+   STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in(15), SPH_C32(0x00000000)); \
+ \
+   STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in(16), SPH_C32(0x00000000)); \
+   STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in(17), SPH_C32(0x00000000)); \
+   STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in(18), SPH_C32(0x00000000)); \
+   STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in(19), SPH_C32(0x00000000)); \
+   STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in(20), SPH_C32(0x00000000)); \
+   STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in(21), SPH_C32(0x00000000)); \
+   STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in(22), SPH_C32(0x00000000)); \
+   STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in(23), SPH_C32(0x00000000)); \
+ \
+   STEP(n, 1, s7, s6, s5, s4, s3, s2, s1, s0, in(24), SPH_C32(0x00000000)); \
+   STEP(n, 1, s6, s5, s4, s3, s2, s1, s0, s7, in(25), SPH_C32(0x00000000)); \
+   STEP(n, 1, s5, s4, s3, s2, s1, s0, s7, s6, in(26), SPH_C32(0x00000000)); \
+   STEP(n, 1, s4, s3, s2, s1, s0, s7, s6, s5, in(27), SPH_C32(0x00000000)); \
+   STEP(n, 1, s3, s2, s1, s0, s7, s6, s5, s4, in(28), SPH_C32(0x00000000)); \
+   STEP(n, 1, s2, s1, s0, s7, s6, s5, s4, s3, in(29), SPH_C32(0x00000000)); \
+   STEP(n, 1, s1, s0, s7, s6, s5, s4, s3, s2, in(30), SPH_C32(0x00000000)); \
+   STEP(n, 1, s0, s7, s6, s5, s4, s3, s2, s1, in(31), SPH_C32(0x00000000)); \
+	} while (0)
+
+#define PASS2(n, in)   do { \
+   STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in( 5), SPH_C32(0x452821E6)); \
+   STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in(14), SPH_C32(0x38D01377)); \
+   STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in(26), SPH_C32(0xBE5466CF)); \
+   STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in(18), SPH_C32(0x34E90C6C)); \
+   STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in(11), SPH_C32(0xC0AC29B7)); \
+   STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in(28), SPH_C32(0xC97C50DD)); \
+   STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in( 7), SPH_C32(0x3F84D5B5)); \
+   STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in(16), SPH_C32(0xB5470917)); \
+ \
+   STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in( 0), SPH_C32(0x9216D5D9)); \
+   STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in(23), SPH_C32(0x8979FB1B)); \
+   STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in(20), SPH_C32(0xD1310BA6)); \
+   STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in(22), SPH_C32(0x98DFB5AC)); \
+   STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in( 1), SPH_C32(0x2FFD72DB)); \
+   STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in(10), SPH_C32(0xD01ADFB7)); \
+   STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in( 4), SPH_C32(0xB8E1AFED)); \
+   STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in( 8), SPH_C32(0x6A267E96)); \
+ \
+   STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in(30), SPH_C32(0xBA7C9045)); \
+   STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in( 3), SPH_C32(0xF12C7F99)); \
+   STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in(21), SPH_C32(0x24A19947)); \
+   STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in( 9), SPH_C32(0xB3916CF7)); \
+   STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in(17), SPH_C32(0x0801F2E2)); \
+   STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in(24), SPH_C32(0x858EFC16)); \
+   STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in(29), SPH_C32(0x636920D8)); \
+   STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in( 6), SPH_C32(0x71574E69)); \
+ \
+   STEP(n, 2, s7, s6, s5, s4, s3, s2, s1, s0, in(19), SPH_C32(0xA458FEA3)); \
+   STEP(n, 2, s6, s5, s4, s3, s2, s1, s0, s7, in(12), SPH_C32(0xF4933D7E)); \
+   STEP(n, 2, s5, s4, s3, s2, s1, s0, s7, s6, in(15), SPH_C32(0x0D95748F)); \
+   STEP(n, 2, s4, s3, s2, s1, s0, s7, s6, s5, in(13), SPH_C32(0x728EB658)); \
+   STEP(n, 2, s3, s2, s1, s0, s7, s6, s5, s4, in( 2), SPH_C32(0x718BCD58)); \
+   STEP(n, 2, s2, s1, s0, s7, s6, s5, s4, s3, in(25), SPH_C32(0x82154AEE)); \
+   STEP(n, 2, s1, s0, s7, s6, s5, s4, s3, s2, in(31), SPH_C32(0x7B54A41D)); \
+   STEP(n, 2, s0, s7, s6, s5, s4, s3, s2, s1, in(27), SPH_C32(0xC25A59B5)); \
+	} while (0)
+
+#define PASS3(n, in)   do { \
+   STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in(19), SPH_C32(0x9C30D539)); \
+   STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in( 9), SPH_C32(0x2AF26013)); \
+   STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in( 4), SPH_C32(0xC5D1B023)); \
+   STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in(20), SPH_C32(0x286085F0)); \
+   STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in(28), SPH_C32(0xCA417918)); \
+   STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in(17), SPH_C32(0xB8DB38EF)); \
+   STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in( 8), SPH_C32(0x8E79DCB0)); \
+   STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in(22), SPH_C32(0x603A180E)); \
+ \
+   STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in(29), SPH_C32(0x6C9E0E8B)); \
+   STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in(14), SPH_C32(0xB01E8A3E)); \
+   STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in(25), SPH_C32(0xD71577C1)); \
+   STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in(12), SPH_C32(0xBD314B27)); \
+   STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in(24), SPH_C32(0x78AF2FDA)); \
+   STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in(30), SPH_C32(0x55605C60)); \
+   STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in(16), SPH_C32(0xE65525F3)); \
+   STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in(26), SPH_C32(0xAA55AB94)); \
+ \
+   STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in(31), SPH_C32(0x57489862)); \
+   STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in(15), SPH_C32(0x63E81440)); \
+   STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in( 7), SPH_C32(0x55CA396A)); \
+   STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in( 3), SPH_C32(0x2AAB10B6)); \
+   STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in( 1), SPH_C32(0xB4CC5C34)); \
+   STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in( 0), SPH_C32(0x1141E8CE)); \
+   STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in(18), SPH_C32(0xA15486AF)); \
+   STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in(27), SPH_C32(0x7C72E993)); \
+ \
+   STEP(n, 3, s7, s6, s5, s4, s3, s2, s1, s0, in(13), SPH_C32(0xB3EE1411)); \
+   STEP(n, 3, s6, s5, s4, s3, s2, s1, s0, s7, in( 6), SPH_C32(0x636FBC2A)); \
+   STEP(n, 3, s5, s4, s3, s2, s1, s0, s7, s6, in(21), SPH_C32(0x2BA9C55D)); \
+   STEP(n, 3, s4, s3, s2, s1, s0, s7, s6, s5, in(10), SPH_C32(0x741831F6)); \
+   STEP(n, 3, s3, s2, s1, s0, s7, s6, s5, s4, in(23), SPH_C32(0xCE5C3E16)); \
+   STEP(n, 3, s2, s1, s0, s7, s6, s5, s4, s3, in(11), SPH_C32(0x9B87931E)); \
+   STEP(n, 3, s1, s0, s7, s6, s5, s4, s3, s2, in( 5), SPH_C32(0xAFD6BA33)); \
+   STEP(n, 3, s0, s7, s6, s5, s4, s3, s2, s1, in( 2), SPH_C32(0x6C24CF5C)); \
+	} while (0)
+
+#define PASS4(n, in)   do { \
+   STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in(24), SPH_C32(0x7A325381)); \
+   STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in( 4), SPH_C32(0x28958677)); \
+   STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in( 0), SPH_C32(0x3B8F4898)); \
+   STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in(14), SPH_C32(0x6B4BB9AF)); \
+   STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in( 2), SPH_C32(0xC4BFE81B)); \
+   STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in( 7), SPH_C32(0x66282193)); \
+   STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in(28), SPH_C32(0x61D809CC)); \
+   STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in(23), SPH_C32(0xFB21A991)); \
+ \
+   STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in(26), SPH_C32(0x487CAC60)); \
+   STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in( 6), SPH_C32(0x5DEC8032)); \
+   STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in(30), SPH_C32(0xEF845D5D)); \
+   STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in(20), SPH_C32(0xE98575B1)); \
+   STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in(18), SPH_C32(0xDC262302)); \
+   STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in(25), SPH_C32(0xEB651B88)); \
+   STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in(19), SPH_C32(0x23893E81)); \
+   STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in( 3), SPH_C32(0xD396ACC5)); \
+ \
+   STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in(22), SPH_C32(0x0F6D6FF3)); \
+   STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in(11), SPH_C32(0x83F44239)); \
+   STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in(31), SPH_C32(0x2E0B4482)); \
+   STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in(21), SPH_C32(0xA4842004)); \
+   STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in( 8), SPH_C32(0x69C8F04A)); \
+   STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in(27), SPH_C32(0x9E1F9B5E)); \
+   STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in(12), SPH_C32(0x21C66842)); \
+   STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in( 9), SPH_C32(0xF6E96C9A)); \
+ \
+   STEP(n, 4, s7, s6, s5, s4, s3, s2, s1, s0, in( 1), SPH_C32(0x670C9C61)); \
+   STEP(n, 4, s6, s5, s4, s3, s2, s1, s0, s7, in(29), SPH_C32(0xABD388F0)); \
+   STEP(n, 4, s5, s4, s3, s2, s1, s0, s7, s6, in( 5), SPH_C32(0x6A51A0D2)); \
+   STEP(n, 4, s4, s3, s2, s1, s0, s7, s6, s5, in(15), SPH_C32(0xD8542F68)); \
+   STEP(n, 4, s3, s2, s1, s0, s7, s6, s5, s4, in(17), SPH_C32(0x960FA728)); \
+   STEP(n, 4, s2, s1, s0, s7, s6, s5, s4, s3, in(10), SPH_C32(0xAB5133A3)); \
+   STEP(n, 4, s1, s0, s7, s6, s5, s4, s3, s2, in(16), SPH_C32(0x6EEF0B6C)); \
+   STEP(n, 4, s0, s7, s6, s5, s4, s3, s2, s1, in(13), SPH_C32(0x137A3BE4)); \
+	} while (0)
+
+#define PASS5(n, in)   do { \
+   STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in(27), SPH_C32(0xBA3BF050)); \
+   STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in( 3), SPH_C32(0x7EFB2A98)); \
+   STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in(21), SPH_C32(0xA1F1651D)); \
+   STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in(26), SPH_C32(0x39AF0176)); \
+   STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in(17), SPH_C32(0x66CA593E)); \
+   STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in(11), SPH_C32(0x82430E88)); \
+   STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in(20), SPH_C32(0x8CEE8619)); \
+   STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in(29), SPH_C32(0x456F9FB4)); \
+ \
+   STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in(19), SPH_C32(0x7D84A5C3)); \
+   STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in( 0), SPH_C32(0x3B8B5EBE)); \
+   STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in(12), SPH_C32(0xE06F75D8)); \
+   STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in( 7), SPH_C32(0x85C12073)); \
+   STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in(13), SPH_C32(0x401A449F)); \
+   STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in( 8), SPH_C32(0x56C16AA6)); \
+   STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in(31), SPH_C32(0x4ED3AA62)); \
+   STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in(10), SPH_C32(0x363F7706)); \
+ \
+   STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in( 5), SPH_C32(0x1BFEDF72)); \
+   STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in( 9), SPH_C32(0x429B023D)); \
+   STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in(14), SPH_C32(0x37D0D724)); \
+   STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in(30), SPH_C32(0xD00A1248)); \
+   STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in(18), SPH_C32(0xDB0FEAD3)); \
+   STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in( 6), SPH_C32(0x49F1C09B)); \
+   STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in(28), SPH_C32(0x075372C9)); \
+   STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in(24), SPH_C32(0x80991B7B)); \
+ \
+   STEP(n, 5, s7, s6, s5, s4, s3, s2, s1, s0, in( 2), SPH_C32(0x25D479D8)); \
+   STEP(n, 5, s6, s5, s4, s3, s2, s1, s0, s7, in(23), SPH_C32(0xF6E8DEF7)); \
+   STEP(n, 5, s5, s4, s3, s2, s1, s0, s7, s6, in(16), SPH_C32(0xE3FE501A)); \
+   STEP(n, 5, s4, s3, s2, s1, s0, s7, s6, s5, in(22), SPH_C32(0xB6794C3B)); \
+   STEP(n, 5, s3, s2, s1, s0, s7, s6, s5, s4, in( 4), SPH_C32(0x976CE0BD)); \
+   STEP(n, 5, s2, s1, s0, s7, s6, s5, s4, s3, in( 1), SPH_C32(0x04C006BA)); \
+   STEP(n, 5, s1, s0, s7, s6, s5, s4, s3, s2, in(25), SPH_C32(0xC1A94FB6)); \
+   STEP(n, 5, s0, s7, s6, s5, s4, s3, s2, s1, in(15), SPH_C32(0x409F60C4)); \
+	} while (0)
+
+#endif
+
+#define SAVE_STATE \
+	sph_u32 u0, u1, u2, u3, u4, u5, u6, u7; \
+	do { \
+		u0 = s0; \
+		u1 = s1; \
+		u2 = s2; \
+		u3 = s3; \
+		u4 = s4; \
+		u5 = s5; \
+		u6 = s6; \
+		u7 = s7; \
+	} while (0)
+
+#define UPDATE_STATE   do { \
+		s0 = SPH_T32(s0 + u0); \
+		s1 = SPH_T32(s1 + u1); \
+		s2 = SPH_T32(s2 + u2); \
+		s3 = SPH_T32(s3 + u3); \
+		s4 = SPH_T32(s4 + u4); \
+		s5 = SPH_T32(s5 + u5); \
+		s6 = SPH_T32(s6 + u6); \
+		s7 = SPH_T32(s7 + u7); \
+	} while (0)
+
+/*
+ * COREn(in) performs the core HAVAL computation for "n" passes, using
+ * the one-argument macro "in" to access the input words. Running state
+ * is held in variable "s0" to "s7".
+ */
+
+#define CORE3(in)  do { \
+		SAVE_STATE; \
+		PASS1(3, in); \
+		PASS2(3, in); \
+		PASS3(3, in); \
+		UPDATE_STATE; \
+	} while (0)
+
+#define CORE4(in)  do { \
+		SAVE_STATE; \
+		PASS1(4, in); \
+		PASS2(4, in); \
+		PASS3(4, in); \
+		PASS4(4, in); \
+		UPDATE_STATE; \
+	} while (0)
+
+#define CORE5(in)  do { \
+		SAVE_STATE; \
+		PASS1(5, in); \
+		PASS2(5, in); \
+		PASS3(5, in); \
+		PASS4(5, in); \
+		PASS5(5, in); \
+		UPDATE_STATE; \
+	} while (0)
+
+/*
+ * DSTATE declares the state variables "s0" to "s7".
+ */
+#define DSTATE   sph_u32 s0, s1, s2, s3, s4, s5, s6, s7
+
+/*
+ * RSTATE fills the state variables from the context "sc".
+ */
+#define RSTATE   do { \
+		s0 = sc->s0; \
+		s1 = sc->s1; \
+		s2 = sc->s2; \
+		s3 = sc->s3; \
+		s4 = sc->s4; \
+		s5 = sc->s5; \
+		s6 = sc->s6; \
+		s7 = sc->s7; \
+	} while (0)
+
+/*
+ * WSTATE updates the context "sc" from the state variables.
+ */
+#define WSTATE   do { \
+		sc->s0 = s0; \
+		sc->s1 = s1; \
+		sc->s2 = s2; \
+		sc->s3 = s3; \
+		sc->s4 = s4; \
+		sc->s5 = s5; \
+		sc->s6 = s6; \
+		sc->s7 = s7; \
+	} while (0)
+
+/*
+ * Initialize a context. "olen" is the output length, in 32-bit words
+ * (between 4 and 8, inclusive). "passes" is the number of passes
+ * (3, 4 or 5).
+ */
+static void
+haval_init(sph_haval_context *sc, unsigned olen, unsigned passes)
+{
+	sc->s0 = SPH_C32(0x243F6A88);
+	sc->s1 = SPH_C32(0x85A308D3);
+	sc->s2 = SPH_C32(0x13198A2E);
+	sc->s3 = SPH_C32(0x03707344);
+	sc->s4 = SPH_C32(0xA4093822);
+	sc->s5 = SPH_C32(0x299F31D0);
+	sc->s6 = SPH_C32(0x082EFA98);
+	sc->s7 = SPH_C32(0xEC4E6C89);
+	sc->olen = olen;
+	sc->passes = passes;
+#if SPH_64
+	sc->count = 0;
+#else
+	sc->count_high = 0;
+	sc->count_low = 0;
+#endif
+}
+
+/*
+ * IN_PREPARE(data) contains declarations and code to prepare for
+ * reading input words pointed to by "data".
+ * INW(i) reads the word number "i" (from 0 to 31).
+ */
+#if SPH_LITTLE_FAST
+#define IN_PREPARE(indata)   const unsigned char *const load_ptr = \
+                             (const unsigned char *)(indata)
+#define INW(i)   sph_dec32le_aligned(load_ptr + 4 * (i))
+#else
+#define IN_PREPARE(indata) \
+	sph_u32 X_var[32]; \
+	int load_index; \
+ \
+	for (load_index = 0; load_index < 32; load_index ++) \
+		X_var[load_index] = sph_dec32le_aligned( \
+			(const unsigned char *)(indata) + 4 * load_index)
+#define INW(i)   X_var[i]
+#endif
+
+/*
+ * Mixing operation used for 128-bit output tailoring. This function
+ * takes the byte 0 from a0, byte 1 from a1, byte 2 from a2 and byte 3
+ * from a3, and combines them into a 32-bit word, which is then rotated
+ * to the left by n bits.
+ */
+static SPH_INLINE sph_u32
+mix128(sph_u32 a0, sph_u32 a1, sph_u32 a2, sph_u32 a3, int n)
+{
+	sph_u32 tmp;
+
+	tmp = (a0 & SPH_C32(0x000000FF))
+		| (a1 & SPH_C32(0x0000FF00))
+		| (a2 & SPH_C32(0x00FF0000))
+		| (a3 & SPH_C32(0xFF000000));
+	if (n > 0)
+		tmp = SPH_ROTL32(tmp, n);
+	return tmp;
+}
+
+/*
+ * Mixing operation used to compute output word 0 for 160-bit output.
+ */
+static SPH_INLINE sph_u32
+mix160_0(sph_u32 x5, sph_u32 x6, sph_u32 x7)
+{
+	sph_u32 tmp;
+
+	tmp = (x5 & SPH_C32(0x01F80000))
+		| (x6 & SPH_C32(0xFE000000))
+		| (x7 & SPH_C32(0x0000003F));
+	return SPH_ROTL32(tmp, 13);
+}
+
+/*
+ * Mixing operation used to compute output word 1 for 160-bit output.
+ */
+static SPH_INLINE sph_u32
+mix160_1(sph_u32 x5, sph_u32 x6, sph_u32 x7)
+{
+	sph_u32 tmp;
+
+	tmp = (x5 & SPH_C32(0xFE000000))
+		| (x6 & SPH_C32(0x0000003F))
+		| (x7 & SPH_C32(0x00000FC0));
+	return SPH_ROTL32(tmp, 7);
+}
+
+/*
+ * Mixing operation used to compute output word 2 for 160-bit output.
+ */
+static SPH_INLINE sph_u32
+mix160_2(sph_u32 x5, sph_u32 x6, sph_u32 x7)
+{
+	sph_u32 tmp;
+
+	tmp = (x5 & SPH_C32(0x0000003F))
+		| (x6 & SPH_C32(0x00000FC0))
+		| (x7 & SPH_C32(0x0007F000));
+	return tmp;
+}
+
+/*
+ * Mixing operation used to compute output word 3 for 160-bit output.
+ */
+static SPH_INLINE sph_u32
+mix160_3(sph_u32 x5, sph_u32 x6, sph_u32 x7)
+{
+	sph_u32 tmp;
+
+	tmp = (x5 & SPH_C32(0x00000FC0))
+		| (x6 & SPH_C32(0x0007F000))
+		| (x7 & SPH_C32(0x01F80000));
+	return tmp >> 6;
+}
+
+/*
+ * Mixing operation used to compute output word 4 for 160-bit output.
+ */
+static SPH_INLINE sph_u32
+mix160_4(sph_u32 x5, sph_u32 x6, sph_u32 x7)
+{
+	sph_u32 tmp;
+
+	tmp = (x5 & SPH_C32(0x0007F000))
+		| (x6 & SPH_C32(0x01F80000))
+		| (x7 & SPH_C32(0xFE000000));
+	return tmp >> 12;
+}
+
+/*
+ * Mixing operation used to compute output word 0 for 192-bit output.
+ */
+static SPH_INLINE sph_u32
+mix192_0(sph_u32 x6, sph_u32 x7)
+{
+	sph_u32 tmp;
+
+	tmp = (x6 & SPH_C32(0xFC000000)) | (x7 & SPH_C32(0x0000001F));
+	return SPH_ROTL32(tmp, 6);
+}
+
+/*
+ * Mixing operation used to compute output word 1 for 192-bit output.
+ */
+static SPH_INLINE sph_u32
+mix192_1(sph_u32 x6, sph_u32 x7)
+{
+	return (x6 & SPH_C32(0x0000001F)) | (x7 & SPH_C32(0x000003E0));
+}
+
+/*
+ * Mixing operation used to compute output word 2 for 192-bit output.
+ */
+static SPH_INLINE sph_u32
+mix192_2(sph_u32 x6, sph_u32 x7)
+{
+	return ((x6 & SPH_C32(0x000003E0)) | (x7 & SPH_C32(0x0000FC00))) >> 5;
+}
+
+/*
+ * Mixing operation used to compute output word 3 for 192-bit output.
+ */
+static SPH_INLINE sph_u32
+mix192_3(sph_u32 x6, sph_u32 x7)
+{
+	return ((x6 & SPH_C32(0x0000FC00)) | (x7 & SPH_C32(0x001F0000))) >> 10;
+}
+
+/*
+ * Mixing operation used to compute output word 4 for 192-bit output.
+ */
+static SPH_INLINE sph_u32
+mix192_4(sph_u32 x6, sph_u32 x7)
+{
+	return ((x6 & SPH_C32(0x001F0000)) | (x7 & SPH_C32(0x03E00000))) >> 16;
+}
+
+/*
+ * Mixing operation used to compute output word 5 for 192-bit output.
+ */
+static SPH_INLINE sph_u32
+mix192_5(sph_u32 x6, sph_u32 x7)
+{
+	return ((x6 & SPH_C32(0x03E00000)) | (x7 & SPH_C32(0xFC000000))) >> 21;
+}
+
+/*
+ * Write out HAVAL output. The output length is tailored to the requested
+ * length.
+ */
+static void
+haval_out(sph_haval_context *sc, void *dst)
+{
+	DSTATE;
+	unsigned char *buf;
+
+	buf = (unsigned char*)dst;
+	RSTATE;
+	switch (sc->olen) {
+	case 4:
+		sph_enc32le(buf,      SPH_T32(s0 + mix128(s7, s4, s5, s6, 24)));
+		sph_enc32le(buf + 4,  SPH_T32(s1 + mix128(s6, s7, s4, s5, 16)));
+		sph_enc32le(buf + 8,  SPH_T32(s2 + mix128(s5, s6, s7, s4, 8)));
+		sph_enc32le(buf + 12, SPH_T32(s3 + mix128(s4, s5, s6, s7, 0)));
+		break;
+	case 5:
+		sph_enc32le(buf,      SPH_T32(s0 + mix160_0(s5, s6, s7)));
+		sph_enc32le(buf + 4,  SPH_T32(s1 + mix160_1(s5, s6, s7)));
+		sph_enc32le(buf + 8,  SPH_T32(s2 + mix160_2(s5, s6, s7)));
+		sph_enc32le(buf + 12, SPH_T32(s3 + mix160_3(s5, s6, s7)));
+		sph_enc32le(buf + 16, SPH_T32(s4 + mix160_4(s5, s6, s7)));
+		break;
+	case 6:
+		sph_enc32le(buf,      SPH_T32(s0 + mix192_0(s6, s7)));
+		sph_enc32le(buf + 4,  SPH_T32(s1 + mix192_1(s6, s7)));
+		sph_enc32le(buf + 8,  SPH_T32(s2 + mix192_2(s6, s7)));
+		sph_enc32le(buf + 12, SPH_T32(s3 + mix192_3(s6, s7)));
+		sph_enc32le(buf + 16, SPH_T32(s4 + mix192_4(s6, s7)));
+		sph_enc32le(buf + 20, SPH_T32(s5 + mix192_5(s6, s7)));
+		break;
+	case 7:
+		sph_enc32le(buf,      SPH_T32(s0 + ((s7 >> 27) & 0x1F)));
+		sph_enc32le(buf + 4,  SPH_T32(s1 + ((s7 >> 22) & 0x1F)));
+		sph_enc32le(buf + 8,  SPH_T32(s2 + ((s7 >> 18) & 0x0F)));
+		sph_enc32le(buf + 12, SPH_T32(s3 + ((s7 >> 13) & 0x1F)));
+		sph_enc32le(buf + 16, SPH_T32(s4 + ((s7 >>  9) & 0x0F)));
+		sph_enc32le(buf + 20, SPH_T32(s5 + ((s7 >>  4) & 0x1F)));
+		sph_enc32le(buf + 24, SPH_T32(s6 + ((s7      ) & 0x0F)));
+		break;
+	case 8:
+		sph_enc32le(buf,      s0);
+		sph_enc32le(buf + 4,  s1);
+		sph_enc32le(buf + 8,  s2);
+		sph_enc32le(buf + 12, s3);
+		sph_enc32le(buf + 16, s4);
+		sph_enc32le(buf + 20, s5);
+		sph_enc32le(buf + 24, s6);
+		sph_enc32le(buf + 28, s7);
+		break;
+	}
+}
+
+/*
+ * The main core functions inline the code with the COREx() macros. We
+ * use a helper file, included three times, which avoids code copying.
+ */
+
+#undef PASSES
+#define PASSES   3
+#include "haval_helper.c"
+
+#undef PASSES
+#define PASSES   4
+#include "haval_helper.c"
+
+#undef PASSES
+#define PASSES   5
+#include "haval_helper.c"
+
+/* ====================================================================== */
+
+#define API(xxx, y) \
+void \
+sph_haval ## xxx ## _ ## y ## _init(void *cc) \
+{ \
+	haval_init((sph_haval_context*)cc, xxx >> 5, y); \
+} \
+ \
+void \
+sph_haval ## xxx ## _ ## y (void *cc, const void *data, size_t len) \
+{ \
+	haval ## y((sph_haval_context*)cc, data, len); \
+} \
+ \
+void \
+sph_haval ## xxx ## _ ## y ## _close(void *cc, void *dst) \
+{ \
+	haval ## y ## _close((sph_haval_context*)cc, 0, 0, dst); \
+} \
+ \
+void \
+sph_haval ## xxx ## _ ## y ## addbits_and_close( \
+	void *cc, unsigned ub, unsigned n, void *dst) \
+{ \
+	haval ## y ## _close((sph_haval_context*)cc, ub, n, dst); \
+}
+
+API(128, 3)
+API(128, 4)
+API(128, 5)
+API(160, 3)
+API(160, 4)
+API(160, 5)
+API(192, 3)
+API(192, 4)
+API(192, 5)
+API(224, 3)
+API(224, 4)
+API(224, 5)
+API(256, 3)
+API(256, 4)
+API(256, 5)
+
+#define RVAL   do { \
+		s0 = val[0]; \
+		s1 = val[1]; \
+		s2 = val[2]; \
+		s3 = val[3]; \
+		s4 = val[4]; \
+		s5 = val[5]; \
+		s6 = val[6]; \
+		s7 = val[7]; \
+	} while (0)
+
+#define WVAL   do { \
+		val[0] = s0; \
+		val[1] = s1; \
+		val[2] = s2; \
+		val[3] = s3; \
+		val[4] = s4; \
+		val[5] = s5; \
+		val[6] = s6; \
+		val[7] = s7; \
+	} while (0)
+
+#define INMSG(i)   msg[i]
+
+/* see sph_haval.h */
+void
+sph_haval_3_comp(const sph_u32 msg[32], sph_u32 val[8])
+{
+	DSTATE;
+
+	RVAL;
+	CORE3(INMSG);
+	WVAL;
+}
+
+/* see sph_haval.h */
+void
+sph_haval_4_comp(const sph_u32 msg[32], sph_u32 val[8])
+{
+	DSTATE;
+
+	RVAL;
+	CORE4(INMSG);
+	WVAL;
+}
+
+/* see sph_haval.h */
+void
+sph_haval_5_comp(const sph_u32 msg[32], sph_u32 val[8])
+{
+	DSTATE;
+
+	RVAL;
+	CORE5(INMSG);
+	WVAL;
+}
+
diff --git a/sha3/sph_haval.h b/sha3/sph_haval.h
new file mode 100644
index 000000000..6334a9226
--- /dev/null
+++ b/sha3/sph_haval.h
@@ -0,0 +1,969 @@
+/* $Id: sph_haval.h 218 2010-06-08 17:06:34Z tp $ */
+/**
+* HAVAL interface.
+*
+* HAVAL is actually a family of 15 hash functions, depending on whether
+* the internal computation uses 3, 4 or 5 passes, and on the output
+* length, which is 128, 160, 192, 224 or 256 bits. This implementation
+* provides interface functions for all 15, which internally map to
+* three cores (depending on the number of passes). Note that output
+* lengths other than 256 bits are not obtained by a simple truncation
+* of a longer result; the requested length is encoded within the
+* padding data.
+*
+* HAVAL was published in: Yuliang Zheng, Josef Pieprzyk and Jennifer
+* Seberry: "HAVAL -- a one-way hashing algorithm with variable length
+* of output", Advances in Cryptology -- AUSCRYPT'92, Lecture Notes in
+* Computer Science, Vol.718, pp.83-104, Springer-Verlag, 1993.
+*
+* This paper, and a reference implementation, are available on the
+* Calyptix web site: http://labs.calyptix.com/haval.php
+*
+* The HAVAL reference paper is quite unclear on the data encoding
+* details, i.e. endianness (both byte order within a 32-bit word, and
+* word order within a message block). This implementation has been
+* made compatible with the reference implementation referenced above.
+*
+* @warning A collision for HAVAL-128/3 (HAVAL with three passes and
+* 128-bit output) has been published; this function is thus considered
+* as cryptographically broken. The status for other variants is unclear;
+* use only with care.
+*
+* ==========================(LICENSE BEGIN)============================
+*
+* Copyright (c) 2007-2010 Projet RNRT SAPHIR
+*
+* Permission is hereby granted, free of charge, to any person obtaining
+* a copy of this software and associated documentation files (the
+* "Software"), to deal in the Software without restriction, including
+* without limitation the rights to use, copy, modify, merge, publish,
+* distribute, sublicense, and/or sell copies of the Software, and to
+* permit persons to whom the Software is furnished to do so, subject to
+* the following conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*
+* ===========================(LICENSE END)=============================
+*
+* @file sph_haval.h
+* @author Thomas Pornin <thomas.pornin@cryptolog.com>
+*/
+
+#ifndef SPH_HAVAL_H__
+#define SPH_HAVAL_H__
+
+#include <stddef.h>
+#include "sph_types.h"
+
+/**
+* Output size (in bits) for HAVAL-128/3.
+*/
+#define SPH_SIZE_haval128_3 128
+
+/**
+* Output size (in bits) for HAVAL-128/4.
+*/
+#define SPH_SIZE_haval128_4 128
+
+/**
+* Output size (in bits) for HAVAL-128/5.
+*/
+#define SPH_SIZE_haval128_5 128
+
+/**
+* Output size (in bits) for HAVAL-160/3.
+*/
+#define SPH_SIZE_haval160_3 160
+
+/**
+* Output size (in bits) for HAVAL-160/4.
+*/
+#define SPH_SIZE_haval160_4 160
+
+/**
+* Output size (in bits) for HAVAL-160/5.
+*/
+#define SPH_SIZE_haval160_5 160
+
+/**
+* Output size (in bits) for HAVAL-192/3.
+*/
+#define SPH_SIZE_haval192_3 192
+
+/**
+* Output size (in bits) for HAVAL-192/4.
+*/
+#define SPH_SIZE_haval192_4 192
+
+/**
+* Output size (in bits) for HAVAL-192/5.
+*/
+#define SPH_SIZE_haval192_5 192
+
+/**
+* Output size (in bits) for HAVAL-224/3.
+*/
+#define SPH_SIZE_haval224_3 224
+
+/**
+* Output size (in bits) for HAVAL-224/4.
+*/
+#define SPH_SIZE_haval224_4 224
+
+/**
+* Output size (in bits) for HAVAL-224/5.
+*/
+#define SPH_SIZE_haval224_5 224
+
+/**
+* Output size (in bits) for HAVAL-256/3.
+*/
+#define SPH_SIZE_haval256_3 256
+
+/**
+* Output size (in bits) for HAVAL-256/4.
+*/
+#define SPH_SIZE_haval256_4 256
+
+/**
+* Output size (in bits) for HAVAL-256/5.
+*/
+#define SPH_SIZE_haval256_5 256
+
+/**
+* This structure is a context for HAVAL computations: it contains the
+* intermediate values and some data from the last entered block. Once
+* a HAVAL computation has been performed, the context can be reused for
+* another computation.
+*
+* The contents of this structure are private. A running HAVAL computation
+* can be cloned by copying the context (e.g. with a simple
+* <code>memcpy()</code>).
+*/
+typedef struct {
+#ifndef DOXYGEN_IGNORE
+unsigned char buf[128]; /* first field, for alignment */
+sph_u32 s0, s1, s2, s3, s4, s5, s6, s7;
+unsigned olen, passes;
+#if SPH_64
+sph_u64 count;
+#else
+sph_u32 count_high, count_low;
+#endif
+#endif
+} sph_haval_context;
+
+/**
+* Type for a HAVAL-128/3 context (identical to the common context).
+*/
+typedef sph_haval_context sph_haval128_3_context;
+
+/**
+* Type for a HAVAL-128/4 context (identical to the common context).
+*/
+typedef sph_haval_context sph_haval128_4_context;
+
+/**
+* Type for a HAVAL-128/5 context (identical to the common context).
+*/
+typedef sph_haval_context sph_haval128_5_context;
+
+/**
+* Type for a HAVAL-160/3 context (identical to the common context).
+*/
+typedef sph_haval_context sph_haval160_3_context;
+
+/**
+* Type for a HAVAL-160/4 context (identical to the common context).
+*/
+typedef sph_haval_context sph_haval160_4_context;
+
+/**
+* Type for a HAVAL-160/5 context (identical to the common context).
+*/
+typedef sph_haval_context sph_haval160_5_context;
+
+/**
+* Type for a HAVAL-192/3 context (identical to the common context).
+*/
+typedef sph_haval_context sph_haval192_3_context;
+
+/**
+* Type for a HAVAL-192/4 context (identical to the common context).
+*/
+typedef sph_haval_context sph_haval192_4_context;
+
+/**
+* Type for a HAVAL-192/5 context (identical to the common context).
+*/
+typedef sph_haval_context sph_haval192_5_context;
+
+/**
+* Type for a HAVAL-224/3 context (identical to the common context).
+*/
+typedef sph_haval_context sph_haval224_3_context;
+
+/**
+* Type for a HAVAL-224/4 context (identical to the common context).
+*/
+typedef sph_haval_context sph_haval224_4_context;
+
+/**
+* Type for a HAVAL-224/5 context (identical to the common context).
+*/
+typedef sph_haval_context sph_haval224_5_context;
+
+/**
+* Type for a HAVAL-256/3 context (identical to the common context).
+*/
+typedef sph_haval_context sph_haval256_3_context;
+
+/**
+* Type for a HAVAL-256/4 context (identical to the common context).
+*/
+typedef sph_haval_context sph_haval256_4_context;
+
+/**
+* Type for a HAVAL-256/5 context (identical to the common context).
+*/
+typedef sph_haval_context sph_haval256_5_context;
+
+/**
+* Initialize the context for HAVAL-128/3.
+*
+* @param cc context to initialize (pointer to a
+* <code>sph_haval128_3_context</code> structure)
+*/
+void sph_haval128_3_init(void *cc);
+
+/**
+* Process some data bytes for HAVAL-128/3. If <code>len</code> is 0,
+* then this function does nothing.
+*
+* @param cc the HAVAL-128/3 context
+* @param data the input data
+* @param len the input data length (in bytes)
+*/
+void sph_haval128_3(void *cc, const void *data, size_t len);
+
+/**
+* Close a HAVAL-128/3 computation. The output buffer must be wide
+* enough to accomodate the result (16 bytes). The context is automatically
+* reinitialized.
+*
+* @param cc the HAVAL-128/3 context
+* @param dst the output buffer
+*/
+void sph_haval128_3_close(void *cc, void *dst);
+
+/**
+* Close a HAVAL-128/3 computation. Up to 7 extra input bits may be added
+* to the input message; these are the <code>n</code> upper bits of
+* the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+* <code>ub</code>, the second extra bit has value 64, and so on). Other
+* bits in <code>ub</code> are ignored.
+*
+* The output buffer must be wide enough to accomodate the result (16
+* bytes). The context is automatically reinitialized.
+*
+* @param cc the HAVAL-128/3 context
+* @param ub the extra bits
+* @param n the number of extra bits (0 to 7)
+* @param dst the output buffer
+*/
+void sph_haval128_3_addbits_and_close(void *cc,
+unsigned ub, unsigned n, void *dst);
+
+/**
+* Initialize the context for HAVAL-128/4.
+*
+* @param cc context to initialize (pointer to a
+* <code>sph_haval128_4_context</code> structure)
+*/
+void sph_haval128_4_init(void *cc);
+
+/**
+* Process some data bytes for HAVAL-128/4. If <code>len</code> is 0,
+* then this function does nothing.
+*
+* @param cc the HAVAL-128/4 context
+* @param data the input data
+* @param len the input data length (in bytes)
+*/
+void sph_haval128_4(void *cc, const void *data, size_t len);
+
+/**
+* Close a HAVAL-128/4 computation. The output buffer must be wide
+* enough to accomodate the result (16 bytes). The context is automatically
+* reinitialized.
+*
+* @param cc the HAVAL-128/4 context
+* @param dst the output buffer
+*/
+void sph_haval128_4_close(void *cc, void *dst);
+
+/**
+* Close a HAVAL-128/4 computation. Up to 7 extra input bits may be added
+* to the input message; these are the <code>n</code> upper bits of
+* the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+* <code>ub</code>, the second extra bit has value 64, and so on). Other
+* bits in <code>ub</code> are ignored.
+*
+* The output buffer must be wide enough to accomodate the result (16
+* bytes). The context is automatically reinitialized.
+*
+* @param cc the HAVAL-128/4 context
+* @param ub the extra bits
+* @param n the number of extra bits (0 to 7)
+* @param dst the output buffer
+*/
+void sph_haval128_4_addbits_and_close(void *cc,
+unsigned ub, unsigned n, void *dst);
+
+/**
+* Initialize the context for HAVAL-128/5.
+*
+* @param cc context to initialize (pointer to a
+* <code>sph_haval128_5_context</code> structure)
+*/
+void sph_haval128_5_init(void *cc);
+
+/**
+* Process some data bytes for HAVAL-128/5. If <code>len</code> is 0,
+* then this function does nothing.
+*
+* @param cc the HAVAL-128/5 context
+* @param data the input data
+* @param len the input data length (in bytes)
+*/
+void sph_haval128_5(void *cc, const void *data, size_t len);
+
+/**
+* Close a HAVAL-128/5 computation. The output buffer must be wide
+* enough to accomodate the result (16 bytes). The context is automatically
+* reinitialized.
+*
+* @param cc the HAVAL-128/5 context
+* @param dst the output buffer
+*/
+void sph_haval128_5_close(void *cc, void *dst);
+
+/**
+* Close a HAVAL-128/5 computation. Up to 7 extra input bits may be added
+* to the input message; these are the <code>n</code> upper bits of
+* the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+* <code>ub</code>, the second extra bit has value 64, and so on). Other
+* bits in <code>ub</code> are ignored.
+*
+* The output buffer must be wide enough to accomodate the result (16
+* bytes). The context is automatically reinitialized.
+*
+* @param cc the HAVAL-128/5 context
+* @param ub the extra bits
+* @param n the number of extra bits (0 to 7)
+* @param dst the output buffer
+*/
+void sph_haval128_5_addbits_and_close(void *cc,
+unsigned ub, unsigned n, void *dst);
+
+/**
+* Initialize the context for HAVAL-160/3.
+*
+* @param cc context to initialize (pointer to a
+* <code>sph_haval160_3_context</code> structure)
+*/
+void sph_haval160_3_init(void *cc);
+
+/**
+* Process some data bytes for HAVAL-160/3. If <code>len</code> is 0,
+* then this function does nothing.
+*
+* @param cc the HAVAL-160/3 context
+* @param data the input data
+* @param len the input data length (in bytes)
+*/
+void sph_haval160_3(void *cc, const void *data, size_t len);
+
+/**
+* Close a HAVAL-160/3 computation. The output buffer must be wide
+* enough to accomodate the result (20 bytes). The context is automatically
+* reinitialized.
+*
+* @param cc the HAVAL-160/3 context
+* @param dst the output buffer
+*/
+void sph_haval160_3_close(void *cc, void *dst);
+
+/**
+* Close a HAVAL-160/3 computation. Up to 7 extra input bits may be added
+* to the input message; these are the <code>n</code> upper bits of
+* the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+* <code>ub</code>, the second extra bit has value 64, and so on). Other
+* bits in <code>ub</code> are ignored.
+*
+* The output buffer must be wide enough to accomodate the result (20
+* bytes). The context is automatically reinitialized.
+*
+* @param cc the HAVAL-160/3 context
+* @param ub the extra bits
+* @param n the number of extra bits (0 to 7)
+* @param dst the output buffer
+*/
+void sph_haval160_3_addbits_and_close(void *cc,
+unsigned ub, unsigned n, void *dst);
+
+/**
+* Initialize the context for HAVAL-160/4.
+*
+* @param cc context to initialize (pointer to a
+* <code>sph_haval160_4_context</code> structure)
+*/
+void sph_haval160_4_init(void *cc);
+
+/**
+* Process some data bytes for HAVAL-160/4. If <code>len</code> is 0,
+* then this function does nothing.
+*
+* @param cc the HAVAL-160/4 context
+* @param data the input data
+* @param len the input data length (in bytes)
+*/
+void sph_haval160_4(void *cc, const void *data, size_t len);
+
+/**
+* Close a HAVAL-160/4 computation. The output buffer must be wide
+* enough to accomodate the result (20 bytes). The context is automatically
+* reinitialized.
+*
+* @param cc the HAVAL-160/4 context
+* @param dst the output buffer
+*/
+void sph_haval160_4_close(void *cc, void *dst);
+
+/**
+* Close a HAVAL-160/4 computation. Up to 7 extra input bits may be added
+* to the input message; these are the <code>n</code> upper bits of
+* the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+* <code>ub</code>, the second extra bit has value 64, and so on). Other
+* bits in <code>ub</code> are ignored.
+*
+* The output buffer must be wide enough to accomodate the result (20
+* bytes). The context is automatically reinitialized.
+*
+* @param cc the HAVAL-160/4 context
+* @param ub the extra bits
+* @param n the number of extra bits (0 to 7)
+* @param dst the output buffer
+*/
+void sph_haval160_3_addbits_and_close(void *cc,
+unsigned ub, unsigned n, void *dst);
+
+/**
+* Initialize the context for HAVAL-160/5.
+*
+* @param cc context to initialize (pointer to a
+* <code>sph_haval160_5_context</code> structure)
+*/
+void sph_haval160_5_init(void *cc);
+
+/**
+* Process some data bytes for HAVAL-160/5. If <code>len</code> is 0,
+* then this function does nothing.
+*
+* @param cc the HAVAL-160/5 context
+* @param data the input data
+* @param len the input data length (in bytes)
+*/
+void sph_haval160_5(void *cc, const void *data, size_t len);
+
+/**
+* Close a HAVAL-160/5 computation. The output buffer must be wide
+* enough to accomodate the result (20 bytes). The context is automatically
+* reinitialized.
+*
+* @param cc the HAVAL-160/5 context
+* @param dst the output buffer
+*/
+void sph_haval160_5_close(void *cc, void *dst);
+
+/**
+* Close a HAVAL-160/5 computation. Up to 7 extra input bits may be added
+* to the input message; these are the <code>n</code> upper bits of
+* the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+* <code>ub</code>, the second extra bit has value 64, and so on). Other
+* bits in <code>ub</code> are ignored.
+*
+* The output buffer must be wide enough to accomodate the result (20
+* bytes). The context is automatically reinitialized.
+*
+* @param cc the HAVAL-160/5 context
+* @param ub the extra bits
+* @param n the number of extra bits (0 to 7)
+* @param dst the output buffer
+*/
+void sph_haval160_5_addbits_and_close(void *cc,
+unsigned ub, unsigned n, void *dst);
+
+/**
+* Initialize the context for HAVAL-192/3.
+*
+* @param cc context to initialize (pointer to a
+* <code>sph_haval192_3_context</code> structure)
+*/
+void sph_haval192_3_init(void *cc);
+
+/**
+* Process some data bytes for HAVAL-192/3. If <code>len</code> is 0,
+* then this function does nothing.
+*
+* @param cc the HAVAL-192/3 context
+* @param data the input data
+* @param len the input data length (in bytes)
+*/
+void sph_haval192_3(void *cc, const void *data, size_t len);
+
+/**
+* Close a HAVAL-192/3 computation. The output buffer must be wide
+* enough to accomodate the result (24 bytes). The context is automatically
+* reinitialized.
+*
+* @param cc the HAVAL-192/3 context
+* @param dst the output buffer
+*/
+void sph_haval192_3_close(void *cc, void *dst);
+
+/**
+* Close a HAVAL-192/3 computation. Up to 7 extra input bits may be added
+* to the input message; these are the <code>n</code> upper bits of
+* the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+* <code>ub</code>, the second extra bit has value 64, and so on). Other
+* bits in <code>ub</code> are ignored.
+*
+* The output buffer must be wide enough to accomodate the result (24
+* bytes). The context is automatically reinitialized.
+*
+* @param cc the HAVAL-192/3 context
+* @param ub the extra bits
+* @param n the number of extra bits (0 to 7)
+* @param dst the output buffer
+*/
+void sph_haval192_3_addbits_and_close(void *cc,
+unsigned ub, unsigned n, void *dst);
+
+/**
+* Initialize the context for HAVAL-192/4.
+*
+* @param cc context to initialize (pointer to a
+* <code>sph_haval192_4_context</code> structure)
+*/
+void sph_haval192_4_init(void *cc);
+
+/**
+* Process some data bytes for HAVAL-192/4. If <code>len</code> is 0,
+* then this function does nothing.
+*
+* @param cc the HAVAL-192/4 context
+* @param data the input data
+* @param len the input data length (in bytes)
+*/
+void sph_haval192_4(void *cc, const void *data, size_t len);
+
+/**
+* Close a HAVAL-192/4 computation. The output buffer must be wide
+* enough to accomodate the result (24 bytes). The context is automatically
+* reinitialized.
+*
+* @param cc the HAVAL-192/4 context
+* @param dst the output buffer
+*/
+void sph_haval192_4_close(void *cc, void *dst);
+
+/**
+* Close a HAVAL-192/4 computation. Up to 7 extra input bits may be added
+* to the input message; these are the <code>n</code> upper bits of
+* the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+* <code>ub</code>, the second extra bit has value 64, and so on). Other
+* bits in <code>ub</code> are ignored.
+*
+* The output buffer must be wide enough to accomodate the result (24
+* bytes). The context is automatically reinitialized.
+*
+* @param cc the HAVAL-192/4 context
+* @param ub the extra bits
+* @param n the number of extra bits (0 to 7)
+* @param dst the output buffer
+*/
+void sph_haval192_4_addbits_and_close(void *cc,
+unsigned ub, unsigned n, void *dst);
+
+/**
+* Initialize the context for HAVAL-192/5.
+*
+* @param cc context to initialize (pointer to a
+* <code>sph_haval192_5_context</code> structure)
+*/
+void sph_haval192_5_init(void *cc);
+
+/**
+* Process some data bytes for HAVAL-192/5. If <code>len</code> is 0,
+* then this function does nothing.
+*
+* @param cc the HAVAL-192/5 context
+* @param data the input data
+* @param len the input data length (in bytes)
+*/
+void sph_haval192_5(void *cc, const void *data, size_t len);
+
+/**
+* Close a HAVAL-192/5 computation. The output buffer must be wide
+* enough to accomodate the result (24 bytes). The context is automatically
+* reinitialized.
+*
+* @param cc the HAVAL-192/5 context
+* @param dst the output buffer
+*/
+void sph_haval192_5_close(void *cc, void *dst);
+
+/**
+* Close a HAVAL-192/5 computation. Up to 7 extra input bits may be added
+* to the input message; these are the <code>n</code> upper bits of
+* the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+* <code>ub</code>, the second extra bit has value 64, and so on). Other
+* bits in <code>ub</code> are ignored.
+*
+* The output buffer must be wide enough to accomodate the result (24
+* bytes). The context is automatically reinitialized.
+*
+* @param cc the HAVAL-192/5 context
+* @param ub the extra bits
+* @param n the number of extra bits (0 to 7)
+* @param dst the output buffer
+*/
+void sph_haval192_5_addbits_and_close(void *cc,
+unsigned ub, unsigned n, void *dst);
+
+/**
+* Initialize the context for HAVAL-224/3.
+*
+* @param cc context to initialize (pointer to a
+* <code>sph_haval224_3_context</code> structure)
+*/
+void sph_haval224_3_init(void *cc);
+
+/**
+* Process some data bytes for HAVAL-224/3. If <code>len</code> is 0,
+* then this function does nothing.
+*
+* @param cc the HAVAL-224/3 context
+* @param data the input data
+* @param len the input data length (in bytes)
+*/
+void sph_haval224_3(void *cc, const void *data, size_t len);
+
+/**
+* Close a HAVAL-224/3 computation. The output buffer must be wide
+* enough to accomodate the result (28 bytes). The context is automatically
+* reinitialized.
+*
+* @param cc the HAVAL-224/3 context
+* @param dst the output buffer
+*/
+void sph_haval224_3_close(void *cc, void *dst);
+
+/**
+* Close a HAVAL-224/3 computation. Up to 7 extra input bits may be added
+* to the input message; these are the <code>n</code> upper bits of
+* the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+* <code>ub</code>, the second extra bit has value 64, and so on). Other
+* bits in <code>ub</code> are ignored.
+*
+* The output buffer must be wide enough to accomodate the result (28
+* bytes). The context is automatically reinitialized.
+*
+* @param cc the HAVAL-224/3 context
+* @param ub the extra bits
+* @param n the number of extra bits (0 to 7)
+* @param dst the output buffer
+*/
+void sph_haval224_3_addbits_and_close(void *cc,
+unsigned ub, unsigned n, void *dst);
+
+/**
+* Initialize the context for HAVAL-224/4.
+*
+* @param cc context to initialize (pointer to a
+* <code>sph_haval224_4_context</code> structure)
+*/
+void sph_haval224_4_init(void *cc);
+
+/**
+* Process some data bytes for HAVAL-224/4. If <code>len</code> is 0,
+* then this function does nothing.
+*
+* @param cc the HAVAL-224/4 context
+* @param data the input data
+* @param len the input data length (in bytes)
+*/
+void sph_haval224_4(void *cc, const void *data, size_t len);
+
+/**
+* Close a HAVAL-224/4 computation. The output buffer must be wide
+* enough to accomodate the result (28 bytes). The context is automatically
+* reinitialized.
+*
+* @param cc the HAVAL-224/4 context
+* @param dst the output buffer
+*/
+void sph_haval224_4_close(void *cc, void *dst);
+
+/**
+* Close a HAVAL-224/4 computation. Up to 7 extra input bits may be added
+* to the input message; these are the <code>n</code> upper bits of
+* the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+* <code>ub</code>, the second extra bit has value 64, and so on). Other
+* bits in <code>ub</code> are ignored.
+*
+* The output buffer must be wide enough to accomodate the result (28
+* bytes). The context is automatically reinitialized.
+*
+* @param cc the HAVAL-224/4 context
+* @param ub the extra bits
+* @param n the number of extra bits (0 to 7)
+* @param dst the output buffer
+*/
+void sph_haval224_4_addbits_and_close(void *cc,
+unsigned ub, unsigned n, void *dst);
+
+/**
+* Initialize the context for HAVAL-224/5.
+*
+* @param cc context to initialize (pointer to a
+* <code>sph_haval224_5_context</code> structure)
+*/
+void sph_haval224_5_init(void *cc);
+
+/**
+* Process some data bytes for HAVAL-224/5. If <code>len</code> is 0,
+* then this function does nothing.
+*
+* @param cc the HAVAL-224/5 context
+* @param data the input data
+* @param len the input data length (in bytes)
+*/
+void sph_haval224_5(void *cc, const void *data, size_t len);
+
+/**
+* Close a HAVAL-224/5 computation. The output buffer must be wide
+* enough to accomodate the result (28 bytes). The context is automatically
+* reinitialized.
+*
+* @param cc the HAVAL-224/5 context
+* @param dst the output buffer
+*/
+void sph_haval224_5_close(void *cc, void *dst);
+
+/**
+* Close a HAVAL-224/5 computation. Up to 7 extra input bits may be added
+* to the input message; these are the <code>n</code> upper bits of
+* the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+* <code>ub</code>, the second extra bit has value 64, and so on). Other
+* bits in <code>ub</code> are ignored.
+*
+* The output buffer must be wide enough to accomodate the result (28
+* bytes). The context is automatically reinitialized.
+*
+* @param cc the HAVAL-224/5 context
+* @param ub the extra bits
+* @param n the number of extra bits (0 to 7)
+* @param dst the output buffer
+*/
+void sph_haval224_5_addbits_and_close(void *cc,
+unsigned ub, unsigned n, void *dst);
+
+/**
+* Initialize the context for HAVAL-256/3.
+*
+* @param cc context to initialize (pointer to a
+* <code>sph_haval256_3_context</code> structure)
+*/
+void sph_haval256_3_init(void *cc);
+
+/**
+* Process some data bytes for HAVAL-256/3. If <code>len</code> is 0,
+* then this function does nothing.
+*
+* @param cc the HAVAL-256/3 context
+* @param data the input data
+* @param len the input data length (in bytes)
+*/
+void sph_haval256_3(void *cc, const void *data, size_t len);
+
+/**
+* Close a HAVAL-256/3 computation. The output buffer must be wide
+* enough to accomodate the result (32 bytes). The context is automatically
+* reinitialized.
+*
+* @param cc the HAVAL-256/3 context
+* @param dst the output buffer
+*/
+void sph_haval256_3_close(void *cc, void *dst);
+
+/**
+* Close a HAVAL-256/3 computation. Up to 7 extra input bits may be added
+* to the input message; these are the <code>n</code> upper bits of
+* the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+* <code>ub</code>, the second extra bit has value 64, and so on). Other
+* bits in <code>ub</code> are ignored.
+*
+* The output buffer must be wide enough to accomodate the result (32
+* bytes). The context is automatically reinitialized.
+*
+* @param cc the HAVAL-256/3 context
+* @param ub the extra bits
+* @param n the number of extra bits (0 to 7)
+* @param dst the output buffer
+*/
+void sph_haval256_3_addbits_and_close(void *cc,
+unsigned ub, unsigned n, void *dst);
+
+/**
+* Initialize the context for HAVAL-256/4.
+*
+* @param cc context to initialize (pointer to a
+* <code>sph_haval256_4_context</code> structure)
+*/
+void sph_haval256_4_init(void *cc);
+
+/**
+* Process some data bytes for HAVAL-256/4. If <code>len</code> is 0,
+* then this function does nothing.
+*
+* @param cc the HAVAL-256/4 context
+* @param data the input data
+* @param len the input data length (in bytes)
+*/
+void sph_haval256_4(void *cc, const void *data, size_t len);
+
+/**
+* Close a HAVAL-256/4 computation. The output buffer must be wide
+* enough to accomodate the result (32 bytes). The context is automatically
+* reinitialized.
+*
+* @param cc the HAVAL-256/4 context
+* @param dst the output buffer
+*/
+void sph_haval256_4_close(void *cc, void *dst);
+
+/**
+* Close a HAVAL-256/4 computation. Up to 7 extra input bits may be added
+* to the input message; these are the <code>n</code> upper bits of
+* the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+* <code>ub</code>, the second extra bit has value 64, and so on). Other
+* bits in <code>ub</code> are ignored.
+*
+* The output buffer must be wide enough to accomodate the result (32
+* bytes). The context is automatically reinitialized.
+*
+* @param cc the HAVAL-256/4 context
+* @param ub the extra bits
+* @param n the number of extra bits (0 to 7)
+* @param dst the output buffer
+*/
+void sph_haval256_4_addbits_and_close(void *cc,
+unsigned ub, unsigned n, void *dst);
+
+/**
+* Initialize the context for HAVAL-256/5.
+*
+* @param cc context to initialize (pointer to a
+* <code>sph_haval256_5_context</code> structure)
+*/
+void sph_haval256_5_init(void *cc);
+
+/**
+* Process some data bytes for HAVAL-256/5. If <code>len</code> is 0,
+* then this function does nothing.
+*
+* @param cc the HAVAL-256/5 context
+* @param data the input data
+* @param len the input data length (in bytes)
+*/
+void sph_haval256_5(void *cc, const void *data, size_t len);
+
+/**
+* Close a HAVAL-256/5 computation. The output buffer must be wide
+* enough to accomodate the result (32 bytes). The context is automatically
+* reinitialized.
+*
+* @param cc the HAVAL-256/5 context
+* @param dst the output buffer
+*/
+void sph_haval256_5_close(void *cc, void *dst);
+
+/**
+* Close a HAVAL-256/5 computation. Up to 7 extra input bits may be added
+* to the input message; these are the <code>n</code> upper bits of
+* the <code>ub</code> byte (i.e. the first extra bit has value 128 in
+* <code>ub</code>, the second extra bit has value 64, and so on). Other
+* bits in <code>ub</code> are ignored.
+*
+* The output buffer must be wide enough to accomodate the result (32
+* bytes). The context is automatically reinitialized.
+*
+* @param cc the HAVAL-256/5 context
+* @param ub the extra bits
+* @param n the number of extra bits (0 to 7)
+* @param dst the output buffer
+*/
+void sph_haval256_5_addbits_and_close(void *cc,
+unsigned ub, unsigned n, void *dst);
+
+/**
+* Apply the HAVAL compression function on the provided data. The
+* <code>msg</code> parameter contains the 32 32-bit input blocks,
+* as numerical values (hence after the little-endian decoding). The
+* <code>val</code> parameter contains the 8 32-bit input blocks for
+* the compression function; the output is written in place in this
+* array. This function uses three internal passes.
+*
+* @param msg the message block (32 values)
+* @param val the function 256-bit input and output
+*/
+void sph_haval_3_comp(const sph_u32 msg[32], sph_u32 val[8]);
+
+/**
+* Apply the HAVAL compression function on the provided data. The
+* <code>msg</code> parameter contains the 32 32-bit input blocks,
+* as numerical values (hence after the little-endian decoding). The
+* <code>val</code> parameter contains the 8 32-bit input blocks for
+* the compression function; the output is written in place in this
+* array. This function uses four internal passes.
+*
+* @param msg the message block (32 values)
+* @param val the function 256-bit input and output
+*/
+void sph_haval_4_comp(const sph_u32 msg[32], sph_u32 val[8]);
+
+/**
+* Apply the HAVAL compression function on the provided data. The
+* <code>msg</code> parameter contains the 32 32-bit input blocks,
+* as numerical values (hence after the little-endian decoding). The
+* <code>val</code> parameter contains the 8 32-bit input blocks for
+* the compression function; the output is written in place in this
+* array. This function uses five internal passes.
+*
+* @param msg the message block (32 values)
+* @param val the function 256-bit input and output
+*/
+void sph_haval_5_comp(const sph_u32 msg[32], sph_u32 val[8]);
+
+#endif
diff --git a/sha3/sph_types.h b/sha3/sph_types.h
index 7295b0b37..6c8ecf4b6 100644
--- a/sha3/sph_types.h
+++ b/sha3/sph_types.h
@@ -930,14 +930,24 @@ typedef long long sph_s64;
  */
 
 #define SPH_T32(x)    ((x) & SPH_C32(0xFFFFFFFF))
+#ifdef _MSC_VER
+#define SPH_ROTL32(x, n)   _rotl(x, n)
+#define SPH_ROTR32(x, n)   _rotr(x, n)
+#else
 #define SPH_ROTL32(x, n)   SPH_T32(((x) << (n)) | ((x) >> (32 - (n))))
 #define SPH_ROTR32(x, n)   SPH_ROTL32(x, (32 - (n)))
+#endif
 
 #if SPH_64
 
 #define SPH_T64(x)    ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF))
+#ifdef _MSC_VER
+#define SPH_ROTL64(x, n)   _rotl64(x, n)
+#define SPH_ROTR64(x, n)   _rotr64(x, n)
+#else
 #define SPH_ROTL64(x, n)   SPH_T64(((x) << (n)) | ((x) >> (64 - (n))))
 #define SPH_ROTR64(x, n)   SPH_ROTL64(x, (64 - (n)))
+#endif
 
 #endif
 
diff --git a/util.c b/util.c
index 362b9f8f1..afef6fd82 100644
--- a/util.c
+++ b/util.c
@@ -1841,7 +1841,7 @@ static bool stratum_benchdata(json_t *result, json_t *params, int thr_id)
 #endif
 
 #ifdef _MSC_VER
-	sprintf(compiler, "VC++ %d\n", _MSC_VER / 100);
+	sprintf(compiler, "MSVC %d\n", msver());
 #elif defined(__clang__)
 	sprintf(compiler, "clang %s\n", __clang_version__);
 #elif defined(__GNUC__)