Skip to content

Commit

Permalink
Shabal Optimisations
Browse files Browse the repository at this point in the history
  • Loading branch information
JohnnyFFM committed Sep 14, 2018
1 parent 21a176a commit 69419d4
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 14 deletions.
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "scavenger"
version = "1.2.0"
version = "1.3.0"
license = "GPL-3.0"
authors = ["PoC Consortium <[email protected]>"]
description = """
Expand Down
6 changes: 3 additions & 3 deletions src/c/mshabal_128_avx.c
Original file line number Diff line number Diff line change
Expand Up @@ -578,9 +578,9 @@ static void simd128_avx_mshabal_compress_fast(mshabal_context_fast* sc, void* u1
}

// transfer results to ram
for (j = 0; j < 12; j++) _mm_storeu_si128((__m128i*)sc->state + j, A[j]);
for (j = 0; j < 16; j++) {
_mm_storeu_si128((__m128i*)sc->state + j + 12, B[j]);
//for (j = 0; j < 12; j++) _mm_storeu_si128((__m128i*)sc->state + j, A[j]);
for (j = 8; j < 10; j++) {
// _mm_storeu_si128((__m128i*)sc->state + j + 12, B[j]);
_mm_storeu_si128((__m128i*)sc->state + j + 28, C[j]);
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/c/mshabal_128_sse2.c
Original file line number Diff line number Diff line change
Expand Up @@ -588,9 +588,9 @@ static void simd128_sse2_mshabal_compress_fast(mshabal_context_fast* sc, void* u
}

// transfer results to ram
for (j = 0; j < 12; j++) _mm_storeu_si128((__m128i*)sc->state + j, A[j]);
for (j = 0; j < 16; j++) {
_mm_storeu_si128((__m128i*)sc->state + j + 12, B[j]);
//for (j = 0; j < 12; j++) _mm_storeu_si128((__m128i*)sc->state + j, A[j]);
for (j = 8; j < 10; j++) {
// _mm_storeu_si128((__m128i*)sc->state + j + 12, B[j]);
_mm_storeu_si128((__m128i*)sc->state + j + 28, C[j]);
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/c/mshabal_256_avx2.c
Original file line number Diff line number Diff line change
Expand Up @@ -638,9 +638,9 @@ static void simd256_mshabal_compress_fast(mshabal256_context_fast* sc, void* u1,
}

// transfer results to ram
for (j = 0; j < 12; j++) _mm256_storeu_si256((__m256i*)sc->state + j, A[j]);
for (j = 0; j < 16; j++) {
_mm256_storeu_si256((__m256i*)sc->state + j + 12, B[j]);
//for (j = 0; j < 12; j++) _mm256_storeu_si256((__m256i*)sc->state + j, A[j]);
for (j = 8; j < 10; j++) {
//_mm256_storeu_si256((__m256i*)sc->state + j + 12, B[j]);
_mm256_storeu_si256((__m256i*)sc->state + j + 28, C[j]);
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/c/mshabal_512_avx512f.c
Original file line number Diff line number Diff line change
Expand Up @@ -737,9 +737,9 @@ static void simd512_mshabal_compress_fast(mshabal512_context_fast *sc, void *u1,
}

// transfer results to ram
for (j = 0; j < 12; j++) _mm512_storeu_si512((__m512i *)sc->state + j, A[j]);
for (j = 0; j < 16; j++) {
_mm512_storeu_si512((__m512i *)sc->state + j + 12, B[j]);
//for (j = 0; j < 12; j++) _mm512_storeu_si512((__m512i *)sc->state + j, A[j]);
for (j = 8 j < 10; j++) {
//_mm512_storeu_si512((__m512i *)sc->state + j + 12, B[j]);
_mm512_storeu_si512((__m512i *)sc->state + j + 28, C[j]);
}
}
Expand Down

0 comments on commit 69419d4

Please sign in to comment.