Skip to content

Commit

Permalink
Fix NEON errors
Browse files Browse the repository at this point in the history
also add cmpge/cmple
  • Loading branch information
tcdude committed Apr 11, 2022
1 parent 483d806 commit 52a7563
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 5 deletions.
64 changes: 61 additions & 3 deletions Sources/kinc/simd/int8x16.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#include "types.h"

/*! \file int8x16.h
\brief Provides 128bit sixteen-element signed integer SIMD operations which are mapped to equivalent SSE or Neon operations.
\brief Provides 128bit sixteen-element signed 8-bit integer SIMD operations which are mapped to equivalent SSE2 or Neon operations.
*/

#ifdef __cplusplus
Expand Down Expand Up @@ -52,10 +52,18 @@ static inline kinc_int8x16_mask_t kinc_int8x16_cmpeq(kinc_int8x16_t a, kinc_int8
return _mm_cmpeq_epi8(a, b);
}

static inline kinc_int8x16_mask_t kinc_int8x16_cmpge(kinc_int8x16_t a, kinc_int8x16_t b) {
return _mm_or_si128(_mm_cmpgt_epi8(a, b), _mm_cmpeq_epi8(a, b));
}

static inline kinc_int8x16_mask_t kinc_int8x16_cmpgt(kinc_int8x16_t a, kinc_int8x16_t b) {
return _mm_cmpgt_epi8(a, b);
}

static inline kinc_int8x16_mask_t kinc_int8x16_cmple(kinc_int8x16_t a, kinc_int8x16_t b) {
return _mm_or_si128(_mm_cmplt_epi8(a, b), _mm_cmpeq_epi8(a, b));
}

static inline kinc_int8x16_mask_t kinc_int8x16_cmplt(kinc_int8x16_t a, kinc_int8x16_t b) {
return _mm_cmplt_epi8(a, b);
}
Expand Down Expand Up @@ -90,7 +98,7 @@ static inline kinc_int8x16_t kinc_int8x16_not(kinc_int8x16_t t) {
#elif defined(KINC_NEON)

static inline kinc_int8x16_t kinc_int8x16_load(const int8_t values[16]) {
return (kinc_int8x16_t)values;
return (kinc_int8x16_t){values[0], values[1], values[2], values[3], values[4], values[5], values[6], values[7], values[8], values[9], values[10], values[11], values[12], values[13], values[14], values[15]};
}

static inline kinc_int8x16_t kinc_int8x16_load_all(int8_t t) {
Expand Down Expand Up @@ -121,16 +129,24 @@ static inline kinc_int8x16_mask_t kinc_int8x16_cmpeq(kinc_int8x16_t a, kinc_int8
return vceqq_s8(a, b);
}

static inline kinc_int8x16_mask_t kinc_int8x16_cmpge(kinc_int8x16_t a, kinc_int8x16_t b) {
return vcgeq_s8(a, b);
}

static inline kinc_int8x16_mask_t kinc_int8x16_cmpgt(kinc_int8x16_t a, kinc_int8x16_t b) {
return vcgtq_s8(a, b);
}

static inline kinc_int8x16_mask_t kinc_int8x16_cmple(kinc_int8x16_t a, kinc_int8x16_t b) {
return vcleq_s8(a, b);
}

static inline kinc_int8x16_mask_t kinc_int8x16_cmplt(kinc_int8x16_t a, kinc_int8x16_t b) {
return vcltq_s8(a, b);
}

static inline kinc_int8x16_mask_t kinc_int8x16_cmpneq(kinc_int8x16_t a, kinc_int8x16_t b) {
return vmvnq_u32(vceqq_s8(a, b));
return vmvnq_u8(vceqq_s8(a, b));
}

static inline kinc_int8x16_t kinc_int8x16_sel(kinc_int8x16_t a, kinc_int8x16_t b, kinc_int8x16_mask_t mask) {
Expand Down Expand Up @@ -306,6 +322,27 @@ static inline kinc_int8x16_mask_t kinc_int8x16_cmpeq(kinc_int8x16_t a, kinc_int8
return mask;
}

static inline kinc_int8x16_mask_t kinc_int8x16_cmpge(kinc_int8x16_t a, kinc_int8x16_t b) {
kinc_int8x16_mask_t mask;
mask.values[0] = a.values[0] >= b.values[0] ? 0xff : 0;
mask.values[1] = a.values[1] >= b.values[1] ? 0xff : 0;
mask.values[2] = a.values[2] >= b.values[2] ? 0xff : 0;
mask.values[3] = a.values[3] >= b.values[3] ? 0xff : 0;
mask.values[4] = a.values[4] >= b.values[4] ? 0xff : 0;
mask.values[5] = a.values[5] >= b.values[5] ? 0xff : 0;
mask.values[6] = a.values[6] >= b.values[6] ? 0xff : 0;
mask.values[7] = a.values[7] >= b.values[7] ? 0xff : 0;
mask.values[8] = a.values[8] >= b.values[8] ? 0xff : 0;
mask.values[9] = a.values[9] >= b.values[9] ? 0xff : 0;
mask.values[10] = a.values[10] >= b.values[10] ? 0xff : 0;
mask.values[11] = a.values[11] >= b.values[11] ? 0xff : 0;
mask.values[12] = a.values[12] >= b.values[12] ? 0xff : 0;
mask.values[13] = a.values[13] >= b.values[13] ? 0xff : 0;
mask.values[14] = a.values[14] >= b.values[14] ? 0xff : 0;
mask.values[15] = a.values[15] >= b.values[15] ? 0xff : 0;
return mask;
}

static inline kinc_int8x16_mask_t kinc_int8x16_cmpgt(kinc_int8x16_t a, kinc_int8x16_t b) {
kinc_int8x16_mask_t mask;
mask.values[0] = a.values[0] > b.values[0] ? 0xff : 0;
Expand All @@ -327,6 +364,27 @@ static inline kinc_int8x16_mask_t kinc_int8x16_cmpgt(kinc_int8x16_t a, kinc_int8
return mask;
}

static inline kinc_int8x16_mask_t kinc_int8x16_cmple(kinc_int8x16_t a, kinc_int8x16_t b) {
kinc_int8x16_mask_t mask;
mask.values[0] = a.values[0] <= b.values[0] ? 0xff : 0;
mask.values[1] = a.values[1] <= b.values[1] ? 0xff : 0;
mask.values[2] = a.values[2] <= b.values[2] ? 0xff : 0;
mask.values[3] = a.values[3] <= b.values[3] ? 0xff : 0;
mask.values[4] = a.values[4] <= b.values[4] ? 0xff : 0;
mask.values[5] = a.values[5] <= b.values[5] ? 0xff : 0;
mask.values[6] = a.values[6] <= b.values[6] ? 0xff : 0;
mask.values[7] = a.values[7] <= b.values[7] ? 0xff : 0;
mask.values[8] = a.values[8] <= b.values[8] ? 0xff : 0;
mask.values[9] = a.values[9] <= b.values[9] ? 0xff : 0;
mask.values[10] = a.values[10] <= b.values[10] ? 0xff : 0;
mask.values[11] = a.values[11] <= b.values[11] ? 0xff : 0;
mask.values[12] = a.values[12] <= b.values[12] ? 0xff : 0;
mask.values[13] = a.values[13] <= b.values[13] ? 0xff : 0;
mask.values[14] = a.values[14] <= b.values[14] ? 0xff : 0;
mask.values[15] = a.values[15] <= b.values[15] ? 0xff : 0;
return mask;
}

static inline kinc_int8x16_mask_t kinc_int8x16_cmplt(kinc_int8x16_t a, kinc_int8x16_t b) {
kinc_int8x16_mask_t mask;
mask.values[0] = a.values[0] < b.values[0] ? 0xff : 0;
Expand Down
16 changes: 14 additions & 2 deletions Tests/SIMD/Sources/simd.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@

#define EPSILON 0.00001f

static int total_tests = 0;

static bool check_f32(const char *name, kinc_float32x4_t result, const float expected[4], float epsilon) {
++total_tests;
bool success = true;
for (int i = 0; i < 4; ++i) {
if (kinc_abs(kinc_float32x4_get(result, i) - expected[i]) > epsilon) {
Expand All @@ -23,6 +26,7 @@ static bool check_f32(const char *name, kinc_float32x4_t result, const float exp
}

static bool check_i8(const char *name, kinc_int8x16_t result, const int8_t expected[16]) {
++total_tests;
bool success = true;
for (int i = 0; i < 16; ++i) {
if (kinc_int8x16_get(result, i) != expected[i]) {
Expand Down Expand Up @@ -144,10 +148,18 @@ int kickstart(int argc, char **argv) {
result = kinc_int8x16_sel(a, b, mask);
failed += check_i8("int8x16 cmpeq & sel", result, (int8_t[16]){2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) ? 0 : 1;

mask = kinc_int8x16_cmpge(a, b);
result = kinc_int8x16_sel(a, b, mask);
failed += check_i8("int8x16 cmpge & sel", result, (int8_t[16]){2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6, 7, 8}) ? 0 : 1;

mask = kinc_int8x16_cmpgt(a, b);
result = kinc_int8x16_sel(a, b, mask);
failed += check_i8("int8x16 cmpgt & sel", result, (int8_t[16]){2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6, 7, 8}) ? 0 : 1;

mask = kinc_int8x16_cmple(a, b);
result = kinc_int8x16_sel(a, b, mask);
failed += check_i8("int8x16 cmple & sel", result, (int8_t[16]){-8, -7, -6, -5, -4, -3, -2, -1, 1, 2, 2, 2, 2, 2, 2, 2}) ? 0 : 1;

mask = kinc_int8x16_cmplt(a, b);
result = kinc_int8x16_sel(a, b, mask);
failed += check_i8("int8x16 cmplt & sel", result, (int8_t[16]){-8, -7, -6, -5, -4, -3, -2, -1, 1, 2, 2, 2, 2, 2, 2, 2}) ? 0 : 1;
Expand All @@ -170,10 +182,10 @@ int kickstart(int argc, char **argv) {
}

if (failed) {
kinc_log(KINC_LOG_LEVEL_ERROR, "\nERROR! %d test(s) failed", failed);
kinc_log(KINC_LOG_LEVEL_ERROR, "\nERROR! %d of %d test(s) failed", failed, total_tests);
}
else {
kinc_log(KINC_LOG_LEVEL_INFO, "\nSUCCESS all tests passed");
kinc_log(KINC_LOG_LEVEL_INFO, "\nSUCCESS %d tests passed", total_tests);
}

return failed;
Expand Down

0 comments on commit 52a7563

Please sign in to comment.