Skip to content

Commit

Permalink
Don't use saturated add/sub
Browse files Browse the repository at this point in the history
  • Loading branch information
tcdude committed Apr 12, 2022
1 parent 08275d4 commit c4739b6
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 16 deletions.
4 changes: 2 additions & 2 deletions Sources/kinc/simd/uint8x16.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,11 @@ static inline uint8_t kinc_uint8x16_get(kinc_uint8x16_t t, int index) {
}

static inline kinc_uint8x16_t kinc_uint8x16_add(kinc_uint8x16_t a, kinc_uint8x16_t b) {
return _mm_adds_epu8(a, b);
return _mm_add_epi8(a, b);
}

static inline kinc_uint8x16_t kinc_uint8x16_sub(kinc_uint8x16_t a, kinc_uint8x16_t b) {
return _mm_subs_epu8(a, b);
return _mm_sub_epi8(a, b);
}

static inline kinc_uint8x16_t kinc_uint8x16_max(kinc_uint8x16_t a, kinc_uint8x16_t b) {
Expand Down
28 changes: 14 additions & 14 deletions Tests/SIMD/Sources/simd.c
Original file line number Diff line number Diff line change
Expand Up @@ -206,59 +206,59 @@ int kickstart(int argc, char **argv) {


{
kinc_uint8x16_t a = kinc_uint8x16_load((uint8_t[16]){4, 2, 3, 4, 5, 6, 7, 8, 4, 2, 3, 4, 5, 6, 7, 8});
kinc_uint8x16_t a = kinc_uint8x16_load((uint8_t[16]){1, 2, 3, 4, 5, 6, 7, 8, 4, 2, 3, 4, 5, 6, 7, 8});
kinc_uint8x16_t b = kinc_uint8x16_load_all(2);

kinc_uint8x16_mask_t mask;
kinc_uint8x16_t result;

result = kinc_uint8x16_add(a, b);
failed += check_u8("uint8x16 add", result, (uint8_t[16]){6, 4, 5, 6, 7, 8, 9, 10, 6, 4, 5, 6, 7, 8, 9, 10}) ? 0 : 1;
failed += check_u8("uint8x16 add", result, (uint8_t[16]){3, 4, 5, 6, 7, 8, 9, 10, 6, 4, 5, 6, 7, 8, 9, 10}) ? 0 : 1;

result = kinc_uint8x16_sub(a, b);
failed += check_u8("uint8x16 sub", result, (uint8_t[16]){2, 0, 1, 2, 3, 4, 5, 6, 2, 0, 1, 2, 3, 4, 5, 6}) ? 0 : 1;
failed += check_u8("uint8x16 sub", result, (uint8_t[16]){255, 0, 1, 2, 3, 4, 5, 6, 2, 0, 1, 2, 3, 4, 5, 6}) ? 0 : 1;

result = kinc_uint8x16_max(a, b);
failed += check_u8("uint8x16 max", result, (uint8_t[16]){4, 2, 3, 4, 5, 6, 7, 8, 4, 2, 3, 4, 5, 6, 7, 8}) ? 0 : 1;
failed += check_u8("uint8x16 max", result, (uint8_t[16]){2, 2, 3, 4, 5, 6, 7, 8, 4, 2, 3, 4, 5, 6, 7, 8}) ? 0 : 1;

result = kinc_uint8x16_min(a, b);
failed += check_u8("uint8x16 min", result, (uint8_t[16]){2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) ? 0 : 1;
failed += check_u8("uint8x16 min", result, (uint8_t[16]){1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) ? 0 : 1;

mask = kinc_uint8x16_cmpeq(a, b);
result = kinc_uint8x16_sel(a, b, mask);
failed += check_u8("uint8x16 cmpeq & sel", result, (uint8_t[16]){2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) ? 0 : 1;

mask = kinc_uint8x16_cmpge(a, b);
result = kinc_uint8x16_sel(a, b, mask);
failed += check_u8("uint8x16 cmpge & sel", result, (uint8_t[16]){4, 2, 3, 4, 5, 6, 7, 8, 4, 2, 3, 4, 5, 6, 7, 8}) ? 0 : 1;
failed += check_u8("uint8x16 cmpge & sel", result, (uint8_t[16]){2, 2, 3, 4, 5, 6, 7, 8, 4, 2, 3, 4, 5, 6, 7, 8}) ? 0 : 1;

mask = kinc_uint8x16_cmpgt(a, b);
result = kinc_uint8x16_sel(a, b, mask);
failed += check_u8("uint8x16 cmpgt & sel", result, (uint8_t[16]){4, 2, 3, 4, 5, 6, 7, 8, 4, 2, 3, 4, 5, 6, 7, 8}) ? 0 : 1;
failed += check_u8("uint8x16 cmpgt & sel", result, (uint8_t[16]){2, 2, 3, 4, 5, 6, 7, 8, 4, 2, 3, 4, 5, 6, 7, 8}) ? 0 : 1;

mask = kinc_uint8x16_cmple(a, b);
result = kinc_uint8x16_sel(a, b, mask);
failed += check_u8("uint8x16 cmple & sel", result, (uint8_t[16]){2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) ? 0 : 1;
failed += check_u8("uint8x16 cmple & sel", result, (uint8_t[16]){1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) ? 0 : 1;

mask = kinc_uint8x16_cmplt(a, b);
result = kinc_uint8x16_sel(a, b, mask);
failed += check_u8("uint8x16 cmplt & sel", result, (uint8_t[16]){2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) ? 0 : 1;
failed += check_u8("uint8x16 cmplt & sel", result, (uint8_t[16]){1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}) ? 0 : 1;

mask = kinc_uint8x16_cmpneq(a, b);
result = kinc_uint8x16_sel(a, b, mask);
failed += check_u8("uint8x16 cmpneq & sel", result, (uint8_t[16]){4, 2, 3, 4, 5, 6, 7, 8, 4, 2, 3, 4, 5, 6, 7, 8}) ? 0 : 1;
failed += check_u8("uint8x16 cmpneq & sel", result, (uint8_t[16]){1, 2, 3, 4, 5, 6, 7, 8, 4, 2, 3, 4, 5, 6, 7, 8}) ? 0 : 1;

result = kinc_uint8x16_or(a, b);
failed += check_u8("uint8x16 or", result, (uint8_t[16]){4 | 2, 2 | 2, 3 | 2, 4 | 2, 5 | 2, 6 | 2, 7 | 2, 8 | 2, 4 | 2, 2 | 2, 3 | 2, 4 | 2, 5 | 2, 6 | 2, 7 | 2, 8 | 2}) ? 0 : 1;
failed += check_u8("uint8x16 or", result, (uint8_t[16]){1 | 2, 2 | 2, 3 | 2, 4 | 2, 5 | 2, 6 | 2, 7 | 2, 8 | 2, 4 | 2, 2 | 2, 3 | 2, 4 | 2, 5 | 2, 6 | 2, 7 | 2, 8 | 2}) ? 0 : 1;

result = kinc_uint8x16_and(a, b);
failed += check_u8("uint8x16 and", result, (uint8_t[16]){4 & 2, 2 & 2, 3 & 2, 4 & 2, 5 & 2, 6 & 2, 7 & 2, 8 & 2, 4 & 2, 2 & 2, 3 & 2, 4 & 2, 5 & 2, 6 & 2, 7 & 2, 8 & 2}) ? 0 : 1;
failed += check_u8("uint8x16 and", result, (uint8_t[16]){1 & 2, 2 & 2, 3 & 2, 4 & 2, 5 & 2, 6 & 2, 7 & 2, 8 & 2, 4 & 2, 2 & 2, 3 & 2, 4 & 2, 5 & 2, 6 & 2, 7 & 2, 8 & 2}) ? 0 : 1;

result = kinc_uint8x16_xor(a, b);
failed += check_u8("uint8x16 xor", result, (uint8_t[16]){4 ^ 2, 2 ^ 2, 3 ^ 2, 4 ^ 2, 5 ^ 2, 6 ^ 2, 7 ^ 2, 8 ^ 2, 4 ^ 2, 2 ^ 2, 3 ^ 2, 4 ^ 2, 5 ^ 2, 6 ^ 2, 7 ^ 2, 8 ^ 2}) ? 0 : 1;
failed += check_u8("uint8x16 xor", result, (uint8_t[16]){1 ^ 2, 2 ^ 2, 3 ^ 2, 4 ^ 2, 5 ^ 2, 6 ^ 2, 7 ^ 2, 8 ^ 2, 4 ^ 2, 2 ^ 2, 3 ^ 2, 4 ^ 2, 5 ^ 2, 6 ^ 2, 7 ^ 2, 8 ^ 2}) ? 0 : 1;

result = kinc_uint8x16_not(a);
uint8_t chk[16] = {4, 2, 3, 4, 5, 6, 7, 8, 4, 2, 3, 4, 5, 6, 7, 8};
uint8_t chk[16] = {1, 2, 3, 4, 5, 6, 7, 8, 4, 2, 3, 4, 5, 6, 7, 8};
for (int i = 0; i < 16; ++i) chk[i] = (uint8_t)(~chk[i]);
failed += check_u8("uint8x16 not", result, chk) ? 0 : 1;
}
Expand Down

0 comments on commit c4739b6

Please sign in to comment.