Skip to content

Commit

Permalink
Fix unsupported intrinsics for int64 (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
Menooker authored Jul 11, 2024
1 parent 85ce1aa commit 52a6fd3
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 14 deletions.
2 changes: 1 addition & 1 deletion cpp/KunSIMD/cpu/cast.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ INLINE vec_s32x8 fast_cast(vec_f32x8 v) {
return cast<vec_s32x8>(v);
}

#ifndef __AVX512F__
#if !defined(__AVX512DQ__) || !defined(__AVX512VL__)

// https://stackoverflow.com/a/77376595
// Only works for inputs in the range: [-2^51, 2^51]
Expand Down
28 changes: 15 additions & 13 deletions cpp/KunSIMD/cpu/s64x4.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,25 @@ INLINE vec_s64x4 operator-(vec_s64x4 const &a) {
return _mm256_sub_epi64(_mm256_setzero_si256(), a.v);
}

#if defined(__AVX512DQ__) && defined(__AVX512VL__)

INLINE vec_s64x4 operator*(vec_s64x4 const &a, vec_s64x4 const &b) {
return _mm256_mullo_epi64(a.v, b.v);
}

INLINE vec_s64x4 sc_max(vec_s64x4 const &a, vec_s64x4 const &b) {
return _mm256_max_epi64(a.v, b.v);
}
INLINE vec_s64x4 sc_min(vec_s64x4 const &a, vec_s64x4 const &b) {
return _mm256_min_epi64(a.v, b.v);
}

INLINE vec_s64x4 sc_abs(vec_s64x4 const &a) {
return _mm256_abs_epi64(a.v);
}

#endif

// INLINE vec_s64x4 operator/(vec_s64x4 const &a, vec_s64x4 const &b) {
// return _mm256_div_epi64(a.v, b.v);
// }
Expand Down Expand Up @@ -121,19 +136,6 @@ INLINE vec_s64x4 operator>>(vec_s64x4 const &a, vec_s64x4 const &b) {
INLINE vec_s64x4 logical_shr(vec_s64x4 const &a, vec_s64x4 const &b) {
return _mm256_srlv_epi64(a.v, b.v);
}

// operator /

INLINE vec_s64x4 sc_max(vec_s64x4 const &a, vec_s64x4 const &b) {
return _mm256_max_epi64(a.v, b.v);
}
INLINE vec_s64x4 sc_min(vec_s64x4 const &a, vec_s64x4 const &b) {
return _mm256_min_epi64(a.v, b.v);
}

INLINE vec_s64x4 sc_abs(vec_s64x4 const &a) {
return _mm256_abs_epi64(a.v);
}
}

#endif

0 comments on commit 52a6fd3

Please sign in to comment.