Skip to content

Commit

Permalink
BUG: Resolve build issue on ppc64 with Power9 or higher as baseline
Browse files Browse the repository at this point in the history
  This fix addresses two issues:
   * Corrects the use of unsupported instructions by the assembler
     in half-precision to double-precision conversion.
   * Resolves a code error related to variable naming during conversion.
  • Loading branch information
seiko2plus committed Oct 27, 2023
1 parent 7a84442 commit 771beb0
Showing 1 changed file with 10 additions and 13 deletions.
23 changes: 10 additions & 13 deletions numpy/_core/src/common/half.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class Half final {
#endif
) || (
std::is_same_v<T, double> &&
#if defined(NPY_HAVE_AVX512FP16) || defined(NPY_HAVE_VSX3)
#if defined(NPY_HAVE_AVX512FP16) || (defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX3_HALF_DOUBLE))
true
#else
false
Expand Down Expand Up @@ -73,11 +73,8 @@ class Half final {
#if defined(NPY_HAVE_AVX512FP16)
__m128d md = _mm_load_sd(&f);
bits_ = static_cast<uint16_t>(_mm_cvtsi128_si32(_mm_castph_si128(_mm_cvtpd_ph(md))));
#elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
__vector double vf64 = vec_splats(f);
__vector unsigned short vf16;
__asm__ __volatile__ ("xvcvdphp %x0,%x1" : "=wa" (vf16) : "wa" (vf64));
bits_ = vec_extract(vf16, 0);
#elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX3_HALF_DOUBLE)
__asm__ __volatile__ ("xscvdphp %x0,%x1" : "=wa" (bits_) : "wa" (f));
#else
bits_ = half_private::FromDoubleBits(BitCast<uint64_t>(f));
#endif
Expand All @@ -96,7 +93,7 @@ class Half final {
__vector float vf32;
__asm__ __volatile__("xvcvhpsp %x0,%x1"
: "=wa"(vf32)
: "wa"(vec_splats(bits_.u)));
: "wa"(vec_splats(bits_)));
return vec_extract(vf32, 0);
#else
return BitCast<float>(half_private::ToFloatBits(bits_));
Expand All @@ -110,12 +107,12 @@ class Half final {
double ret;
_mm_store_sd(&ret, _mm_cvtph_pd(_mm_castsi128_ph(_mm_cvtsi32_si128(bits_))));
return ret;
#elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX_ASM)
__vector float vf64;
__asm__ __volatile__("xvcvhpdp %x0,%x1"
: "=wa"(vf32)
: "wa"(vec_splats(bits_)));
return vec_extract(vf64, 0);
#elif defined(NPY_HAVE_VSX3) && defined(NPY_HAVE_VSX3_HALF_DOUBLE)
double f64;
__asm__ __volatile__("xscvhpdp %x0,%x1"
: "=wa"(f64)
: "wa"(bits_));
return f64;
#else
return BitCast<double>(half_private::ToDoubleBits(bits_));
#endif
Expand Down

0 comments on commit 771beb0

Please sign in to comment.