Skip to content

Commit

Permalink
[PPC64] Add vector pack/unpack support from ISA 2.07
Browse files Browse the repository at this point in the history
This patch adds support for the following new instructions in the
Power ISA 2.07:

  vpksdss
  vpksdus
  vpkudus
  vpkudum
  vupkhsw
  vupklsw

These instructions are available through the vec_packs, vec_packsu,
vec_unpackh, and vec_unpackl built-in interfaces.  These are
lane-sensitive instructions, so the built-ins have different
implementations for big- and little-endian, and the instructions must
be marked as killing the vector swap optimization for now.

The first three instructions perform saturating pack operations.  The
fourth performs a modulo pack operation, which means it can be
represented with a vector shuffle, and conversely the appropriate
vector shuffles may cause this instruction to be generated.  The other
instructions are only generated via built-in support for now.

I noticed during patch preparation that the macro __VSX__ was not
previously predefined when the power8-vector or direct-move features
are requested.  This is an error, and I've corrected that here as
well.

Appropriate tests have been added.

There is a companion patch to llvm for the rest of this support.

llvm-svn: 237500
  • Loading branch information
wschmidt-ibm committed May 16, 2015
1 parent 5ed84cd commit 41e14c4
Show file tree
Hide file tree
Showing 5 changed files with 345 additions and 2 deletions.
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/BuiltinsPPC.def
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,10 @@ BUILTIN(__builtin_altivec_vpkuwus, "V8UsV4UiV4Ui", "")
BUILTIN(__builtin_altivec_vpkswss, "V8SsV4SiV4Si", "")
BUILTIN(__builtin_altivec_vpkshus, "V16UcV8SsV8Ss", "")
BUILTIN(__builtin_altivec_vpkswus, "V8UsV4SiV4Si", "")
BUILTIN(__builtin_altivec_vpksdss, "V4SiV2SLLiV2SLLi", "")
BUILTIN(__builtin_altivec_vpksdus, "V4UiV2SLLiV2SLLi", "")
BUILTIN(__builtin_altivec_vpkudus, "V4UiV2ULLiV2ULLi", "")
BUILTIN(__builtin_altivec_vpkudum, "V4UiV2ULLiV2ULLi", "")

BUILTIN(__builtin_altivec_vperm_4si, "V4iV4iV4iV16Uc", "")

Expand Down Expand Up @@ -194,10 +198,12 @@ BUILTIN(__builtin_altivec_vrfiz, "V4fV4f", "")
BUILTIN(__builtin_altivec_vupkhsb, "V8sV16c", "")
BUILTIN(__builtin_altivec_vupkhpx, "V4UiV8s", "")
BUILTIN(__builtin_altivec_vupkhsh, "V4iV8s", "")
BUILTIN(__builtin_altivec_vupkhsw, "V2LLiV4i", "")

BUILTIN(__builtin_altivec_vupklsb, "V8sV16c", "")
BUILTIN(__builtin_altivec_vupklpx, "V4UiV8s", "")
BUILTIN(__builtin_altivec_vupklsh, "V4iV8s", "")
BUILTIN(__builtin_altivec_vupklsw, "V2LLiV4i", "")

BUILTIN(__builtin_altivec_vcmpbfp_p, "iiV4fV4f", "")

Expand Down
2 changes: 2 additions & 0 deletions clang/lib/Basic/Targets.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,7 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,

if (Feature == "power8-vector") {
HasP8Vector = true;
HasVSX = true;
continue;
}

Expand All @@ -1038,6 +1039,7 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,

if (Feature == "direct-move") {
HasDirectMove = true;
HasVSX = true;
continue;
}

Expand Down
242 changes: 242 additions & 0 deletions clang/lib/Headers/altivec.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,18 @@ vec_perm(vector bool int __a, vector bool int __b, vector unsigned char __c);
static vector float __ATTRS_o_ai
vec_perm(vector float __a, vector float __b, vector unsigned char __c);

#ifdef __VSX__
static vector long long __ATTRS_o_ai
vec_perm(vector long long __a, vector long long __b, vector unsigned char __c);

static vector unsigned long long __ATTRS_o_ai
vec_perm(vector unsigned long long __a, vector unsigned long long __b,
vector unsigned char __c);

static vector double __ATTRS_o_ai
vec_perm(vector double __a, vector double __b, vector unsigned char __c);
#endif

static vector unsigned char __ATTRS_o_ai
vec_xor(vector unsigned char __a, vector unsigned char __b);

Expand Down Expand Up @@ -4626,6 +4638,58 @@ vec_vpkuwum(vector bool int __a, vector bool int __b)
#endif
}

/* vec_vpkudum */

#ifdef __POWER8_VECTOR__
#define __builtin_altivec_vpkudum vec_vpkudum

static vector int __ATTRS_o_ai
vec_vpkudum(vector long long __a, vector long long __b)
{
#ifdef __LITTLE_ENDIAN__
return (vector int)vec_perm(__a, __b, (vector unsigned char)
(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B,
0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B));
#else
return (vector int)vec_perm(__a, __b, (vector unsigned char)
(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F,
0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F));
#endif
}

static vector unsigned int __ATTRS_o_ai
vec_vpkudum(vector unsigned long long __a, vector unsigned long long __b)
{
#ifdef __LITTLE_ENDIAN__
return (vector unsigned int)vec_perm(__a, __b, (vector unsigned char)
(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B,
0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B));
#else
return (vector unsigned int)vec_perm(__a, __b, (vector unsigned char)
(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F,
0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F));
#endif
}

static vector bool int __ATTRS_o_ai
vec_vpkudum(vector bool long long __a, vector bool long long __b)
{
#ifdef __LITTLE_ENDIAN__
return (vector bool int)vec_perm((vector long long)__a,
(vector long long)__b,
(vector unsigned char)
(0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0A, 0x0B,
0x10, 0x11, 0x12, 0x13, 0x18, 0x19, 0x1A, 0x1B));
#else
return (vector bool int)vec_perm((vector long long)__a,
(vector long long)__b,
(vector unsigned char)
(0x04, 0x05, 0x06, 0x07, 0x0C, 0x0D, 0x0E, 0x0F,
0x14, 0x15, 0x16, 0x17, 0x1C, 0x1D, 0x1E, 0x1F));
#endif
}
#endif

/* vec_packpx */

static vector pixel __attribute__((__always_inline__))
Expand Down Expand Up @@ -4692,6 +4756,28 @@ vec_packs(vector unsigned int __a, vector unsigned int __b)
#endif
}

#ifdef __POWER8_VECTOR__
static vector int __ATTRS_o_ai
vec_packs(vector long long __a, vector long long __b)
{
#ifdef __LITTLE_ENDIAN__
return __builtin_altivec_vpksdss(__b, __a);
#else
return __builtin_altivec_vpksdss(__a, __b);
#endif
}

static vector unsigned int __ATTRS_o_ai
vec_packs(vector unsigned long long __a, vector unsigned long long __b)
{
#ifdef __LITTLE_ENDIAN__
return __builtin_altivec_vpkudus(__b, __a);
#else
return __builtin_altivec_vpkudus(__a, __b);
#endif
}
#endif

/* vec_vpkshss */

static vector signed char __attribute__((__always_inline__))
Expand All @@ -4704,6 +4790,20 @@ vec_vpkshss(vector short __a, vector short __b)
#endif
}

/* vec_vpksdss */

#ifdef __POWER8_VECTOR__
static vector int __ATTRS_o_ai
vec_vpksdss(vector long long __a, vector long long __b)
{
#ifdef __LITTLE_ENDIAN__
return __builtin_altivec_vpksdss(__b, __a);
#else
return __builtin_altivec_vpksdss(__a, __b);
#endif
}
#endif

/* vec_vpkuhus */

static vector unsigned char __attribute__((__always_inline__))
Expand All @@ -4716,6 +4816,20 @@ vec_vpkuhus(vector unsigned short __a, vector unsigned short __b)
#endif
}

/* vec_vpkudus */

#ifdef __POWER8_VECTOR__
static vector unsigned int __attribute__((__always_inline__))
vec_vpkudus(vector unsigned long long __a, vector unsigned long long __b)
{
#ifdef __LITTLE_ENDIAN__
return __builtin_altivec_vpkudus(__b, __a);
#else
return __builtin_altivec_vpkudus(__a, __b);
#endif
}
#endif

/* vec_vpkswss */

static vector signed short __attribute__((__always_inline__))
Expand Down Expand Up @@ -4782,6 +4896,28 @@ vec_packsu(vector unsigned int __a, vector unsigned int __b)
#endif
}

#ifdef __POWER8_VECTOR__
static vector unsigned int __ATTRS_o_ai
vec_packsu(vector long long __a, vector long long __b)
{
#ifdef __LITTLE_ENDIAN__
return __builtin_altivec_vpksdus(__b, __a);
#else
return __builtin_altivec_vpksdus(__a, __b);
#endif
}

static vector unsigned int __ATTRS_o_ai
vec_packsu(vector unsigned long long __a, vector unsigned long long __b)
{
#ifdef __LITTLE_ENDIAN__
return __builtin_altivec_vpkudus(__b, __a);
#else
return __builtin_altivec_vpkudus(__a, __b);
#endif
}
#endif

/* vec_vpkshus */

static vector unsigned char __ATTRS_o_ai
Expand Down Expand Up @@ -4826,6 +4962,20 @@ vec_vpkswus(vector unsigned int __a, vector unsigned int __b)
#endif
}

/* vec_vpksdus */

#ifdef __POWER8_VECTOR__
static vector unsigned int __ATTRS_o_ai
vec_vpksdus(vector long long __a, vector long long __b)
{
#ifdef __LITTLE_ENDIAN__
return __builtin_altivec_vpksdus(__b, __a);
#else
return __builtin_altivec_vpksdus(__a, __b);
#endif
}
#endif

/* vec_perm */

// The vperm instruction is defined architecturally with a big-endian bias.
Expand Down Expand Up @@ -8954,6 +9104,28 @@ vec_unpackh(vector pixel __a)
#endif
}

#ifdef __POWER8_VECTOR__
static vector long long __ATTRS_o_ai
vec_unpackh(vector int __a)
{
#ifdef __LITTLE_ENDIAN__
return __builtin_altivec_vupklsw(__a);
#else
return __builtin_altivec_vupkhsw(__a);
#endif
}

static vector bool long long __ATTRS_o_ai
vec_unpackh(vector bool int __a)
{
#ifdef __LITTLE_ENDIAN__
return (vector bool long long)__builtin_altivec_vupklsw((vector int)__a);
#else
return (vector bool long long)__builtin_altivec_vupkhsw((vector int)__a);
#endif
}
#endif

/* vec_vupkhsb */

static vector short __ATTRS_o_ai
Expand Down Expand Up @@ -9008,6 +9180,30 @@ vec_vupkhsh(vector pixel __a)
#endif
}

/* vec_vupkhsw */

#ifdef __POWER8_VECTOR__
static vector long long __ATTRS_o_ai
vec_vupkhsw(vector int __a)
{
#ifdef __LITTLE_ENDIAN__
return __builtin_altivec_vupklsw(__a);
#else
return __builtin_altivec_vupkhsw(__a);
#endif
}

static vector bool long long __ATTRS_o_ai
vec_vupkhsw(vector bool int __a)
{
#ifdef __LITTLE_ENDIAN__
return (vector bool long long)__builtin_altivec_vupklsw((vector int)__a);
#else
return (vector bool long long)__builtin_altivec_vupkhsw((vector int)__a);
#endif
}
#endif

/* vec_unpackl */

static vector short __ATTRS_o_ai
Expand Down Expand Up @@ -9060,6 +9256,28 @@ vec_unpackl(vector pixel __a)
#endif
}

#ifdef __POWER8_VECTOR__
static vector long long __ATTRS_o_ai
vec_unpackl(vector int __a)
{
#ifdef __LITTLE_ENDIAN__
return __builtin_altivec_vupkhsw(__a);
#else
return __builtin_altivec_vupklsw(__a);
#endif
}

static vector bool long long __ATTRS_o_ai
vec_unpackl(vector bool int __a)
{
#ifdef __LITTLE_ENDIAN__
return (vector bool long long)__builtin_altivec_vupkhsw((vector int)__a);
#else
return (vector bool long long)__builtin_altivec_vupklsw((vector int)__a);
#endif
}
#endif

/* vec_vupklsb */

static vector short __ATTRS_o_ai
Expand Down Expand Up @@ -9114,6 +9332,30 @@ vec_vupklsh(vector pixel __a)
#endif
}

/* vec_vupklsw */

#ifdef __POWER8_VECTOR__
static vector long long __ATTRS_o_ai
vec_vupklsw(vector int __a)
{
#ifdef __LITTLE_ENDIAN__
return __builtin_altivec_vupkhsw(__a);
#else
return __builtin_altivec_vupklsw(__a);
#endif
}

static vector bool long long __ATTRS_o_ai
vec_vupklsw(vector bool int __a)
{
#ifdef __LITTLE_ENDIAN__
return (vector bool long long)__builtin_altivec_vupkhsw((vector int)__a);
#else
return (vector bool long long)__builtin_altivec_vupklsw((vector int)__a);
#endif
}
#endif

/* vec_vsx_ld */

#ifdef __VSX__
Expand Down
Loading

0 comments on commit 41e14c4

Please sign in to comment.