forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[libclc] Optimize CLC vector any/all builtins (llvm#124568)
By using the vector reduction buitins we can avoid scalarization. Targets that don't support vector reductions will scalarize later on anyway. The vector reduction builtins should be well-enough supported by the middle-end to be a generic solution. This produces conceptually equivalent code: all vector elements are OR'd/AND'd together and the final scalar is bit-shifted and masked to produce the final result. The 'normalize' builtin uses 'all' so its code has similarly improved in places.
- Loading branch information
1 parent
38b3f45
commit c3a0fcc
Showing
2 changed files
with
30 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,21 @@ | ||
#include <clc/internal/clc.h> | ||
|
||
#define _CLC_ALL(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1) | ||
#define _CLC_ALL2(v) (_CLC_ALL((v).s0) & _CLC_ALL((v).s1)) | ||
#define _CLC_ALL3(v) (_CLC_ALL2((v)) & _CLC_ALL((v).s2)) | ||
#define _CLC_ALL4(v) (_CLC_ALL3((v)) & _CLC_ALL((v).s3)) | ||
#define _CLC_ALL8(v) \ | ||
(_CLC_ALL4((v)) & _CLC_ALL((v).s4) & _CLC_ALL((v).s5) & _CLC_ALL((v).s6) & \ | ||
_CLC_ALL((v).s7)) | ||
#define _CLC_ALL16(v) \ | ||
(_CLC_ALL8((v)) & _CLC_ALL((v).s8) & _CLC_ALL((v).s9) & _CLC_ALL((v).sA) & \ | ||
_CLC_ALL((v).sB) & _CLC_ALL((v).sC) & _CLC_ALL((v).sD) & _CLC_ALL((v).sE) & \ | ||
_CLC_ALL((v).sf)) | ||
|
||
#define ALL_ID(TYPE) _CLC_OVERLOAD _CLC_DEF int __clc_all(TYPE v) | ||
#define _CLC_ALL_VEC(TYPE) \ | ||
_CLC_OVERLOAD _CLC_DEF int __clc_all(TYPE v) { \ | ||
return _CLC_ALL(__builtin_reduce_and(v)); \ | ||
} | ||
|
||
#define ALL_VECTORIZE(TYPE) \ | ||
ALL_ID(TYPE) { return _CLC_ALL(v); } \ | ||
ALL_ID(TYPE##2) { return _CLC_ALL2(v); } \ | ||
ALL_ID(TYPE##3) { return _CLC_ALL3(v); } \ | ||
ALL_ID(TYPE##4) { return _CLC_ALL4(v); } \ | ||
ALL_ID(TYPE##8) { return _CLC_ALL8(v); } \ | ||
ALL_ID(TYPE##16) { return _CLC_ALL16(v); } | ||
#define _CLC_DEFINE_ALL(TYPE) \ | ||
_CLC_OVERLOAD _CLC_DEF int __clc_all(TYPE v) { return _CLC_ALL(v); } \ | ||
_CLC_ALL_VEC(TYPE##2) \ | ||
_CLC_ALL_VEC(TYPE##3) \ | ||
_CLC_ALL_VEC(TYPE##4) \ | ||
_CLC_ALL_VEC(TYPE##8) \ | ||
_CLC_ALL_VEC(TYPE##16) | ||
|
||
ALL_VECTORIZE(char) | ||
ALL_VECTORIZE(short) | ||
ALL_VECTORIZE(int) | ||
ALL_VECTORIZE(long) | ||
_CLC_DEFINE_ALL(char) | ||
_CLC_DEFINE_ALL(short) | ||
_CLC_DEFINE_ALL(int) | ||
_CLC_DEFINE_ALL(long) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,21 @@ | ||
#include <clc/internal/clc.h> | ||
|
||
#define _CLC_ANY(v) (((v) >> ((sizeof(v) * 8) - 1)) & 0x1) | ||
#define _CLC_ANY2(v) (_CLC_ANY((v).s0) | _CLC_ANY((v).s1)) | ||
#define _CLC_ANY3(v) (_CLC_ANY2((v)) | _CLC_ANY((v).s2)) | ||
#define _CLC_ANY4(v) (_CLC_ANY3((v)) | _CLC_ANY((v).s3)) | ||
#define _CLC_ANY8(v) \ | ||
(_CLC_ANY4((v)) | _CLC_ANY((v).s4) | _CLC_ANY((v).s5) | _CLC_ANY((v).s6) | \ | ||
_CLC_ANY((v).s7)) | ||
#define _CLC_ANY16(v) \ | ||
(_CLC_ANY8((v)) | _CLC_ANY((v).s8) | _CLC_ANY((v).s9) | _CLC_ANY((v).sA) | \ | ||
_CLC_ANY((v).sB) | _CLC_ANY((v).sC) | _CLC_ANY((v).sD) | _CLC_ANY((v).sE) | \ | ||
_CLC_ANY((v).sf)) | ||
|
||
#define ANY_ID(TYPE) _CLC_OVERLOAD _CLC_DEF int __clc_any(TYPE v) | ||
#define _CLC_ANY_VEC(TYPE) \ | ||
_CLC_OVERLOAD _CLC_DEF int __clc_any(TYPE v) { \ | ||
return _CLC_ANY(__builtin_reduce_or(v)); \ | ||
} | ||
|
||
#define ANY_VECTORIZE(TYPE) \ | ||
ANY_ID(TYPE) { return _CLC_ANY(v); } \ | ||
ANY_ID(TYPE##2) { return _CLC_ANY2(v); } \ | ||
ANY_ID(TYPE##3) { return _CLC_ANY3(v); } \ | ||
ANY_ID(TYPE##4) { return _CLC_ANY4(v); } \ | ||
ANY_ID(TYPE##8) { return _CLC_ANY8(v); } \ | ||
ANY_ID(TYPE##16) { return _CLC_ANY16(v); } | ||
#define _CLC_DEFINE_ANY(TYPE) \ | ||
_CLC_OVERLOAD _CLC_DEF int __clc_any(TYPE v) { return _CLC_ANY(v); } \ | ||
_CLC_ANY_VEC(TYPE##2) \ | ||
_CLC_ANY_VEC(TYPE##3) \ | ||
_CLC_ANY_VEC(TYPE##4) \ | ||
_CLC_ANY_VEC(TYPE##8) \ | ||
_CLC_ANY_VEC(TYPE##16) | ||
|
||
ANY_VECTORIZE(char) | ||
ANY_VECTORIZE(short) | ||
ANY_VECTORIZE(int) | ||
ANY_VECTORIZE(long) | ||
_CLC_DEFINE_ANY(char) | ||
_CLC_DEFINE_ANY(short) | ||
_CLC_DEFINE_ANY(int) | ||
_CLC_DEFINE_ANY(long) |