forked from intel/llvm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SYCL][HIP] Add AMDGPU reflect pass to choose between safe and unsafe…
… AMDGPU atomics (intel#11467) AMDGPU reflect pass is needed to choose between safe and unsafe atomics at the libclc level. In the long run we will delete this patch as work is being done to ensure correct lowering of atomic instructions. See patches: llvm/llvm-project#85052 llvm/llvm-project#69229 This work is necessary as malloc shared atomics rely on PCIe atomics which can have patchy and unreliable support. Therefore, we want to be able to choose at compile time whether we should use safe atomics using CAS (which PCIe should support), or if we want to rely of the availability of the newest PCIe atomics, if malloc shared atomics are desired. Also changes the implementation of `atomic_or`, `atomic_and` so that they can choose between the safe or unsafe version based on the AMDGPU reflect value.
- Loading branch information
Showing
13 changed files
with
196 additions
and
36 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
// Before including, define: __SPIRV_BUILTIN, __CLC_OP, __HIP_BUILTIN | ||
// and include atomic_helpers.h to get AMDGPU_SAFE_ATOMIC | ||
|
||
AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, int, i, __CLC_OP, __HIP_BUILTIN) | ||
AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, unsigned int, j, __CLC_OP, | ||
__HIP_BUILTIN) | ||
AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, long, l, __CLC_OP, __HIP_BUILTIN) | ||
AMDGPU_SAFE_ATOMIC(__SPIRV_BUILTIN, unsigned long, m, __CLC_OP, | ||
__HIP_BUILTIN) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
//===- AMDGPUOclcReflect.cpp ----------------------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// | ||
// This pass searches for occurences of the AMDGPU_OCLC_REFLECT function, and | ||
// replaces the calls with some val dependent on the operand of the func. This | ||
// can be used to reflect across different implementations of functions at | ||
// compile time based on a compiler flag or some other means. This pass | ||
// currently supports use cases: | ||
// | ||
// 1. Choose a safe or unsafe version of atomic_xor at compile time, which can | ||
// be chosen at compile time by setting the flag | ||
// --amdgpu-oclc-unsafe-int-atomics=true. | ||
// | ||
// This pass is similar to the NVPTX pass NVVMReflect. | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "AMDGPU.h" | ||
#include "llvm/IR/Constants.h" | ||
#include "llvm/IR/Dominators.h" | ||
#include "llvm/IR/InstIterator.h" | ||
#include "llvm/IR/Instructions.h" | ||
#include "llvm/Pass.h" | ||
#include "llvm/Support/CommandLine.h" | ||
|
||
using namespace llvm; | ||
|
||
#define AMDGPU_OCLC_REFLECT "__oclc_amdgpu_reflect" | ||
|
||
static cl::opt<bool> | ||
AMDGPUReflectEnabled("amdgpu-oclc-reflect-enable", cl::init(true), | ||
cl::Hidden, | ||
cl::desc("AMDGPU reflection, enabled by default")); | ||
static cl::opt<bool> AMDGPUUnsafeIntAtomicsEnable( | ||
"amdgpu-oclc-unsafe-int-atomics", cl::init(false), cl::Hidden, | ||
cl::desc("Should unsafe int atomics be chosen. Disabled by default.")); | ||
|
||
PreservedAnalyses AMDGPUOclcReflectPass::run(Function &F, | ||
FunctionAnalysisManager &AM) { | ||
if (!AMDGPUReflectEnabled) | ||
return PreservedAnalyses::all(); | ||
|
||
if (F.getName() == AMDGPU_OCLC_REFLECT) { | ||
assert(F.isDeclaration() && | ||
"__oclc_amdgpu_reflect function should not have a body"); | ||
return PreservedAnalyses::all(); | ||
} | ||
|
||
SmallVector<CallInst *, 4> ToRemove; | ||
|
||
for (Instruction &I : instructions(F)) { | ||
auto *Call = dyn_cast<CallInst>(&I); | ||
if (!Call) | ||
continue; | ||
if (Function *Callee = Call->getCalledFunction(); | ||
!Callee || Callee->getName() != AMDGPU_OCLC_REFLECT) | ||
continue; | ||
|
||
assert(Call->arg_size() == 1 && | ||
"Wrong number of operands to __oclc_amdgpu_reflect function"); | ||
|
||
ToRemove.push_back(Call); | ||
} | ||
|
||
if (!ToRemove.size()) | ||
return PreservedAnalyses::all(); | ||
|
||
for (CallInst *Call : ToRemove) { | ||
const Value *Str = Call->getArgOperand(0); | ||
const Value *Operand = cast<Constant>(Str)->getOperand(0); | ||
StringRef ReflectArg = cast<ConstantDataSequential>(Operand)->getAsString(); | ||
ReflectArg = ReflectArg.drop_back(1); | ||
|
||
if (ReflectArg == "AMDGPU_OCLC_UNSAFE_INT_ATOMICS") { | ||
int ReflectVal = AMDGPUUnsafeIntAtomicsEnable ? 1 : 0; | ||
Call->replaceAllUsesWith(ConstantInt::get(Call->getType(), ReflectVal)); | ||
} else { | ||
report_fatal_error("Invalid arg passed to __oclc_amdgpu_reflect"); | ||
} | ||
Call->eraseFromParent(); | ||
} | ||
|
||
PreservedAnalyses PA; | ||
PA.preserveSet<CFGAnalyses>(); | ||
PA.preserve<DominatorTreeAnalysis>(); | ||
return PA; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 | ||
; RUN: opt -S -p amdgpu-oclc-reflect %s | FileCheck %s -check-prefixes=CHECK,CHECK-SAFE-ATOMICS | ||
; RUN: opt -S -p amdgpu-oclc-reflect -amdgpu-oclc-unsafe-int-atomics=true %s | FileCheck %s -check-prefixes=CHECK,CHECK-UNSAFE-ATOMICS | ||
|
||
target triple = "amdgcn-amd-amdhsa" | ||
|
||
@.str = private unnamed_addr addrspace(4) constant [31 x i8] c"AMDGPU_OCLC_UNSAFE_INT_ATOMICS\00", align 1 | ||
|
||
declare hidden i32 @__oclc_amdgpu_reflect(ptr addrspace(4) noundef) local_unnamed_addr | ||
|
||
define i32 @foo() { | ||
; CHECK-SAFE-ATOMICS-LABEL: define i32 @foo() { | ||
; CHECK-SAFE-ATOMICS-NEXT: ret i32 0 | ||
; | ||
; CHECK-UNSAFE-ATOMICS-LABEL: define i32 @foo() { | ||
; CHECK-UNSAFE-ATOMICS-NEXT: ret i32 1 | ||
; | ||
%call = tail call i32 @__oclc_amdgpu_reflect(ptr addrspace(4) noundef @.str) | ||
ret i32 %call | ||
} | ||
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: | ||
; CHECK: {{.*}} |
24 changes: 24 additions & 0 deletions
24
sycl/test/check_device_code/hip/atomic/amdgpu_unsafe_atomics.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
// REQUIRES: hip | ||
// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx906 %s -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SAFE | ||
// RUN: %clangxx -fsycl -fsycl-targets=amd_gpu_gfx906 %s -mllvm --amdgpu-oclc-unsafe-int-atomics=true -S -emit-llvm -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE | ||
|
||
#include <sycl/sycl.hpp> | ||
|
||
int main() { | ||
sycl::queue{}.single_task([=] { | ||
int a; | ||
sycl::atomic_ref<int, sycl::memory_order_relaxed, sycl::memory_scope_device> | ||
atomicInt(a); | ||
atomicInt.fetch_xor(1); | ||
atomicInt.fetch_and(1); | ||
atomicInt.fetch_or(1); | ||
// CHECK: __CLANG_OFFLOAD_BUNDLE____START__ sycl-amdgcn-amd-amdhsa- | ||
// CHECK-SAFE: cmpxchg volatile | ||
// CHECK-SAFE-NOT: atomicrmw | ||
// CHECK-UNSAFE: atomicrmw volatile xor | ||
// CHECK-UNSAFE: atomicrmw volatile and | ||
// CHECK-UNSAFE: atomicrmw volatile or | ||
// CHECK-UNSAFE-NOT: cmpxchg | ||
// CHECK: __CLANG_OFFLOAD_BUNDLE____END__ sycl-amdgcn-amd-amdhsa- | ||
}); | ||
} |