Skip to content

Commit

Permalink
[AMDGPU][llvm-mc] Predefined symbols to access register counts (.kern…
Browse files Browse the repository at this point in the history
…el.{v|s}gpr_count)

The feature allows for conditional assembly, filling the entries
of .amd_kernel_code_t etc.

Symbols are defined with value 0 at the beginning of each kernel scope.
After each register usage, the respective symbol is set to:
	value = max( value, ( register index + 1 ) )
Thus, at the end of scope the value represents a count of used registers.

Kernel scopes begin at .amdgpu_hsa_kernel directive, end at the
next .amdgpu_hsa_kernel (or EOF, whichever comes first). There is also
dummy scope that lies from the beginning of source file til the
first .amdgpu_hsa_kernel.

Test added.

Differential Revision: https://reviews.llvm.org/D27859

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@290608 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
atamazov committed Dec 27, 2016
1 parent 4c397d7 commit d8dc65b
Show file tree
Hide file tree
Showing 3 changed files with 115 additions and 7 deletions.
63 changes: 56 additions & 7 deletions lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -661,13 +661,57 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
// AsmParser
//===----------------------------------------------------------------------===//

// Holds info related to the current kernel, e.g. count of SGPRs used.
// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
// .amdgpu_hsa_kernel or at EOF.
class KernelScopeInfo {
int SgprIndexUnusedMin;
int VgprIndexUnusedMin;
MCContext *Ctx;

void usesSgprAt(int i) {
if (i >= SgprIndexUnusedMin) {
SgprIndexUnusedMin = ++i;
if (Ctx) {
MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
}
}
}
void usesVgprAt(int i) {
if (i >= VgprIndexUnusedMin) {
VgprIndexUnusedMin = ++i;
if (Ctx) {
MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
}
}
}
public:
KernelScopeInfo() : SgprIndexUnusedMin(-1), VgprIndexUnusedMin(-1), Ctx(nullptr)
{}
void initialize(MCContext &Context) {
Ctx = &Context;
usesSgprAt(SgprIndexUnusedMin = -1);
usesVgprAt(VgprIndexUnusedMin = -1);
}
void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
switch (RegKind) {
case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
default: break;
}
}
};

class AMDGPUAsmParser : public MCTargetAsmParser {
const MCInstrInfo &MII;
MCAsmParser &Parser;

unsigned ForcedEncodingSize;
bool ForcedDPP;
bool ForcedSDWA;
KernelScopeInfo KernelScope;

/// @name Auto-generated Match Functions
/// {
Expand All @@ -693,7 +737,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool ParseSectionDirectiveHSADataGlobalProgram();
bool ParseSectionDirectiveHSARodataReadonlyAgent();
bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum);
bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth);
bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex);
void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, bool IsAtomicReturn);

public:
Expand Down Expand Up @@ -731,6 +775,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
Sym->setVariableValue(MCConstantExpr::create(Isa.Stepping, Ctx));
}
KernelScope.initialize(getContext());
}

bool isSI() const {
Expand Down Expand Up @@ -1240,8 +1285,9 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, R
}
}

bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth)
bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex)
{
if (DwordRegIndex) { *DwordRegIndex = 0; }
const MCRegisterInfo *TRI = getContext().getRegisterInfo();
if (getLexer().is(AsmToken::Identifier)) {
StringRef RegName = Parser.getTok().getString();
Expand Down Expand Up @@ -1301,7 +1347,7 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
} else if (getLexer().is(AsmToken::LBrac)) {
// List of consecutive registers: [s0,s1,s2,s3]
Parser.Lex();
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
return false;
if (RegWidth != 1)
return false;
Expand All @@ -1313,7 +1359,7 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
} else if (getLexer().is(AsmToken::RBrac)) {
Parser.Lex();
break;
} else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1)) {
} else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
if (RegWidth1 != 1) {
return false;
}
Expand Down Expand Up @@ -1341,11 +1387,12 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
{
unsigned Size = 1;
if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
// SGPR and TTMP registers must be are aligned. Max required alignment is 4 dwords.
// SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
Size = std::min(RegWidth, 4u);
}
if (RegNum % Size != 0)
return false;
if (DwordRegIndex) { *DwordRegIndex = RegNum; }
RegNum = RegNum / Size;
int RCID = getRegClass(RegKind, RegWidth);
if (RCID == -1)
Expand All @@ -1371,11 +1418,12 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
SMLoc StartLoc = Tok.getLoc();
SMLoc EndLoc = Tok.getEndLoc();
RegisterKind RegKind;
unsigned Reg, RegNum, RegWidth;
unsigned Reg, RegNum, RegWidth, DwordRegIndex;

if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
return nullptr;
}
KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
}

Expand Down Expand Up @@ -1842,6 +1890,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
ELF::STT_AMDGPU_HSA_KERNEL);
Lex();
KernelScope.initialize(getContext());
return false;
}

Expand Down
59 changes: 59 additions & 0 deletions test/MC/AMDGPU/sym_kernel_scope.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// RUN: llvm-mc -arch=amdgcn %s 2>&1 | FileCheck %s

.byte .kernel.sgpr_count
// CHECK: .byte 0
.byte .kernel.vgpr_count
// CHECK: .byte 0
v_mov_b32_e32 v5, s8
s_endpgm
.byte .kernel.sgpr_count
// CHECK: .byte 9
.byte .kernel.vgpr_count
// CHECK: .byte 6

.amdgpu_hsa_kernel K1
K1:
.byte .kernel.sgpr_count
// CHECK: .byte 0
.byte .kernel.vgpr_count
// CHECK: .byte 0
v_mov_b32_e32 v1, s86
s_endpgm
.byte .kernel.sgpr_count
// CHECK: .byte 87
.byte .kernel.vgpr_count
// CHECK: .byte 2

.amdgpu_hsa_kernel K2
.byte .kernel.sgpr_count
// CHECK: .byte 0
.byte .kernel.vgpr_count
// CHECK: .byte 0
K2:
s_load_dwordx8 s[16:23], s[0:1], 0x0
v_mov_b32_e32 v0, v0
s_endpgm
.byte .kernel.sgpr_count
// CHECK: .byte 24
.byte .kernel.vgpr_count
// CHECK: .byte 1

.text
.amdgpu_hsa_kernel K3
K3:
A = .kernel.vgpr_count
v_mov_b32_e32 v[A], s0
B = .kernel.vgpr_count
v_mov_b32_e32 v[B], s0
v_mov_b32_e32 v[B], v[A]
C = .kernel.vgpr_count
v_mov_b32_e32 v[C], v[A]
D = .kernel.sgpr_count + 3 // align
E = D + 4
s_load_dwordx4 s[D:D+3], s[E:E+1], 0x0
s_endpgm

.byte .kernel.sgpr_count
// CHECK: .byte 10
.byte .kernel.vgpr_count
// CHECK: .byte 3
File renamed without changes.

0 comments on commit d8dc65b

Please sign in to comment.