Skip to content

Commit

Permalink
[AMDGPU][llvm-mc] Support of Trap Handler registers (TTMP0..11 and TB…
Browse files Browse the repository at this point in the history
…A/TMA)git status

Tests added along with implemented feature.
Note that there is a small leftover of unecessary MI sheduling issue
(more info in the review). CodeGen/AMDGPU/salu-to-valu.ll updated to fix
the false regression.

TODO: Support for TTMP quads, comma-separated syntax in "[]" and more.

Differential Revision: http://reviews.llvm.org/D17825

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@266205 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
atamazov committed Apr 13, 2016
1 parent d61f2a5 commit 075abcb
Show file tree
Hide file tree
Showing 7 changed files with 272 additions and 53 deletions.
15 changes: 15 additions & 0 deletions lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -384,17 +384,32 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
FlatUsed = true;
continue;

case AMDGPU::TBA:
case AMDGPU::TBA_LO:
case AMDGPU::TBA_HI:
case AMDGPU::TMA:
case AMDGPU::TMA_LO:
case AMDGPU::TMA_HI:
llvm_unreachable("Trap Handler registers should not be used");
continue;

default:
break;
}

if (AMDGPU::SReg_32RegClass.contains(reg)) {
if (AMDGPU::TTMP_32RegClass.contains(reg)) {
llvm_unreachable("Trap Handler registers should not be used");
}
isSGPR = true;
width = 1;
} else if (AMDGPU::VGPR_32RegClass.contains(reg)) {
isSGPR = false;
width = 1;
} else if (AMDGPU::SReg_64RegClass.contains(reg)) {
if (AMDGPU::TTMP_64RegClass.contains(reg)) {
llvm_unreachable("Trap Handler registers should not be used");
}
isSGPR = true;
width = 2;
} else if (AMDGPU::VReg_64RegClass.contains(reg)) {
Expand Down
55 changes: 34 additions & 21 deletions lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -574,8 +574,10 @@ struct OptionalOperand {

}

static int getRegClass(bool IsVgpr, unsigned RegWidth) {
if (IsVgpr) {
enum RegisterKind { IS_VGPR, IS_SGPR, IS_TTMP };

static int getRegClass(RegisterKind Is, unsigned RegWidth) {
if (Is == IS_VGPR) {
switch (RegWidth) {
default: return -1;
case 1: return AMDGPU::VGPR_32RegClassID;
Expand All @@ -585,16 +587,23 @@ static int getRegClass(bool IsVgpr, unsigned RegWidth) {
case 8: return AMDGPU::VReg_256RegClassID;
case 16: return AMDGPU::VReg_512RegClassID;
}
} else if (Is == IS_TTMP) {
switch (RegWidth) {
default: return -1;
case 1: return AMDGPU::TTMP_32RegClassID;
case 2: return AMDGPU::TTMP_64RegClassID;
}
} else if (Is == IS_SGPR) {
switch (RegWidth) {
default: return -1;
case 1: return AMDGPU::SGPR_32RegClassID;
case 2: return AMDGPU::SGPR_64RegClassID;
case 4: return AMDGPU::SReg_128RegClassID;
case 8: return AMDGPU::SReg_256RegClassID;
case 16: return AMDGPU::SReg_512RegClassID;
}
}

switch (RegWidth) {
default: return -1;
case 1: return AMDGPU::SGPR_32RegClassID;
case 2: return AMDGPU::SGPR_64RegClassID;
case 4: return AMDGPU::SReg_128RegClassID;
case 8: return AMDGPU::SReg_256RegClassID;
case 16: return AMDGPU::SReg_512RegClassID;
}
return -1;
}

static unsigned getRegForName(StringRef RegName) {
Expand All @@ -611,6 +620,10 @@ static unsigned getRegForName(StringRef RegName) {
.Case("vcc_hi", AMDGPU::VCC_HI)
.Case("exec_lo", AMDGPU::EXEC_LO)
.Case("exec_hi", AMDGPU::EXEC_HI)
.Case("tma_lo", AMDGPU::TMA_LO)
.Case("tma_hi", AMDGPU::TMA_HI)
.Case("tba_lo", AMDGPU::TBA_LO)
.Case("tba_hi", AMDGPU::TBA_HI)
.Default(0);
}

Expand Down Expand Up @@ -641,21 +654,21 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
TRI, &getSTI(), false);
}

// Match vgprs and sgprs
if (RegName[0] != 's' && RegName[0] != 'v')
// Match vgprs, sgprs and ttmps
if (RegName[0] != 's' && RegName[0] != 'v' && !RegName.startswith("ttmp"))
return nullptr;

bool IsVgpr = RegName[0] == 'v';
const RegisterKind Is = RegName[0] == 'v' ? IS_VGPR : RegName[0] == 's' ? IS_SGPR : IS_TTMP;
unsigned RegWidth;
unsigned RegIndexInClass;
if (RegName.size() > 1) {
// We have a 32-bit register
if (RegName.size() > (Is == IS_TTMP ? strlen("ttmp") : 1) ) {
// We have a single 32-bit register. Syntax: vXX
RegWidth = 1;
if (RegName.substr(1).getAsInteger(10, RegIndexInClass))
if (RegName.substr(Is == IS_TTMP ? strlen("ttmp") : 1).getAsInteger(10, RegIndexInClass))
return nullptr;
Parser.Lex();
} else {
// We have a register greater than 32-bits.
// We have a register greater than 32-bits (a range of single registers). Syntax: v[XX:YY]

int64_t RegLo, RegHi;
Parser.Lex();
Expand All @@ -678,11 +691,11 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {

Parser.Lex();
RegWidth = (RegHi - RegLo) + 1;
if (IsVgpr) {
if (Is == IS_VGPR) {
// VGPR registers aren't aligned.
RegIndexInClass = RegLo;
} else {
// SGPR registers are aligned. Max alignment is 4 dwords.
// SGPR and TTMP registers must be are aligned. Max required alignment is 4 dwords.
unsigned Size = std::min(RegWidth, 4u);
if (RegLo % Size != 0)
return nullptr;
Expand All @@ -691,7 +704,7 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
}
}

int RCID = getRegClass(IsVgpr, RegWidth);
int RCID = getRegClass(Is, RegWidth);
if (RCID == -1)
return nullptr;

Expand Down
45 changes: 32 additions & 13 deletions lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"

#include <string>

using namespace llvm;

void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
Expand Down Expand Up @@ -189,6 +191,18 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
case AMDGPU::VCC_HI:
O << "vcc_hi";
return;
case AMDGPU::TBA_LO:
O << "tba_lo";
return;
case AMDGPU::TBA_HI:
O << "tba_hi";
return;
case AMDGPU::TMA_LO:
O << "tma_lo";
return;
case AMDGPU::TMA_HI:
O << "tma_hi";
return;
case AMDGPU::EXEC_LO:
O << "exec_lo";
return;
Expand All @@ -205,41 +219,44 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
break;
}

char Type;
std::string Type;
unsigned NumRegs;

if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(reg)) {
Type = 'v';
Type = "v";
NumRegs = 1;
} else if (MRI.getRegClass(AMDGPU::SGPR_32RegClassID).contains(reg)) {
Type = 's';
Type = "s";
NumRegs = 1;
} else if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(reg)) {
Type = 'v';
Type = "v";
NumRegs = 2;
} else if (MRI.getRegClass(AMDGPU::SGPR_64RegClassID).contains(reg)) {
Type = "s";
NumRegs = 2;
} else if (MRI.getRegClass(AMDGPU::SReg_64RegClassID).contains(reg)) {
Type = 's';
} else if (MRI.getRegClass(AMDGPU::TTMP_64RegClassID).contains(reg)) {
Type = "ttmp";
NumRegs = 2;
} else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(reg)) {
Type = 'v';
Type = "v";
NumRegs = 4;
} else if (MRI.getRegClass(AMDGPU::SReg_128RegClassID).contains(reg)) {
Type = 's';
Type = "s";
NumRegs = 4;
} else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(reg)) {
Type = 'v';
Type = "v";
NumRegs = 3;
} else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(reg)) {
Type = 'v';
Type = "v";
NumRegs = 8;
} else if (MRI.getRegClass(AMDGPU::SReg_256RegClassID).contains(reg)) {
Type = 's';
Type = "s";
NumRegs = 8;
} else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(reg)) {
Type = 'v';
Type = "v";
NumRegs = 16;
} else if (MRI.getRegClass(AMDGPU::SReg_512RegClassID).contains(reg)) {
Type = 's';
Type = "s";
NumRegs = 16;
} else {
O << getRegisterName(reg);
Expand All @@ -249,6 +266,8 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
// The low 8 bits of the encoding value is the register index, for both VGPRs
// and SGPRs.
unsigned RegIdx = MRI.getEncodingValue(reg) & ((1 << 8) - 1);
if (Type == "ttmp")
RegIdx -= 112; // Trap temps start at offset 112. TODO: Get this from tablegen.
if (NumRegs == 1) {
O << Type << RegIdx;
return;
Expand Down
26 changes: 25 additions & 1 deletion lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,16 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
reserveRegisterTuples(Reserved, AMDGPU::EXEC);
reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);

// Reserve Trap Handler registers - support is not implemented in Codegen.
reserveRegisterTuples(Reserved, AMDGPU::TBA);
reserveRegisterTuples(Reserved, AMDGPU::TMA);
reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);

// Reserve the last 2 registers so we will always have at least 2 more that
// will physically contain VCC.
reserveRegisterTuples(Reserved, AMDGPU::SGPR102_SGPR103);
Expand Down Expand Up @@ -640,7 +650,21 @@ unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
switch(Channel) {
case 0: return AMDGPU::VCC_LO;
case 1: return AMDGPU::VCC_HI;
default: llvm_unreachable("Invalid SubIdx for VCC");
default: llvm_unreachable("Invalid SubIdx for VCC"); break;
}

case AMDGPU::TBA:
switch(Channel) {
case 0: return AMDGPU::TBA_LO;
case 1: return AMDGPU::TBA_HI;
default: llvm_unreachable("Invalid SubIdx for TBA"); break;
}

case AMDGPU::TMA:
switch(Channel) {
case 0: return AMDGPU::TMA_LO;
case 1: return AMDGPU::TMA_HI;
default: llvm_unreachable("Invalid SubIdx for TMA"); break;
}

case AMDGPU::FLAT_SCR:
Expand Down
61 changes: 59 additions & 2 deletions lib/Target/AMDGPU/SIRegisterInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,40 @@ def EXEC : RegisterWithSubRegs<"EXEC", [EXEC_LO, EXEC_HI]>,
def SCC : SIReg<"scc", 253>;
def M0 : SIReg <"m0", 124>;

// Trap handler registers
def TBA_LO : SIReg<"tba_lo", 108>;
def TBA_HI : SIReg<"tba_hi", 109>;

def TBA : RegisterWithSubRegs<"tba", [TBA_LO, TBA_HI]>,
DwarfRegAlias<TBA_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = 108;
}

def TMA_LO : SIReg<"tma_lo", 110>;
def TMA_HI : SIReg<"tma_hi", 111>;

def TMA : RegisterWithSubRegs<"tma", [TMA_LO, TMA_HI]>,
DwarfRegAlias<TMA_LO> {
let Namespace = "AMDGPU";
let SubRegIndices = [sub0, sub1];
let HWEncoding = 110;
}

def TTMP0 : SIReg <"ttmp0", 112>;
def TTMP1 : SIReg <"ttmp1", 113>;
def TTMP2 : SIReg <"ttmp2", 114>;
def TTMP3 : SIReg <"ttmp3", 115>;
def TTMP4 : SIReg <"ttmp4", 116>;
def TTMP5 : SIReg <"ttmp5", 117>;
def TTMP6 : SIReg <"ttmp6", 118>;
def TTMP7 : SIReg <"ttmp7", 119>;
def TTMP8 : SIReg <"ttmp8", 120>;
def TTMP9 : SIReg <"ttmp9", 121>;
def TTMP10 : SIReg <"ttmp10", 122>;
def TTMP11 : SIReg <"ttmp11", 123>;

multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
def _ci : SIReg<n, ci_e>;
def _vi : SIReg<n, vi_e>;
Expand Down Expand Up @@ -135,6 +169,24 @@ def SGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
(add (decimate (shl SGPR_32, 14), 4)),
(add (decimate (shl SGPR_32, 15), 4))]>;

// Trap handler TMP 32-bit registers
def TTMP_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
(add (sequence "TTMP%u", 0, 11))> {
let isAllocatable = 0;
}

// Trap handler TMP 64-bit registers
def TTMP_64Regs : RegisterTuples<[sub0, sub1],
[(add (decimate TTMP_32, 2)),
(add (decimate (shl TTMP_32, 1), 2))]>;

// Trap handler TMP 128-bit registers
def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
[(add (decimate TTMP_32, 4)),
(add (decimate (shl TTMP_32, 1), 4)),
(add (decimate (shl TTMP_32, 2), 4)),
(add (decimate (shl TTMP_32, 3), 4))]>;

// VGPR 32-bit registers
def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
(add (sequence "VGPR%u", 0, 255))>;
Expand Down Expand Up @@ -199,13 +251,18 @@ class RegImmMatcher<string name> : AsmOperandClass {

// Register class for all scalar registers (SGPRs + Special Registers)
def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
(add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI)
(add SGPR_32, M0, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)
>;

def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)>;

def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)> {
let isAllocatable = 0;
}

def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
(add SGPR_64, VCC, EXEC, FLAT_SCR)
(add SGPR_64, VCC, EXEC, FLAT_SCR, TTMP_64)
>;

def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128)> {
Expand Down
24 changes: 8 additions & 16 deletions test/CodeGen/AMDGPU/salu-to-valu.ll
Original file line number Diff line number Diff line change
Expand Up @@ -201,22 +201,14 @@ entry:

; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16:

; GCN-NOHSA-NOT: v_add
; GCN-NOHSA: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}}
; GCN-NOHSA-NOT: v_add
; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
; GCN-NOHSA-NOT: v_add
; GCN-NOHSA: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}}
; GCN-NOHSA-NOT: v_add
; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
; GCN-NOHSA-NOT: v_add
; GCN-NOHSA: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}}
; GCN-NOHSA-NOT: v_add
; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}}
; GCN-NOHSA-NOT: v_add
; GCN-NOHSA: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}}
; GCN-NOHSA-NOT: v_add
; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}}
; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}}
; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}}
; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET2:s[0-9]+]], 0x134a0{{$}}
; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET2]] addr64{{$}}
; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET3:s[0-9]+]], 0x134b0{{$}}
; GCN-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET3]] addr64{{$}}

; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
Expand Down
Loading

0 comments on commit 075abcb

Please sign in to comment.