Skip to content

Commit

Permalink
AMDGPU: Select 64-bit {ADD,SUB}{C,E} nodes
Browse files Browse the repository at this point in the history
Summary:
This will be used for 64-bit MULHU, which is in turn used for the 64-bit
divide-by-constant optimization (see D24822).

Reviewers: arsenm, tstellarAMD

Subscribers: kzhuravl, wdng, yaxunl, llvm-commits, tony-tye

Differential Revision: https://reviews.llvm.org/D25289

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284224 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
nhaehnle committed Oct 14, 2016
1 parent d1a990d commit 877e3be
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 10 deletions.
47 changes: 37 additions & 10 deletions lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,11 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
// DAG legalization, so we can fold some i64 ADDs used for address
// calculation into the LOAD and STORE instructions.
case ISD::ADD:
case ISD::SUB: {
case ISD::ADDC:
case ISD::ADDE:
case ISD::SUB:
case ISD::SUBC:
case ISD::SUBE: {
if (N->getValueType(0) != MVT::i64 ||
Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
break;
Expand Down Expand Up @@ -576,7 +580,12 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);

bool IsAdd = (N->getOpcode() == ISD::ADD);
unsigned Opcode = N->getOpcode();
bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
bool ProduceCarry =
ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
bool IsAdd =
(Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE);

SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
Expand All @@ -592,25 +601,43 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
DL, MVT::i32, RHS, Sub1);

SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };

unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;

SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
SDValue Carry(AddLo, 1);
SDNode *AddHi
= CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
SDNode *AddLo;
if (!ConsumeCarry) {
SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
} else {
SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
}
SDValue AddHiArgs[] = {
SDValue(Hi0, 0),
SDValue(Hi1, 0),
SDValue(AddLo, 1)
};
SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);

SDValue Args[5] = {
SDValue RegSequenceArgs[] = {
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
SDValue(AddLo,0),
Sub0,
SDValue(AddHi,0),
Sub1,
};
CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
MVT::i64, RegSequenceArgs);

if (ProduceCarry) {
// Replace the carry-use
CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1));
}

// Replace the remaining uses.
CurDAG->ReplaceAllUsesWith(N, RegSequence);
CurDAG->RemoveDeadNode(N);
}

// We need to handle this here because tablegen doesn't support matching
Expand Down
56 changes: 56 additions & 0 deletions test/CodeGen/AMDGPU/add_i128.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s

; GCN-LABEL: {{^}}test_i128_vreg:
; GCN: v_add_i32_e32 v[[LO:[0-9]+]], vcc,
; GCN-NEXT: v_addc_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
; GCN-NEXT: v_addc_u32_e32 v{{[0-9]+}}, vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
; GCN-NEXT: v_addc_u32_e32 v[[HI:[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}}, vcc
; GCN: buffer_store_dwordx4 v{{\[}}[[LO]]:[[HI]]],
define void @test_i128_vreg(i128 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %inA, i128 addrspace(1)* noalias %inB) {
%tid = call i32 @llvm.amdgcn.workitem.id.x() readnone
%a_ptr = getelementptr i128, i128 addrspace(1)* %inA, i32 %tid
%b_ptr = getelementptr i128, i128 addrspace(1)* %inB, i32 %tid
%a = load i128, i128 addrspace(1)* %a_ptr
%b = load i128, i128 addrspace(1)* %b_ptr
%result = add i128 %a, %b
store i128 %result, i128 addrspace(1)* %out
ret void
}

; Check that the SGPR add operand is correctly moved to a VGPR.
; GCN-LABEL: {{^}}sgpr_operand:
; GCN: v_add_i32
; GCN: v_addc_u32
; GCN: v_addc_u32
; GCN: v_addc_u32
define void @sgpr_operand(i128 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %in, i128 %a) {
%foo = load i128, i128 addrspace(1)* %in, align 8
%result = add i128 %foo, %a
store i128 %result, i128 addrspace(1)* %out
ret void
}

; GCN-LABEL: {{^}}sgpr_operand_reversed:
; GCN: v_add_i32
; GCN: v_addc_u32
; GCN: v_addc_u32
; GCN: v_addc_u32
define void @sgpr_operand_reversed(i128 addrspace(1)* noalias %out, i128 addrspace(1)* noalias %in, i128 %a) {
%foo = load i128, i128 addrspace(1)* %in, align 8
%result = add i128 %a, %foo
store i128 %result, i128 addrspace(1)* %out
ret void
}

; GCN-LABEL: {{^}}test_sreg:
; GCN: s_add_u32
; GCN: s_addc_u32
; GCN: s_addc_u32
; GCN: s_addc_u32
define void @test_sreg(i128 addrspace(1)* noalias %out, i128 %a, i128 %b) {
%result = add i128 %a, %b
store i128 %result, i128 addrspace(1)* %out
ret void
}

declare i32 @llvm.amdgcn.workitem.id.x() readnone

0 comments on commit 877e3be

Please sign in to comment.