From 35eab1db2f21aee9678fe946a5d983a67285e7e4 Mon Sep 17 00:00:00 2001 From: Joey Gouly Date: Thu, 22 Aug 2013 15:29:11 +0000 Subject: [PATCH] [ARMv8] Add CodeGen support for VSEL. This uses the ARMcmov pattern that Tim cleaned up in r188995. Thanks to Simon Tatham for his floating point help! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189024 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 94 ++++++++- lib/Target/ARM/ARMInstrVFP.td | 20 +- test/CodeGen/ARM/vsel.ll | 309 +++++++++++++++++++++++++++++ 3 files changed, 414 insertions(+), 9 deletions(-) create mode 100644 test/CodeGen/ARM/vsel.ll diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index ebfa1b118eb7..7021941076f6 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -3178,6 +3178,61 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SelectTrue, SelectFalse, ISD::SETNE); } +static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) { + if (CC == ISD::SETNE) + return ISD::SETEQ; + return ISD::getSetCCSwappedOperands(CC); +} + +static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, + bool &swpCmpOps, bool &swpVselOps) { + // Start by selecting the GE condition code for opcodes that return true for + // 'equality' + if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE || + CC == ISD::SETULE) + CondCode = ARMCC::GE; + + // and GT for opcodes that return false for 'equality'. + else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT || + CC == ISD::SETULT) + CondCode = ARMCC::GT; + + // Since we are constrained to GE/GT, if the opcode contains 'less', we need + // to swap the compare operands. + if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT || + CC == ISD::SETULT) + swpCmpOps = true; + + // Both GT and GE are ordered comparisons, and return false for 'unordered'. + // If we have an unordered opcode, we need to swap the operands to the VSEL + // instruction (effectively negating the condition). + // + // This also has the effect of swapping which one of 'less' or 'greater' + // returns true, so we also swap the compare operands. It also switches + // whether we return true for 'equality', so we compensate by picking the + // opposite condition code to our original choice. + if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE || + CC == ISD::SETUGT) { + swpCmpOps = !swpCmpOps; + swpVselOps = !swpVselOps; + CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT; + } + + // 'ordered' is 'anything but unordered', so use the VS condition code and + // swap the VSEL operands. + if (CC == ISD::SETO) { + CondCode = ARMCC::VS; + swpVselOps = true; + } + + // 'unordered or not equal' is 'anything but equal', so use the EQ condition + // code and swap the VSEL operands. + if (CC == ISD::SETUNE) { + CondCode = ARMCC::EQ; + swpVselOps = true; + } +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); @@ -3188,15 +3243,52 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); if (LHS.getValueType() == MVT::i32) { + // Try to generate VSEL on ARMv8. + // The VSEL instruction can't use all the usual ARM condition + // codes: it only has two bits to select the condition code, so it's + // constrained to use only GE, GT, VS and EQ. + // + // To implement all the various ISD::SETXXX opcodes, we sometimes need to + // swap the operands of the previous compare instruction (effectively + // inverting the compare condition, swapping 'less' and 'greater') and + // sometimes need to swap the operands to the VSEL (which inverts the + // condition in the sense of firing whenever the previous condition didn't) + if (getSubtarget()->hasV8FP() && (TrueVal.getValueType() == MVT::f32 || + TrueVal.getValueType() == MVT::f64)) { + ARMCC::CondCodes CondCode = IntCCToARMCC(CC); + if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || + CondCode == ARMCC::VC || CondCode == ARMCC::NE) { + CC = getInverseCCForVSEL(CC); + std::swap(TrueVal, FalseVal); + } + } + SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, + Cmp); } ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); + // Try to generate VSEL on ARMv8. + if (getSubtarget()->hasV8FP() && (TrueVal.getValueType() == MVT::f32 || + TrueVal.getValueType() == MVT::f64)) { + bool swpCmpOps = false; + bool swpVselOps = false; + checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); + + if (CondCode == ARMCC::GT || CondCode == ARMCC::GE || + CondCode == ARMCC::VS || CondCode == ARMCC::EQ) { + if (swpCmpOps) + std::swap(LHS, RHS); + if (swpVselOps) + std::swap(TrueVal, FalseVal); + } + } + SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index c6b8bc3f929f..b4df4d787e3a 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -333,24 +333,28 @@ def VNMULS : ASbI<0b11100, 0b10, 1, 0, let D = VFPNeonA8Domain; } -multiclass vsel_inst opc> { - let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in { +multiclass vsel_inst opc, int CC> { + let DecoderNamespace = "VFPV8", PostEncoderMethod = "", + Uses = [CPSR], AddedComplexity = 4 in { def S : ASbInp<0b11100, opc, 0, (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), NoItinerary, !strconcat("vsel", op, ".f32\t$Sd, $Sn, $Sm"), - []>, Requires<[HasV8FP]>; + [(set SPR:$Sd, (ARMcmov SPR:$Sm, SPR:$Sn, CC))]>, + Requires<[HasV8FP]>; def D : ADbInp<0b11100, opc, 0, (outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm), NoItinerary, !strconcat("vsel", op, ".f64\t$Dd, $Dn, $Dm"), - []>, Requires<[HasV8FP]>; + [(set DPR:$Dd, (ARMcmov (f64 DPR:$Dm), (f64 DPR:$Dn), CC))]>, + Requires<[HasV8FP]>; } } -defm VSELGT : vsel_inst<"gt", 0b11>; -defm VSELGE : vsel_inst<"ge", 0b10>; -defm VSELEQ : vsel_inst<"eq", 0b00>; -defm VSELVS : vsel_inst<"vs", 0b01>; +// The CC constants here match ARMCC::CondCodes. +defm VSELGT : vsel_inst<"gt", 0b11, 12>; +defm VSELGE : vsel_inst<"ge", 0b10, 10>; +defm VSELEQ : vsel_inst<"eq", 0b00, 0>; +defm VSELVS : vsel_inst<"vs", 0b01, 6>; multiclass vmaxmin_inst { let DecoderNamespace = "VFPV8", PostEncoderMethod = "" in { diff --git a/test/CodeGen/ARM/vsel.ll b/test/CodeGen/ARM/vsel.ll new file mode 100644 index 000000000000..f4ee800f6fd8 --- /dev/null +++ b/test/CodeGen/ARM/vsel.ll @@ -0,0 +1,309 @@ +; RUN: llc < %s -mtriple=armv8-linux-gnueabihf -mattr=+v8fp -float-abi=hard | FileCheck %s +@varfloat = global float 0.0 +@vardouble = global double 0.0 +define void @test_vsel32sgt(i32 %lhs32, i32 %rhs32, float %a, float %b) { +; CHECK: test_vsel32sgt + %tst1 = icmp sgt i32 %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: cmp r0, r1 +; CHECK: vselgt.f32 s0, s0, s1 + ret void +} +define void @test_vsel64sgt(i32 %lhs32, i32 %rhs32, double %a, double %b) { +; CHECK: test_vsel64sgt + %tst1 = icmp sgt i32 %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: cmp r0, r1 +; CHECK: vselgt.f64 d16, d0, d1 + ret void +} +define void @test_vsel32sge(i32 %lhs32, i32 %rhs32, float %a, float %b) { +; CHECK: test_vsel32sge + %tst1 = icmp sge i32 %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: cmp r0, r1 +; CHECK: vselge.f32 s0, s0, s1 + ret void +} +define void @test_vsel64sge(i32 %lhs32, i32 %rhs32, double %a, double %b) { +; CHECK: test_vsel64sge + %tst1 = icmp sge i32 %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: cmp r0, r1 +; CHECK: vselge.f64 d16, d0, d1 + ret void +} +define void @test_vsel32eq(i32 %lhs32, i32 %rhs32, float %a, float %b) { +; CHECK: test_vsel32eq + %tst1 = icmp eq i32 %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: cmp r0, r1 +; CHECK: vseleq.f32 s0, s0, s1 + ret void +} +define void @test_vsel64eq(i32 %lhs32, i32 %rhs32, double %a, double %b) { +; CHECK: test_vsel64eq + %tst1 = icmp eq i32 %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: cmp r0, r1 +; CHECK: vseleq.f64 d16, d0, d1 + ret void +} +define void @test_vsel32slt(i32 %lhs32, i32 %rhs32, float %a, float %b) { +; CHECK: test_vsel32slt + %tst1 = icmp slt i32 %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: cmp r0, r1 +; CHECK: vselgt.f32 s0, s1, s0 + ret void +} +define void @test_vsel64slt(i32 %lhs32, i32 %rhs32, double %a, double %b) { +; CHECK: test_vsel64slt + %tst1 = icmp slt i32 %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: cmp r0, r1 +; CHECK: vselgt.f64 d16, d1, d0 + ret void +} +define void @test_vsel32sle(i32 %lhs32, i32 %rhs32, float %a, float %b) { +; CHECK: test_vsel32sle + %tst1 = icmp sle i32 %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: cmp r0, r1 +; CHECK: vselge.f32 s0, s1, s0 + ret void +} +define void @test_vsel64sle(i32 %lhs32, i32 %rhs32, double %a, double %b) { +; CHECK: test_vsel64sle + %tst1 = icmp sle i32 %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: cmp r0, r1 +; CHECK: vselge.f64 d16, d1, d0 + ret void +} +define void @test_vsel32ogt(float %lhs32, float %rhs32, float %a, float %b) { +; CHECK: test_vsel32ogt + %tst1 = fcmp ogt float %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: vcmpe.f32 s0, s1 +; CHECK: vselgt.f32 s0, s2, s3 + ret void +} +define void @test_vsel64ogt(float %lhs32, float %rhs32, double %a, double %b) { +; CHECK: test_vsel64ogt + %tst1 = fcmp ogt float %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: vcmpe.f32 s0, s1 +; CHECK: vselgt.f64 d16, d1, d2 + ret void +} +define void @test_vsel32oge(float %lhs32, float %rhs32, float %a, float %b) { +; CHECK: test_vsel32oge + %tst1 = fcmp oge float %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: vcmpe.f32 s0, s1 +; CHECK: vselge.f32 s0, s2, s3 + ret void +} +define void @test_vsel64oge(float %lhs32, float %rhs32, double %a, double %b) { +; CHECK: test_vsel64oge + %tst1 = fcmp oge float %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: vcmpe.f32 s0, s1 +; CHECK: vselge.f64 d16, d1, d2 + ret void +} +define void @test_vsel32oeq(float %lhs32, float %rhs32, float %a, float %b) { +; CHECK: test_vsel32oeq + %tst1 = fcmp oeq float %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: vcmpe.f32 s0, s1 +; CHECK: vseleq.f32 s0, s2, s3 + ret void +} +define void @test_vsel64oeq(float %lhs32, float %rhs32, double %a, double %b) { +; CHECK: test_vsel64oeq + %tst1 = fcmp oeq float %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: vcmpe.f32 s0, s1 +; CHECK: vseleq.f64 d16, d1, d2 + ret void +} +define void @test_vsel32ugt(float %lhs32, float %rhs32, float %a, float %b) { +; CHECK: test_vsel32ugt + %tst1 = fcmp ugt float %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: vcmpe.f32 s1, s0 +; CHECK: vselge.f32 s0, s3, s2 + ret void +} +define void @test_vsel64ugt(float %lhs32, float %rhs32, double %a, double %b) { +; CHECK: test_vsel64ugt + %tst1 = fcmp ugt float %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: vcmpe.f32 s1, s0 +; CHECK: vselge.f64 d16, d2, d1 + ret void +} +define void @test_vsel32uge(float %lhs32, float %rhs32, float %a, float %b) { +; CHECK: test_vsel32uge + %tst1 = fcmp uge float %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: vcmpe.f32 s1, s0 +; CHECK: vselgt.f32 s0, s3, s2 + ret void +} +define void @test_vsel64uge(float %lhs32, float %rhs32, double %a, double %b) { +; CHECK: test_vsel64uge + %tst1 = fcmp uge float %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: vcmpe.f32 s1, s0 +; CHECK: vselgt.f64 d16, d2, d1 + ret void +} +define void @test_vsel32olt(float %lhs32, float %rhs32, float %a, float %b) { +; CHECK: test_vsel32olt + %tst1 = fcmp olt float %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: vcmpe.f32 s1, s0 +; CHECK: vselgt.f32 s0, s2, s3 + ret void +} +define void @test_vsel64olt(float %lhs32, float %rhs32, double %a, double %b) { +; CHECK: test_vsel64olt + %tst1 = fcmp olt float %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: vcmpe.f32 s1, s0 +; CHECK: vselgt.f64 d16, d1, d2 + ret void +} +define void @test_vsel32ult(float %lhs32, float %rhs32, float %a, float %b) { +; CHECK: test_vsel32ult + %tst1 = fcmp ult float %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: vcmpe.f32 s0, s1 +; CHECK: vselge.f32 s0, s3, s2 + ret void +} +define void @test_vsel64ult(float %lhs32, float %rhs32, double %a, double %b) { +; CHECK: test_vsel64ult + %tst1 = fcmp ult float %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: vcmpe.f32 s0, s1 +; CHECK: vselge.f64 d16, d2, d1 + ret void +} +define void @test_vsel32ole(float %lhs32, float %rhs32, float %a, float %b) { +; CHECK: test_vsel32ole + %tst1 = fcmp ole float %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: vcmpe.f32 s1, s0 +; CHECK: vselge.f32 s0, s2, s3 + ret void +} +define void @test_vsel64ole(float %lhs32, float %rhs32, double %a, double %b) { +; CHECK: test_vsel64ole + %tst1 = fcmp ole float %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: vcmpe.f32 s1, s0 +; CHECK: vselge.f64 d16, d1, d2 + ret void +} +define void @test_vsel32ule(float %lhs32, float %rhs32, float %a, float %b) { +; CHECK: test_vsel32ule + %tst1 = fcmp ule float %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: vcmpe.f32 s0, s1 +; CHECK: vselgt.f32 s0, s3, s2 + ret void +} +define void @test_vsel64ule(float %lhs32, float %rhs32, double %a, double %b) { +; CHECK: test_vsel64ule + %tst1 = fcmp ule float %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: vcmpe.f32 s0, s1 +; CHECK: vselgt.f64 d16, d2, d1 + ret void +} +define void @test_vsel32ord(float %lhs32, float %rhs32, float %a, float %b) { +; CHECK: test_vsel32ord + %tst1 = fcmp ord float %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: vcmpe.f32 s0, s1 +; CHECK: vselvs.f32 s0, s3, s2 + ret void +} +define void @test_vsel64ord(float %lhs32, float %rhs32, double %a, double %b) { +; CHECK: test_vsel64ord + %tst1 = fcmp ord float %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: vcmpe.f32 s0, s1 +; CHECK: vselvs.f64 d16, d2, d1 + ret void +} +define void @test_vsel32une(float %lhs32, float %rhs32, float %a, float %b) { +; CHECK: test_vsel32une + %tst1 = fcmp une float %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: vcmpe.f32 s0, s1 +; CHECK: vseleq.f32 s0, s3, s2 + ret void +} +define void @test_vsel64une(float %lhs32, float %rhs32, double %a, double %b) { +; CHECK: test_vsel64une + %tst1 = fcmp une float %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: vcmpe.f32 s0, s1 +; CHECK: vseleq.f64 d16, d2, d1 + ret void +} +define void @test_vsel32uno(float %lhs32, float %rhs32, float %a, float %b) { +; CHECK: test_vsel32uno + %tst1 = fcmp uno float %lhs32, %rhs32 + %val1 = select i1 %tst1, float %a, float %b + store float %val1, float* @varfloat +; CHECK: vcmpe.f32 s0, s1 +; CHECK: vselvs.f32 s0, s2, s3 + ret void +} +define void @test_vsel64uno(float %lhs32, float %rhs32, double %a, double %b) { +; CHECK: test_vsel64uno + %tst1 = fcmp uno float %lhs32, %rhs32 + %val1 = select i1 %tst1, double %a, double %b + store double %val1, double* @vardouble +; CHECK: vcmpe.f32 s0, s1 +; CHECK: vselvs.f64 d16, d1, d2 + ret void +}