[mips] Add CCValAssign::[ASZ]ExtUpper and CCPromoteToUpperBitsInType …

…and handle struct's correctly on big-endian N32/N64 return values. Summary: The N32/N64 ABI's require that structs passed in registers are laid out such that spilling the register with 'sd' places the struct at the lowest address. For little endian this is trivial but for big-endian it requires that structs are shifted into the upper bits of the register. We also require that structs passed in registers have the 'inreg' attribute for big-endian N32/N64 to work correctly. This is because the tablegen-erated calling convention implementation only has access to the lowered form of struct arguments (one or more integers of up to 64-bits each) and is unable to determine the original type. Reviewers: vmedic Reviewed By: vmedic Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D5286 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218451 91177308-0d34-0410-b5e6-96231b3b80d8
atgreen · Sep 25, 2014 · 03fe69e · 03fe69e
1 parent 6765c34
commit 03fe69e
Show file tree

Hide file tree

Showing 6 changed files with 207 additions and 14 deletions.
diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h
@@ -31,18 +31,25 @@ class TargetRegisterInfo;
 class CCValAssign {
 public:
   enum LocInfo {
-    Full,   // The value fills the full location.
-    SExt,   // The value is sign extended in the location.
-    ZExt,   // The value is zero extended in the location.
-    AExt,   // The value is extended with undefined upper bits.
-    BCvt,   // The value is bit-converted in the location.
-    VExt,   // The value is vector-widened in the location.
-            // FIXME: Not implemented yet. Code that uses AExt to mean
-            // vector-widen should be fixed to use VExt instead.
-    FPExt,  // The floating-point value is fp-extended in the location.
-    Indirect // The location contains pointer to the value.
+    Full,      // The value fills the full location.
+    SExt,      // The value is sign extended in the location.
+    ZExt,      // The value is zero extended in the location.
+    AExt,      // The value is extended with undefined upper bits.
+    SExtUpper, // The value is in the upper bits of the location and should be
+               // sign extended when retrieved.
+    ZExtUpper, // The value is in the upper bits of the location and should be
+               // zero extended when retrieved.
+    AExtUpper, // The value is in the upper bits of the location and should be
+               // extended with undefined upper bits when retrieved.
+    BCvt,      // The value is bit-converted in the location.
+    VExt,      // The value is vector-widened in the location.
+               // FIXME: Not implemented yet. Code that uses AExt to mean
+               // vector-widen should be fixed to use VExt instead.
+    FPExt,     // The floating-point value is fp-extended in the location.
+    Indirect   // The location contains pointer to the value.
     // TODO: a subset of the value is in the location.
   };
+
 private:
   /// ValNo - This is the value number begin assigned (e.g. an argument number).
   unsigned ValNo;
@@ -146,6 +153,9 @@ class CCValAssign {
     return (HTP == AExt || HTP == SExt || HTP == ZExt);
   }
 
+  bool isUpperBitsInLoc() const {
+    return HTP == AExtUpper || HTP == SExtUpper || HTP == ZExtUpper;
+  }
 };
 
 /// CCAssignFn - This function assigns a location for Val, updating State to

diff --git a/include/llvm/Target/TargetCallingConv.td b/include/llvm/Target/TargetCallingConv.td
@@ -119,6 +119,12 @@ class CCPromoteToType<ValueType destTy> : CCAction {
   ValueType DestTy = destTy;
 }
 
+/// CCPromoteToUpperBitsInType - If applied, this promotes the specified current
+/// value to the specified type and shifts the value into the upper bits.
+class CCPromoteToUpperBitsInType<ValueType destTy> : CCAction {
+  ValueType DestTy = destTy;
+}
+
 /// CCBitConvertToType - If applied, this bitconverts the specified current
 /// value to the specified type.
 class CCBitConvertToType<ValueType destTy> : CCAction {

diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
@@ -92,6 +92,14 @@ def CC_MipsN_VarArg : CallingConv<[
 ]>;
 
 def RetCC_MipsN : CallingConv<[
+  // Aggregate returns are positioned at the lowest address in the slot for
+  // both little and big-endian targets. When passing in registers, this
+  // requires that big-endian targets shift the value into the upper bits.
+  CCIfSubtarget<"isLittle()",
+      CCIfType<[i8, i16, i32], CCIfInReg<CCPromoteToType<i64>>>>,
+  CCIfSubtargetNot<"isLittle()",
+      CCIfType<[i8, i16, i32], CCIfInReg<CCPromoteToUpperBitsInType<i64>>>>,
+
   // i32 are returned in registers V0, V1
   CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
 

diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
@@ -2696,13 +2696,49 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
     SDValue Val = DAG.getCopyFromReg(Chain, DL, RVLocs[i].getLocReg(),
                                      RVLocs[i].getLocVT(), InFlag);
     Chain = Val.getValue(1);
     InFlag = Val.getValue(2);
 
-    if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
-      Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getValVT(), Val);
+    if (VA.isUpperBitsInLoc()) {
+      unsigned ValSizeInBits = Ins[i].ArgVT.getSizeInBits();
+      unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
+      unsigned Shift =
+          VA.getLocInfo() == CCValAssign::ZExtUpper ? ISD::SRL : ISD::SRA;
+      Val = DAG.getNode(
+          Shift, DL, VA.getLocVT(), Val,
+          DAG.getConstant(LocSizeInBits - ValSizeInBits, VA.getLocVT()));
+    }
+
+    switch (VA.getLocInfo()) {
+    default:
+      llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full:
+      break;
+    case CCValAssign::BCvt:
+      Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
+      break;
+    case CCValAssign::AExt:
+    case CCValAssign::AExtUpper:
+      Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
+      break;
+    case CCValAssign::ZExt:
+    case CCValAssign::ZExtUpper:
+      Val = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Val,
+                        DAG.getValueType(VA.getValVT()));
+      Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
+      break;
+    case CCValAssign::SExt:
+    case CCValAssign::SExtUpper:
+      Val = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Val,
+                        DAG.getValueType(VA.getValVT()));
+      Val = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Val);
+      break;
+    }
 
     InVals.push_back(Val);
   }
@@ -2902,9 +2938,43 @@ MipsTargetLowering::LowerReturn(SDValue Chain,
     SDValue Val = OutVals[i];
     CCValAssign &VA = RVLocs[i];
     assert(VA.isRegLoc() && "Can only return in registers!");
+    bool UseUpperBits = false;
+
+    switch (VA.getLocInfo()) {
+    default:
+      llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full:
+      break;
+    case CCValAssign::BCvt:
+      Val = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Val);
+      break;
+    case CCValAssign::AExtUpper:
+      UseUpperBits = true;
+      // Fallthrough
+    case CCValAssign::AExt:
+      Val = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Val);
+      break;
+    case CCValAssign::ZExtUpper:
+      UseUpperBits = true;
+      // Fallthrough
+    case CCValAssign::ZExt:
+      Val = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Val);
+      break;
+    case CCValAssign::SExtUpper:
+      UseUpperBits = true;
+      // Fallthrough
+    case CCValAssign::SExt:
+      Val = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Val);
+      break;
+    }
 
-    if (RVLocs[i].getValVT() != RVLocs[i].getLocVT())
-      Val = DAG.getNode(ISD::BITCAST, DL, RVLocs[i].getLocVT(), Val);
+    if (UseUpperBits) {
+      unsigned ValSizeInBits = Outs[i].ArgVT.getSizeInBits();
+      unsigned LocSizeInBits = VA.getLocVT().getSizeInBits();
+      Val = DAG.getNode(
+          ISD::SHL, DL, VA.getLocVT(), Val,
+          DAG.getConstant(LocSizeInBits - ValSizeInBits, VA.getLocVT()));
+    }
 
     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Flag);
 

diff --git a/test/CodeGen/Mips/cconv/return-struct.ll b/test/CodeGen/Mips/cconv/return-struct.ll
@@ -0,0 +1,84 @@
+; RUN: llc -mtriple=mips-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-BE %s
+; RUN: llc -mtriple=mipsel-linux-gnu -relocation-model=static < %s | FileCheck --check-prefix=ALL --check-prefix=O32 --check-prefix=O32-LE %s
+
+; RUN-TODO: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+; RUN-TODO: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+o32 < %s | FileCheck --check-prefix=ALL --check-prefix=O32 %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 --check-prefix=N32-BE %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n32 < %s | FileCheck --check-prefix=ALL --check-prefix=N32 --check-prefix=N32-LE %s
+
+; RUN: llc -mtriple=mips64-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 --check-prefix=N64-BE %s
+; RUN: llc -mtriple=mips64el-linux-gnu -relocation-model=static -mattr=-n64,+n64 < %s | FileCheck --check-prefix=ALL --check-prefix=N64 --check-prefix=N64-LE %s
+
+; Test struct returns for all ABI's and byte orders.
+
+@struct_byte = global {i8} zeroinitializer
+@struct_2byte = global {i8,i8} zeroinitializer
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1)
+
+define inreg {i8} @ret_struct_i8() nounwind {
+entry:
+        %0 = load volatile {i8}* @struct_byte
+        ret {i8} %0
+}
+
+; ALL-LABEL: ret_struct_i8:
+; O32-DAG:           lui [[R1:\$[0-9]+]], %hi(struct_byte)
+; O32-DAG:           lbu $2, %lo(struct_byte)([[R1]])
+
+; N32-LE-DAG:        lui [[R1:\$[0-9]+]], %hi(struct_byte)
+; N32-LE-DAG:        lb $2, %lo(struct_byte)([[R1]])
+
+; N32-BE-DAG:        lui [[R1:\$[0-9]+]], %hi(struct_byte)
+; N32-BE-DAG:        lb [[R2:\$[0-9]+]], %lo(struct_byte)([[R1]])
+; N32-BE-DAG:        dsll $2, [[R2]], 56
+
+; N64-LE-DAG:        ld  [[R1:\$[0-9]+]], %got_disp(struct_byte)($1)
+; N64-LE-DAG:        lb $2, 0([[R1]])
+
+; N64-BE-DAG:        ld  [[R1:\$[0-9]+]], %got_disp(struct_byte)($1)
+; N64-BE-DAG:        lb [[R2:\$[0-9]+]], 0([[R1]])
+; N64-BE-DAG:        dsll $2, [[R2]], 56
+
+; This test is based on the way clang currently lowers {i8,i8} to {i16}.
+; FIXME: It should probably work for without any lowering too but this doesn't
+;        work as expected. Each member gets mapped to a register rather than
+;        packed into a single register.
+define inreg {i16} @ret_struct_i16() nounwind {
+entry:
+        %retval = alloca {i8,i8}, align 1
+        %0 = bitcast {i8,i8}* %retval to i8*
+        call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* getelementptr inbounds ({i8,i8}* @struct_2byte, i32 0, i32 0), i64 2, i32 1, i1 false)
+        %1 = bitcast {i8,i8}* %retval to {i16}*
+        %2 = load volatile {i16}* %1
+        ret {i16} %2
+}
+
+; ALL-LABEL: ret_struct_i16:
+; O32-DAG:           lui [[R1:\$[0-9]+]], %hi(struct_2byte)
+; O32-DAG:           lhu [[R2:\$[0-9]+]], %lo(struct_2byte)([[R1]])
+; O32-DAG:           sh  [[R2]], 0([[SP:\$sp]])
+; O32-DAG:           lhu $2, 0([[SP:\$sp]])
+
+; N32-LE-DAG:        lui [[R1:\$[0-9]+]], %hi(struct_2byte)
+; N32-LE-DAG:        lhu [[R2:\$[0-9]+]], %lo(struct_2byte)([[R1]])
+; N32-LE-DAG:        sh  [[R2]], 8([[SP:\$sp]])
+; N32-LE-DAG:        lh  $2, 8([[SP:\$sp]])
+
+; N32-BE-DAG:        lui [[R1:\$[0-9]+]], %hi(struct_2byte)
+; N32-BE-DAG:        lhu [[R2:\$[0-9]+]], %lo(struct_2byte)([[R1]])
+; N32-BE-DAG:        sh  [[R2]], 8([[SP:\$sp]])
+; N32-BE-DAG:        lh  [[R3:\$[0-9]+]], 8([[SP:\$sp]])
+; N32-BE-DAG:        dsll $2, [[R3]], 48
+
+; N64-LE-DAG:        ld  [[R1:\$[0-9]+]], %got_disp(struct_2byte)($1)
+; N64-LE-DAG:        lhu [[R2:\$[0-9]+]], 0([[R1]])
+; N64-LE-DAG:        sh  [[R2]], 8([[SP:\$sp]])
+; N64-LE-DAG:        lh  $2, 8([[SP:\$sp]])
+
+; N64-BE-DAG:        ld  [[R1:\$[0-9]+]], %got_disp(struct_2byte)($1)
+; N64-BE-DAG:        lhu [[R2:\$[0-9]+]], 0([[R1]])
+; N64-BE-DAG:        sh  [[R2]], 8([[SP:\$sp]])
+; N64-BE-DAG:        lh  [[R3:\$[0-9]+]], 8([[SP:\$sp]])
+; N64-BE-DAG:        dsll $2, [[R3]], 48
diff --git a/utils/TableGen/CallingConvEmitter.cpp b/utils/TableGen/CallingConvEmitter.cpp
@@ -231,6 +231,21 @@ void CallingConvEmitter::EmitAction(Record *Action,
           << IndentStr << "else\n"
           << IndentStr << IndentStr << "LocInfo = CCValAssign::AExt;\n";
       }
+    } else if (Action->isSubClassOf("CCPromoteToUpperBitsInType")) {
+      Record *DestTy = Action->getValueAsDef("DestTy");
+      MVT::SimpleValueType DestVT = getValueType(DestTy);
+      O << IndentStr << "LocVT = " << getEnumName(DestVT) << ";\n";
+      if (MVT(DestVT).isFloatingPoint()) {
+        PrintFatalError("CCPromoteToUpperBitsInType does not handle floating "
+                        "point");
+      } else {
+        O << IndentStr << "if (ArgFlags.isSExt())\n"
+          << IndentStr << IndentStr << "LocInfo = CCValAssign::SExtUpper;\n"
+          << IndentStr << "else if (ArgFlags.isZExt())\n"
+          << IndentStr << IndentStr << "LocInfo = CCValAssign::ZExtUpper;\n"
+          << IndentStr << "else\n"
+          << IndentStr << IndentStr << "LocInfo = CCValAssign::AExtUpper;\n";
+      }
     } else if (Action->isSubClassOf("CCBitConvertToType")) {
       Record *DestTy = Action->getValueAsDef("DestTy");
       O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n";