Skip to content

Commit

Permalink
ARM: Simplify PCS handling.
Browse files Browse the repository at this point in the history
The backend should now be able to handle all AAPCS rules based on argument
type, which means Clang no longer has to duplicate the register-counting logic
and the CodeGen can be significantly simplified.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@230349 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
TNorthover committed Feb 24, 2015
1 parent f89960b commit 2e8ec42
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 183 deletions.
188 changes: 9 additions & 179 deletions lib/CodeGen/TargetInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4351,17 +4351,10 @@ class ARMABIInfo : public ABIInfo {

private:
ABIKind Kind;
mutable int VFPRegs[16];
const unsigned NumVFPs;
const unsigned NumGPRs;
mutable unsigned AllocatedGPRs;
mutable unsigned AllocatedVFPs;

public:
ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind),
NumVFPs(16), NumGPRs(4) {
ARMABIInfo(CodeGenTypes &CGT, ABIKind _Kind) : ABIInfo(CGT), Kind(_Kind) {
setCCs();
resetAllocatedRegs();
}

bool isEABI() const {
Expand Down Expand Up @@ -4391,8 +4384,7 @@ class ARMABIInfo : public ABIInfo {

private:
ABIArgInfo classifyReturnType(QualType RetTy, bool isVariadic) const;
ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic,
bool &IsCPRC) const;
ABIArgInfo classifyArgumentType(QualType RetTy, bool isVariadic) const;
bool isIllegalVectorType(QualType Ty) const;

bool isHomogeneousAggregateBaseType(QualType Ty) const override;
Expand All @@ -4407,10 +4399,6 @@ class ARMABIInfo : public ABIInfo {
llvm::CallingConv::ID getLLVMDefaultCC() const;
llvm::CallingConv::ID getABIDefaultCC() const;
void setCCs();

void markAllocatedGPRs(unsigned Alignment, unsigned NumRequired) const;
void markAllocatedVFPs(unsigned Alignment, unsigned NumRequired) const;
void resetAllocatedRegs(void) const;
};

class ARMTargetCodeGenInfo : public TargetCodeGenInfo {
Expand Down Expand Up @@ -4521,60 +4509,19 @@ void WindowsARMTargetCodeGenInfo::SetTargetAttributes(
}

void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
// To correctly handle Homogeneous Aggregate, we need to keep track of the
// VFP registers allocated so far.
// C.1.vfp If the argument is a VFP CPRC and there are sufficient consecutive
// VFP registers of the appropriate type unallocated then the argument is
// allocated to the lowest-numbered sequence of such registers.
// C.2.vfp If the argument is a VFP CPRC then any VFP registers that are
// unallocated are marked as unavailable.
resetAllocatedRegs();

if (getCXXABI().classifyReturnType(FI)) {
if (FI.getReturnInfo().isIndirect())
markAllocatedGPRs(1, 1);
} else {
if (!getCXXABI().classifyReturnType(FI))
FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic());
}
for (auto &I : FI.arguments()) {
unsigned PreAllocationVFPs = AllocatedVFPs;
unsigned PreAllocationGPRs = AllocatedGPRs;
bool IsCPRC = false;
// 6.1.2.3 There is one VFP co-processor register class using registers
// s0-s15 (d0-d7) for passing arguments.
I.info = classifyArgumentType(I.type, FI.isVariadic(), IsCPRC);

// If we have allocated some arguments onto the stack (due to running
// out of VFP registers), we cannot split an argument between GPRs and
// the stack. If this situation occurs, we add padding to prevent the
// GPRs from being used. In this situation, the current argument could
// only be allocated by rule C.8, so rule C.6 would mark these GPRs as
// unusable anyway.
// We do not have to do this if the argument is being passed ByVal, as the
// backend can handle that situation correctly.
const bool StackUsed = PreAllocationGPRs > NumGPRs || PreAllocationVFPs > NumVFPs;
const bool IsByVal = I.info.isIndirect() && I.info.getIndirectByVal();
if (!IsCPRC && PreAllocationGPRs < NumGPRs && AllocatedGPRs > NumGPRs &&
StackUsed && !IsByVal) {
llvm::Type *PaddingTy = llvm::ArrayType::get(
llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreAllocationGPRs);
if (I.info.canHaveCoerceToType()) {
I.info = ABIArgInfo::getDirect(I.info.getCoerceToType() /* type */,
0 /* offset */, PaddingTy, true);
} else {
I.info = ABIArgInfo::getDirect(nullptr /* type */, 0 /* offset */,
PaddingTy, true);
}
}
}

for (auto &I : FI.arguments())
I.info = classifyArgumentType(I.type, FI.isVariadic());

// Always honor user-specified calling convention.
if (FI.getCallingConvention() != llvm::CallingConv::C)
return;

llvm::CallingConv::ID cc = getRuntimeCC();
if (cc != llvm::CallingConv::C)
FI.setEffectiveCallingConvention(cc);
FI.setEffectiveCallingConvention(cc);
}

/// Return the default calling convention that LLVM will use.
Expand Down Expand Up @@ -4612,64 +4559,8 @@ void ARMABIInfo::setCCs() {
llvm::CallingConv::ARM_APCS : llvm::CallingConv::ARM_AAPCS);
}

/// markAllocatedVFPs - update VFPRegs according to the alignment and
/// number of VFP registers (unit is S register) requested.
void ARMABIInfo::markAllocatedVFPs(unsigned Alignment,
unsigned NumRequired) const {
// Early Exit.
if (AllocatedVFPs >= 16) {
// We use AllocatedVFP > 16 to signal that some CPRCs were allocated on
// the stack.
AllocatedVFPs = 17;
return;
}
// C.1.vfp If the argument is a VFP CPRC and there are sufficient consecutive
// VFP registers of the appropriate type unallocated then the argument is
// allocated to the lowest-numbered sequence of such registers.
for (unsigned I = 0; I < 16; I += Alignment) {
bool FoundSlot = true;
for (unsigned J = I, JEnd = I + NumRequired; J < JEnd; J++)
if (J >= 16 || VFPRegs[J]) {
FoundSlot = false;
break;
}
if (FoundSlot) {
for (unsigned J = I, JEnd = I + NumRequired; J < JEnd; J++)
VFPRegs[J] = 1;
AllocatedVFPs += NumRequired;
return;
}
}
// C.2.vfp If the argument is a VFP CPRC then any VFP registers that are
// unallocated are marked as unavailable.
for (unsigned I = 0; I < 16; I++)
VFPRegs[I] = 1;
AllocatedVFPs = 17; // We do not have enough VFP registers.
}

/// Update AllocatedGPRs to record the number of general purpose registers
/// which have been allocated. It is valid for AllocatedGPRs to go above 4,
/// this represents arguments being stored on the stack.
void ARMABIInfo::markAllocatedGPRs(unsigned Alignment,
unsigned NumRequired) const {
assert((Alignment == 1 || Alignment == 2) && "Alignment must be 4 or 8 bytes");

if (Alignment == 2 && AllocatedGPRs & 0x1)
AllocatedGPRs += 1;

AllocatedGPRs += NumRequired;
}

void ARMABIInfo::resetAllocatedRegs(void) const {
AllocatedGPRs = 0;
AllocatedVFPs = 0;
for (unsigned i = 0; i < NumVFPs; ++i)
VFPRegs[i] = 0;
}

ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
bool &IsCPRC) const {
// We update number of allocated VFPs according to
ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty,
bool isVariadic) const {
// 6.1.2.1 The following argument types are VFP CPRCs:
// A single-precision floating-point type (including promoted
// half-precision types); A double-precision floating-point type;
Expand All @@ -4687,74 +4578,32 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
if (Size <= 32) {
llvm::Type *ResType =
llvm::Type::getInt32Ty(getVMContext());
markAllocatedGPRs(1, 1);
return ABIArgInfo::getDirect(ResType);
}
if (Size == 64) {
llvm::Type *ResType = llvm::VectorType::get(
llvm::Type::getInt32Ty(getVMContext()), 2);
if (getABIKind() == ARMABIInfo::AAPCS || isVariadic){
markAllocatedGPRs(2, 2);
} else {
markAllocatedVFPs(2, 2);
IsCPRC = true;
}
return ABIArgInfo::getDirect(ResType);
}
if (Size == 128) {
llvm::Type *ResType = llvm::VectorType::get(
llvm::Type::getInt32Ty(getVMContext()), 4);
if (getABIKind() == ARMABIInfo::AAPCS || isVariadic) {
markAllocatedGPRs(2, 4);
} else {
markAllocatedVFPs(4, 4);
IsCPRC = true;
}
return ABIArgInfo::getDirect(ResType);
}
markAllocatedGPRs(1, 1);
return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
}
// Update VFPRegs for legal vector types.
if (getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic) {
if (const VectorType *VT = Ty->getAs<VectorType>()) {
uint64_t Size = getContext().getTypeSize(VT);
// Size of a legal vector should be power of 2 and above 64.
markAllocatedVFPs(Size >= 128 ? 4 : 2, Size / 32);
IsCPRC = true;
}
}
// Update VFPRegs for floating point types.
if (getABIKind() == ARMABIInfo::AAPCS_VFP && !isVariadic) {
if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
if (BT->getKind() == BuiltinType::Half ||
BT->getKind() == BuiltinType::Float) {
markAllocatedVFPs(1, 1);
IsCPRC = true;
}
if (BT->getKind() == BuiltinType::Double ||
BT->getKind() == BuiltinType::LongDouble) {
markAllocatedVFPs(2, 2);
IsCPRC = true;
}
}
}

if (!isAggregateTypeForABI(Ty)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>()) {
Ty = EnumTy->getDecl()->getIntegerType();
}

unsigned Size = getContext().getTypeSize(Ty);
if (!IsCPRC)
markAllocatedGPRs(Size > 32 ? 2 : 1, (Size + 31) / 32);
return (Ty->isPromotableIntegerType() ? ABIArgInfo::getExtend()
: ABIArgInfo::getDirect());
}

if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
markAllocatedGPRs(1, 1);
return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
}

Expand All @@ -4770,19 +4619,6 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
if (isHomogeneousAggregate(Ty, Base, Members)) {
assert(Base && "Base class should be set for homogeneous aggregate");
// Base can be a floating-point or a vector.
if (Base->isVectorType()) {
// ElementSize is in number of floats.
unsigned ElementSize = getContext().getTypeSize(Base) == 64 ? 2 : 4;
markAllocatedVFPs(ElementSize,
Members * ElementSize);
} else if (Base->isSpecificBuiltinType(BuiltinType::Float))
markAllocatedVFPs(1, Members);
else {
assert(Base->isSpecificBuiltinType(BuiltinType::Double) ||
Base->isSpecificBuiltinType(BuiltinType::LongDouble));
markAllocatedVFPs(2, Members * 2);
}
IsCPRC = true;
return ABIArgInfo::getDirect(nullptr, 0, nullptr, false);
}
}
Expand All @@ -4801,7 +4637,6 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
// argument is greater than 64 bytes, this will always use up any available
// registers (of which there are 4). We also don't care about getting the
// alignment right, because general-purpose registers cannot be back-filled.
markAllocatedGPRs(1, 4);
return ABIArgInfo::getIndirect(TyAlign, /*ByVal=*/true,
/*Realign=*/TyAlign > ABIAlign);
}
Expand All @@ -4814,11 +4649,9 @@ ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, bool isVariadic,
if (getContext().getTypeAlign(Ty) <= 32) {
ElemTy = llvm::Type::getInt32Ty(getVMContext());
SizeRegs = (getContext().getTypeSize(Ty) + 31) / 32;
markAllocatedGPRs(1, SizeRegs);
} else {
ElemTy = llvm::Type::getInt64Ty(getVMContext());
SizeRegs = (getContext().getTypeSize(Ty) + 63) / 64;
markAllocatedGPRs(2, SizeRegs * 2);
}

return ABIArgInfo::getDirect(llvm::ArrayType::get(ElemTy, SizeRegs));
Expand Down Expand Up @@ -4918,7 +4751,6 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,

// Large vector types should be returned via memory.
if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128) {
markAllocatedGPRs(1, 1);
return ABIArgInfo::getIndirect(0);
}

Expand Down Expand Up @@ -4956,7 +4788,6 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
}

// Otherwise return in memory.
markAllocatedGPRs(1, 1);
return ABIArgInfo::getIndirect(0);
}

Expand Down Expand Up @@ -4992,7 +4823,6 @@ ABIArgInfo ARMABIInfo::classifyReturnType(QualType RetTy,
return ABIArgInfo::getDirect(llvm::Type::getInt32Ty(getVMContext()));
}

markAllocatedGPRs(1, 1);
return ABIArgInfo::getIndirect(0);
}

Expand Down
8 changes: 4 additions & 4 deletions test/CodeGen/arm-aapcs-vfp.c
Original file line number Diff line number Diff line change
Expand Up @@ -126,19 +126,19 @@ typedef struct { long long x; int y; } struct_long_long_int;
// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_1(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, i64 %k, i32 %l)
void test_vfp_stack_gpr_split_1(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, long long k, int l) {}

// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_2(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, [3 x i32], [2 x i64] %k.coerce)
// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_2(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, [2 x i64] %k.coerce)
void test_vfp_stack_gpr_split_2(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, struct_long_long_int k) {}

// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_3(%struct.struct_long_long_int* noalias sret %agg.result, double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, [3 x i32], [2 x i64] %k.coerce)
// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_3(%struct.struct_long_long_int* noalias sret %agg.result, double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, [2 x i64] %k.coerce)
struct_long_long_int test_vfp_stack_gpr_split_3(double a, double b, double c, double d, double e, double f, double g, double h, double i, struct_long_long_int k) {}

typedef struct { int a; int b:4; int c; } struct_int_bitfield_int;
// CHECK: define arm_aapcs_vfpcc void @test_test_vfp_stack_gpr_split_bitfield(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, i32 %k, [2 x i32], [3 x i32] %l.coerce)
// CHECK: define arm_aapcs_vfpcc void @test_test_vfp_stack_gpr_split_bitfield(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, i32 %k, [3 x i32] %l.coerce)
void test_test_vfp_stack_gpr_split_bitfield(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, int k, struct_int_bitfield_int l) {}

// Note: this struct requires internal padding
typedef struct { int x; long long y; } struct_int_long_long;
// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_4(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, [3 x i32], [2 x i64] %k.coerce)
// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_4(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, [2 x i64] %k.coerce)
void test_vfp_stack_gpr_split_4(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, struct_int_long_long k) {}

// This very large struct (passed byval) uses up the GPRs, so no padding is needed
Expand Down

0 comments on commit 2e8ec42

Please sign in to comment.