Skip to content

Commit

Permalink
Fix several issues with ELT profiler hooks and tests to cover these c…
Browse files Browse the repository at this point in the history
…ases (dotnet#54912)

* Add ELT profiler tests for HFA/HVA arguments/return values on Arm64.

* Fix an issue with `ProfileArgIterator::GetNextArgAddr` and `ProfileArgIterator::GetReturnBufferAddr` not properly handling HFA/HVAs on Arm64.

* Add tests to cover different ways of returning a struct on Linux x64 and macOS x64 (i.e. one register, two registers, stack).

* Fix an issue with `ProfileArgIterator::GetReturnBufferAddr` not properly handling struct returned in registers on Linux x64 and macOS x64.

* Fix an issue with not passing an actual value of `rdx` down to a callback in `ProfileLeaveNaked` and `ProfileTailcallNaked` helpers.
  • Loading branch information
echesakov authored Sep 29, 2021
1 parent b3c592f commit cd1cd1d
Show file tree
Hide file tree
Showing 9 changed files with 1,093 additions and 156 deletions.
4 changes: 2 additions & 2 deletions src/coreclr/vm/amd64/asmhelpers.S
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ NESTED_ENTRY ProfileLeaveNaked, _TEXT, NoHandler
movsd real8 ptr [rsp + 0x70], xmm7 # -- struct flt7 field
mov [rsp + 0x78], r11 # -- struct rdi field
mov [rsp + 0x80], r11 # -- struct rsi field
mov [rsp + 0x88], r11 # -- struct rdx field
mov [rsp + 0x88], rdx # -- struct rdx field
mov [rsp + 0x90], r11 # -- struct rcx field
mov [rsp + 0x98], r11 # -- struct r8 field
mov [rsp + 0xa0], r11 # -- struct r9 field
Expand Down Expand Up @@ -273,7 +273,7 @@ NESTED_ENTRY ProfileTailcallNaked, _TEXT, NoHandler
movsd real8 ptr [rsp + 0x70], xmm7 # -- struct flt7 field
mov [rsp + 0x78], r11 # -- struct rdi field
mov [rsp + 0x80], r11 # -- struct rsi field
mov [rsp + 0x88], r11 # -- struct rdx field
mov [rsp + 0x88], rdx # -- struct rdx field
mov [rsp + 0x90], r11 # -- struct rcx field
mov [rsp + 0x98], r11 # -- struct r8 field
mov [rsp + 0xa0], r11 # -- struct r9 field
Expand Down
59 changes: 40 additions & 19 deletions src/coreclr/vm/amd64/profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ ProfileArgIterator::ProfileArgIterator(MetaSig * pSig, void * platformSpecificHa
PROFILE_PLATFORM_SPECIFIC_DATA* pData = (PROFILE_PLATFORM_SPECIFIC_DATA*)m_handle;
#ifdef UNIX_AMD64_ABI
m_bufferPos = 0;
ZeroMemory(pData->buffer, PROFILE_PLATFORM_SPECIFIC_DATA_BUFFER_SIZE * sizeof(UINT64));
ZeroMemory(pData->buffer, PROFILE_PLATFORM_SPECIFIC_DATA_BUFFER_SIZE * sizeof(UINT64));
#endif // UNIX_AMD64_ABI

// unwind a frame and get the Rsp for the profiled method to make sure it matches
Expand Down Expand Up @@ -484,45 +484,66 @@ LPVOID ProfileArgIterator::GetReturnBufferAddr(void)
// by our calling convention, but is required by our profiler spec.
return (LPVOID)pData->rax;
}

CorElementType t = m_argIterator.GetSig()->GetReturnType();

TypeHandle thReturnType;
CorElementType t = m_argIterator.GetSig()->GetReturnTypeNormalized(&thReturnType);

if (ELEMENT_TYPE_VOID == t)
{
return NULL;
}

#ifdef UNIX_AMD64_ABI
if (m_argIterator.GetSig()->GetReturnTypeSize() == 16)
if (ELEMENT_TYPE_VALUETYPE == t)
{
_ASSERTE(m_bufferPos == 0 && "Nothing else should be using the scratch space during a return");
// The Unix x64 ABI has a special case where a struct that is no larger than 16 bytes is returned in registers
// and if the classes of each eightbyte in the struct are different (i.e. INTEGER and SSE or SSE and INTEGER)
// they will be passed in rax/rdx and xmm0/xmm1, which means the values are noncontiguous.
// Just like the argument passing above we copy it in to the buffer to fake it being contiguous.

MethodTable* pMT = thReturnType.AsMethodTable();
_ASSERTE(pMT->IsRegPassedStruct());

// The unix x64 ABI has a special case where a 16 byte struct will be passed in registers
// and if there are integer and float args it will be passed in rax/etc and xmm/etc, respectively
// which means the values are noncontiguous. Just like the argument passing above
// we copy it in to the buffer to fake it being contiguous.
UINT flags = m_argIterator.GetFPReturnSize();
EEClass* eeClass = pMT->GetClass();
UINT fpReturnSize = m_argIterator.GetFPReturnSize();

// The lower two bits are used to indicate whether struct args are floating point or integer
if (flags & 1)
if (eeClass->GetNumberEightBytes() == 1)
{
pData->buffer[0] = pData->flt0;
pData->buffer[1] = (flags & 2) ? pData->flt1 : pData->rax;
if (fpReturnSize != 0)
{
return &pData->flt0;
}
else
{
return &pData->rax;
}
}
else
{
pData->buffer[0] = pData->rax;
pData->buffer[1] = (flags & 2) ? pData->flt0 : pData->rdx;
}
_ASSERTE(m_bufferPos == 0 && "Nothing else should be using the scratch space during a return");

return pData->buffer;
// The lower two bits are used to indicate whether struct args are floating point or integer
if (fpReturnSize & 1)
{
pData->buffer[0] = pData->flt0;
pData->buffer[1] = (fpReturnSize & 2) ? pData->flt1 : pData->rax;
}
else
{
pData->buffer[0] = pData->rax;
pData->buffer[1] = (fpReturnSize & 2) ? pData->flt0 : pData->rdx;
}

return pData->buffer;
}
}
#endif // UNIX_AMD64_ABI

if (ELEMENT_TYPE_R4 == t || ELEMENT_TYPE_R8 == t)
{
pData->rax = pData->flt0;
}

return &(pData->rax);
}

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/arm64/asmhelpers.S
Original file line number Diff line number Diff line change
Expand Up @@ -1141,7 +1141,7 @@ LEAF_END JIT_ProfilerEnterLeaveTailcallStub, _TEXT
#define PROFILE_ENTER 1
#define PROFILE_LEAVE 2
#define PROFILE_TAILCALL 4
#define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 272
#define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 320

// ------------------------------------------------------------------
.macro GenerateProfileHelper helper, flags
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/arm64/asmhelpers.asm
Original file line number Diff line number Diff line change
Expand Up @@ -1366,7 +1366,7 @@ CallHelper2
#define PROFILE_ENTER 1
#define PROFILE_LEAVE 2
#define PROFILE_TAILCALL 4
#define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 272
#define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 320

; ------------------------------------------------------------------
MACRO
Expand Down
107 changes: 76 additions & 31 deletions src/coreclr/vm/arm64/profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@
#define PROFILE_LEAVE 2
#define PROFILE_TAILCALL 4

// Scratch space to store HFA return values (max 16 bytes)
#define PROFILE_PLATFORM_SPECIFIC_DATA_BUFFER_SIZE 16
#define PROFILE_PLATFORM_SPECIFIC_DATA_BUFFER_SIZE (NUM_FLOAT_ARGUMENT_REGISTERS * sizeof(double))

typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA
{
Expand Down Expand Up @@ -49,8 +48,7 @@ void ProfileSetFunctionIDInPlatformSpecificHandle(void* pPlatformSpecificHandle,
}

ProfileArgIterator::ProfileArgIterator(MetaSig* pSig, void* pPlatformSpecificHandle)
: m_argIterator(pSig),
m_bufferPos(0)
: m_argIterator(pSig), m_bufferPos(0)
{
WRAPPER_NO_CONTRACT;

Expand All @@ -60,6 +58,8 @@ ProfileArgIterator::ProfileArgIterator(MetaSig* pSig, void* pPlatformSpecificHan
m_handle = pPlatformSpecificHandle;

PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(pPlatformSpecificHandle);
ZeroMemory(pData->buffer, PROFILE_PLATFORM_SPECIFIC_DATA_BUFFER_SIZE);

#ifdef _DEBUG
// Unwind a frame and get the SP for the profiled method to make sure it matches
// what the JIT gave us
Expand Down Expand Up @@ -119,6 +119,42 @@ ProfileArgIterator::~ProfileArgIterator()
m_handle = nullptr;
}

LPVOID ProfileArgIterator::CopyStructFromFPRegs(int firstFPReg, int numFPRegs, int hfaFieldSize)
{
WRAPPER_NO_CONTRACT;

PROFILE_PLATFORM_SPECIFIC_DATA* pData = reinterpret_cast<PROFILE_PLATFORM_SPECIFIC_DATA*>(m_handle);

if (hfaFieldSize == 8)
{
UINT64* pDest = (UINT64*)&pData->buffer[m_bufferPos];

for (int i = 0; i < numFPRegs; ++i)
{
pDest[i] = (UINT64)pData->floatArgumentRegisters.q[firstFPReg + i].Low;
}

m_bufferPos += numFPRegs * sizeof(UINT64);

return pDest;
}
else
{
_ASSERTE(hfaFieldSize == 4);

UINT32* pDest = (UINT32*)&pData->buffer[m_bufferPos];

for (int i = 0; i < numFPRegs; ++i)
{
pDest[i] = (UINT32)pData->floatArgumentRegisters.q[firstFPReg + i].Low;
}

m_bufferPos += numFPRegs * sizeof(UINT32);

return pDest;
}
}

LPVOID ProfileArgIterator::GetNextArgAddr()
{
WRAPPER_NO_CONTRACT;
Expand All @@ -142,7 +178,23 @@ LPVOID ProfileArgIterator::GetNextArgAddr()

if (TransitionBlock::IsFloatArgumentRegisterOffset(argOffset))
{
return (LPBYTE)&pData->floatArgumentRegisters + (argOffset - TransitionBlock::GetOffsetOfFloatArgumentRegisters());
ArgLocDesc argLocDesc;
m_argIterator.GetArgLoc(argOffset, &argLocDesc);

if (argLocDesc.m_cFloatReg > 1)
{
if (argLocDesc.m_hfaFieldSize != 16)
{
return CopyStructFromFPRegs(argLocDesc.m_idxFloatReg, argLocDesc.m_cFloatReg, argLocDesc.m_hfaFieldSize);
}
}
#ifdef _DEBUG
else
{
_ASSERTE(argLocDesc.m_cFloatReg == 1);
}
#endif
return (LPBYTE)&pData->floatArgumentRegisters.q[argLocDesc.m_idxFloatReg];
}

LPVOID pArg = nullptr;
Expand Down Expand Up @@ -241,46 +293,39 @@ LPVOID ProfileArgIterator::GetReturnBufferAddr(void)
}

UINT fpReturnSize = m_argIterator.GetFPReturnSize();

if (fpReturnSize != 0)
{
{
TypeHandle thReturnValueType;
m_argIterator.GetSig()->GetReturnTypeNormalized(&thReturnValueType);

if (!thReturnValueType.IsNull() && thReturnValueType.IsHFA())
{
UINT hfaFieldSize = fpReturnSize / 4;
UINT totalSize = m_argIterator.GetSig()->GetReturnTypeSize();
_ASSERTE(totalSize % hfaFieldSize == 0);
_ASSERTE(totalSize <= 16);
CorInfoHFAElemType hfaElemType = thReturnValueType.GetHFAType();

BYTE *dest = pData->buffer;
for (UINT floatRegIdx = 0; floatRegIdx < totalSize / hfaFieldSize; ++floatRegIdx)
if (hfaElemType == CORINFO_HFA_ELEM_VECTOR128)
{
if (hfaFieldSize == 4)
{
*(UINT32*)dest = *(UINT32*)&pData->floatArgumentRegisters.q[floatRegIdx];
dest += 4;
}
else if (hfaFieldSize == 8)
return &pData->floatArgumentRegisters.q[0];
}
else
{
int hfaFieldSize = 8;

if (hfaElemType == CORINFO_HFA_ELEM_FLOAT)
{
*(UINT64*)dest = *(UINT64*)&pData->floatArgumentRegisters.q[floatRegIdx];
dest += 8;
hfaFieldSize = 4;
}
#ifdef _DEBUG
else
{
_ASSERTE(hfaFieldSize == 16);
*(NEON128*)dest = pData->floatArgumentRegisters.q[floatRegIdx];
dest += 16;
_ASSERTE((hfaElemType == CORINFO_HFA_ELEM_DOUBLE) || (hfaElemType == CORINFO_HFA_ELEM_VECTOR64));
}
#endif
const int cntFPRegs = thReturnValueType.GetSize() / hfaFieldSize;

if (floatRegIdx > 8)
{
// There's only space for 8 arguments in buffer
_ASSERTE(FALSE);
break;
}
// On Arm64 HFA and HVA values are returned in s0-s3, d0-d3, or v0-v3.
return CopyStructFromFPRegs(0, cntFPRegs, hfaFieldSize);
}

return pData->buffer;
}

return &pData->floatArgumentRegisters.q[0];
Expand Down
23 changes: 15 additions & 8 deletions src/coreclr/vm/proftoeeinterfaceimpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,21 @@ class ProfileArgIterator
ArgIterator m_argIterator;
#if defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64)
UINT64 m_bufferPos;
#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_ARM64)

#if defined(UNIX_AMD64_ABI)
// On certain architectures we can pass args in non-sequential registers,
// this function will copy the struct so it is laid out as it would be in memory
// so it can be passed to the profiler
LPVOID CopyStructFromRegisters();
#endif

#if defined(TARGET_ARM64)
// On Arm64 the fields an HFA or an HVA argument will need to be copied
// to a temporary buffer in memory, so they are laid out contiguously in memory.
LPVOID CopyStructFromFPRegs(int idxFPReg, int cntFPRegs, int hfaFieldSize);
#endif

#endif // UNIX_AMD64_ABI || TARGET_ARM64

public:
ProfileArgIterator(MetaSig * pMetaSig, void* platformSpecificHandle);
Expand All @@ -74,13 +88,6 @@ class ProfileArgIterator
return m_argIterator.NumFixedArgs();
}

#if defined(UNIX_AMD64_ABI)
// On certain architectures we can pass args in non-sequential registers,
// this function will copy the struct so it is laid out as it would be in memory
// so it can be passed to the profiler
LPVOID CopyStructFromRegisters();
#endif // defined(UNIX_AMD64_ABI)

//
// After initialization, this method is called repeatedly until it
// returns NULL to get the address of each arg.
Expand Down
Loading

0 comments on commit cd1cd1d

Please sign in to comment.