Skip to content

Commit

Permalink
Add profiler ELT test (dotnet#39550)
Browse files Browse the repository at this point in the history
Fix the following issues:

    On amd64 linux we didn't save and restore the xmm registers, and didn't handle enregistered 16 bytes structs as return values
    On arm we didn't save and restore the floating point registers (I made the linux arm helpers match the windows arm helpers)
    On arm64 we didn't handle 16 byte enregistered structs as return values

And add tests
  • Loading branch information
davmason authored Jul 29, 2020
1 parent 90f0c93 commit f0ede2b
Show file tree
Hide file tree
Showing 23 changed files with 1,128 additions and 202 deletions.
70 changes: 25 additions & 45 deletions src/coreclr/src/vm/amd64/asmhelpers.S
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,12 @@
# we can align to 16 and be guaranteed to not exceed the frame size
.equ STACK_FUDGE_FACTOR, 0x8

# Space to keep xmm0 and xmm1
.equ SIZEOF_FP_ARG_SPILL, 0x10*2

.equ OFFSETOF_FP_ARG_SPILL, SIZEOF_PROFILE_PLATFORM_SPECIFIC_DATA + STACK_FUDGE_FACTOR

# SIZEOF_STACK_FRAME is how many bytes we reserve in our ELT helpers below
# There are three components, the first is space for profiler platform specific
# data struct that we spill the general purpose registers to, then space to
# spill xmm0 and xmm1, then finally 8 bytes of padding to ensure that the xmm
# register reads/writes are aligned on 16 bytes.
.equ SIZEOF_STACK_FRAME, SIZEOF_PROFILE_PLATFORM_SPECIFIC_DATA + SIZEOF_FP_ARG_SPILL + STACK_FUDGE_FACTOR
.equ SIZEOF_STACK_FRAME, SIZEOF_PROFILE_PLATFORM_SPECIFIC_DATA + STACK_FUDGE_FACTOR

.equ PROFILE_ENTER, 0x1
.equ PROFILE_LEAVE, 0x2
Expand Down Expand Up @@ -131,15 +126,6 @@ NESTED_ENTRY ProfileEnterNaked, _TEXT, NoHandler
mov r10, 0x1 # PROFILE_ENTER
mov [rsp + 0xa8], r10d # -- struct flags field

# get aligned stack ptr (rsp + OFFSETOF_FP_ARG_SPILL) & (-16)
lea rax, [rsp + OFFSETOF_FP_ARG_SPILL]
and rax, -16

# we need to be able to restore the fp return register
# save fp return registers
movdqa [rax + 0x00], xmm0
movdqa [rax + 0x10], xmm1

END_PROLOGUE

# rdi already contains the clientInfo
Expand All @@ -148,10 +134,14 @@ NESTED_ENTRY ProfileEnterNaked, _TEXT, NoHandler
call C_FUNC(ProfileEnter)

# restore fp return registers
lea rax, [rsp + OFFSETOF_FP_ARG_SPILL]
and rax, -16
movdqa xmm0, [rax + 0x00]
movdqa xmm1, [rax + 0x10]
movsd xmm0, real8 ptr [rsp + 0x38] # -- struct flt0 field
movsd xmm1, real8 ptr [rsp + 0x40] # -- struct flt1 field
movsd xmm2, real8 ptr [rsp + 0x48] # -- struct flt2 field
movsd xmm3, real8 ptr [rsp + 0x50] # -- struct flt3 field
movsd xmm4, real8 ptr [rsp + 0x58] # -- struct flt4 field
movsd xmm5, real8 ptr [rsp + 0x60] # -- struct flt5 field
movsd xmm6, real8 ptr [rsp + 0x68] # -- struct flt6 field
movsd xmm7, real8 ptr [rsp + 0x70] # -- struct flt7 field

# restore arg registers
mov rdi, [rsp + 0x78]
Expand Down Expand Up @@ -216,26 +206,21 @@ NESTED_ENTRY ProfileLeaveNaked, _TEXT, NoHandler
mov r10, 0x2 # PROFILE_LEAVE
mov [rsp + 0xa8], r10d # flags -- struct flags field

# get aligned stack ptr (rsp + OFFSETOF_FP_ARG_SPILL) & (-16)
lea rax, [rsp + OFFSETOF_FP_ARG_SPILL]
and rax, -16

# we need to be able to restore the fp return register
# save fp return registers
movdqa [rax + 0x00], xmm0
movdqa [rax + 0x10], xmm1

END_PROLOGUE

# rdi already contains the clientInfo
lea rsi, [rsp + 0x0]
call C_FUNC(ProfileLeave)

# restore fp return registers
lea rax, [rsp + OFFSETOF_FP_ARG_SPILL]
and rax, -16
movdqa xmm0, [rax + 0x00]
movdqa xmm1, [rax + 0x10]
movsd xmm0, real8 ptr [rsp + 0x38] # -- struct flt0 field
movsd xmm1, real8 ptr [rsp + 0x40] # -- struct flt1 field
movsd xmm2, real8 ptr [rsp + 0x48] # -- struct flt2 field
movsd xmm3, real8 ptr [rsp + 0x50] # -- struct flt3 field
movsd xmm4, real8 ptr [rsp + 0x58] # -- struct flt4 field
movsd xmm5, real8 ptr [rsp + 0x60] # -- struct flt5 field
movsd xmm6, real8 ptr [rsp + 0x68] # -- struct flt6 field
movsd xmm7, real8 ptr [rsp + 0x70] # -- struct flt7 field

# restore int return register
mov rax, [rsp + 0x28]
Expand Down Expand Up @@ -295,26 +280,21 @@ NESTED_ENTRY ProfileTailcallNaked, _TEXT, NoHandler
mov r10, 0x2 # PROFILE_LEAVE
mov [rsp + 0xa8], r10d # flags -- struct flags field

# get aligned stack ptr (rsp + OFFSETOF_FP_ARG_SPILL) & (-16)
lea rax, [rsp + OFFSETOF_FP_ARG_SPILL]
and rax, -16

# we need to be able to restore the fp return register
# save fp return registers
movdqa [rax + 0x00], xmm0
movdqa [rax + 0x10], xmm1

END_PROLOGUE

# rdi already contains the clientInfo
lea rsi, [rsp + 0x0]
call C_FUNC(ProfileTailcall)

# restore fp return registers
lea rax, [rsp + OFFSETOF_FP_ARG_SPILL]
and rax, -16
movdqa xmm0, [rax + 0x00]
movdqa xmm1, [rax + 0x10]
movsd xmm0, real8 ptr [rsp + 0x38] # -- struct flt0 field
movsd xmm1, real8 ptr [rsp + 0x40] # -- struct flt1 field
movsd xmm2, real8 ptr [rsp + 0x48] # -- struct flt2 field
movsd xmm3, real8 ptr [rsp + 0x50] # -- struct flt3 field
movsd xmm4, real8 ptr [rsp + 0x58] # -- struct flt4 field
movsd xmm5, real8 ptr [rsp + 0x60] # -- struct flt5 field
movsd xmm6, real8 ptr [rsp + 0x68] # -- struct flt6 field
movsd xmm7, real8 ptr [rsp + 0x70] # -- struct flt7 field

# restore int return register
mov rax, [rsp + 0x28]
Expand Down
44 changes: 37 additions & 7 deletions src/coreclr/src/vm/amd64/profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ ProfileArgIterator::ProfileArgIterator(MetaSig * pSig, void * platformSpecificHa
PROFILE_PLATFORM_SPECIFIC_DATA* pData = (PROFILE_PLATFORM_SPECIFIC_DATA*)m_handle;
#ifdef UNIX_AMD64_ABI
m_bufferPos = 0;
ZeroMemory(pData->buffer, PROFILE_PLATFORM_SPECIFIC_DATA_BUFFER_SIZE * sizeof(UINT64));
#endif // UNIX_AMD64_ABI

// unwind a frame and get the Rsp for the profiled method to make sure it matches
Expand Down Expand Up @@ -483,17 +484,46 @@ LPVOID ProfileArgIterator::GetReturnBufferAddr(void)
// by our calling convention, but is required by our profiler spec.
return (LPVOID)pData->rax;
}

CorElementType t = m_argIterator.GetSig()->GetReturnType();
if (ELEMENT_TYPE_VOID != t)
if (ELEMENT_TYPE_VOID == t)
{
return NULL;
}

#ifdef UNIX_AMD64_ABI
if (m_argIterator.GetSig()->GetReturnTypeSize() == 16)
{
if (ELEMENT_TYPE_R4 == t || ELEMENT_TYPE_R8 == t)
pData->rax = pData->flt0;
_ASSERTE(m_bufferPos == 0 && "Nothing else should be using the scratch space during a return");

// The unix x64 ABI has a special case where a 16 byte struct will be passed in registers
// and if there are integer and float args it will be passed in rax/etc and xmm/etc, respectively
// which means the values are noncontiguous. Just like the argument passing above
// we copy it in to the buffer to fake it being contiguous.
UINT flags = m_argIterator.GetFPReturnSize();

return &(pData->rax);
// The lower two bits are used to indicate whether struct args are floating point or integer
if (flags & 1)
{
pData->buffer[0] = pData->flt0;
pData->buffer[1] = (flags & 2) ? pData->flt1 : pData->rax;
}
else
{
pData->buffer[0] = pData->rax;
pData->buffer[1] = (flags & 2) ? pData->flt0 : pData->rdx;
}

return pData->buffer;
}
else
return NULL;
#endif // UNIX_AMD64_ABI

if (ELEMENT_TYPE_R4 == t || ELEMENT_TYPE_R8 == t)
{
pData->rax = pData->flt0;
}

return &(pData->rax);
}

#undef PROFILE_ENTER
Expand Down
201 changes: 77 additions & 124 deletions src/coreclr/src/vm/arm/asmhelpers.S
Original file line number Diff line number Diff line change
Expand Up @@ -366,134 +366,87 @@ LEAF_ENTRY JIT_ProfilerEnterLeaveTailcallStub, _TEXT
bx lr
LEAF_END JIT_ProfilerEnterLeaveTailcallStub, _TEXT

//
// EXTERN_C void ProfileEnterNaked(FunctionIDOrClientID functionIDOrClientID);
//
NESTED_ENTRY ProfileEnterNaked, _TEXT, NoHandler
PROLOG_PUSH "{r4, r5, r7, r11, lr}"
PROLOG_STACK_SAVE_OFFSET r7, #8

// fields of PROFILE_PLATFORM_SPECIFIC_DATA, in reverse order

// UINT32 r0; // Keep r0 & r1 contiguous to make returning 64-bit results easier
// UINT32 r1;
// void *r11;
// void *Pc;
// union // Float arg registers as 32-bit (s0-s15) and 64-bit (d0-d7)
// {
// UINT32 s[16];
// UINT64 d[8];
// };
// FunctionID functionId;
// void *probeSp; // stack pointer of managed function
// void *profiledSp; // location of arguments on stack
// LPVOID hiddenArg;
// UINT32 flags;
movw r4, #1
push { /* flags */ r4 }
movw r4, #0
push { /* hiddenArg */ r4 }
add r5, r11, #8
push { /* profiledSp */ r5 }
add r5, sp, #32
push { /* probeSp */ r5 }
push { /* functionId */ r0 }
#define PROFILE_ENTER 1
#define PROFILE_LEAVE 2
#define PROFILE_TAILCALL 4
// size of profiler data structure plus alignment padding
#define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 104+4

// typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA
// {
// UINT32 r0; // Keep r0 & r1 contiguous to make returning 64-bit results easier
// UINT32 r1;
// void *R11;
// void *Pc;
// union // Float arg registers as 32-bit (s0-s15) and 64-bit (d0-d7)
// {
// UINT32 s[16];
// UINT64 d[8];
// };
// FunctionID functionId;
// void *probeSp; // stack pointer of managed function
// void *profiledSp; // location of arguments on stack
// LPVOID hiddenArg;
// UINT32 flags;
// } PROFILE_PLATFORM_SPECIFIC_DATA, *PPROFILE_PLATFORM_SPECIFIC_DATA;

.macro GenerateProfileHelper helper, flags
NESTED_ENTRY \helper\()Naked, _TEXT, NoHandler
PROLOG_PUSH "{r0,r3,r9,r12}"

// for the 5 arguments that do not need popped plus 4 bytes of alignment
alloc_stack 6*4

// push fp regs
vpush.64 { d0 - d7 }
push { lr }
push { r11 }
push { /* return value, r4 is NULL */ r4 }
push { /* return value, r4 is NULL */ r4 }
mov r1, sp
bl C_FUNC(ProfileEnter)
EPILOG_STACK_RESTORE_OFFSET r7, #8
EPILOG_POP "{r4, r5, r7, r11, pc}"
NESTED_END ProfileEnterNaked, _TEXT

//
// EXTERN_C void ProfileLeaveNaked(FunctionIDOrClientID functionIDOrClientID);
//
NESTED_ENTRY ProfileLeaveNaked, _TEXT, NoHandler
PROLOG_PUSH "{r1, r2, r4, r5, r7, r11, lr}"
PROLOG_STACK_SAVE_OFFSET r7, #16

// fields of PROFILE_PLATFORM_SPECIFIC_DATA, in reverse order

// UINT32 r0; // Keep r0 & r1 contiguous to make returning 64-bit results easier
// UINT32 r1;
// void *r11;
// void *Pc;
// union // Float arg registers as 32-bit (s0-s15) and 64-bit (d0-d7)
// {
// UINT32 s[16];
// UINT64 d[8];
// };
// FunctionID functionId;
// void *probeSp; // stack pointer of managed function
// void *profiledSp; // location of arguments on stack
// LPVOID hiddenArg;
// UINT32 flags;
movw r4, #2
push { /* flags */ r4 }
movw r4, #0
push { /* hiddenArg */ r4 }
add r5, r11, #8
push { /* profiledSp */ r5 }
add r5, sp, #40
push { /* probeSp */ r5 }
push { /* functionId */ r0 }
vpush.64 { d0 - d7 }
push { lr }
push { r11 }
push { r1 }
push { r0 }
mov r1, sp
bl C_FUNC(ProfileLeave)
EPILOG_STACK_RESTORE_OFFSET r7, #16
EPILOG_POP "{r1, r2, r4, r5, r7, r11, pc}"
NESTED_END ProfileLeaveNaked, _TEXT
// next three fields pc, r11, r1
push { r1, r11, lr}

//
// EXTERN_C void ProfileTailcallNaked(FunctionIDOrClientID functionIDOrClientID);
//
NESTED_ENTRY ProfileTailcallNaked, _TEXT, NoHandler
PROLOG_PUSH "{r1, r2, r4, r5, r7, r11, lr}"
PROLOG_STACK_SAVE_OFFSET r7, #16

// fields of PROFILE_PLATFORM_SPECIFIC_DATA, in reverse order

// UINT32 r0; // Keep r0 & r1 contiguous to make returning 64-bit results easier
// UINT32 r1;
// void *r11;
// void *Pc;
// union // Float arg registers as 32-bit (s0-s15) and 64-bit (d0-d7)
// {
// UINT32 s[16];
// UINT64 d[8];
// };
// FunctionID functionId;
// void *probeSp; // stack pointer of managed function
// void *profiledSp; // location of arguments on stack
// LPVOID hiddenArg;
// UINT32 flags;
movw r4, #2
push { /* flags */ r4 }
movw r4, #0
push { /* hiddenArg */ r4 }
add r5, r11, #8
push { /* profiledSp */ r5 }
add r5, sp, #40
push { /* probeSp */ r5 }
push { /* functionId */ r0 }
vpush.64 { d0 - d7 }
push { lr }
push { r11 }
push { r1 }
push { r0 }
// return value is in r2 instead of r0 because functionID is passed in r0
push { r2 }

CHECK_STACK_ALIGNMENT

// set the other args, starting with functionID
str r0, [sp, #80]

// probeSp is the original sp when this stub was called
add r2, sp, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA+20
str r2, [sp, #84]

// get the address of the arguments from the frame pointer, store in profiledSp
add r2, r11, #8
str r2, [sp, #88]

// clear hiddenArg
movw r2, #0
str r2, [sp, #92]

// set the flag to indicate what hook this is
movw r2, \flags
str r2, [sp, #96]

// sp is the address of PROFILE_PLATFORM_SPECIFIC_DATA, then call to C++
mov r1, sp
bl C_FUNC(ProfileTailcall)
EPILOG_STACK_RESTORE_OFFSET r7, #16
EPILOG_POP "{r1, r2, r4, r5, r7, r11, pc}"
NESTED_END ProfileTailcallNaked, _TEXT
bl C_FUNC(\helper)

// restore all our regs
pop { r2 }
pop { r1, r11, lr}
vpop.64 { d0 - d7 }

free_stack 6*4

EPILOG_POP "{r0,r3,r9,r12}"

bx lr
NESTED_END \helper\()Naked, _TEXT
.endm

GenerateProfileHelper ProfileEnter, PROFILE_ENTER
GenerateProfileHelper ProfileLeave, PROFILE_LEAVE
GenerateProfileHelper ProfileTailcall, PROFILE_TAILCALL

#endif

Expand Down
Loading

0 comments on commit f0ede2b

Please sign in to comment.