diff --git a/Source/SysCTR/DynaRec/arm/CodeGeneratorARM.cpp b/Source/SysCTR/DynaRec/arm/CodeGeneratorARM.cpp index 34a97ba..ae5be8f 100644 --- a/Source/SysCTR/DynaRec/arm/CodeGeneratorARM.cpp +++ b/Source/SysCTR/DynaRec/arm/CodeGeneratorARM.cpp @@ -64,6 +64,11 @@ static const EArmReg gRegistersToUseForCaching[] = { // XX this optimisation works very well on the PSP, option to disable it was removed static const bool gDynarecStackOptimisation = true; +// function stubs from assembly +extern "C" { void _DirectExitCheckNoDelay( u32 instructions_executed, u32 exit_pc ); } +extern "C" { void _DirectExitCheckDelay( u32 instructions_executed, u32 exit_pc, u32 target_pc ); } +extern "C" { void _IndirectExitCheck( u32 instructions_executed, CIndirectExitMap* map, u32 exit_pc ); } + //Helper functions used for slow loads s32 Read8Bits_Signed ( u32 address ) { return (s8) Read8Bits(address); }; s32 Read16Bits_Signed( u32 address ) { return (s16)Read16Bits(address); }; @@ -824,33 +829,25 @@ CJumpLocation CCodeGeneratorARM::GenerateExitCode( u32 exit_address, u32 jump_ad } #endif FlushAllRegisters(mRegisterCache, true); + MOV32(ArmReg_R0, num_instructions); - - //Call CPU_UpdateCounter - CALL(CCodeLabel( (void*)CPU_UpdateCounter )); - - // This jump may be NULL, in which case we patch it below - // This gets patched with a jump to the next fragment if the target is later found - CJumpLocation jump_to_next_fragment( GenerateBranchIfNotSet( const_cast< u32 * >( &gCPUState.StuffToDo ), next_fragment ) ); - - // If the flag was set, we need in initialise the pc/delay to exit with - CCodeLabel interpret_next_fragment( GetAssemblyBuffer()->GetLabel() ); - - u8 exit_delay; - - if( jump_address != 0 ) + MOV32(ArmReg_R1, exit_address); + if (jump_address != 0) { - SetVar( &gCPUState.TargetPC, jump_address ); - exit_delay = EXEC_DELAY; + MOV32(ArmReg_R2, jump_address); + MOV32(ArmReg_R3, (u32)&_DirectExitCheckDelay); } else { - exit_delay = NO_DELAY; + MOV32(ArmReg_R3, (u32)&_DirectExitCheckNoDelay); } - - SetVar( &gCPUState.Delay, exit_delay ); - SetVar( &gCPUState.CurrentPC, exit_address ); - + + BLX(ArmReg_R3); + +// If the flag was set, we need in initialise the pc/delay to exit with + CJumpLocation jump_to_next_fragment = BX_IMM( CCodeLabel { nullptr } ); + + CCodeLabel interpret_next_fragment( GetAssemblyBuffer()->GetLabel() ); // No need to call CPU_SetPC(), as this is handled by CFragment when we exit RET(); @@ -869,22 +866,15 @@ CJumpLocation CCodeGeneratorARM::GenerateExitCode( u32 exit_address, u32 jump_ad void CCodeGeneratorARM::GenerateEretExitCode( u32 num_instructions, CIndirectExitMap * p_map ) { FlushAllRegisters(mRegisterCache, true); + MOV32(ArmReg_R0, num_instructions); - - //Call CPU_UpdateCounter - CALL(CCodeLabel( (void*)CPU_UpdateCounter )); - - // We always exit to the interpreter, regardless of the state of gCPUState.StuffToDo - + MOV32(ArmReg_R1, reinterpret_cast(p_map)); + LDR(ArmReg_R2, ArmReg_R12, offsetof(SCPUState, CurrentPC)); // Eret is a bit bodged so we exit at PC + 4 - LDR(ArmReg_R0, ArmReg_R12, offsetof(SCPUState, CurrentPC)); - ADD_IMM(ArmReg_R0, ArmReg_R0, 4); - STR(ArmReg_R0, ArmReg_R12, offsetof(SCPUState, CurrentPC)); - - SetVar( &gCPUState.Delay, NO_DELAY ); - - // No need to call CPU_SetPC(), as this is handled by CFragment when we exit - RET(); + ADD_IMM(ArmReg_R2, ArmReg_R2, 4); + + MOV32(ArmReg_R4, (u32)&_IndirectExitCheck); + BLX(ArmReg_R4); } //***************************************************************************** @@ -893,37 +883,13 @@ void CCodeGeneratorARM::GenerateEretExitCode( u32 num_instructions, CIndirectExi void CCodeGeneratorARM::GenerateIndirectExitCode( u32 num_instructions, CIndirectExitMap * p_map ) { FlushAllRegisters(mRegisterCache, true); - MOV32(ArmReg_R0, num_instructions); - - //Call CPU_UpdateCounter - CALL(CCodeLabel( (void*)CPU_UpdateCounter )); - - CCodeLabel no_target( NULL ); - CJumpLocation jump_to_next_fragment( GenerateBranchIfNotSet( const_cast< u32 * >( &gCPUState.StuffToDo ), no_target ) ); - - CCodeLabel exit_dynarec( GetAssemblyBuffer()->GetLabel() ); - - // New return address is in gCPUState.TargetPC - LDR(ArmReg_R0, ArmReg_R12, offsetof(SCPUState, TargetPC)); - STR(ArmReg_R0, ArmReg_R12, offsetof(SCPUState, CurrentPC)); - SetVar( &gCPUState.Delay, NO_DELAY ); - // No need to call CPU_SetPC(), as this is handled by CFragment when we exit - RET(); - - // gCPUState.StuffToDo == 0, try to jump to the indirect target - PatchJumpLong( jump_to_next_fragment, GetAssemblyBuffer()->GetLabel() ); - - MOV32( ArmReg_R0, reinterpret_cast< u32 >( p_map ) ); - LDR(ArmReg_R1, ArmReg_R12, offsetof(SCPUState, TargetPC)); - - CALL(CCodeLabel( (void*)IndirectExitMap_Lookup )); - - // If the target was not found, exit - TST( ArmReg_R0, ArmReg_R0 ); - BX_IMM( exit_dynarec, EQ ); - - BX( ArmReg_R0 ); + MOV32(ArmReg_R0, num_instructions); + MOV32(ArmReg_R1, reinterpret_cast(p_map)); + LDR(ArmReg_R2, ArmReg_R12, offsetof(SCPUState, TargetPC)); + + MOV32(ArmReg_R4, (u32)&_IndirectExitCheck); + BLX(ArmReg_R4); } //***************************************************************************** diff --git a/Source/SysCTR/DynaRec/arm/DynarecStubARM.S b/Source/SysCTR/DynaRec/arm/DynarecStubARM.S index fb1b09a..4ae4f8d 100644 --- a/Source/SysCTR/DynaRec/arm/DynarecStubARM.S +++ b/Source/SysCTR/DynaRec/arm/DynarecStubARM.S @@ -1,8 +1,105 @@ .arm +//The top two defines needs to be adjusted depending on how gCPUState struct is formated in CPU.h!! //Corn +// +#define _C0_Count (0x100 + 9 * 4) //CPU_Control_base + 9*8(64bit regs) or 9*4(32bit regs) +#define _AuxBase 0x280 //Base pointer to Aux regs +#define _CurrentPC (_AuxBase + 0x00) +#define _TargetPC (_AuxBase + 0x04) +#define _Delay (_AuxBase + 0x08) +#define _StuffToDo (_AuxBase + 0x0c) +#define _Events (_AuxBase + 0x30) + +.extern CPU_UpdateCounter +.extern CPU_HANDLE_COUNT_INTERRUPT .align 4 .global _EnterDynaRec +.global _DirectExitCheckNoDelay +.global _DirectExitCheckDelay +.global _IndirectExitCheck .type _EnterDynaRec, %function +.type _DirectExitCheckNoDelay, %function +.type _DirectExitCheckDelay, %function +.type _IndirectExitCheck, %function + +_DirectExitCheckNoDelay: + ldr r4, [r12, #_C0_Count] // COUNT register + ldr r5, [r12, #_Events] // Events[0].mCount + + add r4, r4, r0 // COUNT + ops_executed + str r4, [r12, #_C0_Count] // COUNT = COUNT + ops_executed + + str r1, [r12, #_CurrentPC] // Current PC + mov r1, #0 + str r1, [r12, #_Delay] // Delay = NO_DELAY + + sub r5, r5, r0 // Events[0].mCount - ops_executed + cmp r5, #0 + str r5, [r12, #_Events] + ble _DirectExitCheckCheckCount + bx lr + +_DirectExitCheckDelay: + ldr r4, [r12, #_C0_Count] // COUNT register + ldr r5, [r12, #_Events] // Events[0].mCount + + add r4, r4, r0 // COUNT + ops_executed + str r4, [r12, #_C0_Count] // COUNT = COUNT + ops_executed + + str r1, [r12, #_CurrentPC] // Current PC + str r2, [r12, #_TargetPC] // Target PC + mov r1, #1 // EXEC_DELAY + str r1, [r12, #_Delay] // Delay = NO_DELAY + + sub r5, r5, r0 // Events[0].mCount - ops_executed + cmp r5, #0 + str r5, [r12, #_Events] + ble _DirectExitCheckCheckCount + bx lr + +####################################################################################### +# Utility routine for _DirectExitCheckXX. +# +_DirectExitCheckCheckCount: + mov r4, lr // Keep track of return addresss + mov r5, r12 + bl CPU_HANDLE_COUNT_INTERRUPT + mov lr, r4 + mov r12, r5 + ldr r0, [r12, #_StuffToDo] // StuffToDo + cmp r0, #0 + popne {r4-r11, pc} // Exit the DynaRec + bx lr // Return back to caller + +####################################################################################### +# Update counter. If StuffToDo flags is clear on return, +# r0 - instructions executed +# r1 - CIndirectExitMap pointer +# r2 - exit pc (exit delay is always NO_DELAY) +_IndirectExitCheck: + mov r4, r1 // Keep track of map pointer + mov r5, r2 // and the exit pc + mov r6, r12 + # Can avoid these until Return From DynaRec (pop)? + str r2, [r12,#_CurrentPC] // CurrentPC + bl CPU_UpdateCounter // a0 holds instructions executed + mov r0, #0 + str r0, [r6, #_Delay] // Delay (NO_DELAY) + + ldr r0, [r6, #_StuffToDo] // StuffToDo + cmp r0, #0 + popne {r4-r11,pc} // Exit the DynaRec + + mov r0, r4 // p_map + mov r1, r5 // exit_pc + bl IndirectExitMap_Lookup + + + # r0 holds pointer to indirect target. If it's 0, it means it's not compiled yet + cmp r0, #0 + popeq {r4-r11,pc} // Exit the DynaRec + mov r12,r6 // Restore the CPUState pointer + bx r0 // branch to the looked up fragment _EnterDynaRec: push {r4-r11, lr}