Skip to content

Commit

Permalink
Small tuning for write-barrier on arm64 (dotnet#106191)
Browse files Browse the repository at this point in the history
* Small tunning for write-barrier on arm64

* Clean up windows version as well

* NativeAOT
  • Loading branch information
EgorBo authored Aug 18, 2024
1 parent 38bcd31 commit 6b7bfb9
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 104 deletions.
15 changes: 6 additions & 9 deletions src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.S
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,10 @@
2:
// We can skip the card table write if the reference is to
// an object not on the epehemeral segment.
PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low, x12
cmp \refReg, x12
blo 0f

PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, x12
PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low, x12
PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, x17
cmp \refReg, x12
ccmp \refReg, x17, #0x2, hs
bhs 0f

// Set this objects card, if it has not already been set.
Expand Down Expand Up @@ -222,11 +220,10 @@ LEAF_END RhpByRefAssignRefArm64, _TEXT
LEAF_ENTRY RhpCheckedAssignRefArm64, _TEXT

// is destReg within the heap?
PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, x12
PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, x12
PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, x17
cmp x14, x12

PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, x12
ccmp x14, x12, #0x2, hs
ccmp x14, x17, #0x2, hs
bhs LOCAL_LABEL(NotInHeap)

b C_FUNC(RhpAssignRefArm64)
Expand Down
25 changes: 9 additions & 16 deletions src/coreclr/nativeaot/Runtime/arm64/WriteBarriers.asm
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,10 @@ INVALIDGCVALUE EQU 0xCCCCCCCD
2
;; We can skip the card table write if the reference is to
;; an object not on the epehemeral segment.
PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low, x12
cmp $refReg, x12
blo %ft0

PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, x12
PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low, x12
PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, x17
cmp $refReg, x12
ccmp $refReg, x17, #0x2, hs
bhs %ft0

;; Set this object's card, if it hasn't already been set.
Expand Down Expand Up @@ -179,14 +177,10 @@ INVALIDGCVALUE EQU 0xCCCCCCCD

;; The "check" of this checked write barrier - is $destReg
;; within the heap? if no, early out.
PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, x12
PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, x12
PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, x17
cmp $destReg, x12

PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, x12

;; If $destReg >= g_lowest_address, compare $destReg to g_highest_address.
;; Otherwise, set the C flag (0x2) to take the next branch.
ccmp $destReg, x12, #0x2, hs
ccmp $destReg, x17, #0x2, hs
bhs %ft0

INSERT_UNCHECKED_WRITE_BARRIER_CORE $destReg, $refReg
Expand Down Expand Up @@ -237,11 +231,10 @@ INVALIDGCVALUE EQU 0xCCCCCCCD
LEAF_ENTRY RhpCheckedAssignRefArm64

;; is destReg within the heap?
PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, x12
PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, x12
PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, x17
cmp x14, x12

PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, x12
ccmp x14, x12, #0x2, hs
ccmp x14, x17, #0x2, hs
blo RhpAssignRefArm64

NotInHeap
Expand Down
75 changes: 32 additions & 43 deletions src/coreclr/vm/arm64/patchedcode.S
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ LEAF_ENTRY JIT_PatchedCodeStart, _TEXT
ret lr
LEAF_END JIT_PatchedCodeStart, _TEXT

//-----------------------------------------------------------------------------
// void JIT_ByRefWriteBarrier
// On entry:
// x13 : the source address (points to object reference to write)
Expand All @@ -40,7 +41,7 @@ LEAF_END JIT_PatchedCodeStart, _TEXT
// x13 : incremented by 8
// x14 : incremented by 8
// x15 : trashed
// x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
// x17 : trashed (ip1)
//
// NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
// if you add more trashed registers.
Expand All @@ -63,20 +64,16 @@ WRITE_BARRIER_END JIT_ByRefWriteBarrier
// x12 : trashed
// x14 : trashed (incremented by 8 to implement JIT_ByRefWriteBarrier contract)
// x15 : trashed
// x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
// x17 : trashed (ip1)
//
// NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
// if you add more trashed registers.
//
WRITE_BARRIER_ENTRY JIT_CheckedWriteBarrier
ldr x12, LOCAL_LABEL(wbs_lowest_address)
ldr x17, LOCAL_LABEL(wbs_highest_address)
cmp x14, x12

ldr x12, LOCAL_LABEL(wbs_highest_address)

// Compare against the upper bound if the previous comparison indicated
// that the destination address is greater than or equal to the lower
// bound. Otherwise, set the C flag (specified by the 0x2) so that the
// branch below is not taken.
ccmp x14, x12, #0x2, hs

ccmp x14, x17, #0x2, hs
bhs LOCAL_LABEL(NotInHeap)

b C_FUNC(JIT_WriteBarrier)
Expand All @@ -86,6 +83,7 @@ LOCAL_LABEL(NotInHeap):
ret lr
WRITE_BARRIER_END JIT_CheckedWriteBarrier

//-----------------------------------------------------------------------------
// void JIT_WriteBarrier(Object** dst, Object* src)
// On entry:
// x14 : the destination address (LHS of the assignment)
Expand All @@ -95,7 +93,10 @@ WRITE_BARRIER_END JIT_CheckedWriteBarrier
// x12 : trashed
// x14 : trashed (incremented by 8 to implement JIT_ByRefWriteBarrier contract)
// x15 : trashed
// x17 : trashed (ip1) if FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
// x17 : trashed (ip1)
//
// NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF
// if you add more trashed registers.
//
WRITE_BARRIER_ENTRY JIT_WriteBarrier
stlr x15, [x14]
Expand All @@ -105,20 +106,17 @@ WRITE_BARRIER_ENTRY JIT_WriteBarrier

// Do not perform the work if g_GCShadow is 0
ldr x12, LOCAL_LABEL(wbs_GCShadow)
cbz x12, LOCAL_LABEL(ShadowUpdateDisabled)

// need temporary register. Save before using.
str x13, [sp, #-16]!
cbz x12, LOCAL_LABEL(ShadowUpdateEnd)

// Compute address of shadow heap location:
// pShadow = g_GCShadow + (x14 - g_lowest_address)
ldr x13, LOCAL_LABEL(wbs_lowest_address)
sub x13, x14, x13
add x12, x13, x12
ldr x17, LOCAL_LABEL(wbs_lowest_address)
sub x17, x14, x17
add x12, x17, x12

// if (pShadow >= g_GCShadowEnd) goto end
ldr x13, LOCAL_LABEL(wbs_GCShadowEnd)
cmp x12, x13
ldr x17, LOCAL_LABEL(wbs_GCShadowEnd)
cmp x12, x17
bhs LOCAL_LABEL(ShadowUpdateEnd)

// *pShadow = x15
Expand All @@ -129,18 +127,16 @@ WRITE_BARRIER_ENTRY JIT_WriteBarrier
dmb ish

// if ([x14] == x15) goto end
ldr x13, [x14]
cmp x13, x15
ldr x17, [x14]
cmp x17, x15
beq LOCAL_LABEL(ShadowUpdateEnd)

// *pShadow = INVALIDGCVALUE (0xcccccccd)
movz x13, #0xcccd
movk x13, #0xcccc, LSL #16
str x13, [x12]
movz x17, #0xcccd
movk x17, #0xcccc, LSL #16
str x17, [x12]

LOCAL_LABEL(ShadowUpdateEnd):
ldr x13, [sp], #16
LOCAL_LABEL(ShadowUpdateDisabled):
#endif

#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP
Expand All @@ -156,30 +152,20 @@ LOCAL_LABEL(ShadowUpdateDisabled):

LOCAL_LABEL(CheckCardTable):
// Branch to Exit if the reference is not in the Gen0 heap
//
ldr x12, LOCAL_LABEL(wbs_ephemeral_low)
cbz x12, LOCAL_LABEL(SkipEphemeralCheck)
cmp x15, x12

ldr x12, LOCAL_LABEL(wbs_ephemeral_high)

// Compare against the upper bound if the previous comparison indicated
// that the destination address is greater than or equal to the lower
// bound. Otherwise, set the C flag (specified by the 0x2) so that the
// branch to exit is taken.
ccmp x15, x12, #0x2, hs

ldr x17, LOCAL_LABEL(wbs_ephemeral_high)
cmp x15, x12
ccmp x15, x17, #0x2, hs
bhs LOCAL_LABEL(Exit)

LOCAL_LABEL(SkipEphemeralCheck):
// Check if we need to update the card table
ldr x12, LOCAL_LABEL(wbs_card_table)
add x15, x12, x14, lsr #11
ldrb w12, [x15]
cmp x12, 0xFF
beq LOCAL_LABEL(Exit)

LOCAL_LABEL(UpdateCardTable):
// Update the card table
mov x12, 0xFF
strb w12, [x15]

Expand All @@ -191,12 +177,15 @@ LOCAL_LABEL(UpdateCardTable):
cmp x12, 0xFF
beq LOCAL_LABEL(Exit)

LOCAL_LABEL(UpdateCardBundle):
// Update the card bundle
mov x12, 0xFF
strb w12, [x15]
#endif

LOCAL_LABEL(Exit):
// Increment by 8 to implement JIT_ByRefWriteBarrier contract.
// TODO: Consider duplicating the logic to get rid of this redundant 'add'
// for JIT_WriteBarrier/JIT_CheckedWriteBarrier
add x14, x14, 8
ret lr
WRITE_BARRIER_END JIT_WriteBarrier
Expand Down
Loading

0 comments on commit 6b7bfb9

Please sign in to comment.