Skip to content

Commit

Permalink
ARM: add and use macros for 64-bit arithmetic over register pairs
Browse files Browse the repository at this point in the history
  • Loading branch information
xavierleroy committed Sep 29, 2024
1 parent 8a3a2c0 commit 59831be
Show file tree
Hide file tree
Showing 9 changed files with 126 additions and 140 deletions.
20 changes: 3 additions & 17 deletions runtime/arm/i64_dtos.S
Original file line number Diff line number Diff line change
Expand Up @@ -63,27 +63,13 @@ FUNCTION(__compcert_i64_dtos)
cmp r2, #0
blt 3f
@ EXP >= 0: shift left by EXP. Note that EXP < 12
rsb r3, r2, #32 @ r3 = 32 - amount
LSL Reg0HI, Reg0HI, r2
LSR r3, Reg0LO, r3
ORR Reg0HI, Reg0HI, r3
LSL Reg0LO, Reg0LO, r2
LSHL_small(Reg0, Reg0, r2, r3)
b 4f
@ EXP < 0: shift right by -EXP. Note that -EXP <= 52 but can be >= 32
3: RSB r2, r2, #0 @ r2 = -EXP = shift amount
RSB r3, r2, #32 @ r3 = 32 - amount
LSR Reg0LO, Reg0LO, r2
LSL r3, Reg0HI, r3
ORR Reg0LO, Reg0LO, r3
SUB r3, r2, #32 @ r3 = amount - 32 (see i64_shr.s)
LSR r3, Reg0HI, r3
ORR Reg0LO, Reg0LO, r3
LSR Reg0HI, Reg0HI, r2
LSHR(Reg0, Reg0, r2, r3)
@ apply sign to result
4: EOR Reg0LO, Reg0LO, r12
EOR Reg0HI, Reg0HI, r12
subs Reg0LO, Reg0LO, r12
sbc Reg0HI, Reg0HI, r12
4: LCONDOPP(Reg0, Reg0, r12)
bx lr
@ special cases
1: MOV Reg0LO, #0 @ result is 0
Expand Down
15 changes: 2 additions & 13 deletions runtime/arm/i64_dtou.S
Original file line number Diff line number Diff line change
Expand Up @@ -64,22 +64,11 @@ FUNCTION(__compcert_i64_dtou)
cmp r2, #0
blt 3f
@ EXP >= 0: shift left by EXP. Note that EXP < 12
rsb r3, r2, #32 @ r3 = 32 - amount
LSL Reg0HI, Reg0HI, r2
LSR r3, Reg0LO, r3
ORR Reg0HI, Reg0HI, r3
LSL Reg0LO, Reg0LO, r2
LSHL_small(Reg0, Reg0, r2, r3)
bx lr
@ EXP < 0: shift right by -EXP. Note that -EXP <= 52 but can be >= 32
3: RSB r2, r2, #0 @ r2 = -EXP = shift amount
RSB r3, r2, #32 @ r3 = 32 - amount
LSR Reg0LO, Reg0LO, r2
LSL r3, Reg0HI, r3
ORR Reg0LO, Reg0LO, r3
SUB r3, r2, #32 @ r3 = amount - 32 (see i64_shr.s)
LSR r3, Reg0HI, r3
ORR Reg0LO, Reg0LO, r3
LSR Reg0HI, Reg0HI, r2
LSHR(Reg0, Reg0, r2, r3)
bx lr
@ special cases
1: MOV Reg0LO, #0 @ result is 0
Expand Down
23 changes: 7 additions & 16 deletions runtime/arm/i64_sdiv.S
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,13 @@

FUNCTION(__compcert_i64_sdiv)
push {r4, r5, r6, r7, r8, r10, lr}
ASR r4, Reg0HI, #31 @ r4 = sign of N
ASR r5, Reg1HI, #31 @ r5 = sign of D
EOR r10, r4, r5 @ r10 = sign of result
EOR Reg0LO, Reg0LO, r4 @ take absolute value of N
EOR Reg0HI, Reg0HI, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31)
subs Reg0LO, Reg0LO, r4
sbc Reg0HI, Reg0HI, r4
EOR Reg1LO, Reg1LO, r5 @ take absolute value of D
EOR Reg1HI, Reg1HI, r5
subs Reg1LO, Reg1LO, r5
sbc Reg1HI, Reg1HI, r5
bl __compcert_i64_udivmod @ do unsigned division
EOR Reg0LO, Reg2LO, r10 @ apply expected sign
EOR Reg0HI, Reg2HI, r10
subs Reg0LO, Reg0LO, r10
sbc Reg0HI, Reg0HI, r10
ASR r4, Reg0HI, #31 @ r4 = sign of N
ASR r5, Reg1HI, #31 @ r5 = sign of D
EOR r10, r4, r5 @ r10 = sign of result
LCONDOPP(Reg0, Reg0, r4) @ take absolute value of N
LCONDOPP(Reg1, Reg1, r5) @ take absolute value of D
bl __compcert_i64_udivmod @ do unsigned division
LCONDOPP(Reg0, Reg2, r10) @ apply expected sign
pop {r4, r5, r6, r7, r8, r10, lr}
bx lr
ENDFUNCTION(__compcert_i64_sdiv)
28 changes: 1 addition & 27 deletions runtime/arm/i64_shl.S
Original file line number Diff line number Diff line change
Expand Up @@ -38,34 +38,8 @@

@@@ Shift left

@ Note on ARM shifts: the shift amount is taken modulo 256.
@ If shift amount mod 256 >= 32, the shift produces 0.

@ Algorithm:
@ RH = (XH << N) | (XL >> (32-N) | (XL << (N-32))
@ RL = XL << N
@ If N = 0:
@ RH = XH | 0 | 0
@ RL = XL
@ If 1 <= N <= 31: 1 <= 32-N <= 31 and 2s5 <= N-32 mod 256 <= 255
@ RH = (XH << N) | (XL >> (32-N) | 0
@ RL = XL << N
@ If N = 32:
@ RH = 0 | XL | 0
@ RL = 0
@ If 33 <= N <= 63: 225 <= 32-N mod 256 <= 255 and 1 <= N-32 <= 31
@ RH = 0 | 0 | (XL << (N-32))
@ RL = 0

FUNCTION(__compcert_i64_shl)
AND r2, r2, #63 @ normalize amount to 0...63
RSB r3, r2, #32 @ r3 = 32 - amount
LSL Reg0HI, Reg0HI, r2
LSR r3, Reg0LO, r3
ORR Reg0HI, Reg0HI, r3
SUB r3, r2, #32 @ r3 = amount - 32
LSL r3, Reg0LO, r3
ORR Reg0HI, Reg0HI, r3
LSL Reg0LO, Reg0LO, r2
LSHL(Reg0, Reg0, r2, r3)
bx lr
ENDFUNCTION(__compcert_i64_shl)
28 changes: 1 addition & 27 deletions runtime/arm/i64_shr.S
Original file line number Diff line number Diff line change
Expand Up @@ -38,34 +38,8 @@

@@@ Shift right unsigned

@ Note on ARM shifts: the shift amount is taken modulo 256.
@ If shift amount mod 256 >= 32, the shift produces 0.

@ Algorithm:
@ RL = (XL >> N) | (XH << (32-N) | (XH >> (N-32))
@ RH = XH >> N
@ If N = 0:
@ RL = XL | 0 | 0
@ RH = XH
@ If 1 <= N <= 31: 1 <= 32-N <= 31 and 255 <= N-32 mod 256 <= 255
@ RL = (XL >> N) | (XH >> (32-N) | 0
@ RH = XH >> N
@ If N = 32:
@ RL = 0 | XH | 0
@ RH = 0
@ If 33 <= N <= 63: 255 <= 32-N mod 256 <= 255 and 1 <= N-32 <= 31
@ RL = 0 | 0 | (XH >> (N-32))
@ RH = 0

FUNCTION(__compcert_i64_shr)
AND r2, r2, #63 @ normalize amount to 0...63
RSB r3, r2, #32 @ r3 = 32 - amount
LSR Reg0LO, Reg0LO, r2
LSL r3, Reg0HI, r3
ORR Reg0LO, Reg0LO, r3
SUB r3, r2, #32 @ r3 = amount - 32
LSR r3, Reg0HI, r3
ORR Reg0LO, Reg0LO, r3
LSR Reg0HI, Reg0HI, r2
LSHR(Reg0, Reg0, r2, r3)
bx lr
ENDFUNCTION(__compcert_i64_shr)
22 changes: 6 additions & 16 deletions runtime/arm/i64_smod.S
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,12 @@

FUNCTION(__compcert_i64_smod)
push {r4, r5, r6, r7, r8, r10, lr}
ASR r4, Reg0HI, #31 @ r4 = sign of N
ASR r5, Reg1HI, #31 @ r5 = sign of D
MOV r10, r4 @ r10 = sign of result
EOR Reg0LO, Reg0LO, r4 @ take absolute value of N
EOR Reg0HI, Reg0HI, r4 @ N = (N ^ (N >>s 31)) - (N >>s 31)
subs Reg0LO, Reg0LO, r4
sbc Reg0HI, Reg0HI, r4
EOR Reg1LO, Reg1LO, r5 @ take absolute value of D
EOR Reg1HI, Reg1HI, r5
subs Reg1LO, Reg1LO, r5
sbc Reg1HI, Reg1HI, r5
bl __compcert_i64_udivmod @ do unsigned division
EOR Reg0LO, Reg0LO, r10 @ apply expected sign
EOR Reg0HI, Reg0HI, r10
subs Reg0LO, Reg0LO, r10
sbc Reg0HI, Reg0HI, r10
ASR r10, Reg0HI, #31 @ r10 = sign of N = sign of result
ASR r5, Reg1HI, #31 @ r5 = sign of D
LCONDOPP(Reg0, Reg0, r10) @ take absolute value of N
LCONDOPP(Reg1, Reg1, r5) @ take absolute value of D
bl __compcert_i64_udivmod @ do unsigned division
LCONDOPP(Reg0, Reg0, r10) @ apply expected sign
pop {r4, r5, r6, r7, r8, r10, lr}
bx lr
ENDFUNCTION(__compcert_i64_smod)
3 changes: 1 addition & 2 deletions runtime/arm/i64_udiv.S
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@
FUNCTION(__compcert_i64_udiv)
push {r4, r5, r6, r7, r8, lr}
bl __compcert_i64_udivmod
MOV Reg0LO, Reg2LO
MOV Reg0HI, Reg2HI
LMOV(Reg0, Reg2)
pop {r4, r5, r6, r7, r8, lr}
bx lr
ENDFUNCTION(__compcert_i64_udiv)
39 changes: 17 additions & 22 deletions runtime/arm/i64_udivmod.S
Original file line number Diff line number Diff line change
Expand Up @@ -38,38 +38,33 @@

@@@ Auxiliary function for division and modulus. Don't call from C

@ On entry: N = (r0, r1) numerator D = (r2, r3) divisor
@ On exit: Q = (r4, r5) quotient R = (r0, r1) remainder
@ Locals: COUNT = r6 round counter
@ TMP = r7, r8 temporary
@ On entry: N = Reg0 (r0, r1) numerator D = Reg1 (r2, r3) divisor
@ On exit: Q = Reg2 (r4, r5) quotient R = Reg0 (r0, r1) remainder
@ Locals: TMP = Reg3 (r6, r7) temporary
@ COUNT = r8 round counter

FUNCTION(__compcert_i64_udivmod)
orrs r7, Reg1LO, Reg1HI @ is D == 0?
orrs r6, Reg1LO, Reg1HI @ is D == 0?
it eq
bxeq lr @ if so, return with unspecified results
MOV Reg2LO, #0 @ Q = 0
MOV Reg2HI, #0
MOV r6, #1 @ round = 1
MOV r8, #1 @ round = 1
1: cmp Reg1HI, #0 @ while ((signed) D >= 0)
blt 3f
adds Reg1LO, Reg1LO, Reg1LO @ D = D << 1
adc Reg1HI, Reg1HI, Reg1HI
subs r7, Reg0LO, Reg1LO @ if N < D
sbcs r8, Reg0HI, Reg1HI
LSHL1(Reg1, Reg1) @ D = D << 1
LSUBS(Reg3, Reg0, Reg1) @ if N < D
blo 2f @ break and restore D to previous value
ADD r6, r6, #1 @ increment count
ADD r8, r8, #1 @ increment count
b 1b
2: lsrs Reg1HI, Reg1HI, #1 @ D = D >> 1
rrx Reg1LO, Reg1LO
3: adds Reg2LO, Reg2LO, Reg2LO @ Q = Q << 1
adc Reg2HI, Reg2HI, Reg2HI
subs r7, Reg0LO, Reg1LO @ TMP = N - D
sbcs r8, Reg0HI, Reg1HI
2: LSHR1(Reg1, Reg1) @ D = D >> 1
3: LSHL1(Reg2, Reg2) @ Q = Q << 1
LSUBS(Reg3, Reg0, Reg1) @ TMP = N - D
blo 4f @ if N < D, leave N and Q unchanged
MOV Reg0LO, r7 @ N = N - D
MOV Reg0HI, r8
orr Reg2LO, Reg2LO, #1 @ Q = Q | 1
4: subs r6, r6, #1 @ decrement count
bne 2b @ repeat until count = 0
LMOV(Reg0, Reg3) @ N = N - D
ORR Reg2LO, Reg2LO, #1 @ Q = Q | 1
4: LSHR1(Reg1, Reg1) @ D = D >> 1
subs r8, r8, #1 @ decrement count
bne 3b @ repeat until count = 0
bx lr
ENDFUNCTION(__compcert_i64_udivmod)
88 changes: 88 additions & 0 deletions runtime/arm/sysdeps.h
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,94 @@
#define Reg3LO r6
#endif

#define HI(r) r##HI
#define LO(r) r##LO

// Useful operations over register pairs

// Move
#define LMOV(r,x) \
MOV LO(r), LO(x); MOV HI(r), HI(x)

// Left shift by 1
#define LSHL1(r,x) \
adds LO(r), LO(x), LO(x); adc HI(r), HI(x), HI(x)

// Right shift (logical) by 1
#define LSHR1(r,x) \
lsrs HI(r), HI(x), #1; rrx LO(r), LO(x)

// Subtract and set carry flag
#define LSUBS(r,x,y) \
subs LO(r), LO(x), LO(y); sbcs HI(r), HI(x), HI(y)

// Conditional change sign
// Set r = x if sgn = 0 and r = -x if sgn = -1
#define LCONDOPP(r,x,sgn) \
EOR LO(r), LO(x), sgn; EOR HI(r), HI(x), sgn; \
subs LO(r), LO(r), sgn; sbc HI(r), HI(r), sgn

// Note on ARM shifts: the shift amount is taken modulo 256.
// If shift amount mod 256 >= 32, the shift produces 0.

// General left shift by N bits
// Branchless algorithm:
// rh = (xh << n) | (xl >> (32-n)) | (xl << (n-32))
// rl = xl << n
// What happens:
// n 0 1 ... 31 32 33 ... 63
// (32-n) mod 255 32 31 1 0 255 224
// (n-32) mod 255 224 225 255 0 1 31
// xl << n xl xl<<1 xl<<31 0 0 0
// xh << n xh xh<<1 xh<<31 0 0 0
// xl >> (32-n) 0 xl>>31 xl>>1 xl 0 0
// xl << (n-32) 0 0 0 xl xl<<1 xl<<31

#define LSHL(r,x,n,t) \
RSB t, n, #32; \
LSL HI(r), HI(x), n; \
LSR t, LO(x), t; \
ORR HI(r), HI(r), t; \
SUB t, n, #32; \
LSL t, LO(x), t; \
ORR HI(r), HI(r), t; \
LSL LO(r), LO(x), n

// Special case of LSHL when the shift amount n is between 0 and 32
// No need to compute the (xl << (n-32)) term.

#define LSHL_small(r,x,n,t) \
RSB t, n, #32; \
LSL HI(r), HI(x), n; \
LSR t, LO(x), t; \
ORR HI(r), HI(r), t; \
LSL LO(r), LO(x), n

// General right shift by N bits
// Branchless algorithm:
// rl = (xl >> n) | (xh << (32-n)) | (xh >> (n-32))
// rh = xh >> n
// What happens:
// n 0 1 ... 31 32 33 ... 63
// (32-n) mod 255 32 31 1 0 255 224
// (n-32) mod 255 224 225 255 0 1 31
// xh >> n xh xh>>1 xh>>31 0 0 0
// xl >> n xl xl>>1 xl>>31 0 0 0
// xh << (32-n) 0 xh<<31 xh<<1 xh 0 0
// xh >> (n-32) 0 0 0 xh xh>>1 xh>>31

#define LSHR(r,x,n,t) \
RSB t, n, #32; \
LSR LO(r), LO(x), n; \
LSL t, HI(x), t; \
ORR LO(r), LO(r), t; \
SUB t, n, #32; \
LSR t, HI(x), t; \
ORR LO(r), LO(r), t; \
LSR HI(r), HI(x), n

// Stack is not executable

#if defined(SYS_linux) || defined(SYS_bsd)
.section .note.GNU-stack,"",%progbits
#endif

0 comments on commit 59831be

Please sign in to comment.