Skip to content

Commit

Permalink
runtime: tail call into memeq/cmp body implementations
Browse files Browse the repository at this point in the history
There's no need to call/ret to the body implementation.
It can write the result to the right place.  Just jump to
it and have it return to our caller.

Old:
  call body implementation
  compute result
  put result in a register
  return
  write register to result location
  return

New:
  load address of result location into a register
  jump to body implementation
  compute result
  write result to passed-in address
  return

It's a bit tricky on 386 because there is no free register
with which to pass the result location.  Free up a register
by keeping around blen-alen instead of both alen and blen.

Change-Id: If2cf0682a5bf1cc592bdda7c126ed4eee8944fba
Reviewed-on: https://go-review.googlesource.com/9202
Reviewed-by: Josh Bleecher Snyder <[email protected]>
  • Loading branch information
randall77 committed Apr 29, 2015
1 parent 7e49c81 commit c526f3a
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 113 deletions.
93 changes: 46 additions & 47 deletions src/runtime/asm_386.s
Original file line number Diff line number Diff line change
Expand Up @@ -1296,9 +1296,8 @@ TEXT runtime·memeq(SB),NOSPLIT,$0-13
MOVL a+0(FP), SI
MOVL b+4(FP), DI
MOVL size+8(FP), BX
CALL runtime·memeqbody(SB)
MOVB AX, ret+12(FP)
RET
LEAL ret+12(FP), AX
JMP runtime·memeqbody(SB)

// memequal_varlen(a, b unsafe.Pointer) bool
TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
Expand All @@ -1307,9 +1306,8 @@ TEXT runtime·memequal_varlen(SB),NOSPLIT,$0-9
CMPL SI, DI
JEQ eq
MOVL 4(DX), BX // compiler stores size at offset 4 in the closure
CALL runtime·memeqbody(SB)
MOVB AX, ret+8(FP)
RET
LEAL ret+8(FP), AX
JMP runtime·memeqbody(SB)
eq:
MOVB $1, ret+8(FP)
RET
Expand All @@ -1325,32 +1323,30 @@ TEXT runtime·eqstring(SB),NOSPLIT,$0-17
CMPL SI, DI
JEQ same
MOVL s1len+4(FP), BX
CALL runtime·memeqbody(SB)
MOVB AX, v+16(FP)
RET
LEAL v+16(FP), AX
JMP runtime·memeqbody(SB)
same:
MOVB $1, v+16(FP)
RET

TEXT bytes·Equal(SB),NOSPLIT,$0-25
MOVL a_len+4(FP), BX
MOVL b_len+16(FP), CX
XORL AX, AX
CMPL BX, CX
JNE eqret
MOVL a+0(FP), SI
MOVL b+12(FP), DI
CALL runtime·memeqbody(SB)
LEAL ret+24(FP), AX
JMP runtime·memeqbody(SB)
eqret:
MOVB AX, ret+24(FP)
MOVB $0, ret+24(FP)
RET

// a in SI
// b in DI
// count in BX
// address of result byte in AX
TEXT runtime·memeqbody(SB),NOSPLIT,$0-0
XORL AX, AX

CMPL BX, $4
JB small

Expand Down Expand Up @@ -1381,6 +1377,7 @@ hugeloop:
SUBL $64, BX
CMPL DX, $0xffff
JEQ hugeloop
MOVB $0, (AX)
RET

// 4 bytes at a time using 32-bit register
Expand All @@ -1394,14 +1391,15 @@ bigloop:
SUBL $4, BX
CMPL CX, DX
JEQ bigloop
MOVB $0, (AX)
RET

// remaining 0-4 bytes
leftover:
MOVL -4(SI)(BX*1), CX
MOVL -4(DI)(BX*1), DX
CMPL CX, DX
SETEQ AX
SETEQ (AX)
RET

small:
Expand Down Expand Up @@ -1438,26 +1436,24 @@ di_finish:
SUBL SI, DI
SHLL CX, DI
equal:
SETEQ AX
SETEQ (AX)
RET

TEXT runtime·cmpstring(SB),NOSPLIT,$0-20
MOVL s1_base+0(FP), SI
MOVL s1_len+4(FP), BX
MOVL s2_base+8(FP), DI
MOVL s2_len+12(FP), DX
CALL runtime·cmpbody(SB)
MOVL AX, ret+16(FP)
RET
LEAL ret+16(FP), AX
JMP runtime·cmpbody(SB)

TEXT bytes·Compare(SB),NOSPLIT,$0-28
MOVL s1+0(FP), SI
MOVL s1+4(FP), BX
MOVL s2+12(FP), DI
MOVL s2+16(FP), DX
CALL runtime·cmpbody(SB)
MOVL AX, ret+24(FP)
RET
LEAL ret+24(FP), AX
JMP runtime·cmpbody(SB)

TEXT bytes·IndexByte(SB),NOSPLIT,$0-20
MOVL s+0(FP), SI
Expand Down Expand Up @@ -1492,14 +1488,13 @@ TEXT strings·IndexByte(SB),NOSPLIT,$0-16
// DI = b
// BX = alen
// DX = blen
// output:
// AX = 1/0/-1
// AX = address of return word (set to 1/0/-1)
TEXT runtime·cmpbody(SB),NOSPLIT,$0-0
MOVL DX, BP
SUBL BX, DX // DX = blen-alen
CMOVLGT BX, BP // BP = min(alen, blen)
CMPL SI, DI
JEQ allsame
CMPL BX, DX
MOVL DX, BP
CMOVLLT BX, BP // BP = min(alen, blen)
CMPL BP, $4
JB small
TESTL $0x4000000, runtime·cpuid_edx(SB) // check for sse2
Expand All @@ -1510,49 +1505,51 @@ largeloop:
MOVOU (SI), X0
MOVOU (DI), X1
PCMPEQB X0, X1
PMOVMSKB X1, AX
XORL $0xffff, AX // convert EQ to NE
PMOVMSKB X1, BX
XORL $0xffff, BX // convert EQ to NE
JNE diff16 // branch if at least one byte is not equal
ADDL $16, SI
ADDL $16, DI
SUBL $16, BP
JMP largeloop

diff16:
BSFL AX, BX // index of first byte that differs
XORL AX, AX
BSFL BX, BX // index of first byte that differs
XORL DX, DX
MOVB (SI)(BX*1), CX
CMPB CX, (DI)(BX*1)
SETHI AX
LEAL -1(AX*2), AX // convert 1/0 to +1/-1
SETHI DX
LEAL -1(DX*2), DX // convert 1/0 to +1/-1
MOVL DX, (AX)
RET

mediumloop:
CMPL BP, $4
JBE _0through4
MOVL (SI), AX
MOVL (SI), BX
MOVL (DI), CX
CMPL AX, CX
CMPL BX, CX
JNE diff4
ADDL $4, SI
ADDL $4, DI
SUBL $4, BP
JMP mediumloop

_0through4:
MOVL -4(SI)(BP*1), AX
MOVL -4(SI)(BP*1), BX
MOVL -4(DI)(BP*1), CX
CMPL AX, CX
CMPL BX, CX
JEQ allsame

diff4:
BSWAPL AX // reverse order of bytes
BSWAPL BX // reverse order of bytes
BSWAPL CX
XORL AX, CX // find bit differences
XORL BX, CX // find bit differences
BSRL CX, CX // index of highest bit difference
SHRL CX, AX // move a's bit to bottom
ANDL $1, AX // mask bit
LEAL -1(AX*2), AX // 1/0 => +1/-1
SHRL CX, BX // move a's bit to bottom
ANDL $1, BX // mask bit
LEAL -1(BX*2), BX // 1/0 => +1/-1
MOVL BX, (AX)
RET

// 0-3 bytes in common
Expand Down Expand Up @@ -1590,18 +1587,20 @@ di_finish:
BSRL DI, CX // index of highest bit difference
SHRL CX, SI // move a's bit to bottom
ANDL $1, SI // mask bit
LEAL -1(SI*2), AX // 1/0 => +1/-1
LEAL -1(SI*2), BX // 1/0 => +1/-1
MOVL BX, (AX)
RET

// all the bytes in common are the same, so we just need
// to compare the lengths.
allsame:
XORL AX, AX
XORL BX, BX
XORL CX, CX
CMPL BX, DX
SETGT AX // 1 if alen > blen
TESTL DX, DX
SETLT BX // 1 if alen > blen
SETEQ CX // 1 if alen == blen
LEAL -1(CX)(AX*2), AX // 1,0,-1 result
LEAL -1(CX)(BX*2), BX // 1,0,-1 result
MOVL BX, (AX)
RET

TEXT runtime·fastrand1(SB), NOSPLIT, $0-4
Expand Down
Loading

0 comments on commit c526f3a

Please sign in to comment.