diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index b623fba277db..4c0f7da87ef8 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -697,6 +697,72 @@ ENTRY(fillw) ret END(fillw) +/* + * strlen(string) + * %rdi + * + * Uses the ((x - 0x01....01) & ~x & 0x80....80) trick. + * + * 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added + * with leaq. + * + * For a description see either: + * - "Hacker's Delight" by Henry S. Warren, Jr. + * - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms" + * by Agner Fog + * + * The latter contains a 32-bit variant of the same algorithm coded in assembly for i386. + */ +ENTRY(strlen) + PUSH_FRAME_POINTER + movabsq $0xfefefefefefefeff,%r8 + movabsq $0x8080808080808080,%r9 + + movq %rdi,%r10 + movq %rdi,%rcx + testb $7,%dil + jz 2f + + /* + * Handle misaligned reads: align to 8 and fill + * the spurious bytes. + */ + andq $~7,%rdi + movq (%rdi),%r11 + shlq $3,%rcx + movq $-1,%rdx + shlq %cl,%rdx + notq %rdx + orq %rdx,%r11 + + leaq (%r11,%r8),%rcx + notq %r11 + andq %r11,%rcx + andq %r9,%rcx + jnz 3f + + /* + * Main loop. + */ + ALIGN_TEXT +1: + leaq 8(%rdi),%rdi +2: + movq (%rdi),%r11 + leaq (%r11,%r8),%rcx + notq %r11 + andq %r11,%rcx + andq %r9,%rcx + jz 1b +3: + bsfq %rcx,%rcx + shrq $3,%rcx + leaq (%rcx,%rdi),%rax + subq %r10,%rax + POP_FRAME_POINTER + ret +END(strlen) + /*****************************************************************************/ /* copyout and fubyte family */ /*****************************************************************************/ diff --git a/sys/conf/files b/sys/conf/files index e68aa2118791..9ec7292a741b 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4086,7 +4086,6 @@ libkern/strdup.c standard libkern/strndup.c standard libkern/strlcat.c standard libkern/strlcpy.c standard -libkern/strlen.c standard libkern/strncat.c standard libkern/strncmp.c standard libkern/strncpy.c standard diff --git a/sys/conf/files.arm b/sys/conf/files.arm index eb3a23b5fc21..69986585bdf6 100644 --- a/sys/conf/files.arm +++ b/sys/conf/files.arm @@ -127,6 +127,7 @@ libkern/lshrdi3.c standard libkern/memcmp.c standard libkern/moddi3.c standard libkern/qdivrem.c standard +libkern/strlen.c standard libkern/ucmpdi2.c standard libkern/udivdi3.c standard libkern/umoddi3.c standard diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index b5192e47a738..de759a9f7c83 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -219,6 +219,7 @@ libkern/memcmp.c standard libkern/memset.c standard libkern/moddi3.c standard libkern/qdivrem.c standard +libkern/strlen.c standard libkern/ucmpdi2.c standard libkern/udivdi3.c standard libkern/umoddi3.c standard diff --git a/sys/conf/files.mips b/sys/conf/files.mips index c18f0a5c69be..7ee5b0019bd7 100644 --- a/sys/conf/files.mips +++ b/sys/conf/files.mips @@ -66,6 +66,7 @@ libkern/ucmpdi2.c optional mips | mipshf | mipsel | mipselhf libkern/ashldi3.c standard libkern/ashrdi3.c standard libkern/memcmp.c standard +libkern/strlen.c standard # cfe support dev/cfe/cfe_api.c optional cfe diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc index 3022fd6f6e39..347abee153d2 100644 --- a/sys/conf/files.powerpc +++ b/sys/conf/files.powerpc @@ -129,6 +129,7 @@ libkern/memcmp.c standard libkern/memset.c standard libkern/moddi3.c optional powerpc | powerpcspe libkern/qdivrem.c optional powerpc | powerpcspe +libkern/strlen.c standard libkern/ucmpdi2.c optional powerpc | powerpcspe libkern/udivdi3.c optional powerpc | powerpcspe libkern/umoddi3.c optional powerpc | powerpcspe diff --git a/sys/conf/files.riscv b/sys/conf/files.riscv index 3969528db07e..7ecea016b9a3 100644 --- a/sys/conf/files.riscv +++ b/sys/conf/files.riscv @@ -29,6 +29,7 @@ libkern/flsl.c standard libkern/flsll.c standard libkern/memcmp.c standard libkern/memset.c standard +libkern/strlen.c standard riscv/riscv/autoconf.c standard riscv/riscv/bus_machdep.c standard riscv/riscv/bus_space_asm.S standard