Skip to content

Commit

Permalink
x86/hweight: Get rid of the special calling convention
Browse files Browse the repository at this point in the history
People complained about ARCH_HWEIGHT_CFLAGS and how it throws a wrench
into kcov, lto, etc, experimentations.

Add asm versions for __sw_hweight{32,64}() and do explicit saving and
restoring of clobbered registers. This gets rid of the special calling
convention. We get to call those functions on !X86_FEATURE_POPCNT CPUs.

We still need to hardcode POPCNT and register operands as some old gas
versions which we support, do not know about POPCNT.

Btw, remove redundant REX prefix from 32-bit POPCNT because alternatives
can do padding now.

Suggested-by: H. Peter Anvin <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Acked-by: Peter Zijlstra (Intel) <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Brian Gerst <[email protected]>
Cc: Denys Vlasenko <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
suryasaimadhu authored and Ingo Molnar committed Jun 8, 2016
1 parent 08dd8cd commit f596710
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 25 deletions.
5 changes: 0 additions & 5 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -294,11 +294,6 @@ config X86_32_LAZY_GS
def_bool y
depends on X86_32 && !CC_STACKPROTECTOR

config ARCH_HWEIGHT_CFLAGS
string
default "-fcall-saved-ecx -fcall-saved-edx" if X86_32
default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64

config ARCH_SUPPORTS_UPROBES
def_bool y

Expand Down
24 changes: 10 additions & 14 deletions arch/x86/include/asm/arch_hweight.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#include <asm/cpufeatures.h>

#ifdef CONFIG_64BIT
/* popcnt %edi, %eax -- redundant REX prefix for alignment */
#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7"
/* popcnt %edi, %eax */
#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc7"
/* popcnt %rdi, %rax */
#define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7"
#define REG_IN "D"
Expand All @@ -17,19 +17,15 @@
#define REG_OUT "a"
#endif

/*
* __sw_hweightXX are called from within the alternatives below
* and callee-clobbered registers need to be taken care of. See
* ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective
* compiler switches.
*/
#define __HAVE_ARCH_SW_HWEIGHT

static __always_inline unsigned int __arch_hweight32(unsigned int w)
{
unsigned int res = 0;
unsigned int res;

asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT)
: "="REG_OUT (res)
: REG_IN (w));
: "="REG_OUT (res)
: REG_IN (w));

return res;
}
Expand All @@ -53,11 +49,11 @@ static inline unsigned long __arch_hweight64(__u64 w)
#else
static __always_inline unsigned long __arch_hweight64(__u64 w)
{
unsigned long res = 0;
unsigned long res;

asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT)
: "="REG_OUT (res)
: REG_IN (w));
: "="REG_OUT (res)
: REG_IN (w));

return res;
}
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/kernel/i386_ksyms_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,5 @@ EXPORT_SYMBOL(empty_zero_page);
EXPORT_SYMBOL(___preempt_schedule);
EXPORT_SYMBOL(___preempt_schedule_notrace);
#endif

EXPORT_SYMBOL(__sw_hweight32);
3 changes: 3 additions & 0 deletions arch/x86/kernel/x8664_ksyms_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ EXPORT_SYMBOL(clear_page);

EXPORT_SYMBOL(csum_partial);

EXPORT_SYMBOL(__sw_hweight32);
EXPORT_SYMBOL(__sw_hweight64);

/*
* Export string functions. We normally rely on gcc builtin for most of these,
* but gcc sometimes decides not to inline them.
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ lib-y += memcpy_$(BITS).o
lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o

obj-y += msr.o msr-reg.o msr-reg-export.o
obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o

ifeq ($(CONFIG_X86_32),y)
obj-y += atomic64_32.o
Expand Down
77 changes: 77 additions & 0 deletions arch/x86/lib/hweight.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#include <linux/linkage.h>

#include <asm/asm.h>

/*
* unsigned int __sw_hweight32(unsigned int w)
* %rdi: w
*/
ENTRY(__sw_hweight32)

#ifdef CONFIG_X86_64
movl %edi, %eax # w
#endif
__ASM_SIZE(push,) %__ASM_REG(dx)
movl %eax, %edx # w -> t
shrl %edx # t >>= 1
andl $0x55555555, %edx # t &= 0x55555555
subl %edx, %eax # w -= t

movl %eax, %edx # w -> t
shrl $2, %eax # w_tmp >>= 2
andl $0x33333333, %edx # t &= 0x33333333
andl $0x33333333, %eax # w_tmp &= 0x33333333
addl %edx, %eax # w = w_tmp + t

movl %eax, %edx # w -> t
shrl $4, %edx # t >>= 4
addl %edx, %eax # w_tmp += t
andl $0x0f0f0f0f, %eax # w_tmp &= 0x0f0f0f0f
imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101
shrl $24, %eax # w = w_tmp >> 24
__ASM_SIZE(pop,) %__ASM_REG(dx)
ret
ENDPROC(__sw_hweight32)

ENTRY(__sw_hweight64)
#ifdef CONFIG_X86_64
pushq %rdx

movq %rdi, %rdx # w -> t
movabsq $0x5555555555555555, %rax
shrq %rdx # t >>= 1
andq %rdx, %rax # t &= 0x5555555555555555
movabsq $0x3333333333333333, %rdx
subq %rax, %rdi # w -= t

movq %rdi, %rax # w -> t
shrq $2, %rdi # w_tmp >>= 2
andq %rdx, %rax # t &= 0x3333333333333333
andq %rdi, %rdx # w_tmp &= 0x3333333333333333
addq %rdx, %rax # w = w_tmp + t

movq %rax, %rdx # w -> t
shrq $4, %rdx # t >>= 4
addq %rdx, %rax # w_tmp += t
movabsq $0x0f0f0f0f0f0f0f0f, %rdx
andq %rdx, %rax # w_tmp &= 0x0f0f0f0f0f0f0f0f
movabsq $0x0101010101010101, %rdx
imulq %rdx, %rax # w_tmp *= 0x0101010101010101
shrq $56, %rax # w = w_tmp >> 56

popq %rdx
ret
#else /* CONFIG_X86_32 */
/* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */
pushl %ecx

call __sw_hweight32
movl %eax, %ecx # stash away result
movl %edx, %eax # second part of input
call __sw_hweight32
addl %ecx, %eax # result

popl %ecx
ret
#endif
ENDPROC(__sw_hweight64)
5 changes: 0 additions & 5 deletions lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@ KCOV_INSTRUMENT_rbtree.o := n
KCOV_INSTRUMENT_list_debug.o := n
KCOV_INSTRUMENT_debugobjects.o := n
KCOV_INSTRUMENT_dynamic_debug.o := n
# Kernel does not boot if we instrument this file as it uses custom calling
# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS).
KCOV_INSTRUMENT_hweight.o := n

lib-y := ctype.o string.o vsprintf.o cmdline.o \
rbtree.o radix-tree.o dump_stack.o timerqueue.o\
Expand Down Expand Up @@ -74,8 +71,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o

GCOV_PROFILE_hweight.o := n
CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o

obj-$(CONFIG_BTREE) += btree.o
Expand Down
4 changes: 4 additions & 0 deletions lib/hweight.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* The Hamming Weight of a number is the total number of bits set in it.
*/

#ifndef __HAVE_ARCH_SW_HWEIGHT
unsigned int __sw_hweight32(unsigned int w)
{
#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER
Expand All @@ -25,6 +26,7 @@ unsigned int __sw_hweight32(unsigned int w)
#endif
}
EXPORT_SYMBOL(__sw_hweight32);
#endif

unsigned int __sw_hweight16(unsigned int w)
{
Expand All @@ -43,6 +45,7 @@ unsigned int __sw_hweight8(unsigned int w)
}
EXPORT_SYMBOL(__sw_hweight8);

#ifndef __HAVE_ARCH_SW_HWEIGHT
unsigned long __sw_hweight64(__u64 w)
{
#if BITS_PER_LONG == 32
Expand All @@ -65,3 +68,4 @@ unsigned long __sw_hweight64(__u64 w)
#endif
}
EXPORT_SYMBOL(__sw_hweight64);
#endif

0 comments on commit f596710

Please sign in to comment.