Skip to content

Commit

Permalink
x86/build: Mostly disable '-maccumulate-outgoing-args'
Browse files Browse the repository at this point in the history
The GCC '-maccumulate-outgoing-args' flag is enabled for most configs,
mostly because of issues which are no longer relevant.  For most
configs, and with most recent versions of GCC, it's no longer needed.

Clarify which cases need it, and only enable it for those cases.  Also
produce a compile-time error for the ftrace graph + mcount + '-Os' case,
which will otherwise cause runtime failures.

The main benefit of '-maccumulate-outgoing-args' is that it prevents an
ugly prologue for functions which have aligned stacks.  But removing the
option also has some benefits: more readable argument saves, smaller
text size, and (presumably) slightly improved performance.

Here are the object size savings for 32-bit and 64-bit defconfig
kernels:

      text	   data	    bss	     dec	    hex	filename
  10006710	3543328	1773568	15323606	 e9d1d6	vmlinux.x86-32.before
   9706358	3547424	1773568	15027350	 e54c96	vmlinux.x86-32.after

      text	   data	    bss	     dec	    hex	filename
  10652105	4537576	 843776	16033457	 f4a6b1	vmlinux.x86-64.before
  10639629	4537576	 843776	16020981	 f475f5	vmlinux.x86-64.after

That comes out to a 3% text size improvement on x86-32 and a 0.1% text
size improvement on x86-64.

Signed-off-by: Josh Poimboeuf <[email protected]>
Cc: Andrew Lutomirski <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Brian Gerst <[email protected]>
Cc: Denys Vlasenko <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Pavel Machek <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Steven Rostedt <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Link: http://lkml.kernel.org/r/20170316193133.zrj6gug53766m6nn@treble
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
jpoimboe authored and Ingo Molnar committed Mar 30, 2017
1 parent a46f60d commit 3f135e5
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 22 deletions.
35 changes: 31 additions & 4 deletions arch/x86/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -120,10 +120,6 @@ else
# -funit-at-a-time shrinks the kernel .text considerably
# unfortunately it makes reading oopses harder.
KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time)

# this works around some issues with generating unwind tables in older gccs
# newer gccs do it by default
KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args)
endif

ifdef CONFIG_X86_X32
Expand All @@ -147,6 +143,37 @@ ifeq ($(CONFIG_KMEMCHECK),y)
KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy)
endif

#
# If the function graph tracer is used with mcount instead of fentry,
# '-maccumulate-outgoing-args' is needed to prevent a GCC bug
# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=42109)
#
ifdef CONFIG_FUNCTION_GRAPH_TRACER
ifndef CONFIG_HAVE_FENTRY
ACCUMULATE_OUTGOING_ARGS := 1
else
ifeq ($(call cc-option-yn, -mfentry), n)
ACCUMULATE_OUTGOING_ARGS := 1
endif
endif
endif

#
# Jump labels need '-maccumulate-outgoing-args' for gcc < 4.5.2 to prevent a
# GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226). There's no way
# to test for this bug at compile-time because the test case needs to execute,
# which is a no-go for cross compilers. So check the GCC version instead.
#
ifdef CONFIG_JUMP_LABEL
ifneq ($(ACCUMULATE_OUTGOING_ARGS), 1)
ACCUMULATE_OUTGOING_ARGS = $(call cc-if-fullversion, -lt, 040502, 1)
endif
endif

ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
KBUILD_CFLAGS += -maccumulate-outgoing-args
endif

# Stackpointer is addressed different for 32 bit and 64 bit x86
sp-$(CONFIG_X86_32) := esp
sp-$(CONFIG_X86_64) := rsp
Expand Down
18 changes: 0 additions & 18 deletions arch/x86/Makefile_32.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -45,24 +45,6 @@ cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx)
# cpu entries
cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686))

# Work around the pentium-mmx code generator madness of gcc4.4.x which
# does stack alignment by generating horrible code _before_ the mcount
# prologue (push %ebp, mov %esp, %ebp) which breaks the function graph
# tracer assumptions. For i686, generic, core2 this is set by the
# compiler anyway
ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y)
ADD_ACCUMULATE_OUTGOING_ARGS := y
endif

# Work around to a bug with asm goto with first implementations of it
# in gcc causing gcc to mess up the push and pop of the stack in some
# uses of asm goto.
ifeq ($(CONFIG_JUMP_LABEL), y)
ADD_ACCUMULATE_OUTGOING_ARGS := y
endif

cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args)

# Bug fix for binutils: this option is required in order to keep
# binutils from generating NOPL instructions against our will.
ifneq ($(CONFIG_X86_P6_NOP),y)
Expand Down
6 changes: 6 additions & 0 deletions arch/x86/kernel/ftrace.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@
#include <asm/ftrace.h>
#include <asm/nops.h>

#if defined(CONFIG_FUNCTION_GRAPH_TRACER) && \
!defined(CC_USING_FENTRY) && \
!defined(CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE)
# error The following combination is not supported: ((compiler missing -mfentry) || (CONFIG_X86_32 and !CONFIG_DYNAMIC_FTRACE)) && CONFIG_FUNCTION_GRAPH_TRACER && CONFIG_CC_OPTIMIZE_FOR_SIZE
#endif

#ifdef CONFIG_DYNAMIC_FTRACE

int ftrace_arch_code_modify_prepare(void)
Expand Down
4 changes: 4 additions & 0 deletions scripts/Kbuild.include
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,10 @@ cc-fullversion = $(shell $(CONFIG_SHELL) \
# Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1)
cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4))

# cc-if-fullversion
# Usage: EXTRA_CFLAGS += $(call cc-if-fullversion, -lt, 040502, -O1)
cc-if-fullversion = $(shell [ $(cc-fullversion) $(1) $(2) ] && echo $(3) || echo $(4))

# cc-ldoption
# Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both)
cc-ldoption = $(call try-run,\
Expand Down

0 comments on commit 3f135e5

Please sign in to comment.