Skip to content

Commit

Permalink
include/linux/unaligned: force inlining of byteswap operations
Browse files Browse the repository at this point in the history
Sometimes gcc mysteriously doesn't inline
very small functions we expect to be inlined. See

    https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122

With this .config:
http://busybox.net/~vda/kernel_config_OPTIMIZE_INLINING_and_Os,
the following functions get deinlined many times.
Examples of disassembly:

<get_unaligned_be16> (24 copies, 108 calls):
       66 8b 07                mov    (%rdi),%ax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       86 e0                   xchg   %ah,%al
       5d                      pop    %rbp
       c3                      retq

<get_unaligned_be32> (25 copies, 181 calls):
       8b 07                   mov    (%rdi),%eax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       0f c8                   bswap  %eax
       5d                      pop    %rbp
       c3                      retq

<get_unaligned_be64> (23 copies, 94 calls):
       48 8b 07                mov    (%rdi),%rax
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       48 0f c8                bswap  %rax
       5d                      pop    %rbp
       c3                      retq

<put_unaligned_be16> (2 copies, 11 calls):
       89 f8                   mov    %edi,%eax
       55                      push   %rbp
       c1 ef 08                shr    $0x8,%edi
       c1 e0 08                shl    $0x8,%eax
       09 c7                   or     %eax,%edi
       48 89 e5                mov    %rsp,%rbp
       66 89 3e                mov    %di,(%rsi)

<put_unaligned_be32> (8 copies, 43 calls):
       55                      push   %rbp
       0f cf                   bswap  %edi
       89 3e                   mov    %edi,(%rsi)
       48 89 e5                mov    %rsp,%rbp
       5d                      pop    %rbp
       c3                      retq

<put_unaligned_be64> (26 copies, 157 calls):
       55                      push   %rbp
       48 0f cf                bswap  %rdi
       48 89 3e                mov    %rdi,(%rsi)
       48 89 e5                mov    %rsp,%rbp
       5d                      pop    %rbp
       c3                      retq

This patch fixes this via s/inline/__always_inline/.

It only affects arches with efficient unaligned access insns, such as x86.
(arched which lack such ops do not include linux/unaligned/access_ok.h)

Code size decrease after the patch is ~8.5k:

    text     data      bss       dec     hex filename
92197848 20826112 36417536 149441496 8e84bd8 vmlinux
92189231 20826144 36417536 149432911 8e82a4f vmlinux6_unaligned_be_after

Signed-off-by: Denys Vlasenko <[email protected]>
Acked-by: Ingo Molnar <[email protected]>
Cc: Thomas Graf <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: David Rientjes <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
dvlasenk authored and torvalds committed Mar 17, 2016
1 parent bc27fb6 commit e3bde95
Showing 1 changed file with 12 additions and 12 deletions.
24 changes: 12 additions & 12 deletions include/linux/unaligned/access_ok.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,62 +4,62 @@
#include <linux/kernel.h>
#include <asm/byteorder.h>

static inline u16 get_unaligned_le16(const void *p)
static __always_inline u16 get_unaligned_le16(const void *p)
{
return le16_to_cpup((__le16 *)p);
}

static inline u32 get_unaligned_le32(const void *p)
static __always_inline u32 get_unaligned_le32(const void *p)
{
return le32_to_cpup((__le32 *)p);
}

static inline u64 get_unaligned_le64(const void *p)
static __always_inline u64 get_unaligned_le64(const void *p)
{
return le64_to_cpup((__le64 *)p);
}

static inline u16 get_unaligned_be16(const void *p)
static __always_inline u16 get_unaligned_be16(const void *p)
{
return be16_to_cpup((__be16 *)p);
}

static inline u32 get_unaligned_be32(const void *p)
static __always_inline u32 get_unaligned_be32(const void *p)
{
return be32_to_cpup((__be32 *)p);
}

static inline u64 get_unaligned_be64(const void *p)
static __always_inline u64 get_unaligned_be64(const void *p)
{
return be64_to_cpup((__be64 *)p);
}

static inline void put_unaligned_le16(u16 val, void *p)
static __always_inline void put_unaligned_le16(u16 val, void *p)
{
*((__le16 *)p) = cpu_to_le16(val);
}

static inline void put_unaligned_le32(u32 val, void *p)
static __always_inline void put_unaligned_le32(u32 val, void *p)
{
*((__le32 *)p) = cpu_to_le32(val);
}

static inline void put_unaligned_le64(u64 val, void *p)
static __always_inline void put_unaligned_le64(u64 val, void *p)
{
*((__le64 *)p) = cpu_to_le64(val);
}

static inline void put_unaligned_be16(u16 val, void *p)
static __always_inline void put_unaligned_be16(u16 val, void *p)
{
*((__be16 *)p) = cpu_to_be16(val);
}

static inline void put_unaligned_be32(u32 val, void *p)
static __always_inline void put_unaligned_be32(u32 val, void *p)
{
*((__be32 *)p) = cpu_to_be32(val);
}

static inline void put_unaligned_be64(u64 val, void *p)
static __always_inline void put_unaligned_be64(u64 val, void *p)
{
*((__be64 *)p) = cpu_to_be64(val);
}
Expand Down

0 comments on commit e3bde95

Please sign in to comment.