Skip to content

Commit

Permalink
x86/stackprotector/32: Make the canary into a regular percpu variable
Browse files Browse the repository at this point in the history
On 32-bit kernels, the stackprotector canary is quite nasty -- it is
stored at %gs:(20), which is nasty because 32-bit kernels use %fs for
percpu storage.  It's even nastier because it means that whether %gs
contains userspace state or kernel state while running kernel code
depends on whether stackprotector is enabled (this is
CONFIG_X86_32_LAZY_GS), and this setting radically changes the way
that segment selectors work.  Supporting both variants is a
maintenance and testing mess.

Merely rearranging so that percpu and the stack canary
share the same segment would be messy as the 32-bit percpu address
layout isn't currently compatible with putting a variable at a fixed
offset.

Fortunately, GCC 8.1 added options that allow the stack canary to be
accessed as %fs:__stack_chk_guard, effectively turning it into an ordinary
percpu variable.  This lets us get rid of all of the code to manage the
stack canary GDT descriptor and the CONFIG_X86_32_LAZY_GS mess.

(That name is special.  We could use any symbol we want for the
 %fs-relative mode, but for CONFIG_SMP=n, gcc refuses to let us use any
 name other than __stack_chk_guard.)

Forcibly disable stackprotector on older compilers that don't support
the new options and turn the stack canary into a percpu variable. The
"lazy GS" approach is now used for all 32-bit configurations.

Also makes load_gs_index() work on 32-bit kernels. On 64-bit kernels,
it loads the GS selector and updates the user GSBASE accordingly. (This
is unchanged.) On 32-bit kernels, it loads the GS selector and updates
GSBASE, which is now always the user base. This means that the overall
effect is the same on 32-bit and 64-bit, which avoids some ifdeffery.

 [ bp: Massage commit message. ]

Signed-off-by: Andy Lutomirski <[email protected]>
Signed-off-by: Borislav Petkov <[email protected]>
Link: https://lkml.kernel.org/r/c0ff7dba14041c7e5d1cae5d4df052f03759bef3.1613243844.git.luto@kernel.org
  • Loading branch information
amluto authored and suryasaimadhu committed Mar 8, 2021
1 parent a38fd87 commit 3fb0fdb
Show file tree
Hide file tree
Showing 19 changed files with 60 additions and 218 deletions.
7 changes: 2 additions & 5 deletions arch/x86/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -360,10 +360,6 @@ config X86_64_SMP
def_bool y
depends on X86_64 && SMP

config X86_32_LAZY_GS
def_bool y
depends on X86_32 && !STACKPROTECTOR

config ARCH_SUPPORTS_UPROBES
def_bool y

Expand All @@ -386,7 +382,8 @@ config CC_HAS_SANE_STACKPROTECTOR
default $(success,$(srctree)/scripts/gcc-x86_32-has-stack-protector.sh $(CC))
help
We have to make sure stack protector is unconditionally disabled if
the compiler produces broken code.
the compiler produces broken code or if it does not let us control
the segment on 32-bit kernels.

menu "Processor type and features"

Expand Down
8 changes: 8 additions & 0 deletions arch/x86/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@ ifeq ($(CONFIG_X86_32),y)

# temporary until string.h is fixed
KBUILD_CFLAGS += -ffreestanding

ifeq ($(CONFIG_STACKPROTECTOR),y)
ifeq ($(CONFIG_SMP),y)
KBUILD_CFLAGS += -mstack-protector-guard-reg=fs -mstack-protector-guard-symbol=__stack_chk_guard
else
KBUILD_CFLAGS += -mstack-protector-guard=global
endif
endif
else
BITS := 64
UTS_MACHINE := x86_64
Expand Down
56 changes: 4 additions & 52 deletions arch/x86/entry/entry_32.S
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
* 1C(%esp) - %ds
* 20(%esp) - %es
* 24(%esp) - %fs
* 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
* 28(%esp) - unused -- was %gs on old stackprotector kernels
* 2C(%esp) - orig_eax
* 30(%esp) - %eip
* 34(%esp) - %cs
Expand Down Expand Up @@ -56,14 +56,9 @@
/*
* User gs save/restore
*
* %gs is used for userland TLS and kernel only uses it for stack
* canary which is required to be at %gs:20 by gcc. Read the comment
* at the top of stackprotector.h for more info.
*
* Local labels 98 and 99 are used.
* This is leftover junk from CONFIG_X86_32_LAZY_GS. A subsequent patch
* will remove it entirely.
*/
#ifdef CONFIG_X86_32_LAZY_GS

/* unfortunately push/pop can't be no-op */
.macro PUSH_GS
pushl $0
Expand All @@ -86,49 +81,6 @@
.macro SET_KERNEL_GS reg
.endm

#else /* CONFIG_X86_32_LAZY_GS */

.macro PUSH_GS
pushl %gs
.endm

.macro POP_GS pop=0
98: popl %gs
.if \pop <> 0
add $\pop, %esp
.endif
.endm
.macro POP_GS_EX
.pushsection .fixup, "ax"
99: movl $0, (%esp)
jmp 98b
.popsection
_ASM_EXTABLE(98b, 99b)
.endm

.macro PTGS_TO_GS
98: mov PT_GS(%esp), %gs
.endm
.macro PTGS_TO_GS_EX
.pushsection .fixup, "ax"
99: movl $0, PT_GS(%esp)
jmp 98b
.popsection
_ASM_EXTABLE(98b, 99b)
.endm

.macro GS_TO_REG reg
movl %gs, \reg
.endm
.macro REG_TO_PTGS reg
movl \reg, PT_GS(%esp)
.endm
.macro SET_KERNEL_GS reg
movl $(__KERNEL_STACK_CANARY), \reg
movl \reg, %gs
.endm

#endif /* CONFIG_X86_32_LAZY_GS */

/* Unconditionally switch to user cr3 */
.macro SWITCH_TO_USER_CR3 scratch_reg:req
Expand Down Expand Up @@ -779,7 +731,7 @@ SYM_CODE_START(__switch_to_asm)

#ifdef CONFIG_STACKPROTECTOR
movl TASK_stack_canary(%edx), %ebx
movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
movl %ebx, PER_CPU_VAR(__stack_chk_guard)
#endif

#ifdef CONFIG_RETPOLINE
Expand Down
15 changes: 4 additions & 11 deletions arch/x86/include/asm/processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,9 @@ struct fixed_percpu_data {
* GCC hardcodes the stack canary as %gs:40. Since the
* irq_stack is the object at %gs:0, we reserve the bottom
* 48 bytes of the irq stack for the canary.
*
* Once we are willing to require -mstack-protector-guard-symbol=
* support for x86_64 stackprotector, we can get rid of this.
*/
char gs_base[40];
unsigned long stack_canary;
Expand All @@ -460,17 +463,7 @@ extern asmlinkage void ignore_sysret(void);
void current_save_fsgs(void);
#else /* X86_64 */
#ifdef CONFIG_STACKPROTECTOR
/*
* Make sure stack canary segment base is cached-aligned:
* "For Intel Atom processors, avoid non zero segment base address
* that is not aligned to cache line boundary at all cost."
* (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
*/
struct stack_canary {
char __pad[20]; /* canary at %gs:20 */
unsigned long canary;
};
DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
#endif
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
Expand Down
5 changes: 4 additions & 1 deletion arch/x86/include/asm/ptrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,10 @@ struct pt_regs {
unsigned short __esh;
unsigned short fs;
unsigned short __fsh;
/* On interrupt, gs and __gsh store the vector number. */
/*
* On interrupt, gs and __gsh store the vector number. They never
* store gs any more.
*/
unsigned short gs;
unsigned short __gsh;
/* On interrupt, this is the error code. */
Expand Down
30 changes: 8 additions & 22 deletions arch/x86/include/asm/segment.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
*
* 26 - ESPFIX small SS
* 27 - per-cpu [ offset to per-cpu data area ]
* 28 - stack_canary-20 [ for stack protector ] <=== cacheline #8
* 28 - unused
* 29 - unused
* 30 - unused
* 31 - TSS for double fault handler
Expand All @@ -118,7 +118,6 @@

#define GDT_ENTRY_ESPFIX_SS 26
#define GDT_ENTRY_PERCPU 27
#define GDT_ENTRY_STACK_CANARY 28

#define GDT_ENTRY_DOUBLEFAULT_TSS 31

Expand Down Expand Up @@ -158,12 +157,6 @@
# define __KERNEL_PERCPU 0
#endif

#ifdef CONFIG_STACKPROTECTOR
# define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8)
#else
# define __KERNEL_STACK_CANARY 0
#endif

#else /* 64-bit: */

#include <asm/cache.h>
Expand Down Expand Up @@ -364,22 +357,15 @@ static inline void __loadsegment_fs(unsigned short value)
asm("mov %%" #seg ",%0":"=r" (value) : : "memory")

/*
* x86-32 user GS accessors:
* x86-32 user GS accessors. This is ugly and could do with some cleaning up.
*/
#ifdef CONFIG_X86_32
# ifdef CONFIG_X86_32_LAZY_GS
# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; })
# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
# define task_user_gs(tsk) ((tsk)->thread.gs)
# define lazy_save_gs(v) savesegment(gs, (v))
# define lazy_load_gs(v) loadsegment(gs, (v))
# else /* X86_32_LAZY_GS */
# define get_user_gs(regs) (u16)((regs)->gs)
# define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
# define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
# define lazy_save_gs(v) do { } while (0)
# define lazy_load_gs(v) do { } while (0)
# endif /* X86_32_LAZY_GS */
# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; })
# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
# define task_user_gs(tsk) ((tsk)->thread.gs)
# define lazy_save_gs(v) savesegment(gs, (v))
# define lazy_load_gs(v) loadsegment(gs, (v))
# define load_gs_index(v) loadsegment(gs, (v))
#endif /* X86_32 */

#endif /* !__ASSEMBLY__ */
Expand Down
79 changes: 16 additions & 63 deletions arch/x86/include/asm/stackprotector.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,23 @@
* Stack protector works by putting predefined pattern at the start of
* the stack frame and verifying that it hasn't been overwritten when
* returning from the function. The pattern is called stack canary
* and unfortunately gcc requires it to be at a fixed offset from %gs.
* On x86_64, the offset is 40 bytes and on x86_32 20 bytes. x86_64
* and x86_32 use segment registers differently and thus handles this
* requirement differently.
* and unfortunately gcc historically required it to be at a fixed offset
* from the percpu segment base. On x86_64, the offset is 40 bytes.
*
* On x86_64, %gs is shared by percpu area and stack canary. All
* percpu symbols are zero based and %gs points to the base of percpu
* area. The first occupant of the percpu area is always
* fixed_percpu_data which contains stack_canary at offset 40. Userland
* %gs is always saved and restored on kernel entry and exit using
* swapgs, so stack protector doesn't add any complexity there.
* The same segment is shared by percpu area and stack canary. On
* x86_64, percpu symbols are zero based and %gs (64-bit) points to the
* base of percpu area. The first occupant of the percpu area is always
* fixed_percpu_data which contains stack_canary at the approproate
* offset. On x86_32, the stack canary is just a regular percpu
* variable.
*
* On x86_32, it's slightly more complicated. As in x86_64, %gs is
* used for userland TLS. Unfortunately, some processors are much
* slower at loading segment registers with different value when
* entering and leaving the kernel, so the kernel uses %fs for percpu
* area and manages %gs lazily so that %gs is switched only when
* necessary, usually during task switch.
* Putting percpu data in %fs on 32-bit is a minor optimization compared to
* using %gs. Since 32-bit userspace normally has %fs == 0, we are likely
* to load 0 into %fs on exit to usermode, whereas with percpu data in
* %gs, we are likely to load a non-null %gs on return to user mode.
*
* As gcc requires the stack canary at %gs:20, %gs can't be managed
* lazily if stack protector is enabled, so the kernel saves and
* restores userland %gs on kernel entry and exit. This behavior is
* controlled by CONFIG_X86_32_LAZY_GS and accessors are defined in
* system.h to hide the details.
* Once we are willing to require GCC 8.1 or better for 64-bit stackprotector
* support, we can remove some of this complexity.
*/

#ifndef _ASM_STACKPROTECTOR_H
Expand All @@ -44,14 +37,6 @@
#include <linux/random.h>
#include <linux/sched.h>

/*
* 24 byte read-only segment initializer for stack canary. Linker
* can't handle the address bit shifting. Address will be set in
* head_32 for boot CPU and setup_per_cpu_areas() for others.
*/
#define GDT_STACK_CANARY_INIT \
[GDT_ENTRY_STACK_CANARY] = GDT_ENTRY_INIT(0x4090, 0, 0x18),

/*
* Initialize the stackprotector canary value.
*
Expand Down Expand Up @@ -86,7 +71,7 @@ static __always_inline void boot_init_stack_canary(void)
#ifdef CONFIG_X86_64
this_cpu_write(fixed_percpu_data.stack_canary, canary);
#else
this_cpu_write(stack_canary.canary, canary);
this_cpu_write(__stack_chk_guard, canary);
#endif
}

Expand All @@ -95,48 +80,16 @@ static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
#ifdef CONFIG_X86_64
per_cpu(fixed_percpu_data.stack_canary, cpu) = idle->stack_canary;
#else
per_cpu(stack_canary.canary, cpu) = idle->stack_canary;
#endif
}

static inline void setup_stack_canary_segment(int cpu)
{
#ifdef CONFIG_X86_32
unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu);
struct desc_struct desc;

desc = gdt_table[GDT_ENTRY_STACK_CANARY];
set_desc_base(&desc, canary);
write_gdt_entry(gdt_table, GDT_ENTRY_STACK_CANARY, &desc, DESCTYPE_S);
#endif
}

static inline void load_stack_canary_segment(void)
{
#ifdef CONFIG_X86_32
asm("mov %0, %%gs" : : "r" (__KERNEL_STACK_CANARY) : "memory");
per_cpu(__stack_chk_guard, cpu) = idle->stack_canary;
#endif
}

#else /* STACKPROTECTOR */

#define GDT_STACK_CANARY_INIT

/* dummy boot_init_stack_canary() is defined in linux/stackprotector.h */

static inline void setup_stack_canary_segment(int cpu)
{ }

static inline void cpu_init_stack_canary(int cpu, struct task_struct *idle)
{ }

static inline void load_stack_canary_segment(void)
{
#ifdef CONFIG_X86_32
asm volatile ("mov %0, %%gs" : : "r" (0));
#endif
}

#endif /* STACKPROTECTOR */
#endif /* _ASM_STACKPROTECTOR_H */
6 changes: 2 additions & 4 deletions arch/x86/include/asm/suspend_32.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,10 @@
/* image of the saved processor state */
struct saved_context {
/*
* On x86_32, all segment registers, with the possible exception of
* gs, are saved at kernel entry in pt_regs.
* On x86_32, all segment registers except gs are saved at kernel
* entry in pt_regs.
*/
#ifdef CONFIG_X86_32_LAZY_GS
u16 gs;
#endif
unsigned long cr0, cr2, cr3, cr4;
u64 misc_enable;
bool misc_enable_saved;
Expand Down
5 changes: 0 additions & 5 deletions arch/x86/kernel/asm-offsets_32.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,6 @@ void foo(void)
offsetof(struct cpu_entry_area, tss.x86_tss.sp1) -
offsetofend(struct cpu_entry_area, entry_stack_page.stack));

#ifdef CONFIG_STACKPROTECTOR
BLANK();
OFFSET(stack_canary_offset, stack_canary, canary);
#endif

BLANK();
DEFINE(EFI_svam, offsetof(efi_runtime_services_t, set_virtual_address_map));
}
5 changes: 2 additions & 3 deletions arch/x86/kernel/cpu/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {

[GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
[GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
GDT_STACK_CANARY_INIT
#endif
} };
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
Expand Down Expand Up @@ -599,7 +598,6 @@ void load_percpu_segment(int cpu)
__loadsegment_simple(gs, 0);
wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
#endif
load_stack_canary_segment();
}

#ifdef CONFIG_X86_32
Expand Down Expand Up @@ -1796,7 +1794,8 @@ DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);

#ifdef CONFIG_STACKPROTECTOR
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
#endif

#endif /* CONFIG_X86_64 */
Expand Down
Loading

0 comments on commit 3fb0fdb

Please sign in to comment.