Skip to content

Commit

Permalink
Merge tag 'x86_urgent_for_v5.12_rc3' of git://git.kernel.org/pub/scm/…
Browse files Browse the repository at this point in the history
…linux/kernel/git/tip/tip

Pull x86 fixes from Borislav Petkov:

 - A couple of SEV-ES fixes and robustifications: verify usermode stack
   pointer in NMI is not coming from the syscall gap, correctly track
   IRQ states in the #VC handler and access user insn bytes atomically
   in same handler as latter cannot sleep.

 - Balance 32-bit fast syscall exit path to do the proper work on exit
   and thus not confuse audit and ptrace frameworks.

 - Two fixes for the ORC unwinder going "off the rails" into KASAN
   redzones and when ORC data is missing.

* tag 'x86_urgent_for_v5.12_rc3' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/sev-es: Use __copy_from_user_inatomic()
  x86/sev-es: Correctly track IRQ states in runtime #VC handler
  x86/sev-es: Check regs->sp is trusted before adjusting #VC IST stack
  x86/sev-es: Introduce ip_within_syscall_gap() helper
  x86/entry: Fix entry/exit mismatch on failed fast 32-bit syscalls
  x86/unwind/orc: Silence warnings caused by missing ORC data
  x86/unwind/orc: Disable KASAN checking in the ORC unwinder, part 2
  • Loading branch information
torvalds committed Mar 14, 2021
2 parents c3c7579 + bffe30d commit 0a7c10d
Show file tree
Hide file tree
Showing 9 changed files with 99 additions and 29 deletions.
3 changes: 2 additions & 1 deletion arch/x86/entry/common.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
regs->ax = -EFAULT;

instrumentation_end();
syscall_exit_to_user_mode(regs);
local_irq_disable();
irqentry_exit_to_user_mode(regs);
return false;
}

Expand Down
2 changes: 2 additions & 0 deletions arch/x86/entry/entry_64_compat.S
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,8 @@ SYM_CODE_START(entry_SYSCALL_compat)
/* Switch to the kernel stack */
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp

SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)

/* Construct struct pt_regs on stack */
pushq $__USER32_DS /* pt_regs->ss */
pushq %r8 /* pt_regs->sp */
Expand Down
2 changes: 2 additions & 0 deletions arch/x86/include/asm/insn-eval.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
int insn_get_code_seg_params(struct pt_regs *regs);
int insn_fetch_from_user(struct pt_regs *regs,
unsigned char buf[MAX_INSN_SIZE]);
int insn_fetch_from_user_inatomic(struct pt_regs *regs,
unsigned char buf[MAX_INSN_SIZE]);
bool insn_decode(struct insn *insn, struct pt_regs *regs,
unsigned char buf[MAX_INSN_SIZE], int buf_size);

Expand Down
1 change: 1 addition & 0 deletions arch/x86/include/asm/proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ void __end_SYSENTER_singlestep_region(void);
void entry_SYSENTER_compat(void);
void __end_entry_SYSENTER_compat(void);
void entry_SYSCALL_compat(void);
void entry_SYSCALL_compat_safe_stack(void);
void entry_INT80_compat(void);
#ifdef CONFIG_XEN_PV
void xen_entry_INT80_compat(void);
Expand Down
15 changes: 15 additions & 0 deletions arch/x86/include/asm/ptrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ struct pt_regs {
#include <asm/paravirt_types.h>
#endif

#include <asm/proto.h>

struct cpuinfo_x86;
struct task_struct;

Expand Down Expand Up @@ -175,6 +177,19 @@ static inline bool any_64bit_mode(struct pt_regs *regs)
#ifdef CONFIG_X86_64
#define current_user_stack_pointer() current_pt_regs()->sp
#define compat_user_stack_pointer() current_pt_regs()->sp

static inline bool ip_within_syscall_gap(struct pt_regs *regs)
{
bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack);

#ifdef CONFIG_IA32_EMULATION
ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
regs->ip < (unsigned long)entry_SYSCALL_compat_safe_stack);
#endif

return ret;
}
#endif

static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
Expand Down
22 changes: 17 additions & 5 deletions arch/x86/kernel/sev-es.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,18 @@ static void __init setup_vc_stacks(int cpu)
cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
}

static __always_inline bool on_vc_stack(unsigned long sp)
static __always_inline bool on_vc_stack(struct pt_regs *regs)
{
unsigned long sp = regs->sp;

/* User-mode RSP is not trusted */
if (user_mode(regs))
return false;

/* SYSCALL gap still has user-mode RSP */
if (ip_within_syscall_gap(regs))
return false;

return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
}

Expand All @@ -144,7 +154,7 @@ void noinstr __sev_es_ist_enter(struct pt_regs *regs)
old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);

/* Make room on the IST stack */
if (on_vc_stack(regs->sp))
if (on_vc_stack(regs))
new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist);
else
new_ist = old_ist - sizeof(old_ist);
Expand Down Expand Up @@ -248,7 +258,7 @@ static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
int res;

if (user_mode(ctxt->regs)) {
res = insn_fetch_from_user(ctxt->regs, buffer);
res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
if (!res) {
ctxt->fi.vector = X86_TRAP_PF;
ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
Expand Down Expand Up @@ -1248,13 +1258,12 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
{
struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
irqentry_state_t irq_state;
struct ghcb_state state;
struct es_em_ctxt ctxt;
enum es_result result;
struct ghcb *ghcb;

lockdep_assert_irqs_disabled();

/*
* Handle #DB before calling into !noinstr code to avoid recursive #DB.
*/
Expand All @@ -1263,6 +1272,8 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
return;
}

irq_state = irqentry_nmi_enter(regs);
lockdep_assert_irqs_disabled();
instrumentation_begin();

/*
Expand Down Expand Up @@ -1325,6 +1336,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)

out:
instrumentation_end();
irqentry_nmi_exit(regs, irq_state);

return;

Expand Down
3 changes: 1 addition & 2 deletions arch/x86/kernel/traps.c
Original file line number Diff line number Diff line change
Expand Up @@ -694,8 +694,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
* In the SYSCALL entry path the RSP value comes from user-space - don't
* trust it and switch to the current kernel stack
*/
if (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
regs->ip < (unsigned long)entry_SYSCALL_64_safe_stack) {
if (ip_within_syscall_gap(regs)) {
sp = this_cpu_read(cpu_current_top_of_stack);
goto sync;
}
Expand Down
14 changes: 7 additions & 7 deletions arch/x86/kernel/unwind_orc.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

#define orc_warn_current(args...) \
({ \
if (state->task == current) \
if (state->task == current && !state->error) \
orc_warn(args); \
})

Expand Down Expand Up @@ -367,8 +367,8 @@ static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
return false;

*ip = regs->ip;
*sp = regs->sp;
*ip = READ_ONCE_NOCHECK(regs->ip);
*sp = READ_ONCE_NOCHECK(regs->sp);
return true;
}

Expand All @@ -380,8 +380,8 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
return false;

*ip = regs->ip;
*sp = regs->sp;
*ip = READ_ONCE_NOCHECK(regs->ip);
*sp = READ_ONCE_NOCHECK(regs->sp);
return true;
}

Expand All @@ -402,12 +402,12 @@ static bool get_reg(struct unwind_state *state, unsigned int reg_off,
return false;

if (state->full_regs) {
*val = ((unsigned long *)state->regs)[reg];
*val = READ_ONCE_NOCHECK(((unsigned long *)state->regs)[reg]);
return true;
}

if (state->prev_regs) {
*val = ((unsigned long *)state->prev_regs)[reg];
*val = READ_ONCE_NOCHECK(((unsigned long *)state->prev_regs)[reg]);
return true;
}

Expand Down
66 changes: 52 additions & 14 deletions arch/x86/lib/insn-eval.c
Original file line number Diff line number Diff line change
Expand Up @@ -1415,6 +1415,25 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
}
}

static unsigned long insn_get_effective_ip(struct pt_regs *regs)
{
unsigned long seg_base = 0;

/*
* If not in user-space long mode, a custom code segment could be in
* use. This is true in protected mode (if the process defined a local
* descriptor table), or virtual-8086 mode. In most of the cases
* seg_base will be zero as in USER_CS.
*/
if (!user_64bit_mode(regs)) {
seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
if (seg_base == -1L)
return 0;
}

return seg_base + regs->ip;
}

/**
* insn_fetch_from_user() - Copy instruction bytes from user-space memory
* @regs: Structure with register values as seen when entering kernel mode
Expand All @@ -1431,24 +1450,43 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
*/
int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
{
unsigned long seg_base = 0;
unsigned long ip;
int not_copied;

/*
* If not in user-space long mode, a custom code segment could be in
* use. This is true in protected mode (if the process defined a local
* descriptor table), or virtual-8086 mode. In most of the cases
* seg_base will be zero as in USER_CS.
*/
if (!user_64bit_mode(regs)) {
seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
if (seg_base == -1L)
return 0;
}
ip = insn_get_effective_ip(regs);
if (!ip)
return 0;

not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE);

return MAX_INSN_SIZE - not_copied;
}

/**
* insn_fetch_from_user_inatomic() - Copy instruction bytes from user-space memory
* while in atomic code
* @regs: Structure with register values as seen when entering kernel mode
* @buf: Array to store the fetched instruction
*
* Gets the linear address of the instruction and copies the instruction bytes
* to the buf. This function must be used in atomic context.
*
* Returns:
*
* Number of instruction bytes copied.
*
* 0 if nothing was copied.
*/
int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
{
unsigned long ip;
int not_copied;

ip = insn_get_effective_ip(regs);
if (!ip)
return 0;

not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip),
MAX_INSN_SIZE);
not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE);

return MAX_INSN_SIZE - not_copied;
}
Expand Down

0 comments on commit 0a7c10d

Please sign in to comment.