Skip to content

Commit

Permalink
Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linu…
Browse files Browse the repository at this point in the history
…x/kernel/git/tip/tip

Pull x86 fpu changes from Ingo Molnar:
 "Various x86 FPU handling cleanups, refactorings and fixes (Borislav
  Petkov, Oleg Nesterov, Rik van Riel)"

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
  x86/fpu: Kill eager_fpu_init_bp()
  x86/fpu: Don't allocate fpu->state for swapper/0
  x86/fpu: Rename drop_init_fpu() to fpu_reset_state()
  x86/fpu: Fold __drop_fpu() into its sole user
  x86/fpu: Don't abuse drop_init_fpu() in flush_thread()
  x86/fpu: Use restore_init_xstate() instead of math_state_restore() on kthread exec
  x86/fpu: Introduce restore_init_xstate()
  x86/fpu: Document user_fpu_begin()
  x86/fpu: Factor out memset(xstate, 0) in fpu_finit() paths
  x86/fpu: Change xstateregs_get()/set() to use ->xsave.i387 rather than ->fxsave
  x86/fpu: Don't abuse FPU in kernel threads if use_eager_fpu()
  x86/fpu: Always allow FPU in interrupt if use_eager_fpu()
  x86/fpu: __kernel_fpu_begin() should clear fpu_owner_task even if use_eager_fpu()
  x86/fpu: Also check fpu_lazy_restore() when use_eager_fpu()
  x86/fpu: Use task_disable_lazy_fpu_restore() helper
  x86/fpu: Use an explicit if/else in switch_fpu_prepare()
  x86/fpu: Introduce task_disable_lazy_fpu_restore() helper
  x86/fpu: Move lazy restore functions up a few lines
  x86/fpu: Change math_error() to use unlazy_fpu(), kill (now) unused save_init_fpu()
  x86/fpu: Don't do __thread_fpu_end() if use_eager_fpu()
  ...
  • Loading branch information
torvalds committed Apr 13, 2015
2 parents 64f004a + 7fc253e commit 421ec90
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 131 deletions.
130 changes: 66 additions & 64 deletions arch/x86/include/asm/fpu-internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,34 @@ extern void finit_soft_fpu(struct i387_soft_struct *soft);
static inline void finit_soft_fpu(struct i387_soft_struct *soft) {}
#endif

/*
* Must be run with preemption disabled: this clears the fpu_owner_task,
* on this CPU.
*
* This will disable any lazy FPU state restore of the current FPU state,
* but if the current thread owns the FPU, it will still be saved by.
*/
static inline void __cpu_disable_lazy_restore(unsigned int cpu)
{
per_cpu(fpu_owner_task, cpu) = NULL;
}

/*
* Used to indicate that the FPU state in memory is newer than the FPU
* state in registers, and the FPU state should be reloaded next time the
* task is run. Only safe on the current task, or non-running tasks.
*/
static inline void task_disable_lazy_fpu_restore(struct task_struct *tsk)
{
tsk->thread.fpu.last_cpu = ~0;
}

static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
{
return new == this_cpu_read_stable(fpu_owner_task) &&
cpu == new->thread.fpu.last_cpu;
}

static inline int is_ia32_compat_frame(void)
{
return config_enabled(CONFIG_IA32_EMULATION) &&
Expand Down Expand Up @@ -107,7 +135,6 @@ static __always_inline __pure bool use_fxsr(void)

static inline void fx_finit(struct i387_fxsave_struct *fx)
{
memset(fx, 0, xstate_size);
fx->cwd = 0x37f;
fx->mxcsr = MXCSR_DEFAULT;
}
Expand Down Expand Up @@ -351,39 +378,44 @@ static inline void __thread_fpu_begin(struct task_struct *tsk)
__thread_set_has_fpu(tsk);
}

static inline void __drop_fpu(struct task_struct *tsk)
static inline void drop_fpu(struct task_struct *tsk)
{
/*
* Forget coprocessor state..
*/
preempt_disable();
tsk->thread.fpu_counter = 0;

if (__thread_has_fpu(tsk)) {
/* Ignore delayed exceptions from user space */
asm volatile("1: fwait\n"
"2:\n"
_ASM_EXTABLE(1b, 2b));
__thread_fpu_end(tsk);
}
}

static inline void drop_fpu(struct task_struct *tsk)
{
/*
* Forget coprocessor state..
*/
preempt_disable();
tsk->thread.fpu_counter = 0;
__drop_fpu(tsk);
clear_stopped_child_used_math(tsk);
preempt_enable();
}

static inline void drop_init_fpu(struct task_struct *tsk)
static inline void restore_init_xstate(void)
{
if (use_xsave())
xrstor_state(init_xstate_buf, -1);
else
fxrstor_checking(&init_xstate_buf->i387);
}

/*
* Reset the FPU state in the eager case and drop it in the lazy case (later use
* will reinit it).
*/
static inline void fpu_reset_state(struct task_struct *tsk)
{
if (!use_eager_fpu())
drop_fpu(tsk);
else {
if (use_xsave())
xrstor_state(init_xstate_buf, -1);
else
fxrstor_checking(&init_xstate_buf->i387);
}
else
restore_init_xstate();
}

/*
Expand All @@ -400,24 +432,6 @@ static inline void drop_init_fpu(struct task_struct *tsk)
*/
typedef struct { int preload; } fpu_switch_t;

/*
* Must be run with preemption disabled: this clears the fpu_owner_task,
* on this CPU.
*
* This will disable any lazy FPU state restore of the current FPU state,
* but if the current thread owns the FPU, it will still be saved by.
*/
static inline void __cpu_disable_lazy_restore(unsigned int cpu)
{
per_cpu(fpu_owner_task, cpu) = NULL;
}

static inline int fpu_lazy_restore(struct task_struct *new, unsigned int cpu)
{
return new == this_cpu_read_stable(fpu_owner_task) &&
cpu == new->thread.fpu.last_cpu;
}

static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct task_struct *new, int cpu)
{
fpu_switch_t fpu;
Expand All @@ -426,13 +440,17 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
* If the task has used the math, pre-load the FPU on xsave processors
* or if the past 5 consecutive context-switches used math.
*/
fpu.preload = tsk_used_math(new) && (use_eager_fpu() ||
new->thread.fpu_counter > 5);
fpu.preload = tsk_used_math(new) &&
(use_eager_fpu() || new->thread.fpu_counter > 5);

if (__thread_has_fpu(old)) {
if (!__save_init_fpu(old))
cpu = ~0;
old->thread.fpu.last_cpu = cpu;
old->thread.fpu.has_fpu = 0; /* But leave fpu_owner_task! */
task_disable_lazy_fpu_restore(old);
else
old->thread.fpu.last_cpu = cpu;

/* But leave fpu_owner_task! */
old->thread.fpu.has_fpu = 0;

/* Don't change CR0.TS if we just switch! */
if (fpu.preload) {
Expand All @@ -443,10 +461,10 @@ static inline fpu_switch_t switch_fpu_prepare(struct task_struct *old, struct ta
stts();
} else {
old->thread.fpu_counter = 0;
old->thread.fpu.last_cpu = ~0;
task_disable_lazy_fpu_restore(old);
if (fpu.preload) {
new->thread.fpu_counter++;
if (!use_eager_fpu() && fpu_lazy_restore(new, cpu))
if (fpu_lazy_restore(new, cpu))
fpu.preload = 0;
else
prefetch(new->thread.fpu.state);
Expand All @@ -466,7 +484,7 @@ static inline void switch_fpu_finish(struct task_struct *new, fpu_switch_t fpu)
{
if (fpu.preload) {
if (unlikely(restore_fpu_checking(new)))
drop_init_fpu(new);
fpu_reset_state(new);
}
}

Expand Down Expand Up @@ -495,10 +513,12 @@ static inline int restore_xstate_sig(void __user *buf, int ia32_frame)
}

/*
* Need to be preemption-safe.
* Needs to be preemption-safe.
*
* NOTE! user_fpu_begin() must be used only immediately before restoring
* it. This function does not do any save/restore on their own.
* the save state. It does not do any saving/restoring on its own. In
* lazy FPU mode, it is just an optimization to avoid a #NM exception,
* the task can lose the FPU right after preempt_enable().
*/
static inline void user_fpu_begin(void)
{
Expand All @@ -519,24 +539,6 @@ static inline void __save_fpu(struct task_struct *tsk)
fpu_fxsave(&tsk->thread.fpu);
}

/*
* These disable preemption on their own and are safe
*/
static inline void save_init_fpu(struct task_struct *tsk)
{
WARN_ON_ONCE(!__thread_has_fpu(tsk));

if (use_eager_fpu()) {
__save_fpu(tsk);
return;
}

preempt_disable();
__save_init_fpu(tsk);
__thread_fpu_end(tsk);
preempt_enable();
}

/*
* i387 state interaction
*/
Expand Down
54 changes: 26 additions & 28 deletions arch/x86/kernel/i387.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,16 @@ void kernel_fpu_enable(void)
* be set (so that the clts/stts pair does nothing that is
* visible in the interrupted kernel thread).
*
* Except for the eagerfpu case when we return 1 unless we've already
* been eager and saved the state in kernel_fpu_begin().
* Except for the eagerfpu case when we return true; in the likely case
* the thread has FPU but we are not going to set/clear TS.
*/
static inline bool interrupted_kernel_fpu_idle(void)
{
if (this_cpu_read(in_kernel_fpu))
return false;

if (use_eager_fpu())
return __thread_has_fpu(current);
return true;

return !__thread_has_fpu(current) &&
(read_cr0() & X86_CR0_TS);
Expand Down Expand Up @@ -94,9 +94,10 @@ void __kernel_fpu_begin(void)

if (__thread_has_fpu(me)) {
__save_init_fpu(me);
} else if (!use_eager_fpu()) {
} else {
this_cpu_write(fpu_owner_task, NULL);
clts();
if (!use_eager_fpu())
clts();
}
}
EXPORT_SYMBOL(__kernel_fpu_begin);
Expand All @@ -107,7 +108,7 @@ void __kernel_fpu_end(void)

if (__thread_has_fpu(me)) {
if (WARN_ON(restore_fpu_checking(me)))
drop_init_fpu(me);
fpu_reset_state(me);
} else if (!use_eager_fpu()) {
stts();
}
Expand All @@ -120,10 +121,13 @@ void unlazy_fpu(struct task_struct *tsk)
{
preempt_disable();
if (__thread_has_fpu(tsk)) {
__save_init_fpu(tsk);
__thread_fpu_end(tsk);
} else
tsk->thread.fpu_counter = 0;
if (use_eager_fpu()) {
__save_fpu(tsk);
} else {
__save_init_fpu(tsk);
__thread_fpu_end(tsk);
}
}
preempt_enable();
}
EXPORT_SYMBOL(unlazy_fpu);
Expand Down Expand Up @@ -221,11 +225,12 @@ void fpu_finit(struct fpu *fpu)
return;
}

memset(fpu->state, 0, xstate_size);

if (cpu_has_fxsr) {
fx_finit(&fpu->state->fxsave);
} else {
struct i387_fsave_struct *fp = &fpu->state->fsave;
memset(fp, 0, xstate_size);
fp->cwd = 0xffff037fu;
fp->swd = 0xffff0000u;
fp->twd = 0xffffffffu;
Expand All @@ -247,7 +252,7 @@ int init_fpu(struct task_struct *tsk)
if (tsk_used_math(tsk)) {
if (cpu_has_fpu && tsk == current)
unlazy_fpu(tsk);
tsk->thread.fpu.last_cpu = ~0;
task_disable_lazy_fpu_restore(tsk);
return 0;
}

Expand Down Expand Up @@ -336,6 +341,7 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
int ret;

if (!cpu_has_xsave)
Expand All @@ -350,23 +356,21 @@ int xstateregs_get(struct task_struct *target, const struct user_regset *regset,
* memory layout in the thread struct, so that we can copy the entire
* xstateregs to the user using one user_regset_copyout().
*/
memcpy(&target->thread.fpu.state->fxsave.sw_reserved,
xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));

memcpy(&xsave->i387.sw_reserved,
xstate_fx_sw_bytes, sizeof(xstate_fx_sw_bytes));
/*
* Copy the xstate memory layout.
*/
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.state->xsave, 0, -1);
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
return ret;
}

int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
unsigned int pos, unsigned int count,
const void *kbuf, const void __user *ubuf)
{
struct xsave_struct *xsave = &target->thread.fpu.state->xsave;
int ret;
struct xsave_hdr_struct *xsave_hdr;

if (!cpu_has_xsave)
return -ENODEV;
Expand All @@ -375,22 +379,16 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
if (ret)
return ret;

ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.state->xsave, 0, -1);

ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
/*
* mxcsr reserved bits must be masked to zero for security reasons.
*/
target->thread.fpu.state->fxsave.mxcsr &= mxcsr_feature_mask;

xsave_hdr = &target->thread.fpu.state->xsave.xsave_hdr;

xsave_hdr->xstate_bv &= pcntxt_mask;
xsave->i387.mxcsr &= mxcsr_feature_mask;
xsave->xsave_hdr.xstate_bv &= pcntxt_mask;
/*
* These bits must be zero.
*/
memset(xsave_hdr->reserved, 0, 48);

memset(&xsave->xsave_hdr.reserved, 0, 48);
return ret;
}

Expand Down
19 changes: 12 additions & 7 deletions arch/x86/kernel/process.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)

dst->thread.fpu_counter = 0;
dst->thread.fpu.has_fpu = 0;
dst->thread.fpu.last_cpu = ~0;
dst->thread.fpu.state = NULL;
task_disable_lazy_fpu_restore(dst);
if (tsk_used_math(src)) {
int err = fpu_alloc(&dst->thread.fpu);
if (err)
Expand Down Expand Up @@ -151,13 +151,18 @@ void flush_thread(void)

flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
drop_init_fpu(tsk);
/*
* Free the FPU state for non xsave platforms. They get reallocated
* lazily at the first use.
*/
if (!use_eager_fpu())

if (!use_eager_fpu()) {
/* FPU state will be reallocated lazily at the first use. */
drop_fpu(tsk);
free_thread_xstate(tsk);
} else if (!used_math()) {
/* kthread execs. TODO: cleanup this horror. */
if (WARN_ON(init_fpu(tsk)))
force_sig(SIGKILL, tsk);
user_fpu_begin();
restore_init_xstate();
}
}

static void hard_disable_TSC(void)
Expand Down
Loading

0 comments on commit 421ec90

Please sign in to comment.