Skip to content

Commit

Permalink
x86/fpu: Remove use_eager_fpu()
Browse files Browse the repository at this point in the history
This removes all the obvious code paths that depend on lazy FPU mode.
It shouldn't change the generated code at all.

Signed-off-by: Andy Lutomirski <[email protected]>
Signed-off-by: Rik van Riel <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Brian Gerst <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Denys Vlasenko <[email protected]>
Cc: Fenghua Yu <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Josh Poimboeuf <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Oleg Nesterov <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Quentin Casasnovas <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Ingo Molnar <[email protected]>
  • Loading branch information
amluto authored and Ingo Molnar committed Oct 7, 2016
1 parent 2f7fada commit c592b57
Show file tree
Hide file tree
Showing 7 changed files with 14 additions and 106 deletions.
17 changes: 4 additions & 13 deletions arch/x86/crypto/crc32c-intel_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,21 +48,13 @@
#ifdef CONFIG_X86_64
/*
* use carryless multiply version of crc32c when buffer
* size is >= 512 (when eager fpu is enabled) or
* >= 1024 (when eager fpu is disabled) to account
* size is >= 512 to account
* for fpu state save/restore overhead.
*/
#define CRC32C_PCL_BREAKEVEN_EAGERFPU 512
#define CRC32C_PCL_BREAKEVEN_NOEAGERFPU 1024
#define CRC32C_PCL_BREAKEVEN 512

asmlinkage unsigned int crc_pcl(const u8 *buffer, int len,
unsigned int crc_init);
static int crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_EAGERFPU;
#define set_pcl_breakeven_point() \
do { \
if (!use_eager_fpu()) \
crc32c_pcl_breakeven = CRC32C_PCL_BREAKEVEN_NOEAGERFPU; \
} while (0)
#endif /* CONFIG_X86_64 */

static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, size_t length)
Expand Down Expand Up @@ -185,7 +177,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
* use faster PCL version if datasize is large enough to
* overcome kernel fpu state save/restore overhead
*/
if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) {
kernel_fpu_begin();
*crcp = crc_pcl(data, len, *crcp);
kernel_fpu_end();
Expand All @@ -197,7 +189,7 @@ static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data,
static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
if (len >= crc32c_pcl_breakeven && irq_fpu_usable()) {
if (len >= CRC32C_PCL_BREAKEVEN && irq_fpu_usable()) {
kernel_fpu_begin();
*(__le32 *)out = ~cpu_to_le32(crc_pcl(data, len, *crcp));
kernel_fpu_end();
Expand Down Expand Up @@ -256,7 +248,6 @@ static int __init crc32c_intel_mod_init(void)
alg.update = crc32c_pcl_intel_update;
alg.finup = crc32c_pcl_intel_finup;
alg.digest = crc32c_pcl_intel_digest;
set_pcl_breakeven_point();
}
#endif
return crypto_register_shash(&alg);
Expand Down
34 changes: 1 addition & 33 deletions arch/x86/include/asm/fpu/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,6 @@ extern u64 fpu__get_supported_xfeatures_mask(void);
/*
* FPU related CPU feature flag helper routines:
*/
static __always_inline __pure bool use_eager_fpu(void)
{
return true;
}

static __always_inline __pure bool use_xsaveopt(void)
{
return static_cpu_has(X86_FEATURE_XSAVEOPT);
Expand Down Expand Up @@ -501,24 +496,6 @@ static inline int fpu_want_lazy_restore(struct fpu *fpu, unsigned int cpu)
}


/*
* Wrap lazy FPU TS handling in a 'hw fpregs activation/deactivation'
* idiom, which is then paired with the sw-flag (fpregs_active) later on:
*/

static inline void __fpregs_activate_hw(void)
{
if (!use_eager_fpu())
clts();
}

static inline void __fpregs_deactivate_hw(void)
{
if (!use_eager_fpu())
stts();
}

/* Must be paired with an 'stts' (fpregs_deactivate_hw()) after! */
static inline void __fpregs_deactivate(struct fpu *fpu)
{
WARN_ON_FPU(!fpu->fpregs_active);
Expand All @@ -528,7 +505,6 @@ static inline void __fpregs_deactivate(struct fpu *fpu)
trace_x86_fpu_regs_deactivated(fpu);
}

/* Must be paired with a 'clts' (fpregs_activate_hw()) before! */
static inline void __fpregs_activate(struct fpu *fpu)
{
WARN_ON_FPU(fpu->fpregs_active);
Expand All @@ -554,22 +530,17 @@ static inline int fpregs_active(void)
}

/*
* Encapsulate the CR0.TS handling together with the
* software flag.
*
* These generally need preemption protection to work,
* do try to avoid using these on their own.
*/
static inline void fpregs_activate(struct fpu *fpu)
{
__fpregs_activate_hw();
__fpregs_activate(fpu);
}

static inline void fpregs_deactivate(struct fpu *fpu)
{
__fpregs_deactivate(fpu);
__fpregs_deactivate_hw();
}

/*
Expand All @@ -596,8 +567,7 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
* or if the past 5 consecutive context-switches used math.
*/
fpu.preload = static_cpu_has(X86_FEATURE_FPU) &&
new_fpu->fpstate_active &&
(use_eager_fpu() || new_fpu->counter > 5);
new_fpu->fpstate_active;

if (old_fpu->fpregs_active) {
if (!copy_fpregs_to_fpstate(old_fpu))
Expand All @@ -615,8 +585,6 @@ switch_fpu_prepare(struct fpu *old_fpu, struct fpu *new_fpu, int cpu)
__fpregs_activate(new_fpu);
trace_x86_fpu_regs_activated(new_fpu);
prefetch(&new_fpu->state);
} else {
__fpregs_deactivate_hw();
}
} else {
old_fpu->counter = 0;
Expand Down
38 changes: 5 additions & 33 deletions arch/x86/kernel/fpu/core.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,27 +57,9 @@ static bool kernel_fpu_disabled(void)
return this_cpu_read(in_kernel_fpu);
}

/*
* Were we in an interrupt that interrupted kernel mode?
*
* On others, we can do a kernel_fpu_begin/end() pair *ONLY* if that
* pair does nothing at all: the thread must not have fpu (so
* that we don't try to save the FPU state), and TS must
* be set (so that the clts/stts pair does nothing that is
* visible in the interrupted kernel thread).
*
* Except for the eagerfpu case when we return true; in the likely case
* the thread has FPU but we are not going to set/clear TS.
*/
static bool interrupted_kernel_fpu_idle(void)
{
if (kernel_fpu_disabled())
return false;

if (use_eager_fpu())
return true;

return !current->thread.fpu.fpregs_active && (read_cr0() & X86_CR0_TS);
return !kernel_fpu_disabled();
}

/*
Expand Down Expand Up @@ -125,7 +107,6 @@ void __kernel_fpu_begin(void)
copy_fpregs_to_fpstate(fpu);
} else {
this_cpu_write(fpu_fpregs_owner_ctx, NULL);
__fpregs_activate_hw();
}
}
EXPORT_SYMBOL(__kernel_fpu_begin);
Expand All @@ -136,8 +117,6 @@ void __kernel_fpu_end(void)

if (fpu->fpregs_active)
copy_kernel_to_fpregs(&fpu->state);
else
__fpregs_deactivate_hw();

kernel_fpu_enable();
}
Expand Down Expand Up @@ -199,10 +178,7 @@ void fpu__save(struct fpu *fpu)
trace_x86_fpu_before_save(fpu);
if (fpu->fpregs_active) {
if (!copy_fpregs_to_fpstate(fpu)) {
if (use_eager_fpu())
copy_kernel_to_fpregs(&fpu->state);
else
fpregs_deactivate(fpu);
copy_kernel_to_fpregs(&fpu->state);
}
}
trace_x86_fpu_after_save(fpu);
Expand Down Expand Up @@ -259,8 +235,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
* Don't let 'init optimized' areas of the XSAVE area
* leak into the child task:
*/
if (use_eager_fpu())
memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);
memset(&dst_fpu->state.xsave, 0, fpu_kernel_xstate_size);

/*
* Save current FPU registers directly into the child
Expand All @@ -282,10 +257,7 @@ int fpu__copy(struct fpu *dst_fpu, struct fpu *src_fpu)
memcpy(&src_fpu->state, &dst_fpu->state,
fpu_kernel_xstate_size);

if (use_eager_fpu())
copy_kernel_to_fpregs(&src_fpu->state);
else
fpregs_deactivate(src_fpu);
copy_kernel_to_fpregs(&src_fpu->state);
}
preempt_enable();

Expand Down Expand Up @@ -517,7 +489,7 @@ void fpu__clear(struct fpu *fpu)
{
WARN_ON_FPU(fpu != &current->thread.fpu); /* Almost certainly an anomaly */

if (!use_eager_fpu() || !static_cpu_has(X86_FEATURE_FPU)) {
if (!static_cpu_has(X86_FEATURE_FPU)) {
/* FPU state will be reallocated lazily at the first use. */
fpu__drop(fpu);
} else {
Expand Down
8 changes: 3 additions & 5 deletions arch/x86/kernel/fpu/signal.c
Original file line number Diff line number Diff line change
Expand Up @@ -340,11 +340,9 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
}

fpu->fpstate_active = 1;
if (use_eager_fpu()) {
preempt_disable();
fpu__restore(fpu);
preempt_enable();
}
preempt_disable();
fpu__restore(fpu);
preempt_enable();

return err;
} else {
Expand Down
9 changes: 0 additions & 9 deletions arch/x86/kernel/fpu/xstate.c
Original file line number Diff line number Diff line change
Expand Up @@ -886,15 +886,6 @@ int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
*/
if (!boot_cpu_has(X86_FEATURE_OSPKE))
return -EINVAL;
/*
* For most XSAVE components, this would be an arduous task:
* brining fpstate up to date with fpregs, updating fpstate,
* then re-populating fpregs. But, for components that are
* never lazily managed, we can just access the fpregs
* directly. PKRU is never managed lazily, so we can just
* manipulate it directly. Make sure it stays that way.
*/
WARN_ON_ONCE(!use_eager_fpu());

/* Set the bits we need in PKRU: */
if (init_val & PKEY_DISABLE_ACCESS)
Expand Down
4 changes: 1 addition & 3 deletions arch/x86/kvm/cpuid.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#include <linux/export.h>
#include <linux/vmalloc.h>
#include <linux/uaccess.h>
#include <asm/fpu/internal.h> /* For use_eager_fpu. Ugh! */
#include <asm/user.h>
#include <asm/fpu/xstate.h>
#include "cpuid.h"
Expand Down Expand Up @@ -114,8 +113,7 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
best->ebx = xstate_required_size(vcpu->arch.xcr0, true);

if (use_eager_fpu())
kvm_x86_ops->fpu_activate(vcpu);
kvm_x86_ops->fpu_activate(vcpu);

/*
* The existing code assumes virtual address is 48-bit in the canonical
Expand Down
10 changes: 0 additions & 10 deletions arch/x86/kvm/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -7357,16 +7357,6 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
copy_fpregs_to_fpstate(&vcpu->arch.guest_fpu);
__kernel_fpu_end();
++vcpu->stat.fpu_reload;
/*
* If using eager FPU mode, or if the guest is a frequent user
* of the FPU, just leave the FPU active for next time.
* Every 255 times fpu_counter rolls over to 0; a guest that uses
* the FPU in bursts will revert to loading it on demand.
*/
if (!use_eager_fpu()) {
if (++vcpu->fpu_counter < 5)
kvm_make_request(KVM_REQ_DEACTIVATE_FPU, vcpu);
}
trace_kvm_fpu(0);
}

Expand Down

0 comments on commit c592b57

Please sign in to comment.