Skip to content

Commit

Permalink
Merge branch 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linu…
Browse files Browse the repository at this point in the history
…x/kernel/git/tip/tip

Pull x86 fpu changes from Ingo Molnar:
 "There are two main areas of changes:

   - Rework of the extended FPU state code to robustify the kernel's
     usage of cpuid provided xstate sizes - and related changes (Dave
     Hansen)"

   - math emulation enhancements: new modern FPU instructions support,
     with testcases, plus cleanups (Denys Vlasnko)"

* 'x86-fpu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits)
  x86/fpu: Fixup uninitialized feature_name warning
  x86/fpu/math-emu: Add support for FISTTP instructions
  x86/fpu/math-emu, selftests: Add test for FISTTP instructions
  x86/fpu/math-emu: Add support for FCMOVcc insns
  x86/fpu/math-emu: Add support for F[U]COMI[P] insns
  x86/fpu/math-emu: Remove define layer for undocumented opcodes
  x86/fpu/math-emu, selftests: Add tests for FCMOV and FCOMI insns
  x86/fpu/math-emu: Remove !NO_UNDOC_CODE
  x86/fpu: Check CPU-provided sizes against struct declarations
  x86/fpu: Check to ensure increasing-offset xstate offsets
  x86/fpu: Correct and check XSAVE xstate size calculations
  x86/fpu: Add C structures for AVX-512 state components
  x86/fpu: Rework YMM definition
  x86/fpu/mpx: Rework MPX 'xstate' types
  x86/fpu: Add xfeature_enabled() helper instead of test_bit()
  x86/fpu: Remove 'xfeature_nr'
  x86/fpu: Rework XSTATE_* macros to remove magic '2'
  x86/fpu: Rename XFEATURES_NR_MAX
  x86/fpu: Rename XSAVE macros
  x86/fpu: Remove partial LWP support definitions
  ...
  • Loading branch information
torvalds committed Nov 4, 2015
2 parents 0f25f2c + 158ecc3 commit ce4d72f
Show file tree
Hide file tree
Showing 33 changed files with 1,374 additions and 243 deletions.
3 changes: 2 additions & 1 deletion arch/x86/crypto/camellia_aesni_avx2_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,8 @@ static int __init camellia_aesni_init(void)
return -ENODEV;
}

if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/crypto/camellia_aesni_avx_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -559,7 +559,8 @@ static int __init camellia_aesni_init(void)
return -ENODEV;
}

if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/crypto/cast5_avx_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -469,7 +469,8 @@ static int __init cast5_init(void)
{
const char *feature_name;

if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/crypto/cast6_avx_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,8 @@ static int __init cast6_init(void)
{
const char *feature_name;

if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/crypto/chacha20_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ static int __init chacha20_simd_mod_init(void)

#ifdef CONFIG_AS_AVX2
chacha20_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL);
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
#endif
return crypto_register_alg(&alg);
}
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/crypto/poly1305_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ static int __init poly1305_simd_mod_init(void)

#ifdef CONFIG_AS_AVX2
poly1305_use_avx2 = cpu_has_avx && cpu_has_avx2 &&
cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL);
cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
if (poly1305_use_avx2)
alg.descsize += 10 * sizeof(u32);
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/crypto/serpent_avx2_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -542,7 +542,8 @@ static int __init init(void)
pr_info("AVX2 instructions are not detected.\n");
return -ENODEV;
}
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/crypto/serpent_avx_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -597,7 +597,8 @@ static int __init serpent_init(void)
{
const char *feature_name;

if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
&feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/crypto/sha1_ssse3_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ static struct shash_alg alg = {
#ifdef CONFIG_AS_AVX
static bool __init avx_usable(void)
{
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
if (cpu_has_avx)
pr_info("AVX detected but unusable.\n");
return false;
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/crypto/sha256_ssse3_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ static struct shash_alg algs[] = { {
#ifdef CONFIG_AS_AVX
static bool __init avx_usable(void)
{
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
if (cpu_has_avx)
pr_info("AVX detected but unusable.\n");
return false;
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/crypto/sha512_ssse3_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ static struct shash_alg algs[] = { {
#ifdef CONFIG_AS_AVX
static bool __init avx_usable(void)
{
if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) {
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
if (cpu_has_avx)
pr_info("AVX detected but unusable.\n");
return false;
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/crypto/twofish_avx_glue.c
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ static int __init twofish_init(void)
{
const char *feature_name;

if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, &feature_name)) {
if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, &feature_name)) {
pr_info("CPU feature '%s' is not supported.\n", feature_name);
return -ENODEV;
}
Expand Down
148 changes: 103 additions & 45 deletions arch/x86/include/asm/fpu/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,94 +95,152 @@ struct swregs_state {
/*
* List of XSAVE features Linux knows about:
*/
enum xfeature_bit {
XSTATE_BIT_FP,
XSTATE_BIT_SSE,
XSTATE_BIT_YMM,
XSTATE_BIT_BNDREGS,
XSTATE_BIT_BNDCSR,
XSTATE_BIT_OPMASK,
XSTATE_BIT_ZMM_Hi256,
XSTATE_BIT_Hi16_ZMM,

XFEATURES_NR_MAX,
enum xfeature {
XFEATURE_FP,
XFEATURE_SSE,
/*
* Values above here are "legacy states".
* Those below are "extended states".
*/
XFEATURE_YMM,
XFEATURE_BNDREGS,
XFEATURE_BNDCSR,
XFEATURE_OPMASK,
XFEATURE_ZMM_Hi256,
XFEATURE_Hi16_ZMM,

XFEATURE_MAX,
};

#define XSTATE_FP (1 << XSTATE_BIT_FP)
#define XSTATE_SSE (1 << XSTATE_BIT_SSE)
#define XSTATE_YMM (1 << XSTATE_BIT_YMM)
#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS)
#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR)
#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK)
#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256)
#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM)
#define XFEATURE_MASK_FP (1 << XFEATURE_FP)
#define XFEATURE_MASK_SSE (1 << XFEATURE_SSE)
#define XFEATURE_MASK_YMM (1 << XFEATURE_YMM)
#define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS)
#define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR)
#define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK)
#define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256)
#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)

#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK \
| XFEATURE_MASK_ZMM_Hi256 \
| XFEATURE_MASK_Hi16_ZMM)

#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM

#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
#define XSTATE_AVX512 (XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
struct reg_128_bit {
u8 regbytes[128/8];
};
struct reg_256_bit {
u8 regbytes[256/8];
};
struct reg_512_bit {
u8 regbytes[512/8];
};

/*
* State component 2:
*
* There are 16x 256-bit AVX registers named YMM0-YMM15.
* The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15)
* and are stored in 'struct fxregs_state::xmm_space[]'.
* and are stored in 'struct fxregs_state::xmm_space[]' in the
* "legacy" area.
*
* The high 128 bits are stored here:
* 16x 128 bits == 256 bytes.
* The high 128 bits are stored here.
*/
struct ymmh_struct {
u8 ymmh_space[256];
};

/* We don't support LWP yet: */
struct lwp_struct {
u8 reserved[128];
};
struct reg_128_bit hi_ymm[16];
} __packed;

/* Intel MPX support: */
struct bndreg {

struct mpx_bndreg {
u64 lower_bound;
u64 upper_bound;
} __packed;
/*
* State component 3 is used for the 4 128-bit bounds registers
*/
struct mpx_bndreg_state {
struct mpx_bndreg bndreg[4];
} __packed;

struct bndcsr {
/*
* State component 4 is used for the 64-bit user-mode MPX
* configuration register BNDCFGU and the 64-bit MPX status
* register BNDSTATUS. We call the pair "BNDCSR".
*/
struct mpx_bndcsr {
u64 bndcfgu;
u64 bndstatus;
} __packed;

struct mpx_struct {
struct bndreg bndreg[4];
struct bndcsr bndcsr;
};
/*
* The BNDCSR state is padded out to be 64-bytes in size.
*/
struct mpx_bndcsr_state {
union {
struct mpx_bndcsr bndcsr;
u8 pad_to_64_bytes[64];
};
} __packed;

/* AVX-512 Components: */

/*
* State component 5 is used for the 8 64-bit opmask registers
* k0-k7 (opmask state).
*/
struct avx_512_opmask_state {
u64 opmask_reg[8];
} __packed;

/*
* State component 6 is used for the upper 256 bits of the
* registers ZMM0-ZMM15. These 16 256-bit values are denoted
* ZMM0_H-ZMM15_H (ZMM_Hi256 state).
*/
struct avx_512_zmm_uppers_state {
struct reg_256_bit zmm_upper[16];
} __packed;

/*
* State component 7 is used for the 16 512-bit registers
* ZMM16-ZMM31 (Hi16_ZMM state).
*/
struct avx_512_hi16_state {
struct reg_512_bit hi16_zmm[16];
} __packed;

struct xstate_header {
u64 xfeatures;
u64 xcomp_bv;
u64 reserved[6];
} __attribute__((packed));

/* New processor state extensions should be added here: */
#define XSTATE_RESERVE (sizeof(struct ymmh_struct) + \
sizeof(struct lwp_struct) + \
sizeof(struct mpx_struct) )
/*
* This is our most modern FPU state format, as saved by the XSAVE
* and restored by the XRSTOR instructions.
*
* It consists of a legacy fxregs portion, an xstate header and
* subsequent fixed size areas as defined by the xstate header.
* Not all CPUs support all the extensions.
* subsequent areas as defined by the xstate header. Not all CPUs
* support all the extensions, so the size of the extended area
* can vary quite a bit between CPUs.
*/
struct xregs_state {
struct fxregs_state i387;
struct xstate_header header;
u8 __reserved[XSTATE_RESERVE];
u8 extended_state_area[0];
} __attribute__ ((packed, aligned (64)));

/*
* This is a union of all the possible FPU state formats
* put together, so that we can pick the right one runtime.
*
* The size of the structure is determined by the largest
* member - which is the xsave area:
* member - which is the xsave area. The padding is there
* to ensure that statically-allocated task_structs (just
* the init_task today) have enough space.
*/
union fpregs_state {
struct fregs_state fsave;
Expand Down
15 changes: 10 additions & 5 deletions arch/x86/include/asm/fpu/xstate.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <linux/uaccess.h>

/* Bit 63 of XCR0 is reserved for future expansion */
#define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63)))
#define XFEATURE_MASK_EXTEND (~(XFEATURE_MASK_FPSSE | (1ULL << 63)))

#define XSTATE_CPUID 0x0000000d

Expand All @@ -19,14 +19,18 @@
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)

/* Supported features which support lazy state saving */
#define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
| XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
#define XFEATURE_MASK_LAZY (XFEATURE_MASK_FP | \
XFEATURE_MASK_SSE | \
XFEATURE_MASK_YMM | \
XFEATURE_MASK_OPMASK | \
XFEATURE_MASK_ZMM_Hi256 | \
XFEATURE_MASK_Hi16_ZMM)

/* Supported features which require eager state saving */
#define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR)
#define XFEATURE_MASK_EAGER (XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR)

/* All currently supported features */
#define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER)
#define XCNTXT_MASK (XFEATURE_MASK_LAZY | XFEATURE_MASK_EAGER)

#ifdef CONFIG_X86_64
#define REX_PREFIX "0x48, "
Expand All @@ -40,6 +44,7 @@ extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS];

extern void update_regset_xstate_info(unsigned int size, u64 xstate_mask);

void fpu__xstate_clear_all_cpu_caps(void);
void *get_xsave_addr(struct xregs_state *xsave, int xstate);
const void *get_xsave_field_ptr(int xstate_field);

Expand Down
7 changes: 4 additions & 3 deletions arch/x86/include/asm/trace/mpx.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
TRACE_EVENT(mpx_bounds_register_exception,

TP_PROTO(void *addr_referenced,
const struct bndreg *bndreg),
const struct mpx_bndreg *bndreg),
TP_ARGS(addr_referenced, bndreg),

TP_STRUCT__entry(
Expand Down Expand Up @@ -44,7 +44,7 @@ TRACE_EVENT(mpx_bounds_register_exception,

TRACE_EVENT(bounds_exception_mpx,

TP_PROTO(const struct bndcsr *bndcsr),
TP_PROTO(const struct mpx_bndcsr *bndcsr),
TP_ARGS(bndcsr),

TP_STRUCT__entry(
Expand Down Expand Up @@ -116,7 +116,8 @@ TRACE_EVENT(mpx_new_bounds_table,
/*
* This gets used outside of MPX-specific code, so we need a stub.
*/
static inline void trace_bounds_exception_mpx(const struct bndcsr *bndcsr)
static inline
void trace_bounds_exception_mpx(const struct mpx_bndcsr *bndcsr)
{
}

Expand Down
Loading

0 comments on commit ce4d72f

Please sign in to comment.