Skip to content

Commit

Permalink
Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/l…
Browse files Browse the repository at this point in the history
…inux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:
 "This is bigger than usual - the reason is partly a pent-up stream of
  fixes after the merge window and partly accidental.  The fixes are:

   - five patches to fix a boot failure on Andy Lutomirsky's laptop
   - four SGI UV platform fixes
   - KASAN fix
   - warning fix
   - documentation update
   - swap entry definition fix
   - pkeys fix
   - irq stats fix"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/apic/x2apic, smp/hotplug: Don't use before alloc in x2apic_cluster_probe()
  x86/efi: Allocate a trampoline if needed in efi_free_boot_services()
  x86/boot: Rework reserve_real_mode() to allow multiple tries
  x86/boot: Defer setup_real_mode() to early_initcall time
  x86/boot: Synchronize trampoline_cr4_features and mmu_cr4_features directly
  x86/boot: Run reserve_bios_regions() after we initialize the memory map
  x86/irq: Do not substract irq_tlb_count from irq_call_count
  x86/mm: Fix swap entry comment and macro
  x86/mm/kaslr: Fix -Wformat-security warning
  x86/mm/pkeys: Fix compact mode by removing protection keys' XSAVE buffer manipulation
  x86/build: Reduce the W=1 warnings noise when compiling x86 syscall tables
  x86/platform/UV: Fix kernel panic running RHEL kdump kernel on UV systems
  x86/platform/UV: Fix problem with UV4 BIOS providing incorrect PXM values
  x86/platform/UV: Fix bug with iounmap() of the UV4 EFI System Table causing a crash
  x86/platform/UV: Fix problem with UV4 Socket IDs not being contiguous
  x86/entry: Clarify the RF saving/restoring situation with SYSCALL/SYSRET
  x86/mm: Disable preemption during CR3 read+write
  x86/mm/KASLR: Increase BRK pages for KASLR memory randomization
  x86/mm/KASLR: Fix physical memory calculation on KASLR memory randomization
  x86, kasan, ftrace: Put APIC interrupt handlers into .irqentry.text
  • Loading branch information
torvalds committed Aug 12, 2016
2 parents 3bc6d8c + d52c056 commit 01ea443
Show file tree
Hide file tree
Showing 20 changed files with 182 additions and 195 deletions.
2 changes: 2 additions & 0 deletions arch/x86/entry/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y

CFLAGS_syscall_64.o += -Wno-override-init
CFLAGS_syscall_32.o += -Wno-override-init
obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
obj-y += common.o

Expand Down
25 changes: 20 additions & 5 deletions arch/x86/entry/entry_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -288,11 +288,15 @@ return_from_SYSCALL_64:
jne opportunistic_sysret_failed

/*
* SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET,
* restoring TF results in a trap from userspace immediately after
* SYSRET. This would cause an infinite loop whenever #DB happens
* with register state that satisfies the opportunistic SYSRET
* conditions. For example, single-stepping this user code:
* SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
* restore RF properly. If the slowpath sets it for whatever reason, we
* need to restore it correctly.
*
* SYSRET can restore TF, but unlike IRET, restoring TF results in a
* trap from userspace immediately after SYSRET. This would cause an
* infinite loop whenever #DB happens with register state that satisfies
* the opportunistic SYSRET conditions. For example, single-stepping
* this user code:
*
* movq $stuck_here, %rcx
* pushfq
Expand Down Expand Up @@ -601,9 +605,20 @@ apicinterrupt3 \num trace(\sym) smp_trace(\sym)
.endm
#endif

/* Make sure APIC interrupt handlers end up in the irqentry section: */
#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
# define PUSH_SECTION_IRQENTRY .pushsection .irqentry.text, "ax"
# define POP_SECTION_IRQENTRY .popsection
#else
# define PUSH_SECTION_IRQENTRY
# define POP_SECTION_IRQENTRY
#endif

.macro apicinterrupt num sym do_sym
PUSH_SECTION_IRQENTRY
apicinterrupt3 \num \sym \do_sym
trace_apicinterrupt \num \sym
POP_SECTION_IRQENTRY
.endm

#ifdef CONFIG_SMP
Expand Down
4 changes: 0 additions & 4 deletions arch/x86/include/asm/hardirq.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,6 @@ typedef struct {
#ifdef CONFIG_SMP
unsigned int irq_resched_count;
unsigned int irq_call_count;
/*
* irq_tlb_count is double-counted in irq_call_count, so it must be
* subtracted from irq_call_count when displaying irq_call_count
*/
unsigned int irq_tlb_count;
#endif
#ifdef CONFIG_X86_THERMAL_VECTOR
Expand Down
4 changes: 2 additions & 2 deletions arch/x86/include/asm/pgtable_64.h
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
*
* | ... | 11| 10| 9|8|7|6|5| 4| 3|2|1|0| <- bit number
* | ... |SW3|SW2|SW1|G|L|D|A|CD|WT|U|W|P| <- bit names
* | OFFSET (14->63) | TYPE (10-13) |0|X|X|X| X| X|X|X|0| <- swp entry
* | OFFSET (14->63) | TYPE (9-13) |0|X|X|X| X| X|X|X|0| <- swp entry
*
* G (8) is aliased and used as a PROT_NONE indicator for
* !present ptes. We need to start storing swap entries above
Expand All @@ -156,7 +156,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
#define SWP_TYPE_FIRST_BIT (_PAGE_BIT_PROTNONE + 1)
#define SWP_TYPE_BITS 5
/* Place the offset above the type: */
#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS + 1)
#define SWP_OFFSET_FIRST_BIT (SWP_TYPE_FIRST_BIT + SWP_TYPE_BITS)

#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)

Expand Down
10 changes: 9 additions & 1 deletion arch/x86/include/asm/realmode.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,15 @@ extern unsigned char boot_gdt[];
extern unsigned char secondary_startup_64[];
#endif

static inline size_t real_mode_size_needed(void)
{
if (real_mode_header)
return 0; /* already allocated. */

return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE);
}

void set_real_mode_mem(phys_addr_t mem, size_t size);
void reserve_real_mode(void);
void setup_real_mode(void);

#endif /* _ARCH_X86_REALMODE_H */
7 changes: 7 additions & 0 deletions arch/x86/include/asm/tlbflush.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,14 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)

static inline void __native_flush_tlb(void)
{
/*
* If current->mm == NULL then we borrow a mm which may change during a
* task switch and therefore we must not be preempted while we write CR3
* back:
*/
preempt_disable();
native_write_cr3(native_read_cr3());
preempt_enable();
}

static inline void __native_flush_tlb_global_irq_disabled(void)
Expand Down
5 changes: 3 additions & 2 deletions arch/x86/include/asm/uv/bios.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ struct uv_gam_range_entry {
u16 nasid; /* HNasid */
u16 sockid; /* Socket ID, high bits of APIC ID */
u16 pnode; /* Index to MMR and GRU spaces */
u32 pxm; /* ACPI proximity domain number */
u32 unused2;
u32 limit; /* PA bits 56:26 (UV_GAM_RANGE_SHFT) */
};

Expand All @@ -88,7 +88,8 @@ struct uv_gam_range_entry {
#define UV_SYSTAB_VERSION_UV4 0x400 /* UV4 BIOS base version */
#define UV_SYSTAB_VERSION_UV4_1 0x401 /* + gpa_shift */
#define UV_SYSTAB_VERSION_UV4_2 0x402 /* + TYPE_NVRAM/WINDOW/MBOX */
#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_2
#define UV_SYSTAB_VERSION_UV4_3 0x403 /* - GAM Range PXM Value */
#define UV_SYSTAB_VERSION_UV4_LATEST UV_SYSTAB_VERSION_UV4_3

#define UV_SYSTAB_TYPE_UNUSED 0 /* End of table (offset == 0) */
#define UV_SYSTAB_TYPE_GAM_PARAMS 1 /* GAM PARAM conversions */
Expand Down
13 changes: 9 additions & 4 deletions arch/x86/kernel/apic/x2apic_cluster.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ static void init_x2apic_ldr(void)
/*
* At CPU state changes, update the x2apic cluster sibling info.
*/
int x2apic_prepare_cpu(unsigned int cpu)
static int x2apic_prepare_cpu(unsigned int cpu)
{
if (!zalloc_cpumask_var(&per_cpu(cpus_in_cluster, cpu), GFP_KERNEL))
return -ENOMEM;
Expand All @@ -168,7 +168,7 @@ int x2apic_prepare_cpu(unsigned int cpu)
return 0;
}

int x2apic_dead_cpu(unsigned int this_cpu)
static int x2apic_dead_cpu(unsigned int this_cpu)
{
int cpu;

Expand All @@ -186,13 +186,18 @@ int x2apic_dead_cpu(unsigned int this_cpu)
static int x2apic_cluster_probe(void)
{
int cpu = smp_processor_id();
int ret;

if (!x2apic_mode)
return 0;

ret = cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
x2apic_prepare_cpu, x2apic_dead_cpu);
if (ret < 0) {
pr_err("Failed to register X2APIC_PREPARE\n");
return 0;
}
cpumask_set_cpu(cpu, per_cpu(cpus_in_cluster, cpu));
cpuhp_setup_state(CPUHP_X2APIC_PREPARE, "X2APIC_PREPARE",
x2apic_prepare_cpu, x2apic_dead_cpu);
return 1;
}

Expand Down
42 changes: 20 additions & 22 deletions arch/x86/kernel/apic/x2apic_uv_x.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,11 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
if (strncmp(oem_id, "SGI", 3) != 0)
return 0;

if (numa_off) {
pr_err("UV: NUMA is off, disabling UV support\n");
return 0;
}

/* Setup early hub type field in uv_hub_info for Node 0 */
uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;

Expand Down Expand Up @@ -325,7 +330,7 @@ static __init void build_uv_gr_table(void)
struct uv_gam_range_entry *gre = uv_gre_table;
struct uv_gam_range_s *grt;
unsigned long last_limit = 0, ram_limit = 0;
int bytes, i, sid, lsid = -1;
int bytes, i, sid, lsid = -1, indx = 0, lindx = -1;

if (!gre)
return;
Expand Down Expand Up @@ -356,11 +361,12 @@ static __init void build_uv_gr_table(void)
}
sid = gre->sockid - _min_socket;
if (lsid < sid) { /* new range */
grt = &_gr_table[sid];
grt->base = lsid;
grt = &_gr_table[indx];
grt->base = lindx;
grt->nasid = gre->nasid;
grt->limit = last_limit = gre->limit;
lsid = sid;
lindx = indx++;
continue;
}
if (lsid == sid && !ram_limit) { /* update range */
Expand All @@ -371,7 +377,7 @@ static __init void build_uv_gr_table(void)
}
if (!ram_limit) { /* non-contiguous ram range */
grt++;
grt->base = sid - 1;
grt->base = lindx;
grt->nasid = gre->nasid;
grt->limit = last_limit = gre->limit;
continue;
Expand Down Expand Up @@ -1155,19 +1161,18 @@ static void __init decode_gam_rng_tbl(unsigned long ptr)
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (!index) {
pr_info("UV: GAM Range Table...\n");
pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s %3s\n",
pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n",
"Range", "", "Size", "Type", "NASID",
"SID", "PN", "PXM");
"SID", "PN");
}
pr_info(
"UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x %3d\n",
"UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n",
index++,
(unsigned long)lgre << UV_GAM_RANGE_SHFT,
(unsigned long)gre->limit << UV_GAM_RANGE_SHFT,
((unsigned long)(gre->limit - lgre)) >>
(30 - UV_GAM_RANGE_SHFT), /* 64M -> 1G */
gre->type, gre->nasid, gre->sockid,
gre->pnode, gre->pxm);
gre->type, gre->nasid, gre->sockid, gre->pnode);

lgre = gre->limit;
if (sock_min > gre->sockid)
Expand Down Expand Up @@ -1286,28 +1291,26 @@ static void __init build_socket_tables(void)
_pnode_to_socket[i] = SOCK_EMPTY;

/* fill in pnode/node/addr conversion list values */
pr_info("UV: GAM Building socket/pnode/pxm conversion tables\n");
pr_info("UV: GAM Building socket/pnode conversion tables\n");
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
continue;
i = gre->sockid - minsock;
if (_socket_to_pnode[i] != SOCK_EMPTY)
continue; /* duplicate */
_socket_to_pnode[i] = gre->pnode;
_socket_to_node[i] = gre->pxm;

i = gre->pnode - minpnode;
_pnode_to_socket[i] = gre->sockid;

pr_info(
"UV: sid:%02x type:%d nasid:%04x pn:%02x pxm:%2d pn2s:%2x\n",
"UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
gre->sockid, gre->type, gre->nasid,
_socket_to_pnode[gre->sockid - minsock],
_socket_to_node[gre->sockid - minsock],
_pnode_to_socket[gre->pnode - minpnode]);
}

/* check socket -> node values */
/* Set socket -> node values */
lnid = -1;
for_each_present_cpu(cpu) {
int nid = cpu_to_node(cpu);
Expand All @@ -1318,14 +1321,9 @@ static void __init build_socket_tables(void)
lnid = nid;
apicid = per_cpu(x86_cpu_to_apicid, cpu);
sockid = apicid >> uv_cpuid.socketid_shift;
i = sockid - minsock;

if (nid != _socket_to_node[i]) {
pr_warn(
"UV: %02x: type:%d socket:%02x PXM:%02x != node:%2d\n",
i, sockid, gre->type, _socket_to_node[i], nid);
_socket_to_node[i] = nid;
}
_socket_to_node[sockid - minsock] = nid;
pr_info("UV: sid:%02x: apicid:%04x node:%2d\n",
sockid, apicid, nid);
}

/* Setup physical blade to pnode translation from GAM Range Table */
Expand Down
Loading

0 comments on commit 01ea443

Please sign in to comment.