Skip to content

Commit

Permalink
Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linu…
Browse files Browse the repository at this point in the history
…x/kernel/git/tip/tip

Pull x86 asm updates from Ingo Molnar:
 "The main changes in this cycle were:

   - vDSO and asm entry improvements (Andy Lutomirski)

   - Xen paravirt entry enhancements (Boris Ostrovsky)

   - asm entry labels enhancement (Borislav Petkov)

   - and other misc changes (Thomas Gleixner, me)"

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/vsdo: Fix build on PARAVIRT_CLOCK=y, KVM_GUEST=n
  Revert "x86/kvm: On KVM re-enable (e.g. after suspend), update clocks"
  x86/entry/64_compat: Make labels local
  x86/platform/uv: Include clocksource.h for clocksource_touch_watchdog()
  x86/vdso: Enable vdso pvclock access on all vdso variants
  x86/vdso: Remove pvclock fixmap machinery
  x86/vdso: Get pvclock data from the vvar VMA instead of the fixmap
  x86, vdso, pvclock: Simplify and speed up the vdso pvclock reader
  x86/kvm: On KVM re-enable (e.g. after suspend), update clocks
  x86/entry/64: Bypass enter_from_user_mode on non-context-tracking boots
  x86/asm: Add asm macros for static keys/jump labels
  x86/asm: Error out if asm/jump_label.h is included inappropriately
  context_tracking: Switch to new static_branch API
  x86/entry, x86/paravirt: Remove the unused usergs_sysret32 PV op
  x86/paravirt: Remove the unused irq_enable_sysexit pv op
  x86/xen: Avoid fast syscall path for Xen PV guests
  • Loading branch information
torvalds committed Jan 11, 2016
2 parents 4f19b88 + 8705d60 commit 88cbfd0
Show file tree
Hide file tree
Showing 28 changed files with 194 additions and 244 deletions.
15 changes: 15 additions & 0 deletions arch/x86/entry/calling.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#include <linux/jump_label.h>

/*
x86 function call convention, 64-bit:
Expand Down Expand Up @@ -232,3 +234,16 @@ For 32-bit we have the following conventions - kernel is built with

#endif /* CONFIG_X86_64 */

/*
* This does 'call enter_from_user_mode' unless we can avoid it based on
* kernel config or using the static jump infrastructure.
*/
.macro CALL_enter_from_user_mode
#ifdef CONFIG_CONTEXT_TRACKING
#ifdef HAVE_JUMP_LABEL
STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
#endif
call enter_from_user_mode
.Lafter_call_\@:
#endif
.endm
8 changes: 2 additions & 6 deletions arch/x86/entry/entry_32.S
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,8 @@ sysenter_past_esp:
* Return back to the vDSO, which will pop ecx and edx.
* Don't bother with DS and ES (they already contain __USER_DS).
*/
ENABLE_INTERRUPTS_SYSEXIT
sti
sysexit

.pushsection .fixup, "ax"
2: movl $0, PT_FS(%esp)
Expand Down Expand Up @@ -552,11 +553,6 @@ ENTRY(native_iret)
iret
_ASM_EXTABLE(native_iret, iret_exc)
END(native_iret)

ENTRY(native_irq_enable_sysexit)
sti
sysexit
END(native_irq_enable_sysexit)
#endif

ENTRY(overflow)
Expand Down
8 changes: 2 additions & 6 deletions arch/x86/entry/entry_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -520,9 +520,7 @@ END(irq_entries_start)
*/
TRACE_IRQS_OFF

#ifdef CONFIG_CONTEXT_TRACKING
call enter_from_user_mode
#endif
CALL_enter_from_user_mode

1:
/*
Expand Down Expand Up @@ -1066,9 +1064,7 @@ ENTRY(error_entry)
* (which can take locks).
*/
TRACE_IRQS_OFF
#ifdef CONFIG_CONTEXT_TRACKING
call enter_from_user_mode
#endif
CALL_enter_from_user_mode
ret

.Lerror_entry_done:
Expand Down
20 changes: 7 additions & 13 deletions arch/x86/entry/entry_64_compat.S
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,6 @@

.section .entry.text, "ax"

#ifdef CONFIG_PARAVIRT
ENTRY(native_usergs_sysret32)
swapgs
sysretl
ENDPROC(native_usergs_sysret32)
#endif

/*
* 32-bit SYSENTER instruction entry.
*
Expand Down Expand Up @@ -103,15 +96,15 @@ ENTRY(entry_SYSENTER_compat)
* This needs to happen before enabling interrupts so that
* we don't get preempted with NT set.
*
* NB.: sysenter_fix_flags is a label with the code under it moved
* NB.: .Lsysenter_fix_flags is a label with the code under it moved
* out-of-line as an optimization: NT is unlikely to be set in the
* majority of the cases and instead of polluting the I$ unnecessarily,
* we're keeping that code behind a branch which will predict as
* not-taken and therefore its instructions won't be fetched.
*/
testl $X86_EFLAGS_NT, EFLAGS(%rsp)
jnz sysenter_fix_flags
sysenter_flags_fixed:
jnz .Lsysenter_fix_flags
.Lsysenter_flags_fixed:

/*
* User mode is traced as though IRQs are on, and SYSENTER
Expand All @@ -126,10 +119,10 @@ sysenter_flags_fixed:
"jmp .Lsyscall_32_done", X86_FEATURE_XENPV
jmp sysret32_from_system_call

sysenter_fix_flags:
.Lsysenter_fix_flags:
pushq $X86_EFLAGS_FIXED
popfq
jmp sysenter_flags_fixed
jmp .Lsysenter_flags_fixed
ENDPROC(entry_SYSENTER_compat)

/*
Expand Down Expand Up @@ -238,7 +231,8 @@ sysret32_from_system_call:
xorq %r9, %r9
xorq %r10, %r10
movq RSP-ORIG_RAX(%rsp), %rsp
USERGS_SYSRET32
swapgs
sysretl
END(entry_SYSCALL_compat)

/*
Expand Down
151 changes: 73 additions & 78 deletions arch/x86/entry/vdso/vclock_gettime.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@
#include <asm/vvar.h>
#include <asm/unistd.h>
#include <asm/msr.h>
#include <asm/pvclock.h>
#include <linux/math64.h>
#include <linux/time.h>
#include <linux/kernel.h>

#define gtod (&VVAR(vsyscall_gtod_data))

Expand All @@ -36,12 +38,12 @@ static notrace cycle_t vread_hpet(void)
}
#endif

#ifndef BUILD_VDSO32
#ifdef CONFIG_PARAVIRT_CLOCK
extern u8 pvclock_page
__attribute__((visibility("hidden")));
#endif

#include <linux/kernel.h>
#include <asm/vsyscall.h>
#include <asm/fixmap.h>
#include <asm/pvclock.h>
#ifndef BUILD_VDSO32

notrace static long vdso_fallback_gettime(long clock, struct timespec *ts)
{
Expand All @@ -60,75 +62,6 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
return ret;
}

#ifdef CONFIG_PARAVIRT_CLOCK

static notrace const struct pvclock_vsyscall_time_info *get_pvti(int cpu)
{
const struct pvclock_vsyscall_time_info *pvti_base;
int idx = cpu / (PAGE_SIZE/PVTI_SIZE);
int offset = cpu % (PAGE_SIZE/PVTI_SIZE);

BUG_ON(PVCLOCK_FIXMAP_BEGIN + idx > PVCLOCK_FIXMAP_END);

pvti_base = (struct pvclock_vsyscall_time_info *)
__fix_to_virt(PVCLOCK_FIXMAP_BEGIN+idx);

return &pvti_base[offset];
}

static notrace cycle_t vread_pvclock(int *mode)
{
const struct pvclock_vsyscall_time_info *pvti;
cycle_t ret;
u64 last;
u32 version;
u8 flags;
unsigned cpu, cpu1;


/*
* Note: hypervisor must guarantee that:
* 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
* 2. that per-CPU pvclock time info is updated if the
* underlying CPU changes.
* 3. that version is increased whenever underlying CPU
* changes.
*
*/
do {
cpu = __getcpu() & VGETCPU_CPU_MASK;
/* TODO: We can put vcpu id into higher bits of pvti.version.
* This will save a couple of cycles by getting rid of
* __getcpu() calls (Gleb).
*/

pvti = get_pvti(cpu);

version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);

/*
* Test we're still on the cpu as well as the version.
* We could have been migrated just after the first
* vgetcpu but before fetching the version, so we
* wouldn't notice a version change.
*/
cpu1 = __getcpu() & VGETCPU_CPU_MASK;
} while (unlikely(cpu != cpu1 ||
(pvti->pvti.version & 1) ||
pvti->pvti.version != version));

if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
*mode = VCLOCK_NONE;

/* refer to tsc.c read_tsc() comment for rationale */
last = gtod->cycle_last;

if (likely(ret >= last))
return ret;

return last;
}
#endif

#else

Expand Down Expand Up @@ -162,15 +95,77 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
return ret;
}

#endif

#ifdef CONFIG_PARAVIRT_CLOCK
static notrace const struct pvclock_vsyscall_time_info *get_pvti0(void)
{
return (const struct pvclock_vsyscall_time_info *)&pvclock_page;
}

static notrace cycle_t vread_pvclock(int *mode)
{
*mode = VCLOCK_NONE;
return 0;
}
#endif
const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
cycle_t ret;
u64 tsc, pvti_tsc;
u64 last, delta, pvti_system_time;
u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;

/*
* Note: The kernel and hypervisor must guarantee that cpu ID
* number maps 1:1 to per-CPU pvclock time info.
*
* Because the hypervisor is entirely unaware of guest userspace
* preemption, it cannot guarantee that per-CPU pvclock time
* info is updated if the underlying CPU changes or that that
* version is increased whenever underlying CPU changes.
*
* On KVM, we are guaranteed that pvti updates for any vCPU are
* atomic as seen by *all* vCPUs. This is an even stronger
* guarantee than we get with a normal seqlock.
*
* On Xen, we don't appear to have that guarantee, but Xen still
* supplies a valid seqlock using the version field.
* We only do pvclock vdso timing at all if
* PVCLOCK_TSC_STABLE_BIT is set, and we interpret that bit to
* mean that all vCPUs have matching pvti and that the TSC is
* synced, so we can just look at vCPU 0's pvti.
*/

if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
*mode = VCLOCK_NONE;
return 0;
}

do {
version = pvti->version;

smp_rmb();

tsc = rdtsc_ordered();
pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
pvti_tsc_shift = pvti->tsc_shift;
pvti_system_time = pvti->system_time;
pvti_tsc = pvti->tsc_timestamp;

/* Make sure that the version double-check is last. */
smp_rmb();
} while (unlikely((version & 1) || version != pvti->version));

delta = tsc - pvti_tsc;
ret = pvti_system_time +
pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
pvti_tsc_shift);

/* refer to vread_tsc() comment for rationale */
last = gtod->cycle_last;

if (likely(ret >= last))
return ret;

return last;
}
#endif

notrace static cycle_t vread_tsc(void)
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/entry/vdso/vdso-layout.lds.S
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ SECTIONS
* segment.
*/

vvar_start = . - 2 * PAGE_SIZE;
vvar_start = . - 3 * PAGE_SIZE;
vvar_page = vvar_start;

/* Place all vvars at the offsets in asm/vvar.h. */
Expand All @@ -36,6 +36,7 @@ SECTIONS
#undef EMIT_VVAR

hpet_page = vvar_start + PAGE_SIZE;
pvclock_page = vvar_start + 2 * PAGE_SIZE;

. = SIZEOF_HEADERS;

Expand Down
3 changes: 3 additions & 0 deletions arch/x86/entry/vdso/vdso2c.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,15 @@ enum {
sym_vvar_start,
sym_vvar_page,
sym_hpet_page,
sym_pvclock_page,
sym_VDSO_FAKE_SECTION_TABLE_START,
sym_VDSO_FAKE_SECTION_TABLE_END,
};

const int special_pages[] = {
sym_vvar_page,
sym_hpet_page,
sym_pvclock_page,
};

struct vdso_sym {
Expand All @@ -91,6 +93,7 @@ struct vdso_sym required_syms[] = {
[sym_vvar_start] = {"vvar_start", true},
[sym_vvar_page] = {"vvar_page", true},
[sym_hpet_page] = {"hpet_page", true},
[sym_pvclock_page] = {"pvclock_page", true},
[sym_VDSO_FAKE_SECTION_TABLE_START] = {
"VDSO_FAKE_SECTION_TABLE_START", false
},
Expand Down
14 changes: 14 additions & 0 deletions arch/x86/entry/vdso/vma.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include <linux/random.h>
#include <linux/elf.h>
#include <linux/cpu.h>
#include <asm/pvclock.h>
#include <asm/vgtod.h>
#include <asm/proto.h>
#include <asm/vdso.h>
Expand Down Expand Up @@ -100,6 +101,7 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
.name = "[vvar]",
.pages = no_pages,
};
struct pvclock_vsyscall_time_info *pvti;

if (calculate_addr) {
addr = vdso_addr(current->mm->start_stack,
Expand Down Expand Up @@ -169,6 +171,18 @@ static int map_vdso(const struct vdso_image *image, bool calculate_addr)
}
#endif

pvti = pvclock_pvti_cpu0_va();
if (pvti && image->sym_pvclock_page) {
ret = remap_pfn_range(vma,
text_start + image->sym_pvclock_page,
__pa(pvti) >> PAGE_SHIFT,
PAGE_SIZE,
PAGE_READONLY);

if (ret)
goto up_fail;
}

up_fail:
if (ret)
current->mm->context.vdso = NULL;
Expand Down
Loading

0 comments on commit 88cbfd0

Please sign in to comment.