From bee980d9e9642e96351fa3ca9077b853ecf62f57 Mon Sep 17 00:00:00 2001 From: Keir Fraser Date: Thu, 28 Mar 2013 10:03:36 -0400 Subject: [PATCH 1/2] xen/events: Handle VIRQ_TIMER before any other hardirq in event loop. This avoids any other hardirq handler seeing a very stale jiffies value immediately after wakeup from a long idle period. The one observable symptom of this was a USB keyboard, with software keyboard repeat, which would always repeat a key immediately that it was pressed. This is due to the key press waking the guest, the key handler immediately runs, sees an old jiffies value, and then that jiffies value significantly updated, before the key is unpressed. Reviewed-by: David Vrabel Signed-off-by: Keir Fraser Signed-off-by: Konrad Rzeszutek Wilk --- drivers/xen/events.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/xen/events.c b/drivers/xen/events.c index aa85881d17b23f..2647ad8e1f19c0 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -1316,7 +1316,7 @@ static void __xen_evtchn_do_upcall(void) { int start_word_idx, start_bit_idx; int word_idx, bit_idx; - int i; + int i, irq; int cpu = get_cpu(); struct shared_info *s = HYPERVISOR_shared_info; struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu); @@ -1324,6 +1324,8 @@ static void __xen_evtchn_do_upcall(void) do { xen_ulong_t pending_words; + xen_ulong_t pending_bits; + struct irq_desc *desc; vcpu_info->evtchn_upcall_pending = 0; @@ -1335,6 +1337,17 @@ static void __xen_evtchn_do_upcall(void) * selector flag. xchg_xen_ulong must contain an * appropriate barrier. */ + if ((irq = per_cpu(virq_to_irq, cpu)[VIRQ_TIMER]) != -1) { + int evtchn = evtchn_from_irq(irq); + word_idx = evtchn / BITS_PER_LONG; + pending_bits = evtchn % BITS_PER_LONG; + if (active_evtchns(cpu, s, word_idx) & (1ULL << pending_bits)) { + desc = irq_to_desc(irq); + if (desc) + generic_handle_irq_desc(irq, desc); + } + } + pending_words = xchg_xen_ulong(&vcpu_info->evtchn_pending_sel, 0); start_word_idx = __this_cpu_read(current_word_idx); @@ -1343,7 +1356,6 @@ static void __xen_evtchn_do_upcall(void) word_idx = start_word_idx; for (i = 0; pending_words != 0; i++) { - xen_ulong_t pending_bits; xen_ulong_t words; words = MASK_LSBS(pending_words, word_idx); @@ -1372,8 +1384,7 @@ static void __xen_evtchn_do_upcall(void) do { xen_ulong_t bits; - int port, irq; - struct irq_desc *desc; + int port; bits = MASK_LSBS(pending_bits, bit_idx); From b22227944b8fe92b19150b4c36421e37979d9a16 Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Fri, 29 Mar 2013 10:20:56 -0400 Subject: [PATCH 2/2] xen/mmu: On early bootup, flush the TLB when changing RO->RW bits Xen provided pagetables. Occassionaly on a DL380 G4 the guest would crash quite early with this: (XEN) d244:v0: unhandled page fault (ec=0003) (XEN) Pagetable walk from ffffffff84dc7000: (XEN) L4[0x1ff] = 00000000c3f18067 0000000000001789 (XEN) L3[0x1fe] = 00000000c3f14067 000000000000178d (XEN) L2[0x026] = 00000000dc8b2067 0000000000004def (XEN) L1[0x1c7] = 00100000dc8da067 0000000000004dc7 (XEN) domain_crash_sync called from entry.S (XEN) Domain 244 (vcpu#0) crashed on cpu#3: (XEN) ----[ Xen-4.1.3OVM x86_64 debug=n Not tainted ]---- (XEN) CPU: 3 (XEN) RIP: e033:[] (XEN) RFLAGS: 0000000000000216 EM: 1 CONTEXT: pv guest (XEN) rax: 0000000000000000 rbx: ffffffff81785f88 rcx: 000000000000003f (XEN) rdx: 0000000000000000 rsi: 00000000dc8da063 rdi: ffffffff84dc7000 The offending code shows it to be a loop writting the value zero (%rax) in the %rdi (the L4 provided by Xen) register: 0: 44 00 00 add %r8b,(%rax) 3: 31 c0 xor %eax,%eax 5: b9 40 00 00 00 mov $0x40,%ecx a: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) 11: 00 00 13: ff c9 dec %ecx 15:* 48 89 07 mov %rax,(%rdi) <-- trapping instruction 18: 48 89 47 08 mov %rax,0x8(%rdi) 1c: 48 89 47 10 mov %rax,0x10(%rdi) which fails. xen_setup_kernel_pagetable recycles some of the Xen's page-table entries when it has switched over to its Linux page-tables. Right before try to clear the page, we make a hypercall to change it from _RO to _RW and that works (otherwise we would hit an BUG()). And the _RW flag is set for that page: (XEN) L1[0x1c7] = 001000004885f067 0000000000004dc7 The error code is 3, so PFEC_page_present and PFEC_write_access, so page is present (correct), and we tried to write to the page, but a violation occurred. The one theory is that the the page entries in hardware (which are cached) are not up to date with what we just set. Especially as we have just done an CR3 write and flushed the multicalls. This patch does solve the problem by flusing out the TLB page entry after changing it from _RO to _RW and we don't hit this issue anymore. Fixed-Oracle-Bug: 16243091 [ON OCCASIONS VM START GOES INTO 'CRASH' STATE: CLEAR_PAGE+0X12 ON HP DL380 G4] Reported-and-Tested-by: Saar Maoz Signed-off-by: Konrad Rzeszutek Wilk --- arch/x86/xen/mmu.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 6afbb2ca9a0ac4..a4ea92477e016e 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1748,14 +1748,18 @@ static void *m2v(phys_addr_t maddr) } /* Set the page permissions on an identity-mapped pages */ -static void set_page_prot(void *addr, pgprot_t prot) +static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags) { unsigned long pfn = __pa(addr) >> PAGE_SHIFT; pte_t pte = pfn_pte(pfn, prot); - if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) + if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) BUG(); } +static void set_page_prot(void *addr, pgprot_t prot) +{ + return set_page_prot_flags(addr, prot, UVMF_NONE); +} #ifdef CONFIG_X86_32 static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) { @@ -1839,12 +1843,12 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, unsigned long addr) { if (*pt_base == PFN_DOWN(__pa(addr))) { - set_page_prot((void *)addr, PAGE_KERNEL); + set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG); clear_page((void *)addr); (*pt_base)++; } if (*pt_end == PFN_DOWN(__pa(addr))) { - set_page_prot((void *)addr, PAGE_KERNEL); + set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG); clear_page((void *)addr); (*pt_end)--; }