Skip to content

Commit

Permalink
xen: implement save/restore
Browse files Browse the repository at this point in the history
This patch implements Xen save/restore and migration.

Saving is triggered via xenbus, which is polled in
drivers/xen/manage.c.  When a suspend request comes in, the kernel
prepares itself for saving by:

1 - Freeze all processes.  This is primarily to prevent any
    partially-completed pagetable updates from confusing the suspend
    process.  If CONFIG_PREEMPT isn't defined, then this isn't necessary.

2 - Suspend xenbus and other devices

3 - Stop_machine, to make sure all the other vcpus are quiescent.  The
    Xen tools require the domain to run its save off vcpu0.

4 - Within the stop_machine state, it pins any unpinned pgds (under
    construction or destruction), performs canonicalizes various other
    pieces of state (mostly converting mfns to pfns), and finally

5 - Suspend the domain

Restore reverses the steps used to save the domain, ending when all
the frozen processes are thawed.

Signed-off-by: Jeremy Fitzhardinge <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
  • Loading branch information
jsgf authored and KAGA-KOKO committed May 27, 2008
1 parent 7d88d32 commit 0e91398
Show file tree
Hide file tree
Showing 14 changed files with 318 additions and 21 deletions.
2 changes: 1 addition & 1 deletion arch/x86/xen/Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
obj-y := enlighten.o setup.o multicalls.o mmu.o \
time.o xen-asm.o grant-table.o
time.o xen-asm.o grant-table.o suspend.o

obj-$(CONFIG_SMP) += smp.o
6 changes: 3 additions & 3 deletions arch/x86/xen/enlighten.c
Original file line number Diff line number Diff line change
Expand Up @@ -857,7 +857,7 @@ static __init void xen_pagetable_setup_start(pgd_t *base)
PFN_DOWN(__pa(xen_start_info->pt_base)));
}

static __init void setup_shared_info(void)
void xen_setup_shared_info(void)
{
if (!xen_feature(XENFEAT_auto_translated_physmap)) {
unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP);
Expand Down Expand Up @@ -894,15 +894,15 @@ static __init void xen_pagetable_setup_done(pgd_t *base)
pv_mmu_ops.release_pmd = xen_release_pmd;
pv_mmu_ops.set_pte = xen_set_pte;

setup_shared_info();
xen_setup_shared_info();

/* Actually pin the pagetable down, but we can't set PG_pinned
yet because the page structures don't exist yet. */
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base)));
}

/* This is called once we have the cpu_possible_map */
void __init xen_setup_vcpu_info_placement(void)
void xen_setup_vcpu_info_placement(void)
{
int cpu;

Expand Down
46 changes: 46 additions & 0 deletions arch/x86/xen/mmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -560,6 +560,29 @@ void xen_pgd_pin(pgd_t *pgd)
xen_mc_issue(0);
}

/*
* On save, we need to pin all pagetables to make sure they get their
* mfns turned into pfns. Search the list for any unpinned pgds and pin
* them (unpinned pgds are not currently in use, probably because the
* process is under construction or destruction).
*/
void xen_mm_pin_all(void)
{
unsigned long flags;
struct page *page;

spin_lock_irqsave(&pgd_lock, flags);

list_for_each_entry(page, &pgd_list, lru) {
if (!PagePinned(page)) {
xen_pgd_pin((pgd_t *)page_address(page));
SetPageSavePinned(page);
}
}

spin_unlock_irqrestore(&pgd_lock, flags);
}

/* The init_mm pagetable is really pinned as soon as its created, but
that's before we have page structures to store the bits. So do all
the book-keeping now. */
Expand Down Expand Up @@ -617,6 +640,29 @@ static void xen_pgd_unpin(pgd_t *pgd)
xen_mc_issue(0);
}

/*
* On resume, undo any pinning done at save, so that the rest of the
* kernel doesn't see any unexpected pinned pagetables.
*/
void xen_mm_unpin_all(void)
{
unsigned long flags;
struct page *page;

spin_lock_irqsave(&pgd_lock, flags);

list_for_each_entry(page, &pgd_list, lru) {
if (PageSavePinned(page)) {
BUG_ON(!PagePinned(page));
printk("unpinning pinned %p\n", page_address(page));
xen_pgd_unpin((pgd_t *)page_address(page));
ClearPageSavePinned(page);
}
}

spin_unlock_irqrestore(&pgd_lock, flags);
}

void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
{
spin_lock(&next->page_table_lock);
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/xen/smp.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
#include "xen-ops.h"
#include "mmu.h"

static cpumask_t xen_cpu_initialized_map;
cpumask_t xen_cpu_initialized_map;
static DEFINE_PER_CPU(int, resched_irq) = -1;
static DEFINE_PER_CPU(int, callfunc_irq) = -1;
static DEFINE_PER_CPU(int, debug_irq) = -1;
Expand Down
42 changes: 42 additions & 0 deletions arch/x86/xen/suspend.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#include <linux/types.h>

#include <xen/interface/xen.h>
#include <xen/grant_table.h>
#include <xen/events.h>

#include <asm/xen/hypercall.h>
#include <asm/xen/page.h>

#include "xen-ops.h"
#include "mmu.h"

void xen_pre_suspend(void)
{
xen_start_info->store_mfn = mfn_to_pfn(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn =
mfn_to_pfn(xen_start_info->console.domU.mfn);

BUG_ON(!irqs_disabled());

HYPERVISOR_shared_info = &xen_dummy_shared_info;
if (HYPERVISOR_update_va_mapping(fix_to_virt(FIX_PARAVIRT_BOOTMAP),
__pte_ma(0), 0))
BUG();
}

void xen_post_suspend(int suspend_cancelled)
{
if (suspend_cancelled) {
xen_start_info->store_mfn =
pfn_to_mfn(xen_start_info->store_mfn);
xen_start_info->console.domU.mfn =
pfn_to_mfn(xen_start_info->console.domU.mfn);
} else {
#ifdef CONFIG_SMP
xen_cpu_initialized_map = cpu_online_map;
#endif
}

xen_setup_shared_info();
}

8 changes: 8 additions & 0 deletions arch/x86/xen/time.c
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,14 @@ void xen_setup_cpu_clockevents(void)
clockevents_register_device(&__get_cpu_var(xen_clock_events));
}

void xen_time_suspend(void)
{
}

void xen_time_resume(void)
{
}

__init void xen_time_init(void)
{
int cpu = smp_processor_id();
Expand Down
4 changes: 4 additions & 0 deletions arch/x86/xen/xen-ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
extern const char xen_hypervisor_callback[];
extern const char xen_failsafe_callback[];

struct trap_info;
void xen_copy_trap_info(struct trap_info *traps);

DECLARE_PER_CPU(unsigned long, xen_cr3);
Expand All @@ -19,6 +20,7 @@ extern struct shared_info xen_dummy_shared_info;
extern struct shared_info *HYPERVISOR_shared_info;

void xen_setup_mfn_list_list(void);
void xen_setup_shared_info(void);

char * __init xen_memory_setup(void);
void __init xen_arch_setup(void);
Expand Down Expand Up @@ -59,6 +61,8 @@ int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info,
int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *),
void *info, int wait);

extern cpumask_t xen_cpu_initialized_map;


/* Declare an asm function, along with symbols needed to make it
inlineable */
Expand Down
83 changes: 83 additions & 0 deletions drivers/xen/events.c
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,89 @@ static int retrigger_dynirq(unsigned int irq)
return ret;
}

static void restore_cpu_virqs(unsigned int cpu)
{
struct evtchn_bind_virq bind_virq;
int virq, irq, evtchn;

for (virq = 0; virq < NR_VIRQS; virq++) {
if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
continue;

BUG_ON(irq_info[irq].type != IRQT_VIRQ);
BUG_ON(irq_info[irq].index != virq);

/* Get a new binding from Xen. */
bind_virq.virq = virq;
bind_virq.vcpu = cpu;
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
&bind_virq) != 0)
BUG();
evtchn = bind_virq.port;

/* Record the new mapping. */
evtchn_to_irq[evtchn] = irq;
irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
bind_evtchn_to_cpu(evtchn, cpu);

/* Ready for use. */
unmask_evtchn(evtchn);
}
}

static void restore_cpu_ipis(unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
int ipi, irq, evtchn;

for (ipi = 0; ipi < XEN_NR_IPIS; ipi++) {
if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
continue;

BUG_ON(irq_info[irq].type != IRQT_IPI);
BUG_ON(irq_info[irq].index != ipi);

/* Get a new binding from Xen. */
bind_ipi.vcpu = cpu;
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
&bind_ipi) != 0)
BUG();
evtchn = bind_ipi.port;

/* Record the new mapping. */
evtchn_to_irq[evtchn] = irq;
irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
bind_evtchn_to_cpu(evtchn, cpu);

/* Ready for use. */
unmask_evtchn(evtchn);

}
}

void xen_irq_resume(void)
{
unsigned int cpu, irq, evtchn;

init_evtchn_cpu_bindings();

/* New event-channel space is not 'live' yet. */
for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
mask_evtchn(evtchn);

/* No IRQ <-> event-channel mappings. */
for (irq = 0; irq < NR_IRQS; irq++)
irq_info[irq].evtchn = 0; /* zap event-channel binding */

for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
evtchn_to_irq[evtchn] = -1;

for_each_possible_cpu(cpu) {
restore_cpu_virqs(cpu);
restore_cpu_ipis(cpu);
}
}

static struct irq_chip xen_dynamic_chip __read_mostly = {
.name = "xen-dyn",
.mask = disable_dynirq,
Expand Down
4 changes: 2 additions & 2 deletions drivers/xen/grant-table.c
Original file line number Diff line number Diff line change
Expand Up @@ -471,14 +471,14 @@ static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
return 0;
}

static int gnttab_resume(void)
int gnttab_resume(void)
{
if (max_nr_grant_frames() < nr_grant_frames)
return -ENOSYS;
return gnttab_map(0, nr_grant_frames - 1);
}

static int gnttab_suspend(void)
int gnttab_suspend(void)
{
arch_gnttab_unmap_shared(shared, nr_grant_frames);
return 0;
Expand Down
Loading

0 comments on commit 0e91398

Please sign in to comment.