Skip to content

Commit

Permalink
Merge remote-tracking branch 'remotes/juanquintela/tags/migration/201…
Browse files Browse the repository at this point in the history
…71023' into staging

migration/next for 20171023

# gpg: Signature made Mon 23 Oct 2017 17:05:14 BST
# gpg:                using RSA key 0xF487EF185872D723
# gpg: Good signature from "Juan Quintela <[email protected]>"
# gpg:                 aka "Juan Quintela <[email protected]>"
# Primary key fingerprint: 1899 FF8E DEBF 58CC EE03  4B82 F487 EF18 5872 D723

* remotes/juanquintela/tags/migration/20171023: (21 commits)
  migration: Improve migration thread error handling
  qapi: Fix grammar in x-multifd-page-count descriptions
  migration: add bitmap for received page
  migration: introduce qemu_ufd_copy_ioctl helper
  migration: postcopy_place_page factoring out
  migration: new ram_init_bitmaps()
  migration: clean up xbzrle cache init/destroy
  migration: provide ram_state_cleanup
  migration: provide ram_state_init()
  migration: pause-before-switchover for postcopy
  migration: allow cancel to unpause
  migrate: HMP migate_continue
  migration: migrate-continue
  migration: Wait for semaphore before completing migration
  migration: Add 'pre-switchover' and 'device' statuses
  migration: Add 'pause-before-switchover' capability
  migration: Make cache_init() take an error parameter
  migration: Move xbzrle cache resize error handling to xbzrle_cache_resize
  migration: Make cache size elements use the right types
  migratiom: Remove max_item_age parameter
  ...

Signed-off-by: Peter Maydell <[email protected]>
  • Loading branch information
pm215 committed Oct 25, 2017
2 parents 328f6f7 + 87db1a7 commit 4e1b31d
Show file tree
Hide file tree
Showing 14 changed files with 431 additions and 138 deletions.
12 changes: 12 additions & 0 deletions hmp-commands.hx
Original file line number Diff line number Diff line change
Expand Up @@ -959,7 +959,19 @@ STEXI
@item migrate_cancel
@findex migrate_cancel
Cancel the current VM migration.
ETEXI

{
.name = "migrate_continue",
.args_type = "state:s",
.params = "state",
.help = "Continue migration from the given paused state",
.cmd = hmp_migrate_continue,
},
STEXI
@item migrate_continue @var{state}
@findex migrate_continue
Continue migration from the paused state @var{state}
ETEXI

{
Expand Down
13 changes: 13 additions & 0 deletions hmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1495,6 +1495,19 @@ void hmp_migrate_cancel(Monitor *mon, const QDict *qdict)
qmp_migrate_cancel(NULL);
}

void hmp_migrate_continue(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;
const char *state = qdict_get_str(qdict, "state");
int val = qapi_enum_parse(&MigrationStatus_lookup, state, -1, &err);

if (val >= 0) {
qmp_migrate_continue(val, &err);
}

hmp_handle_error(mon, &err);
}

void hmp_migrate_incoming(Monitor *mon, const QDict *qdict)
{
Error *err = NULL;
Expand Down
1 change: 1 addition & 0 deletions hmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ void hmp_savevm(Monitor *mon, const QDict *qdict);
void hmp_delvm(Monitor *mon, const QDict *qdict);
void hmp_info_snapshots(Monitor *mon, const QDict *qdict);
void hmp_migrate_cancel(Monitor *mon, const QDict *qdict);
void hmp_migrate_continue(Monitor *mon, const QDict *qdict);
void hmp_migrate_incoming(Monitor *mon, const QDict *qdict);
void hmp_migrate_set_downtime(Monitor *mon, const QDict *qdict);
void hmp_migrate_set_speed(Monitor *mon, const QDict *qdict);
Expand Down
10 changes: 10 additions & 0 deletions include/exec/ram_addr.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ struct RAMBlock {
* of the postcopy phase
*/
unsigned long *unsentmap;
/* bitmap of already received pages in postcopy */
unsigned long *receivedmap;
};

static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
Expand All @@ -60,6 +62,14 @@ static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
return (char *)block->host + offset;
}

static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
RAMBlock *rb)
{
uint64_t host_addr_offset =
(uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
return host_addr_offset >> TARGET_PAGE_BITS;
}

long qemu_getrampagesize(void);
unsigned long last_ram_page(void);
RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
Expand Down
129 changes: 108 additions & 21 deletions migration/migration.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ enum mig_rp_message_type {
static MigrationState *current_migration;

static bool migration_object_check(MigrationState *ms, Error **errp);
static int migration_maybe_pause(MigrationState *s,
int *current_active_state,
int new_state);

void migration_object_init(void)
{
Expand Down Expand Up @@ -526,6 +529,8 @@ static bool migration_is_setup_or_active(int state)
case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_SETUP:
case MIGRATION_STATUS_PRE_SWITCHOVER:
case MIGRATION_STATUS_DEVICE:
return true;

default:
Expand Down Expand Up @@ -600,6 +605,8 @@ MigrationInfo *qmp_query_migrate(Error **errp)
case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_CANCELLING:
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_PRE_SWITCHOVER:
case MIGRATION_STATUS_DEVICE:
/* TODO add some postcopy stats */
info->has_status = true;
info->has_total_time = true;
Expand Down Expand Up @@ -865,6 +872,12 @@ static void migrate_params_test_apply(MigrateSetParameters *params,
if (params->has_block_incremental) {
dest->block_incremental = params->block_incremental;
}
if (params->has_x_multifd_channels) {
dest->x_multifd_channels = params->x_multifd_channels;
}
if (params->has_x_multifd_page_count) {
dest->x_multifd_page_count = params->x_multifd_page_count;
}
}

static void migrate_params_apply(MigrateSetParameters *params)
Expand Down Expand Up @@ -1071,19 +1084,30 @@ static void migrate_fd_cleanup(void *opaque)
MIGRATION_STATUS_CANCELLED);
}

if (s->error) {
/* It is used on info migrate. We can't free it */
error_report_err(error_copy(s->error));
}
notifier_list_notify(&migration_state_notifiers, s);
block_cleanup_parameters(s);
}

void migrate_set_error(MigrationState *s, const Error *error)
{
qemu_mutex_lock(&s->error_mutex);
if (!s->error) {
s->error = error_copy(error);
}
qemu_mutex_unlock(&s->error_mutex);
}

void migrate_fd_error(MigrationState *s, const Error *error)
{
trace_migrate_fd_error(error_get_pretty(error));
assert(s->to_dst_file == NULL);
migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_FAILED);
if (!s->error) {
s->error = error_copy(error);
}
migrate_set_error(s, error);
notifier_list_notify(&migration_state_notifiers, s);
block_cleanup_parameters(s);
}
Expand All @@ -1104,6 +1128,10 @@ static void migrate_fd_cancel(MigrationState *s)
if (!migration_is_setup_or_active(old_state)) {
break;
}
/* If the migration is paused, kick it out of the pause */
if (old_state == MIGRATION_STATUS_PRE_SWITCHOVER) {
qemu_sem_post(&s->pause_sem);
}
migrate_set_state(&s->state, old_state, MIGRATION_STATUS_CANCELLING);
} while (s->state != MIGRATION_STATUS_CANCELLING);

Expand Down Expand Up @@ -1183,6 +1211,8 @@ bool migration_is_idle(void)
case MIGRATION_STATUS_ACTIVE:
case MIGRATION_STATUS_POSTCOPY_ACTIVE:
case MIGRATION_STATUS_COLO:
case MIGRATION_STATUS_PRE_SWITCHOVER:
case MIGRATION_STATUS_DEVICE:
return false;
case MIGRATION_STATUS__MAX:
g_assert_not_reached();
Expand Down Expand Up @@ -1362,29 +1392,24 @@ void qmp_migrate_cancel(Error **errp)
migrate_fd_cancel(migrate_get_current());
}

void qmp_migrate_set_cache_size(int64_t value, Error **errp)
void qmp_migrate_continue(MigrationStatus state, Error **errp)
{
MigrationState *s = migrate_get_current();
int64_t new_size;

/* Check for truncation */
if (value != (size_t)value) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
"exceeding address space");
if (s->state != state) {
error_setg(errp, "Migration not in expected state: %s",
MigrationStatus_str(s->state));
return;
}
qemu_sem_post(&s->pause_sem);
}

/* Cache should not be larger than guest ram size */
if (value > ram_bytes_total()) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
"exceeds guest ram size ");
return;
}
void qmp_migrate_set_cache_size(int64_t value, Error **errp)
{
MigrationState *s = migrate_get_current();
int64_t new_size;

new_size = xbzrle_cache_resize(value);
new_size = xbzrle_cache_resize(value, errp);
if (new_size < 0) {
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "cache size",
"is smaller than page size");
return;
}

Expand Down Expand Up @@ -1521,6 +1546,16 @@ bool migrate_use_multifd(void)
return s->enabled_capabilities[MIGRATION_CAPABILITY_X_MULTIFD];
}

bool migrate_pause_before_switchover(void)
{
MigrationState *s;

s = migrate_get_current();

return s->enabled_capabilities[
MIGRATION_CAPABILITY_PAUSE_BEFORE_SWITCHOVER];
}

int migrate_multifd_channels(void)
{
MigrationState *s;
Expand Down Expand Up @@ -1799,8 +1834,11 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
QEMUFile *fb;
int64_t time_at_stop = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
bool restart_block = false;
migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_POSTCOPY_ACTIVE);
int cur_state = MIGRATION_STATUS_ACTIVE;
if (!migrate_pause_before_switchover()) {
migrate_set_state(&ms->state, MIGRATION_STATUS_ACTIVE,
MIGRATION_STATUS_POSTCOPY_ACTIVE);
}

trace_postcopy_start();
qemu_mutex_lock_iothread();
Expand All @@ -1814,6 +1852,12 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
goto fail;
}

ret = migration_maybe_pause(ms, &cur_state,
MIGRATION_STATUS_POSTCOPY_ACTIVE);
if (ret < 0) {
goto fail;
}

ret = bdrv_inactivate_all();
if (ret < 0) {
goto fail;
Expand Down Expand Up @@ -1951,6 +1995,41 @@ static int postcopy_start(MigrationState *ms, bool *old_vm_running)
return -1;
}

/**
* migration_maybe_pause: Pause if required to by
* migrate_pause_before_switchover called with the iothread locked
* Returns: 0 on success
*/
static int migration_maybe_pause(MigrationState *s,
int *current_active_state,
int new_state)
{
if (!migrate_pause_before_switchover()) {
return 0;
}

/* Since leaving this state is not atomic with posting the semaphore
* it's possible that someone could have issued multiple migrate_continue
* and the semaphore is incorrectly positive at this point;
* the docs say it's undefined to reinit a semaphore that's already
* init'd, so use timedwait to eat up any existing posts.
*/
while (qemu_sem_timedwait(&s->pause_sem, 1) == 0) {
/* This block intentionally left blank */
}

qemu_mutex_unlock_iothread();
migrate_set_state(&s->state, *current_active_state,
MIGRATION_STATUS_PRE_SWITCHOVER);
qemu_sem_wait(&s->pause_sem);
migrate_set_state(&s->state, MIGRATION_STATUS_PRE_SWITCHOVER,
new_state);
*current_active_state = new_state;
qemu_mutex_lock_iothread();

return s->state == new_state ? 0 : -EINVAL;
}

/**
* migration_completion: Used by migration_thread when there's not much left.
* The caller 'breaks' the loop when this returns.
Expand All @@ -1976,6 +2055,10 @@ static void migration_completion(MigrationState *s, int current_active_state,
if (!ret) {
bool inactivate = !migrate_colo_enabled();
ret = vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
if (ret >= 0) {
ret = migration_maybe_pause(s, &current_active_state,
MIGRATION_STATUS_DEVICE);
}
if (ret >= 0) {
qemu_file_set_rate_limit(s->to_dst_file, INT64_MAX);
ret = qemu_savevm_state_complete_precopy(s->to_dst_file, false,
Expand Down Expand Up @@ -2355,8 +2438,10 @@ static void migration_instance_finalize(Object *obj)
MigrationState *ms = MIGRATION_OBJ(obj);
MigrationParameters *params = &ms->parameters;

qemu_mutex_destroy(&ms->error_mutex);
g_free(params->tls_hostname);
g_free(params->tls_creds);
qemu_sem_destroy(&ms->pause_sem);
}

static void migration_instance_init(Object *obj)
Expand All @@ -2367,6 +2452,8 @@ static void migration_instance_init(Object *obj)
ms->state = MIGRATION_STATUS_NONE;
ms->xbzrle_cache_size = DEFAULT_MIGRATE_CACHE_SIZE;
ms->mbps = -1;
qemu_sem_init(&ms->pause_sem, 0);
qemu_mutex_init(&ms->error_mutex);

params->tls_hostname = g_strdup("");
params->tls_creds = g_strdup("");
Expand Down
11 changes: 10 additions & 1 deletion migration/migration.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ struct MigrationState
/* Flag set once the migration thread called bdrv_inactivate_all */
bool block_inactive;

/* Migration is paused due to pause-before-switchover */
QemuSemaphore pause_sem;

/* The semaphore is used to notify COLO thread that failover is finished */
QemuSemaphore colo_exit_sem;

Expand All @@ -129,8 +132,12 @@ struct MigrationState
int64_t colo_checkpoint_time;
QEMUTimer *colo_delay_timer;

/* The last error that occurred */
/* The first error that has occurred.
We used the mutex to be able to return the 1st error message */
Error *error;
/* mutex to protect errp */
QemuMutex error_mutex;

/* Do we have to clean up -b/-i from old migrate parameters */
/* This feature is deprecated and will be removed */
bool must_remove_block_options;
Expand Down Expand Up @@ -159,6 +166,7 @@ bool migration_has_all_channels(void);

uint64_t migrate_max_downtime(void);

void migrate_set_error(MigrationState *s, const Error *error);
void migrate_fd_error(MigrationState *s, const Error *error);

void migrate_fd_connect(MigrationState *s);
Expand All @@ -177,6 +185,7 @@ bool migrate_zero_blocks(void);

bool migrate_auto_converge(void);
bool migrate_use_multifd(void);
bool migrate_pause_before_switchover(void);
int migrate_multifd_channels(void);
int migrate_multifd_page_count(void);

Expand Down
Loading

0 comments on commit 4e1b31d

Please sign in to comment.