Skip to content

Commit

Permalink
block/mirror: change the semantic of 'force' of block-job-cancel
Browse files Browse the repository at this point in the history
When doing drive mirror to a low speed shared storage, if there was heavy
BLK IO write workload in VM after the 'ready' event, drive mirror block job
can't be canceled immediately, it would keep running until the heavy BLK IO
workload stopped in the VM.

Libvirt depends on the current block-job-cancel semantics, which is that
when used without a flag after the 'ready' event, the command blocks
until data is in sync.  However, these semantics are awkward in other
situations, for example, people may use drive mirror for realtime
backups while still wanting to use block live migration.  Libvirt cannot
start a block live migration while another drive mirror is in progress,
but the user would rather abandon the backup attempt as broken and
proceed with the live migration than be stuck waiting for the current
drive mirror backup to finish.

The drive-mirror command already includes a 'force' flag, which libvirt
does not use, although it documented the flag as only being useful to
quit a job which is paused.  However, since quitting a paused job has
the same effect as abandoning a backup in a non-paused job (namely, the
destination file is not in sync, and the command completes immediately),
we can just improve the documentation to make the force flag obviously
useful.

Cc: Paolo Bonzini <[email protected]>
Cc: Jeff Cody <[email protected]>
Cc: Kevin Wolf <[email protected]>
Cc: Max Reitz <[email protected]>
Cc: Eric Blake <[email protected]>
Cc: John Snow <[email protected]>
Reported-by: Huaitong Han <[email protected]>
Signed-off-by: Huaitong Han <[email protected]>
Signed-off-by: Liang Li <[email protected]>
Signed-off-by: Jeff Cody <[email protected]>
Signed-off-by: Kevin Wolf <[email protected]>
  • Loading branch information
Liang Li authored and kevmw committed Mar 19, 2018
1 parent 1cfeaf3 commit b76e445
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 24 deletions.
10 changes: 4 additions & 6 deletions block/mirror.c
Original file line number Diff line number Diff line change
Expand Up @@ -869,11 +869,8 @@ static void coroutine_fn mirror_run(void *opaque)

ret = 0;
trace_mirror_before_sleep(s, cnt, s->synced, delay_ns);
if (!s->synced) {
block_job_sleep_ns(&s->common, delay_ns);
if (block_job_is_cancelled(&s->common)) {
break;
}
if (block_job_is_cancelled(&s->common) && s->common.force) {
break;
} else if (!should_complete) {
delay_ns = (s->in_flight == 0 && cnt == 0 ? SLICE_TIME : 0);
block_job_sleep_ns(&s->common, delay_ns);
Expand All @@ -887,7 +884,8 @@ static void coroutine_fn mirror_run(void *opaque)
* or it was cancelled prematurely so that we do not guarantee that
* the target is a copy of the source.
*/
assert(ret < 0 || (!s->synced && block_job_is_cancelled(&s->common)));
assert(ret < 0 || ((s->common.force || !s->synced) &&
block_job_is_cancelled(&s->common)));
assert(need_drain);
mirror_wait_for_all_io(s);
}
Expand Down
4 changes: 2 additions & 2 deletions blockdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ void blockdev_mark_auto_del(BlockBackend *blk)
aio_context_acquire(aio_context);

if (bs->job) {
block_job_cancel(bs->job);
block_job_cancel(bs->job, false);
}

aio_context_release(aio_context);
Expand Down Expand Up @@ -3850,7 +3850,7 @@ void qmp_block_job_cancel(const char *device,
}

trace_qmp_block_job_cancel(job);
block_job_user_cancel(job, errp);
block_job_user_cancel(job, force, errp);
out:
aio_context_release(aio_context);
}
Expand Down
16 changes: 9 additions & 7 deletions blockjob.c
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ static int block_job_finalize_single(BlockJob *job)
return 0;
}

static void block_job_cancel_async(BlockJob *job)
static void block_job_cancel_async(BlockJob *job, bool force)
{
if (job->iostatus != BLOCK_DEVICE_IO_STATUS_OK) {
block_job_iostatus_reset(job);
Expand All @@ -498,6 +498,8 @@ static void block_job_cancel_async(BlockJob *job)
job->pause_count--;
}
job->cancelled = true;
/* To prevent 'force == false' overriding a previous 'force == true' */
job->force |= force;
}

static int block_job_txn_apply(BlockJobTxn *txn, int fn(BlockJob *), bool lock)
Expand Down Expand Up @@ -581,7 +583,7 @@ static void block_job_completed_txn_abort(BlockJob *job)
* on the caller, so leave it. */
QLIST_FOREACH(other_job, &txn->jobs, txn_list) {
if (other_job != job) {
block_job_cancel_async(other_job);
block_job_cancel_async(other_job, false);
}
}
while (!QLIST_EMPTY(&txn->jobs)) {
Expand Down Expand Up @@ -747,13 +749,13 @@ void block_job_user_resume(BlockJob *job, Error **errp)
block_job_resume(job);
}

void block_job_cancel(BlockJob *job)
void block_job_cancel(BlockJob *job, bool force)
{
if (job->status == BLOCK_JOB_STATUS_CONCLUDED) {
block_job_do_dismiss(job);
return;
}
block_job_cancel_async(job);
block_job_cancel_async(job, force);
if (!block_job_started(job)) {
block_job_completed(job, -ECANCELED);
} else if (job->deferred_to_main_loop) {
Expand All @@ -763,20 +765,20 @@ void block_job_cancel(BlockJob *job)
}
}

void block_job_user_cancel(BlockJob *job, Error **errp)
void block_job_user_cancel(BlockJob *job, bool force, Error **errp)
{
if (block_job_apply_verb(job, BLOCK_JOB_VERB_CANCEL, errp)) {
return;
}
block_job_cancel(job);
block_job_cancel(job, force);
}

/* A wrapper around block_job_cancel() taking an Error ** parameter so it may be
* used with block_job_finish_sync() without the need for (rather nasty)
* function pointer casts there. */
static void block_job_cancel_err(BlockJob *job, Error **errp)
{
block_job_cancel(job);
block_job_cancel(job, false);
}

int block_job_cancel_sync(BlockJob *job)
Expand Down
3 changes: 2 additions & 1 deletion hmp-commands.hx
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ ETEXI
.args_type = "force:-f,device:B",
.params = "[-f] device",
.help = "stop an active background block operation (use -f"
"\n\t\t\t if the operation is currently paused)",
"\n\t\t\t if you want to abort the operation immediately"
"\n\t\t\t instead of keep running until data is in sync)",
.cmd = hmp_block_job_cancel,
},

Expand Down
12 changes: 10 additions & 2 deletions include/block/blockjob.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,12 @@ typedef struct BlockJob {
*/
bool cancelled;

/**
* Set to true if the job should abort immediately without waiting
* for data to be in sync.
*/
bool force;

/**
* Counter for pause request. If non-zero, the block job is either paused,
* or if busy == true will pause itself as soon as possible.
Expand Down Expand Up @@ -230,10 +236,11 @@ void block_job_start(BlockJob *job);
/**
* block_job_cancel:
* @job: The job to be canceled.
* @force: Quit a job without waiting for data to be in sync.
*
* Asynchronously cancel the specified job.
*/
void block_job_cancel(BlockJob *job);
void block_job_cancel(BlockJob *job, bool force);

/**
* block_job_complete:
Expand Down Expand Up @@ -307,11 +314,12 @@ void block_job_user_resume(BlockJob *job, Error **errp);
/**
* block_job_user_cancel:
* @job: The job to be cancelled.
* @force: Quit a job without waiting for data to be in sync.
*
* Cancels the specified job, but may refuse to do so if the
* operation isn't currently meaningful.
*/
void block_job_user_cancel(BlockJob *job, Error **errp);
void block_job_user_cancel(BlockJob *job, bool force, Error **errp);

/**
* block_job_cancel_sync:
Expand Down
5 changes: 3 additions & 2 deletions qapi/block-core.json
Original file line number Diff line number Diff line change
Expand Up @@ -2207,8 +2207,9 @@
# the name of the parameter), but since QEMU 2.7 it can have
# other values.
#
# @force: whether to allow cancellation of a paused job (default
# false). Since 1.3.
# @force: If true, and the job has already emitted the event BLOCK_JOB_READY,
# abandon the job immediately (even if it is paused) instead of waiting
# for the destination to complete its final synchronization (since 1.3)
#
# Returns: Nothing on success
# If no background operation is active on this device, DeviceNotActive
Expand Down
8 changes: 4 additions & 4 deletions tests/test-blockjob-txn.c
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ static void test_single_job(int expected)
block_job_start(job);

if (expected == -ECANCELED) {
block_job_cancel(job);
block_job_cancel(job, false);
}

while (result == -EINPROGRESS) {
Expand Down Expand Up @@ -170,10 +170,10 @@ static void test_pair_jobs(int expected1, int expected2)
block_job_txn_unref(txn);

if (expected1 == -ECANCELED) {
block_job_cancel(job1);
block_job_cancel(job1, false);
}
if (expected2 == -ECANCELED) {
block_job_cancel(job2);
block_job_cancel(job2, false);
}

while (result1 == -EINPROGRESS || result2 == -EINPROGRESS) {
Expand Down Expand Up @@ -226,7 +226,7 @@ static void test_pair_jobs_fail_cancel_race(void)
block_job_start(job1);
block_job_start(job2);

block_job_cancel(job1);
block_job_cancel(job1, false);

/* Now make job2 finish before the main loop kicks jobs. This simulates
* the race between a pending kick and another job completing.
Expand Down

0 comments on commit b76e445

Please sign in to comment.