Skip to content

Commit

Permalink
MFV r286682: 5765 add support for estimating send stream size with
Browse files Browse the repository at this point in the history
lzc_send_space when source is a bookmark

Reviewed by: Matthew Ahrens <[email protected]>
Reviewed by: Christopher Siden <[email protected]>
Reviewed by: Steven Hartland <[email protected]>
Reviewed by: Bayard Bell <[email protected]>
Approved by: Albert Lee <[email protected]>
Author: Max Grossman <[email protected]>

illumos/illumos-gate@643da46
  • Loading branch information
amotin committed Aug 12, 2015
1 parent 1bf9368 commit 53d48e6
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 40 deletions.
22 changes: 17 additions & 5 deletions cddl/contrib/opensolaris/lib/libzfs_core/common/libzfs_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
*/

/*
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/

Expand Down Expand Up @@ -532,18 +532,30 @@ lzc_send(const char *snapname, const char *from, int fd,
}

/*
* If fromsnap is NULL, a full (non-incremental) stream will be estimated.
* "from" can be NULL, a snapshot, or a bookmark.
*
* If from is NULL, a full (non-incremental) stream will be estimated. This
* is calculated very efficiently.
*
* If from is a snapshot, lzc_send_space uses the deadlists attached to
* each snapshot to efficiently estimate the stream size.
*
* If from is a bookmark, the indirect blocks in the destination snapshot
* are traversed, looking for blocks with a birth time since the creation TXG of
* the snapshot this bookmark was created from. This will result in
* significantly more I/O and be less efficient than a send space estimation on
* an equivalent snapshot.
*/
int
lzc_send_space(const char *snapname, const char *fromsnap, uint64_t *spacep)
lzc_send_space(const char *snapname, const char *from, uint64_t *spacep)
{
nvlist_t *args;
nvlist_t *result;
int err;

args = fnvlist_alloc();
if (fromsnap != NULL)
fnvlist_add_string(args, "fromsnap", fromsnap);
if (from != NULL)
fnvlist_add_string(args, "from", from);
err = lzc_ioctl(ZFS_IOC_SEND_SPACE, snapname, args, &result);
nvlist_free(args);
if (err == 0)
Expand Down
106 changes: 84 additions & 22 deletions sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dmu_send.c
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,40 @@ dmu_send(const char *tosnap, const char *fromsnap,
return (err);
}

static int
dmu_adjust_send_estimate_for_indirects(dsl_dataset_t *ds, uint64_t size,
uint64_t *sizep)
{
int err;
/*
* Assume that space (both on-disk and in-stream) is dominated by
* data. We will adjust for indirect blocks and the copies property,
* but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
*/

/*
* Subtract out approximate space used by indirect blocks.
* Assume most space is used by data blocks (non-indirect, non-dnode).
* Assume all blocks are recordsize. Assume ditto blocks and
* internal fragmentation counter out compression.
*
* Therefore, space used by indirect blocks is sizeof(blkptr_t) per
* block, which we observe in practice.
*/
uint64_t recordsize;
err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
if (err != 0)
return (err);
size -= size / recordsize * sizeof (blkptr_t);

/* Add in the space for the record associated with each block. */
size += size / recordsize * sizeof (dmu_replay_record_t);

*sizep = size;

return (0);
}

int
dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
{
Expand Down Expand Up @@ -891,33 +925,61 @@ dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
return (err);
}

/*
* Assume that space (both on-disk and in-stream) is dominated by
* data. We will adjust for indirect blocks and the copies property,
* but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
*/
err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
return (err);
}

/*
* Simple callback used to traverse the blocks of a snapshot and sum their
* uncompressed size
*/
/* ARGSUSED */
static int
dmu_calculate_send_traversal(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
{
uint64_t *spaceptr = arg;
if (bp != NULL && !BP_IS_HOLE(bp)) {
*spaceptr += BP_GET_UCSIZE(bp);
}
return (0);
}

/*
* Given a desination snapshot and a TXG, calculate the approximate size of a
* send stream sent from that TXG. from_txg may be zero, indicating that the
* whole snapshot will be sent.
*/
int
dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg,
uint64_t *sizep)
{
dsl_pool_t *dp = ds->ds_dir->dd_pool;
int err;
uint64_t size = 0;

ASSERT(dsl_pool_config_held(dp));

/* tosnap must be a snapshot */
if (!dsl_dataset_is_snapshot(ds))
return (SET_ERROR(EINVAL));

/* verify that from_txg is before the provided snapshot was taken */
if (from_txg >= dsl_dataset_phys(ds)->ds_creation_txg) {
return (SET_ERROR(EXDEV));
}

/*
* Subtract out approximate space used by indirect blocks.
* Assume most space is used by data blocks (non-indirect, non-dnode).
* Assume all blocks are recordsize. Assume ditto blocks and
* internal fragmentation counter out compression.
*
* Therefore, space used by indirect blocks is sizeof(blkptr_t) per
* block, which we observe in practice.
* traverse the blocks of the snapshot with birth times after
* from_txg, summing their uncompressed size
*/
uint64_t recordsize;
err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
if (err != 0)
err = traverse_dataset(ds, from_txg, TRAVERSE_POST,
dmu_calculate_send_traversal, &size);
if (err)
return (err);
size -= size / recordsize * sizeof (blkptr_t);

/* Add in the space for the record associated with each block. */
size += size / recordsize * sizeof (dmu_replay_record_t);

*sizep = size;

return (0);
err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
return (err);
}

typedef struct dmu_recv_begin_arg {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
Expand All @@ -45,6 +45,8 @@ int dmu_send(const char *tosnap, const char *fromsnap,
#endif
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
uint64_t *sizep);
int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
uint64_t *sizep);
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
boolean_t embedok, boolean_t large_block_ok,
#ifdef illumos
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ dsl_dataset_phys(dsl_dataset_t *ds)
*/
#define MAX_TAG_PREFIX_LEN 17

#define dsl_dataset_is_snapshot(ds) \
(dsl_dataset_phys(ds)->ds_num_children != 0)

#define DS_UNIQUE_IS_ACCURATE(ds) \
((dsl_dataset_phys(ds)->ds_flags & DS_FLAG_UNIQUE_ACCURATE) != 0)

Expand Down
52 changes: 40 additions & 12 deletions sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ioctl.c
Original file line number Diff line number Diff line change
Expand Up @@ -5489,7 +5489,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
* of bytes that will be written to the fd supplied to zfs_ioc_send_new().
*
* innvl: {
* (optional) "fromsnap" -> full snap name to send an incremental from
* (optional) "from" -> full snap or bookmark name to send an incremental
* from
* }
*
* outnvl: {
Expand All @@ -5500,7 +5501,6 @@ static int
zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
{
dsl_pool_t *dp;
dsl_dataset_t *fromsnap = NULL;
dsl_dataset_t *tosnap;
int error;
char *fromname;
Expand All @@ -5516,27 +5516,55 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
return (error);
}

error = nvlist_lookup_string(innvl, "fromsnap", &fromname);
error = nvlist_lookup_string(innvl, "from", &fromname);
if (error == 0) {
error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
if (error != 0) {
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
return (error);
if (strchr(fromname, '@') != NULL) {
/*
* If from is a snapshot, hold it and use the more
* efficient dmu_send_estimate to estimate send space
* size using deadlists.
*/
dsl_dataset_t *fromsnap;
error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
if (error != 0)
goto out;
error = dmu_send_estimate(tosnap, fromsnap, &space);
dsl_dataset_rele(fromsnap, FTAG);
} else if (strchr(fromname, '#') != NULL) {
/*
* If from is a bookmark, fetch the creation TXG of the
* snapshot it was created from and use that to find
* blocks that were born after it.
*/
zfs_bookmark_phys_t frombm;

error = dsl_bookmark_lookup(dp, fromname, tosnap,
&frombm);
if (error != 0)
goto out;
error = dmu_send_estimate_from_txg(tosnap,
frombm.zbm_creation_txg, &space);
} else {
/*
* from is not properly formatted as a snapshot or
* bookmark
*/
error = SET_ERROR(EINVAL);
goto out;
}
} else {
// If estimating the size of a full send, use dmu_send_estimate
error = dmu_send_estimate(tosnap, NULL, &space);
}

error = dmu_send_estimate(tosnap, fromsnap, &space);
fnvlist_add_uint64(outnvl, "space", space);

if (fromsnap != NULL)
dsl_dataset_rele(fromsnap, FTAG);
out:
dsl_dataset_rele(tosnap, FTAG);
dsl_pool_rele(dp, FTAG);
return (error);
}


static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];

static void
Expand Down

0 comments on commit 53d48e6

Please sign in to comment.