Skip to content

Commit

Permalink
Allow to control failfast
Browse files Browse the repository at this point in the history
Linux defaults to setting "failfast" on BIOs, so that the OS will not
retry IOs that fail, and instead report the error to ZFS.

In some cases, such as errors reported by the HBA driver, not
the device itself, we would wish to retry rather than generating
vdev errors in ZFS. This new property allows that.

This introduces a per vdev option to disable the failfast option.
This also introduces a global module parameter to define the failfast
mask value.

Reviewed-by: Brian Behlendorf <[email protected]>
Co-authored-by: Allan Jude <[email protected]>
Signed-off-by: Allan Jude <[email protected]>
Signed-off-by: Mariusz Zaborski <[email protected]>
Sponsored-by: Seagate Technology LLC
Submitted-by: Klara, Inc.
Closes #14056
  • Loading branch information
oshogbo authored Nov 10, 2022
1 parent 945b407 commit 16f0fda
Show file tree
Hide file tree
Showing 9 changed files with 94 additions and 8 deletions.
10 changes: 8 additions & 2 deletions include/os/linux/kernel/linux/blkdev_compat.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ typedef int bvec_iterator_t;
#endif

static inline void
bio_set_flags_failfast(struct block_device *bdev, int *flags)
bio_set_flags_failfast(struct block_device *bdev, int *flags, bool dev,
bool transport, bool driver)
{
#ifdef CONFIG_BUG
/*
Expand All @@ -148,7 +149,12 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags)
#endif /* BLOCK_EXT_MAJOR */
#endif /* CONFIG_BUG */

*flags |= REQ_FAILFAST_MASK;
if (dev)
*flags |= REQ_FAILFAST_DEV;
if (transport)
*flags |= REQ_FAILFAST_TRANSPORT;
if (driver)
*flags |= REQ_FAILFAST_DRIVER;
}

/*
Expand Down
1 change: 1 addition & 0 deletions include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ typedef enum {
VDEV_PROP_BYTES_TRIM,
VDEV_PROP_REMOVING,
VDEV_PROP_ALLOCATING,
VDEV_PROP_FAILFAST,
VDEV_NUM_PROPS
} vdev_prop_t;

Expand Down
1 change: 1 addition & 0 deletions include/sys/vdev_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ struct vdev {
uint64_t vdev_islog; /* is an intent log device */
uint64_t vdev_noalloc; /* device is passivated? */
uint64_t vdev_removing; /* device is being removed? */
uint64_t vdev_failfast; /* device failfast setting */
boolean_t vdev_ishole; /* is a hole in the namespace */
uint64_t vdev_top_zap;
vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias */
Expand Down
3 changes: 2 additions & 1 deletion lib/libzfs/libzfs.abi
Original file line number Diff line number Diff line change
Expand Up @@ -3214,7 +3214,8 @@
<enumerator name='VDEV_PROP_BYTES_TRIM' value='38'/>
<enumerator name='VDEV_PROP_REMOVING' value='39'/>
<enumerator name='VDEV_PROP_ALLOCATING' value='40'/>
<enumerator name='VDEV_NUM_PROPS' value='41'/>
<enumerator name='VDEV_PROP_FAILFAST' value='41'/>
<enumerator name='VDEV_NUM_PROPS' value='42'/>
</enum-decl>
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
<enum-decl name='vdev_state' id='21566197'>
Expand Down
17 changes: 15 additions & 2 deletions man/man4/zfs.4
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
.\" own identifying information:
.\" Portions Copyright [yyyy] [name of copyright owner]
.\"
.Dd November 7, 2022
.Dd November 9, 2022
.Dt ZFS 4
.Os
.
Expand Down Expand Up @@ -1345,6 +1345,19 @@ as fuller devices will tend to be slower than empty devices.
Also see
.Sy zio_dva_throttle_enabled .
.
.It Sy zfs_vdev_failfast_mask Ns = Ns Sy 1 Pq uint
Defines if the driver should retire on a given error type.
The following options may be bitwise-ored together:
.TS
box;
lbz r l l .
Value Name Description
_
1 Device No driver retries on device errors
2 Transport No driver retries on transport errors.
4 Driver No driver retries on driver errors.
.TE
.
.It Sy zfs_expire_snapshot Ns = Ns Sy 300 Ns s Pq int
Time before expiring
.Pa .zfs/snapshot .
Expand All @@ -1364,7 +1377,7 @@ The following flags may be bitwise-ored together:
.TS
box;
lbz r l l .
Value Symbolic Name Description
Value Name Description
_
1 ZFS_DEBUG_DPRINTF Enable dprintf entries in the debug log.
* 2 ZFS_DEBUG_DBUF_VERIFY Enable extra dbuf verifications.
Expand Down
5 changes: 4 additions & 1 deletion man/man7/vdevprops.7
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
.\"
.\" Copyright (c) 2021 Klara, Inc.
.\"
.Dd November 27, 2021
.Dd October 30, 2022
.Dt VDEVPROPS 7
.Os
.
Expand Down Expand Up @@ -121,6 +121,9 @@ dataset.
A text comment up to 8192 characters long
.It Sy bootsize
The amount of space to reserve for the EFI system partition
.It Sy failfast
If this device should propage BIO errors back to ZFS, used to disable
failfast.
.It Sy path
The path to the device for this vdev
.It Sy allocating
Expand Down
16 changes: 14 additions & 2 deletions module/os/linux/zfs/vdev_disk.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@ typedef struct dio_request {
struct bio *dr_bio[0]; /* Attached bio's */
} dio_request_t;

/*
* BIO request failfast mask.
*/

static unsigned int zfs_vdev_failfast_mask = 1;

static fmode_t
vdev_bdev_mode(spa_mode_t spa_mode)
{
Expand Down Expand Up @@ -659,8 +665,11 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio,
retry:
dr = vdev_disk_dio_alloc(bio_count);

if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
bio_set_flags_failfast(bdev, &flags);
if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&
zio->io_vd->vdev_failfast == B_TRUE) {
bio_set_flags_failfast(bdev, &flags, zfs_vdev_failfast_mask & 1,
zfs_vdev_failfast_mask & 2, zfs_vdev_failfast_mask & 4);
}

dr->dr_zio = zio;

Expand Down Expand Up @@ -1045,3 +1054,6 @@ param_set_max_auto_ashift(const char *buf, zfs_kernel_param_t *kp)

ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, open_timeout_ms, UINT, ZMOD_RW,
"Timeout before determining that a device is missing");

ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, failfast_mask, UINT, ZMOD_RW,
"Defines failfast mask: 1 - device, 2 - transport, 4 - driver");
3 changes: 3 additions & 0 deletions module/zcommon/zpool_prop.c
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,9 @@ vdev_prop_init(void)
boolean_na_table, sfeatures);

/* default index properties */
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,
sfeatures);

/* hidden properties */
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,
Expand Down
46 changes: 46 additions & 0 deletions module/zfs/vdev.c
Original file line number Diff line number Diff line change
Expand Up @@ -3563,6 +3563,26 @@ vdev_load(vdev_t *vd)
}
}

if (vd == vd->vdev_top && vd->vdev_top_zap != 0) {
spa_t *spa = vd->vdev_spa;
uint64_t failfast;

error = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap,
vdev_prop_to_name(VDEV_PROP_FAILFAST), sizeof (failfast),
1, &failfast);
if (error == 0) {
vd->vdev_failfast = failfast & 1;
} else if (error == ENOENT) {
vd->vdev_failfast = vdev_prop_default_numeric(
VDEV_PROP_FAILFAST);
} else {
vdev_dbgmsg(vd,
"vdev_load: zap_lookup(top_zap=%llu) "
"failed [error=%d]",
(u_longlong_t)vd->vdev_top_zap, error);
}
}

/*
* Load any rebuild state from the top-level vdev zap.
*/
Expand Down Expand Up @@ -5709,6 +5729,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
else
error = spa_vdev_alloc(spa, vdev_guid);
break;
case VDEV_PROP_FAILFAST:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
break;
}
vd->vdev_failfast = intval & 1;
break;
default:
/* Most processing is done in vdev_props_set_sync */
break;
Expand Down Expand Up @@ -6019,6 +6046,25 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
intval = ZPROP_BOOLEAN_NA;
}

vdev_prop_add_list(outnvl, propname, strval,
intval, src);
break;
case VDEV_PROP_FAILFAST:
src = ZPROP_SRC_LOCAL;
strval = NULL;

err = zap_lookup(mos, objid, nvpair_name(elem),
sizeof (uint64_t), 1, &intval);
if (err == ENOENT) {
intval = vdev_prop_default_numeric(
prop);
err = 0;
} else if (err) {
break;
}
if (intval == vdev_prop_default_numeric(prop))
src = ZPROP_SRC_DEFAULT;

vdev_prop_add_list(outnvl, propname, strval,
intval, src);
break;
Expand Down

0 comments on commit 16f0fda

Please sign in to comment.