Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
…/git/song/md into for-5.19/drivers

Pull MD updates from Song:

"1. Improve annotation in raid5 code, by Logan Gunthorpe.
 2. Support MD_BROKEN flag in raid-1/5/10, by Mariusz Tkaczyk.
 3. Other small fixes/cleanups."

* 'md-next' of https://git.kernel.org/pub/scm/linux/kernel/git/song/md:
  md: Replace role magic numbers with defined constants
  md/raid0: Ignore RAID0 layout if the second zone has only one device
  md/raid5: Annotate functions that hold device_lock with __must_hold
  md/raid5-ppl: Annotate with rcu_dereference_protected()
  md/raid5: Annotate rdev/replacement access when mddev_lock is held
  md/raid5: Annotate rdev/replacement accesses when nr_pending is elevated
  md/raid5: Add __rcu annotation to struct disk_info
  md/raid5: Un-nest struct raid5_percpu definition
  md/raid5: Cleanup setup_conf() error returns
  md: replace deprecated strlcpy & remove duplicated line
  md/bitmap: don't set sb values if can't pass sanity check
  md: fix an incorrect NULL check in md_reload_sb
  md: fix an incorrect NULL check in does_sb_need_changing
  raid5: introduce MD_BROKEN
  md: Set MD_BROKEN for RAID1 and RAID10
  • Loading branch information
axboe committed Apr 28, 2022
2 parents 8ba816b + 9151ad5 commit f01e49f
Show file tree
Hide file tree
Showing 10 changed files with 321 additions and 226 deletions.
45 changes: 23 additions & 22 deletions drivers/md/md-bitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -639,14 +639,6 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
write_behind = le32_to_cpu(sb->write_behind);
sectors_reserved = le32_to_cpu(sb->sectors_reserved);
/* Setup nodes/clustername only if bitmap version is
* cluster-compatible
*/
if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
nodes = le32_to_cpu(sb->nodes);
strlcpy(bitmap->mddev->bitmap_info.cluster_name,
sb->cluster_name, 64);
}

/* verify that the bitmap-specific fields are valid */
if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
Expand All @@ -668,6 +660,16 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
goto out;
}

/*
* Setup nodes/clustername only if bitmap version is
* cluster-compatible
*/
if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) {
nodes = le32_to_cpu(sb->nodes);
strscpy(bitmap->mddev->bitmap_info.cluster_name,
sb->cluster_name, 64);
}

/* keep the array size field of the bitmap superblock up to date */
sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors);

Expand Down Expand Up @@ -695,14 +697,13 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
set_bit(BITMAP_HOSTENDIAN, &bitmap->flags);
bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
err = 0;

out:
kunmap_atomic(sb);
/* Assigning chunksize is required for "re_read" */
bitmap->mddev->bitmap_info.chunksize = chunksize;
if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
/* Assigning chunksize is required for "re_read" */
bitmap->mddev->bitmap_info.chunksize = chunksize;
err = md_setup_cluster(bitmap->mddev, nodes);
if (err) {
pr_warn("%s: Could not setup cluster service (%d)\n",
Expand All @@ -713,18 +714,18 @@ static int md_bitmap_read_sb(struct bitmap *bitmap)
goto re_read;
}


out_no_sb:
if (test_bit(BITMAP_STALE, &bitmap->flags))
bitmap->events_cleared = bitmap->mddev->events;
bitmap->mddev->bitmap_info.chunksize = chunksize;
bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
bitmap->mddev->bitmap_info.max_write_behind = write_behind;
bitmap->mddev->bitmap_info.nodes = nodes;
if (bitmap->mddev->bitmap_info.space == 0 ||
bitmap->mddev->bitmap_info.space > sectors_reserved)
bitmap->mddev->bitmap_info.space = sectors_reserved;
if (err) {
if (err == 0) {
if (test_bit(BITMAP_STALE, &bitmap->flags))
bitmap->events_cleared = bitmap->mddev->events;
bitmap->mddev->bitmap_info.chunksize = chunksize;
bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep;
bitmap->mddev->bitmap_info.max_write_behind = write_behind;
bitmap->mddev->bitmap_info.nodes = nodes;
if (bitmap->mddev->bitmap_info.space == 0 ||
bitmap->mddev->bitmap_info.space > sectors_reserved)
bitmap->mddev->bitmap_info.space = sectors_reserved;
} else {
md_bitmap_print_sb(bitmap);
if (bitmap->cluster_slot < 0)
md_cluster_stop(bitmap->mddev);
Expand Down
2 changes: 1 addition & 1 deletion drivers/md/md-cluster.c
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev,
pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name);
goto out_err;
}
strlcpy(res->name, name, namelen + 1);
strscpy(res->name, name, namelen + 1);
if (with_lvb) {
res->lksb.sb_lvbptr = kzalloc(LVB_SIZE, GFP_KERNEL);
if (!res->lksb.sb_lvbptr) {
Expand Down
62 changes: 35 additions & 27 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -2627,14 +2627,16 @@ static void sync_sbs(struct mddev *mddev, int nospares)

static bool does_sb_need_changing(struct mddev *mddev)
{
struct md_rdev *rdev;
struct md_rdev *rdev = NULL, *iter;
struct mdp_superblock_1 *sb;
int role;

/* Find a good rdev */
rdev_for_each(rdev, mddev)
if ((rdev->raid_disk >= 0) && !test_bit(Faulty, &rdev->flags))
rdev_for_each(iter, mddev)
if ((iter->raid_disk >= 0) && !test_bit(Faulty, &iter->flags)) {
rdev = iter;
break;
}

/* No good device found. */
if (!rdev)
Expand All @@ -2645,11 +2647,11 @@ static bool does_sb_need_changing(struct mddev *mddev)
rdev_for_each(rdev, mddev) {
role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
/* Device activated? */
if (role == 0xffff && rdev->raid_disk >=0 &&
if (role == MD_DISK_ROLE_SPARE && rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags))
return true;
/* Device turned faulty? */
if (test_bit(Faulty, &rdev->flags) && (role < 0xfffd))
if (test_bit(Faulty, &rdev->flags) && (role < MD_DISK_ROLE_MAX))
return true;
}

Expand Down Expand Up @@ -2984,10 +2986,11 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)

if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
md_error(rdev->mddev, rdev);
if (test_bit(Faulty, &rdev->flags))
err = 0;
else

if (test_bit(MD_BROKEN, &rdev->mddev->flags))
err = -EBUSY;
else
err = 0;
} else if (cmd_match(buf, "remove")) {
if (rdev->mddev->pers) {
clear_bit(Blocked, &rdev->flags);
Expand Down Expand Up @@ -4028,7 +4031,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
oldpriv = mddev->private;
mddev->pers = pers;
mddev->private = priv;
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
mddev->level = mddev->new_level;
mddev->layout = mddev->new_layout;
mddev->chunk_sectors = mddev->new_chunk_sectors;
Expand Down Expand Up @@ -4353,10 +4356,9 @@ __ATTR_PREALLOC(resync_start, S_IRUGO|S_IWUSR,
* like active, but no writes have been seen for a while (100msec).
*
* broken
* RAID0/LINEAR-only: same as clean, but array is missing a member.
* It's useful because RAID0/LINEAR mounted-arrays aren't stopped
* when a member is gone, so this state will at least alert the
* user that something is wrong.
* Array is failed. It's useful because mounted-arrays aren't stopped
* when array is failed, so this state will at least alert the user that
* something is wrong.
*/
enum array_state { clear, inactive, suspended, readonly, read_auto, clean, active,
write_pending, active_idle, broken, bad_word};
Expand Down Expand Up @@ -5763,7 +5765,7 @@ static int add_named_array(const char *val, const struct kernel_param *kp)
len--;
if (len >= DISK_NAME_LEN)
return -E2BIG;
strlcpy(buf, val, len+1);
strscpy(buf, val, len+1);
if (strncmp(buf, "md_", 3) == 0)
return md_alloc(0, buf);
if (strncmp(buf, "md", 2) == 0 &&
Expand Down Expand Up @@ -5896,7 +5898,7 @@ int md_run(struct mddev *mddev)
mddev->level = pers->level;
mddev->new_level = pers->level;
}
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
strscpy(mddev->clevel, pers->name, sizeof(mddev->clevel));

if (mddev->reshape_position != MaxSector &&
pers->start_reshape == NULL) {
Expand Down Expand Up @@ -7443,7 +7445,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
err = -ENODEV;
else {
md_error(mddev, rdev);
if (!test_bit(Faulty, &rdev->flags))
if (test_bit(MD_BROKEN, &mddev->flags))
err = -EBUSY;
}
rcu_read_unlock();
Expand Down Expand Up @@ -7984,13 +7986,16 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)

if (!mddev->pers || !mddev->pers->error_handler)
return;
mddev->pers->error_handler(mddev,rdev);
if (mddev->degraded)
mddev->pers->error_handler(mddev, rdev);

if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
sysfs_notify_dirent_safe(rdev->sysfs_state);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
if (!test_bit(MD_BROKEN, &mddev->flags)) {
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
}
if (mddev->event_work.func)
queue_work(md_misc_wq, &mddev->event_work);
md_new_event();
Expand Down Expand Up @@ -9670,7 +9675,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
role = le16_to_cpu(sb->dev_roles[rdev2->desc_nr]);

if (test_bit(Candidate, &rdev2->flags)) {
if (role == 0xfffe) {
if (role == MD_DISK_ROLE_FAULTY) {
pr_info("md: Removing Candidate device %s because add failed\n", bdevname(rdev2->bdev,b));
md_kick_rdev_from_array(rdev2);
continue;
Expand All @@ -9683,7 +9688,7 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
/*
* got activated except reshape is happening.
*/
if (rdev2->raid_disk == -1 && role != 0xffff &&
if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
!(le32_to_cpu(sb->feature_map) &
MD_FEATURE_RESHAPE_ACTIVE)) {
rdev2->saved_raid_disk = role;
Expand All @@ -9700,7 +9705,8 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
* as faulty. The recovery is performed by the
* one who initiated the error.
*/
if ((role == 0xfffe) || (role == 0xfffd)) {
if (role == MD_DISK_ROLE_FAULTY ||
role == MD_DISK_ROLE_JOURNAL) {
md_error(mddev, rdev2);
clear_bit(Blocked, &rdev2->flags);
}
Expand Down Expand Up @@ -9790,16 +9796,18 @@ static int read_rdev(struct mddev *mddev, struct md_rdev *rdev)

void md_reload_sb(struct mddev *mddev, int nr)
{
struct md_rdev *rdev;
struct md_rdev *rdev = NULL, *iter;
int err;

/* Find the rdev */
rdev_for_each_rcu(rdev, mddev) {
if (rdev->desc_nr == nr)
rdev_for_each_rcu(iter, mddev) {
if (iter->desc_nr == nr) {
rdev = iter;
break;
}
}

if (!rdev || rdev->desc_nr != nr) {
if (!rdev) {
pr_warn("%s: %d Could not find rdev with nr %d\n", __func__, __LINE__, nr);
return;
}
Expand Down
62 changes: 35 additions & 27 deletions drivers/md/md.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,34 +234,42 @@ extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new);
struct md_cluster_info;

/* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added */
/**
* enum mddev_flags - md device flags.
* @MD_ARRAY_FIRST_USE: First use of array, needs initialization.
* @MD_CLOSING: If set, we are closing the array, do not open it then.
* @MD_JOURNAL_CLEAN: A raid with journal is already clean.
* @MD_HAS_JOURNAL: The raid array has journal feature set.
* @MD_CLUSTER_RESYNC_LOCKED: cluster raid only, which means node, already took
* resync lock, need to release the lock.
* @MD_FAILFAST_SUPPORTED: Using MD_FAILFAST on metadata writes is supported as
* calls to md_error() will never cause the array to
* become failed.
* @MD_HAS_PPL: The raid array has PPL feature set.
* @MD_HAS_MULTIPLE_PPLS: The raid array has multiple PPLs feature set.
* @MD_ALLOW_SB_UPDATE: md_check_recovery is allowed to update the metadata
* without taking reconfig_mutex.
* @MD_UPDATING_SB: md_check_recovery is updating the metadata without
* explicitly holding reconfig_mutex.
* @MD_NOT_READY: do_md_run() is active, so 'array_state', ust not report that
* array is ready yet.
* @MD_BROKEN: This is used to stop writes and mark array as failed.
*
* change UNSUPPORTED_MDDEV_FLAGS for each array type if new flag is added
*/
enum mddev_flags {
MD_ARRAY_FIRST_USE, /* First use of array, needs initialization */
MD_CLOSING, /* If set, we are closing the array, do not open
* it then */
MD_JOURNAL_CLEAN, /* A raid with journal is already clean */
MD_HAS_JOURNAL, /* The raid array has journal feature set */
MD_CLUSTER_RESYNC_LOCKED, /* cluster raid only, which means node
* already took resync lock, need to
* release the lock */
MD_FAILFAST_SUPPORTED, /* Using MD_FAILFAST on metadata writes is
* supported as calls to md_error() will
* never cause the array to become failed.
*/
MD_HAS_PPL, /* The raid array has PPL feature set */
MD_HAS_MULTIPLE_PPLS, /* The raid array has multiple PPLs feature set */
MD_ALLOW_SB_UPDATE, /* md_check_recovery is allowed to update
* the metadata without taking reconfig_mutex.
*/
MD_UPDATING_SB, /* md_check_recovery is updating the metadata
* without explicitly holding reconfig_mutex.
*/
MD_NOT_READY, /* do_md_run() is active, so 'array_state'
* must not report that array is ready yet
*/
MD_BROKEN, /* This is used in RAID-0/LINEAR only, to stop
* I/O in case an array member is gone/failed.
*/
MD_ARRAY_FIRST_USE,
MD_CLOSING,
MD_JOURNAL_CLEAN,
MD_HAS_JOURNAL,
MD_CLUSTER_RESYNC_LOCKED,
MD_FAILFAST_SUPPORTED,
MD_HAS_PPL,
MD_HAS_MULTIPLE_PPLS,
MD_ALLOW_SB_UPDATE,
MD_UPDATING_SB,
MD_NOT_READY,
MD_BROKEN,
};

enum mddev_sb_flags {
Expand Down
31 changes: 16 additions & 15 deletions drivers/md/raid0.c
Original file line number Diff line number Diff line change
Expand Up @@ -128,21 +128,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
pr_debug("md/raid0:%s: FINAL %d zones\n",
mdname(mddev), conf->nr_strip_zones);

if (conf->nr_strip_zones == 1) {
conf->layout = RAID0_ORIG_LAYOUT;
} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
conf->layout = mddev->layout;
} else if (default_layout == RAID0_ORIG_LAYOUT ||
default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
conf->layout = default_layout;
} else {
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
mdname(mddev));
pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
err = -ENOTSUPP;
goto abort;
}
/*
* now since we have the hard sector sizes, we can make sure
* chunk size is a multiple of that sector size
Expand Down Expand Up @@ -273,6 +258,22 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
(unsigned long long)smallest->sectors);
}

if (conf->nr_strip_zones == 1 || conf->strip_zone[1].nb_dev == 1) {
conf->layout = RAID0_ORIG_LAYOUT;
} else if (mddev->layout == RAID0_ORIG_LAYOUT ||
mddev->layout == RAID0_ALT_MULTIZONE_LAYOUT) {
conf->layout = mddev->layout;
} else if (default_layout == RAID0_ORIG_LAYOUT ||
default_layout == RAID0_ALT_MULTIZONE_LAYOUT) {
conf->layout = default_layout;
} else {
pr_err("md/raid0:%s: cannot assemble multi-zone RAID0 with default_layout setting\n",
mdname(mddev));
pr_err("md/raid0: please set raid0.default_layout to 1 or 2\n");
err = -EOPNOTSUPP;
goto abort;
}

pr_debug("md/raid0:%s: done.\n", mdname(mddev));
*private_conf = conf;

Expand Down
Loading

0 comments on commit f01e49f

Please sign in to comment.