Skip to content

Commit

Permalink
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/…
Browse files Browse the repository at this point in the history
…git/shli/md

Pull MD bugfixes from Shaohua Li:

 - fix raid5-ppl flush request handling hang from Artur

 - fix a potential deadlock in raid5/10 reshape from BingJing

 - fix a deadlock for dm-raid from Heinz

 - fix two md-cluster of raid10 from Lidong and Guoqing

 - fix a NULL deference problem in device removal from Neil

 - fix a NULL deference problem in raid1/raid10 in specific condition
   from Yufen

 - other cleanup and fixes

* 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  md/raid1: fix NULL pointer dereference
  md: fix a potential deadlock of raid5/raid10 reshape
  md-cluster: choose correct label when clustered layout is not supported
  md: raid5: avoid string overflow warning
  raid5-ppl: fix handling flush requests
  md raid10: fix NULL deference in handle_write_completed()
  md: only allow remove_and_add_spares when no sync_thread running.
  md: document lifetime of internal rdev pointer.
  md: fix md_write_start() deadlock w/o metadata devices
  MD: Free bioset when md_run fails
  raid10: change the size of resync window for clustered raid
  md-multipath: Use seq_putc() in multipath_status()
  md/raid1: Fix trailing semicolon
  md/raid5: simplify uninitialization of shrinker
  • Loading branch information
torvalds committed Mar 1, 2018
2 parents 7bec4a9 + 3de59bb commit 7e30309
Show file tree
Hide file tree
Showing 11 changed files with 124 additions and 31 deletions.
2 changes: 1 addition & 1 deletion drivers/md/md-multipath.c
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,7 @@ static void multipath_status(struct seq_file *seq, struct mddev *mddev)
seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
}
rcu_read_unlock();
seq_printf (seq, "]");
seq_putc(seq, ']');
}

static int multipath_congested(struct mddev *mddev, int bits)
Expand Down
53 changes: 48 additions & 5 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,9 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
struct bio *bio;
int ff = 0;

if (!page)
return;

if (test_bit(Faulty, &rdev->flags))
return;

Expand Down Expand Up @@ -5452,6 +5455,7 @@ int md_run(struct mddev *mddev)
* the only valid external interface is through the md
* device.
*/
mddev->has_superblocks = false;
rdev_for_each(rdev, mddev) {
if (test_bit(Faulty, &rdev->flags))
continue;
Expand All @@ -5465,6 +5469,9 @@ int md_run(struct mddev *mddev)
set_disk_ro(mddev->gendisk, 1);
}

if (rdev->sb_page)
mddev->has_superblocks = true;

/* perform some consistency tests on the device.
* We don't want the data to overlap the metadata,
* Internal Bitmap issues have been handled elsewhere.
Expand Down Expand Up @@ -5497,8 +5504,10 @@ int md_run(struct mddev *mddev)
}
if (mddev->sync_set == NULL) {
mddev->sync_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
if (!mddev->sync_set)
return -ENOMEM;
if (!mddev->sync_set) {
err = -ENOMEM;
goto abort;
}
}

spin_lock(&pers_lock);
Expand All @@ -5511,7 +5520,8 @@ int md_run(struct mddev *mddev)
else
pr_warn("md: personality for level %s is not loaded!\n",
mddev->clevel);
return -EINVAL;
err = -EINVAL;
goto abort;
}
spin_unlock(&pers_lock);
if (mddev->level != pers->level) {
Expand All @@ -5524,7 +5534,8 @@ int md_run(struct mddev *mddev)
pers->start_reshape == NULL) {
/* This personality cannot handle reshaping... */
module_put(pers->owner);
return -EINVAL;
err = -EINVAL;
goto abort;
}

if (pers->sync_request) {
Expand Down Expand Up @@ -5593,7 +5604,7 @@ int md_run(struct mddev *mddev)
mddev->private = NULL;
module_put(pers->owner);
bitmap_destroy(mddev);
return err;
goto abort;
}
if (mddev->queue) {
bool nonrot = true;
Expand Down Expand Up @@ -5655,6 +5666,18 @@ int md_run(struct mddev *mddev)
sysfs_notify_dirent_safe(mddev->sysfs_action);
sysfs_notify(&mddev->kobj, NULL, "degraded");
return 0;

abort:
if (mddev->bio_set) {
bioset_free(mddev->bio_set);
mddev->bio_set = NULL;
}
if (mddev->sync_set) {
bioset_free(mddev->sync_set);
mddev->sync_set = NULL;
}

return err;
}
EXPORT_SYMBOL_GPL(md_run);

Expand Down Expand Up @@ -8049,6 +8072,7 @@ EXPORT_SYMBOL(md_done_sync);
bool md_write_start(struct mddev *mddev, struct bio *bi)
{
int did_change = 0;

if (bio_data_dir(bi) != WRITE)
return true;

Expand Down Expand Up @@ -8081,6 +8105,8 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
rcu_read_unlock();
if (did_change)
sysfs_notify_dirent_safe(mddev->sysfs_state);
if (!mddev->has_superblocks)
return true;
wait_event(mddev->sb_wait,
!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
mddev->suspended);
Expand Down Expand Up @@ -8543,6 +8569,19 @@ void md_do_sync(struct md_thread *thread)
set_mask_bits(&mddev->sb_flags, 0,
BIT(MD_SB_CHANGE_PENDING) | BIT(MD_SB_CHANGE_DEVS));

if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
mddev->delta_disks > 0 &&
mddev->pers->finish_reshape &&
mddev->pers->size &&
mddev->queue) {
mddev_lock_nointr(mddev);
md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
mddev_unlock(mddev);
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
}

spin_lock(&mddev->lock);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
/* We completed so min/max setting can be forgotten if used. */
Expand All @@ -8569,6 +8608,10 @@ static int remove_and_add_spares(struct mddev *mddev,
int removed = 0;
bool remove_some = false;

if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
/* Mustn't remove devices when resync thread is running */
return 0;

rdev_for_each(rdev, mddev) {
if ((this == NULL || rdev == this) &&
rdev->raid_disk >= 0 &&
Expand Down
2 changes: 2 additions & 0 deletions drivers/md/md.h
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,8 @@ struct mddev {
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
struct md_cluster_info *cluster_info;
unsigned int good_device_nr; /* good device num within cluster raid */

bool has_superblocks:1;
};

enum recovery_flags {
Expand Down
11 changes: 11 additions & 0 deletions drivers/md/raid1.c
Original file line number Diff line number Diff line change
Expand Up @@ -1809,6 +1809,17 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
struct md_rdev *repl =
conf->mirrors[conf->raid_disks + number].rdev;
freeze_array(conf, 0);
if (atomic_read(&repl->nr_pending)) {
/* It means that some queued IO of retry_list
* hold repl. Thus, we cannot set replacement
* as NULL, avoiding rdev NULL pointer
* dereference in sync_request_write and
* handle_write_finished.
*/
err = -EBUSY;
unfreeze_array(conf);
goto abort;
}
clear_bit(Replacement, &repl->flags);
p->rdev = repl;
conf->mirrors[conf->raid_disks + number].rdev = NULL;
Expand Down
12 changes: 12 additions & 0 deletions drivers/md/raid1.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,18 @@
#define BARRIER_BUCKETS_NR_BITS (PAGE_SHIFT - ilog2(sizeof(atomic_t)))
#define BARRIER_BUCKETS_NR (1<<BARRIER_BUCKETS_NR_BITS)

/* Note: raid1_info.rdev can be set to NULL asynchronously by raid1_remove_disk.
* There are three safe ways to access raid1_info.rdev.
* 1/ when holding mddev->reconfig_mutex
* 2/ when resync/recovery is known to be happening - i.e. in code that is
* called as part of performing resync/recovery.
* 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
* and if it is non-NULL, increment rdev->nr_pending before dropping the
* RCU lock.
* When .rdev is set to NULL, the nr_pending count checked again and if it has
* been incremented, the pointer is put back in .rdev.
*/

struct raid1_info {
struct md_rdev *rdev;
sector_t head_position;
Expand Down
18 changes: 7 additions & 11 deletions drivers/md/raid10.c
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ static void r10bio_pool_free(void *r10_bio, void *data)
#define RESYNC_WINDOW (1024*1024)
/* maximum number of concurrent requests, memory permitting */
#define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE)
#define CLUSTER_RESYNC_WINDOW (16 * RESYNC_WINDOW)
#define CLUSTER_RESYNC_WINDOW (32 * RESYNC_WINDOW)
#define CLUSTER_RESYNC_WINDOW_SECTORS (CLUSTER_RESYNC_WINDOW >> 9)

/*
Expand Down Expand Up @@ -2655,7 +2655,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
for (m = 0; m < conf->copies; m++) {
int dev = r10_bio->devs[m].devnum;
rdev = conf->mirrors[dev].rdev;
if (r10_bio->devs[m].bio == NULL)
if (r10_bio->devs[m].bio == NULL ||
r10_bio->devs[m].bio->bi_end_io == NULL)
continue;
if (!r10_bio->devs[m].bio->bi_status) {
rdev_clear_badblocks(
Expand All @@ -2670,7 +2671,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
md_error(conf->mddev, rdev);
}
rdev = conf->mirrors[dev].replacement;
if (r10_bio->devs[m].repl_bio == NULL)
if (r10_bio->devs[m].repl_bio == NULL ||
r10_bio->devs[m].repl_bio->bi_end_io == NULL)
continue;

if (!r10_bio->devs[m].repl_bio->bi_status) {
Expand Down Expand Up @@ -3782,7 +3784,7 @@ static int raid10_run(struct mddev *mddev)
if (fc > 1 || fo > 0) {
pr_err("only near layout is supported by clustered"
" raid10\n");
goto out;
goto out_free_conf;
}
}

Expand Down Expand Up @@ -4830,17 +4832,11 @@ static void raid10_finish_reshape(struct mddev *mddev)
return;

if (mddev->delta_disks > 0) {
sector_t size = raid10_size(mddev, 0, 0);
md_set_array_sectors(mddev, size);
if (mddev->recovery_cp > mddev->resync_max_sectors) {
mddev->recovery_cp = mddev->resync_max_sectors;
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}
mddev->resync_max_sectors = size;
if (mddev->queue) {
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
}
mddev->resync_max_sectors = mddev->array_sectors;
} else {
int d;
rcu_read_lock();
Expand Down
13 changes: 13 additions & 0 deletions drivers/md/raid10.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@
#ifndef _RAID10_H
#define _RAID10_H

/* Note: raid10_info.rdev can be set to NULL asynchronously by
* raid10_remove_disk.
* There are three safe ways to access raid10_info.rdev.
* 1/ when holding mddev->reconfig_mutex
* 2/ when resync/recovery/reshape is known to be happening - i.e. in code
* that is called as part of performing resync/recovery/reshape.
* 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
* and if it is non-NULL, increment rdev->nr_pending before dropping the
* RCU lock.
* When .rdev is set to NULL, the nr_pending count checked again and if it has
* been incremented, the pointer is put back in .rdev.
*/

struct raid10_info {
struct md_rdev *rdev, *replacement;
sector_t head_position;
Expand Down
3 changes: 2 additions & 1 deletion drivers/md/raid5-log.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ extern void ppl_write_stripe_run(struct r5conf *conf);
extern void ppl_stripe_write_finished(struct stripe_head *sh);
extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add);
extern void ppl_quiesce(struct r5conf *conf, int quiesce);
extern int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio);

static inline bool raid5_has_ppl(struct r5conf *conf)
{
Expand Down Expand Up @@ -104,7 +105,7 @@ static inline int log_handle_flush_request(struct r5conf *conf, struct bio *bio)
if (conf->log)
ret = r5l_handle_flush_request(conf->log, bio);
else if (raid5_has_ppl(conf))
ret = 0;
ret = ppl_handle_flush_request(conf->log, bio);

return ret;
}
Expand Down
10 changes: 10 additions & 0 deletions drivers/md/raid5-ppl.c
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,16 @@ void ppl_quiesce(struct r5conf *conf, int quiesce)
}
}

int ppl_handle_flush_request(struct r5l_log *log, struct bio *bio)
{
if (bio->bi_iter.bi_size == 0) {
bio_endio(bio);
return 0;
}
bio->bi_opf &= ~REQ_PREFLUSH;
return -EAGAIN;
}

void ppl_stripe_write_finished(struct stripe_head *sh)
{
struct ppl_io_unit *io;
Expand Down
19 changes: 6 additions & 13 deletions drivers/md/raid5.c
Original file line number Diff line number Diff line change
Expand Up @@ -2196,15 +2196,16 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
static int grow_stripes(struct r5conf *conf, int num)
{
struct kmem_cache *sc;
size_t namelen = sizeof(conf->cache_name[0]);
int devs = max(conf->raid_disks, conf->previous_raid_disks);

if (conf->mddev->gendisk)
sprintf(conf->cache_name[0],
snprintf(conf->cache_name[0], namelen,
"raid%d-%s", conf->level, mdname(conf->mddev));
else
sprintf(conf->cache_name[0],
snprintf(conf->cache_name[0], namelen,
"raid%d-%p", conf->level, conf->mddev);
sprintf(conf->cache_name[1], "%s-alt", conf->cache_name[0]);
snprintf(conf->cache_name[1], namelen, "%.27s-alt", conf->cache_name[0]);

conf->active_name = 0;
sc = kmem_cache_create(conf->cache_name[conf->active_name],
Expand Down Expand Up @@ -6764,9 +6765,7 @@ static void free_conf(struct r5conf *conf)

log_exit(conf);

if (conf->shrinker.nr_deferred)
unregister_shrinker(&conf->shrinker);

unregister_shrinker(&conf->shrinker);
free_thread_groups(conf);
shrink_stripes(conf);
raid5_free_percpu(conf);
Expand Down Expand Up @@ -8001,13 +8000,7 @@ static void raid5_finish_reshape(struct mddev *mddev)

if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {

if (mddev->delta_disks > 0) {
md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
if (mddev->queue) {
set_capacity(mddev->gendisk, mddev->array_sectors);
revalidate_disk(mddev->gendisk);
}
} else {
if (mddev->delta_disks <= 0) {
int d;
spin_lock_irq(&conf->device_lock);
mddev->degraded = raid5_calc_degraded(conf);
Expand Down
12 changes: 12 additions & 0 deletions drivers/md/raid5.h
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,18 @@ enum {
* HANDLE gets cleared if stripe_handle leaves nothing locked.
*/

/* Note: disk_info.rdev can be set to NULL asynchronously by raid5_remove_disk.
* There are three safe ways to access disk_info.rdev.
* 1/ when holding mddev->reconfig_mutex
* 2/ when resync/recovery/reshape is known to be happening - i.e. in code that
* is called as part of performing resync/recovery/reshape.
* 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
* and if it is non-NULL, increment rdev->nr_pending before dropping the RCU
* lock.
* When .rdev is set to NULL, the nr_pending count checked again and if
* it has been incremented, the pointer is put back in .rdev.
*/

struct disk_info {
struct md_rdev *rdev, *replacement;
struct page *extra_page; /* extra page to use in prexor */
Expand Down

0 comments on commit 7e30309

Please sign in to comment.