Skip to content

Commit

Permalink
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/…
Browse files Browse the repository at this point in the history
…git/shli/md

Pull MD updates from Shaohua Li:
 "Some small fixes for MD:

   - fix raid5-cache potential problems if raid5 cache isn't fully
     recovered

   - fix a wait-within-wait warning in raid1/10

   - make raid5-PPL support disks with writeback cache enabled"

* 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  raid5-ppl: PPL support for disks with write-back cache enabled
  md/r5cache: print more info of log recovery
  md/raid1,raid10: silence warning about wait-within-wait
  md: introduce new personality funciton start()
  • Loading branch information
torvalds committed Jan 31, 2018
2 parents 20c59c7 + 1532d9e commit 040639b
Show file tree
Hide file tree
Showing 10 changed files with 285 additions and 38 deletions.
7 changes: 4 additions & 3 deletions Documentation/md/raid5-ppl.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ case the behavior is the same as in plain raid5.
PPL is available for md version-1 metadata and external (specifically IMSM)
metadata arrays. It can be enabled using mdadm option --consistency-policy=ppl.

Currently, volatile write-back cache should be disabled on all member drives
when using PPL. Otherwise it cannot guarantee consistency in case of power
failure.
There is a limitation of maximum 64 disks in the array for PPL. It allows to
keep data structures and implementation simple. RAID5 arrays with so many disks
are not likely due to high risk of multiple disks failure. Such restriction
should not be a real life limitation.
9 changes: 9 additions & 0 deletions drivers/md/dm-raid.c
Original file line number Diff line number Diff line change
Expand Up @@ -3151,6 +3151,14 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}

r = md_start(&rs->md);

if (r) {
ti->error = "Failed to start raid array";
mddev_unlock(&rs->md);
goto bad_md_start;
}

rs->callbacks.congested_fn = raid_is_congested;
dm_table_add_target_callbacks(ti->table, &rs->callbacks);

Expand Down Expand Up @@ -3198,6 +3206,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
mddev_unlock(&rs->md);
return 0;

bad_md_start:
bad_journal_mode_set:
bad_stripe_cache:
bad_check_reshape:
Expand Down
31 changes: 23 additions & 8 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,7 @@ static struct md_rdev *find_rdev(struct mddev *mddev, dev_t dev)
return NULL;
}

static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)
struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev)
{
struct md_rdev *rdev;

Expand All @@ -721,6 +721,7 @@ static struct md_rdev *find_rdev_rcu(struct mddev *mddev, dev_t dev)

return NULL;
}
EXPORT_SYMBOL_GPL(md_find_rdev_rcu);

static struct md_personality *find_pers(int level, char *clevel)
{
Expand Down Expand Up @@ -5560,11 +5561,6 @@ int md_run(struct mddev *mddev)
if (start_readonly && mddev->ro == 0)
mddev->ro = 2; /* read-only, but switch on first write */

/*
* NOTE: some pers->run(), for example r5l_recovery_log(), wakes
* up mddev->thread. It is important to initialize critical
* resources for mddev->thread BEFORE calling pers->run().
*/
err = pers->run(mddev);
if (err)
pr_warn("md: pers->run() failed ...\n");
Expand Down Expand Up @@ -5678,6 +5674,9 @@ static int do_md_run(struct mddev *mddev)
if (mddev_is_clustered(mddev))
md_allow_write(mddev);

/* run start up tasks that require md_thread */
md_start(mddev);

md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */

Expand All @@ -5689,6 +5688,21 @@ static int do_md_run(struct mddev *mddev)
return err;
}

int md_start(struct mddev *mddev)
{
int ret = 0;

if (mddev->pers->start) {
set_bit(MD_RECOVERY_WAIT, &mddev->recovery);
md_wakeup_thread(mddev->thread);
ret = mddev->pers->start(mddev);
clear_bit(MD_RECOVERY_WAIT, &mddev->recovery);
md_wakeup_thread(mddev->sync_thread);
}
return ret;
}
EXPORT_SYMBOL_GPL(md_start);

static int restart_array(struct mddev *mddev)
{
struct gendisk *disk = mddev->gendisk;
Expand Down Expand Up @@ -6997,7 +7011,7 @@ static int set_disk_faulty(struct mddev *mddev, dev_t dev)
return -ENODEV;

rcu_read_lock();
rdev = find_rdev_rcu(mddev, dev);
rdev = md_find_rdev_rcu(mddev, dev);
if (!rdev)
err = -ENODEV;
else {
Expand Down Expand Up @@ -8169,7 +8183,8 @@ void md_do_sync(struct md_thread *thread)
int ret;

/* just incase thread restarts... */
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
test_bit(MD_RECOVERY_WAIT, &mddev->recovery))
return;
if (mddev->ro) {/* never try to sync a read-only array */
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
Expand Down
9 changes: 9 additions & 0 deletions drivers/md/md.h
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,7 @@ enum recovery_flags {
MD_RECOVERY_RESHAPE, /* A reshape is happening */
MD_RECOVERY_FROZEN, /* User request to abort, and not restart, any action */
MD_RECOVERY_ERROR, /* sync-action interrupted because io-error */
MD_RECOVERY_WAIT, /* waiting for pers->start() to finish */
};

static inline int __must_check mddev_lock(struct mddev *mddev)
Expand Down Expand Up @@ -523,7 +524,13 @@ struct md_personality
struct list_head list;
struct module *owner;
bool (*make_request)(struct mddev *mddev, struct bio *bio);
/*
* start up works that do NOT require md_thread. tasks that
* requires md_thread should go into start()
*/
int (*run)(struct mddev *mddev);
/* start up works that require md threads */
int (*start)(struct mddev *mddev);
void (*free)(struct mddev *mddev, void *priv);
void (*status)(struct seq_file *seq, struct mddev *mddev);
/* error_handler must set ->faulty and clear ->in_sync
Expand Down Expand Up @@ -687,6 +694,7 @@ extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);

extern void mddev_init(struct mddev *mddev);
extern int md_run(struct mddev *mddev);
extern int md_start(struct mddev *mddev);
extern void md_stop(struct mddev *mddev);
extern void md_stop_writes(struct mddev *mddev);
extern int md_rdev_init(struct md_rdev *rdev);
Expand All @@ -702,6 +710,7 @@ extern void md_reload_sb(struct mddev *mddev, int raid_disk);
extern void md_update_sb(struct mddev *mddev, int force);
extern void md_kick_rdev_from_array(struct md_rdev * rdev);
struct md_rdev *md_find_rdev_nr_rcu(struct mddev *mddev, int nr);
struct md_rdev *md_find_rdev_rcu(struct mddev *mddev, dev_t dev);

static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
{
Expand Down
11 changes: 11 additions & 0 deletions drivers/md/raid1.c
Original file line number Diff line number Diff line change
Expand Up @@ -815,6 +815,17 @@ static void flush_pending_writes(struct r1conf *conf)
bio = bio_list_get(&conf->pending_bio_list);
conf->pending_count = 0;
spin_unlock_irq(&conf->device_lock);

/*
* As this is called in a wait_event() loop (see freeze_array),
* current->state might be TASK_UNINTERRUPTIBLE which will
* cause a warning when we prepare to wait again. As it is
* rare that this path is taken, it is perfectly safe to force
* us to go around the wait_event() loop again, so the warning
* is a false-positive. Silence the warning by resetting
* thread state
*/
__set_current_state(TASK_RUNNING);
blk_start_plug(&plug);
flush_bio_list(conf, bio);
blk_finish_plug(&plug);
Expand Down
12 changes: 12 additions & 0 deletions drivers/md/raid10.c
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,18 @@ static void flush_pending_writes(struct r10conf *conf)
bio = bio_list_get(&conf->pending_bio_list);
conf->pending_count = 0;
spin_unlock_irq(&conf->device_lock);

/*
* As this is called in a wait_event() loop (see freeze_array),
* current->state might be TASK_UNINTERRUPTIBLE which will
* cause a warning when we prepare to wait again. As it is
* rare that this path is taken, it is perfectly safe to force
* us to go around the wait_event() loop again, so the warning
* is a false-positive. Silence the warning by resetting
* thread state
*/
__set_current_state(TASK_RUNNING);

blk_start_plug(&plug);
/* flush any pending bitmap writes to disk
* before proceeding w/ I/O */
Expand Down
31 changes: 19 additions & 12 deletions drivers/md/raid5-cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -1111,9 +1111,6 @@ void r5l_write_stripe_run(struct r5l_log *log)

int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio)
{
if (!log)
return -ENODEV;

if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) {
/*
* in write through (journal only)
Expand Down Expand Up @@ -1592,8 +1589,6 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space)
void r5l_quiesce(struct r5l_log *log, int quiesce)
{
struct mddev *mddev;
if (!log)
return;

if (quiesce) {
/* make sure r5l_write_super_and_discard_space exits */
Expand Down Expand Up @@ -2448,7 +2443,6 @@ static void r5c_recovery_flush_data_only_stripes(struct r5l_log *log,
raid5_release_stripe(sh);
}

md_wakeup_thread(conf->mddev->thread);
/* reuse conf->wait_for_quiescent in recovery */
wait_event(conf->wait_for_quiescent,
atomic_read(&conf->active_stripes) == 0);
Expand Down Expand Up @@ -2491,10 +2485,10 @@ static int r5l_recovery_log(struct r5l_log *log)
ctx->seq += 10000;

if ((ctx->data_only_stripes == 0) && (ctx->data_parity_stripes == 0))
pr_debug("md/raid:%s: starting from clean shutdown\n",
pr_info("md/raid:%s: starting from clean shutdown\n",
mdname(mddev));
else
pr_debug("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n",
pr_info("md/raid:%s: recovering %d data-only stripes and %d data-parity stripes\n",
mdname(mddev), ctx->data_only_stripes,
ctx->data_parity_stripes);

Expand Down Expand Up @@ -3036,6 +3030,23 @@ static int r5l_load_log(struct r5l_log *log)
return ret;
}

int r5l_start(struct r5l_log *log)
{
int ret;

if (!log)
return 0;

ret = r5l_load_log(log);
if (ret) {
struct mddev *mddev = log->rdev->mddev;
struct r5conf *conf = mddev->private;

r5l_exit_log(conf);
}
return ret;
}

void r5c_update_on_rdev_error(struct mddev *mddev, struct md_rdev *rdev)
{
struct r5conf *conf = mddev->private;
Expand Down Expand Up @@ -3138,13 +3149,9 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)

rcu_assign_pointer(conf->log, log);

if (r5l_load_log(log))
goto error;

set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
return 0;

error:
rcu_assign_pointer(conf->log, NULL);
md_unregister_thread(&log->reclaim_thread);
reclaim_thread:
Expand Down
30 changes: 30 additions & 0 deletions drivers/md/raid5-log.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ extern struct md_sysfs_entry r5c_journal_mode;
extern void r5c_update_on_rdev_error(struct mddev *mddev,
struct md_rdev *rdev);
extern bool r5c_big_stripe_cached(struct r5conf *conf, sector_t sect);
extern int r5l_start(struct r5l_log *log);

extern struct dma_async_tx_descriptor *
ops_run_partial_parity(struct stripe_head *sh, struct raid5_percpu *percpu,
Expand All @@ -42,6 +43,7 @@ extern int ppl_write_stripe(struct r5conf *conf, struct stripe_head *sh);
extern void ppl_write_stripe_run(struct r5conf *conf);
extern void ppl_stripe_write_finished(struct stripe_head *sh);
extern int ppl_modify_log(struct r5conf *conf, struct md_rdev *rdev, bool add);
extern void ppl_quiesce(struct r5conf *conf, int quiesce);

static inline bool raid5_has_ppl(struct r5conf *conf)
{
Expand Down Expand Up @@ -87,6 +89,34 @@ static inline void log_write_stripe_run(struct r5conf *conf)
ppl_write_stripe_run(conf);
}

static inline void log_flush_stripe_to_raid(struct r5conf *conf)
{
if (conf->log)
r5l_flush_stripe_to_raid(conf->log);
else if (raid5_has_ppl(conf))
ppl_write_stripe_run(conf);
}

static inline int log_handle_flush_request(struct r5conf *conf, struct bio *bio)
{
int ret = -ENODEV;

if (conf->log)
ret = r5l_handle_flush_request(conf->log, bio);
else if (raid5_has_ppl(conf))
ret = 0;

return ret;
}

static inline void log_quiesce(struct r5conf *conf, int quiesce)
{
if (conf->log)
r5l_quiesce(conf->log, quiesce);
else if (raid5_has_ppl(conf))
ppl_quiesce(conf, quiesce);
}

static inline void log_exit(struct r5conf *conf)
{
if (conf->log)
Expand Down
Loading

0 comments on commit 040639b

Please sign in to comment.