Skip to content

Commit

Permalink
Merge tag 'md/4.2-fixes' of git://neil.brown.name/md
Browse files Browse the repository at this point in the history
Pull md fixes from Neil Brown:
 "Some md fixes for 4.2

  Several are tagged for -stable.
  A few aren't because they are not very, serious or because they are in
  the 'experimental' cluster code"

* tag 'md/4.2-fixes' of git://neil.brown.name/md:
  md/raid5: clear R5_NeedReplace when no longer needed.
  Fix read-balancing during node failure
  md-cluster: fix bitmap sub-offset in bitmap_read_sb
  md: Return error if request_module fails and returns positive value
  md: Skip cluster setup in case of error while reading bitmap
  md/raid1: fix test for 'was read error from last working device'.
  md: Skip cluster setup for dm-raid
  md: flush ->event_work before stopping array.
  md/raid10: always set reshape_safe when initializing reshape_position.
  md/raid5: avoid races when changing cache size.
  • Loading branch information
torvalds committed Jul 25, 2015
2 parents 32fd3d4 + e6030cb commit aca105a
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 27 deletions.
28 changes: 17 additions & 11 deletions drivers/md/bitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap)
bitmap_super_t *sb;
unsigned long chunksize, daemon_sleep, write_behind;

bitmap->storage.sb_page = alloc_page(GFP_KERNEL);
bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (bitmap->storage.sb_page == NULL)
return -ENOMEM;
bitmap->storage.sb_page->index = 0;
Expand Down Expand Up @@ -541,6 +541,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap)
sb->state = cpu_to_le32(bitmap->flags);
bitmap->events_cleared = bitmap->mddev->events;
sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
bitmap->mddev->bitmap_info.nodes = 0;

kunmap_atomic(sb);

Expand All @@ -558,6 +559,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
unsigned long sectors_reserved = 0;
int err = -EINVAL;
struct page *sb_page;
loff_t offset = bitmap->mddev->bitmap_info.offset;

if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) {
chunksize = 128 * 1024 * 1024;
Expand All @@ -584,9 +586,9 @@ static int bitmap_read_sb(struct bitmap *bitmap)
bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t);
/* to 4k blocks */
bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096);
bitmap->mddev->bitmap_info.offset += bitmap->cluster_slot * (bm_blocks << 3);
offset = bitmap->mddev->bitmap_info.offset + (bitmap->cluster_slot * (bm_blocks << 3));
pr_info("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__,
bitmap->cluster_slot, (unsigned long long)bitmap->mddev->bitmap_info.offset);
bitmap->cluster_slot, offset);
}

if (bitmap->storage.file) {
Expand All @@ -597,7 +599,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
bitmap, bytes, sb_page);
} else {
err = read_sb_page(bitmap->mddev,
bitmap->mddev->bitmap_info.offset,
offset,
sb_page,
0, sizeof(bitmap_super_t));
}
Expand All @@ -611,8 +613,16 @@ static int bitmap_read_sb(struct bitmap *bitmap)
daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
write_behind = le32_to_cpu(sb->write_behind);
sectors_reserved = le32_to_cpu(sb->sectors_reserved);
nodes = le32_to_cpu(sb->nodes);
strlcpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64);
/* XXX: This is a hack to ensure that we don't use clustering
* in case:
* - dm-raid is in use and
* - the nodes written in bitmap_sb is erroneous.
*/
if (!bitmap->mddev->sync_super) {
nodes = le32_to_cpu(sb->nodes);
strlcpy(bitmap->mddev->bitmap_info.cluster_name,
sb->cluster_name, 64);
}

/* verify that the bitmap-specific fields are valid */
if (sb->magic != cpu_to_le32(BITMAP_MAGIC))
Expand Down Expand Up @@ -671,7 +681,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
kunmap_atomic(sb);
/* Assiging chunksize is required for "re_read" */
bitmap->mddev->bitmap_info.chunksize = chunksize;
if (nodes && (bitmap->cluster_slot < 0)) {
if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
err = md_setup_cluster(bitmap->mddev, nodes);
if (err) {
pr_err("%s: Could not setup cluster service (%d)\n",
Expand Down Expand Up @@ -1866,10 +1876,6 @@ int bitmap_copy_from_slot(struct mddev *mddev, int slot,
if (IS_ERR(bitmap))
return PTR_ERR(bitmap);

rv = bitmap_read_sb(bitmap);
if (rv)
goto err;

rv = bitmap_init_from_disk(bitmap, 0);
if (rv)
goto err;
Expand Down
12 changes: 11 additions & 1 deletion drivers/md/md-cluster.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct resync_info {

/* md_cluster_info flags */
#define MD_CLUSTER_WAITING_FOR_NEWDISK 1
#define MD_CLUSTER_SUSPEND_READ_BALANCING 2


struct md_cluster_info {
Expand Down Expand Up @@ -275,6 +276,9 @@ static void recover_bitmaps(struct md_thread *thread)

static void recover_prep(void *arg)
{
struct mddev *mddev = arg;
struct md_cluster_info *cinfo = mddev->cluster_info;
set_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state);
}

static void recover_slot(void *arg, struct dlm_slot *slot)
Expand Down Expand Up @@ -307,6 +311,7 @@ static void recover_done(void *arg, struct dlm_slot *slots,

cinfo->slot_number = our_slot;
complete(&cinfo->completion);
clear_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state);
}

static const struct dlm_lockspace_ops md_ls_ops = {
Expand Down Expand Up @@ -816,12 +821,17 @@ static void resync_finish(struct mddev *mddev)
resync_send(mddev, RESYNCING, 0, 0);
}

static int area_resyncing(struct mddev *mddev, sector_t lo, sector_t hi)
static int area_resyncing(struct mddev *mddev, int direction,
sector_t lo, sector_t hi)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
int ret = 0;
struct suspend_info *s;

if ((direction == READ) &&
test_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state))
return 1;

spin_lock_irq(&cinfo->suspend_lock);
if (list_empty(&cinfo->suspend_list))
goto out;
Expand Down
2 changes: 1 addition & 1 deletion drivers/md/md-cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ struct md_cluster_operations {
int (*metadata_update_start)(struct mddev *mddev);
int (*metadata_update_finish)(struct mddev *mddev);
int (*metadata_update_cancel)(struct mddev *mddev);
int (*area_resyncing)(struct mddev *mddev, sector_t lo, sector_t hi);
int (*area_resyncing)(struct mddev *mddev, int direction, sector_t lo, sector_t hi);
int (*add_new_disk_start)(struct mddev *mddev, struct md_rdev *rdev);
int (*add_new_disk_finish)(struct mddev *mddev);
int (*new_disk_ack)(struct mddev *mddev, bool ack);
Expand Down
4 changes: 3 additions & 1 deletion drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -5382,6 +5382,8 @@ static void __md_stop(struct mddev *mddev)
{
struct md_personality *pers = mddev->pers;
mddev_detach(mddev);
/* Ensure ->event_work is done */
flush_workqueue(md_misc_wq);
spin_lock(&mddev->lock);
mddev->ready = 0;
mddev->pers = NULL;
Expand Down Expand Up @@ -7437,7 +7439,7 @@ int md_setup_cluster(struct mddev *mddev, int nodes)
err = request_module("md-cluster");
if (err) {
pr_err("md-cluster module not found.\n");
return err;
return -ENOENT;
}

spin_lock(&pers_lock);
Expand Down
9 changes: 5 additions & 4 deletions drivers/md/raid1.c
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ static void raid1_end_read_request(struct bio *bio, int error)
spin_lock_irqsave(&conf->device_lock, flags);
if (r1_bio->mddev->degraded == conf->raid_disks ||
(r1_bio->mddev->degraded == conf->raid_disks-1 &&
!test_bit(Faulty, &conf->mirrors[mirror].rdev->flags)))
test_bit(In_sync, &conf->mirrors[mirror].rdev->flags)))
uptodate = 1;
spin_unlock_irqrestore(&conf->device_lock, flags);
}
Expand Down Expand Up @@ -541,7 +541,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect

if ((conf->mddev->recovery_cp < this_sector + sectors) ||
(mddev_is_clustered(conf->mddev) &&
md_cluster_ops->area_resyncing(conf->mddev, this_sector,
md_cluster_ops->area_resyncing(conf->mddev, READ, this_sector,
this_sector + sectors)))
choose_first = 1;
else
Expand Down Expand Up @@ -1111,7 +1111,8 @@ static void make_request(struct mddev *mddev, struct bio * bio)
((bio_end_sector(bio) > mddev->suspend_lo &&
bio->bi_iter.bi_sector < mddev->suspend_hi) ||
(mddev_is_clustered(mddev) &&
md_cluster_ops->area_resyncing(mddev, bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
md_cluster_ops->area_resyncing(mddev, WRITE,
bio->bi_iter.bi_sector, bio_end_sector(bio))))) {
/* As the suspend_* range is controlled by
* userspace, we want an interruptible
* wait.
Expand All @@ -1124,7 +1125,7 @@ static void make_request(struct mddev *mddev, struct bio * bio)
if (bio_end_sector(bio) <= mddev->suspend_lo ||
bio->bi_iter.bi_sector >= mddev->suspend_hi ||
(mddev_is_clustered(mddev) &&
!md_cluster_ops->area_resyncing(mddev,
!md_cluster_ops->area_resyncing(mddev, WRITE,
bio->bi_iter.bi_sector, bio_end_sector(bio))))
break;
schedule();
Expand Down
5 changes: 4 additions & 1 deletion drivers/md/raid10.c
Original file line number Diff line number Diff line change
Expand Up @@ -3556,6 +3556,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
/* far_copies must be 1 */
conf->prev.stride = conf->dev_sectors;
}
conf->reshape_safe = conf->reshape_progress;
spin_lock_init(&conf->device_lock);
INIT_LIST_HEAD(&conf->retry_list);

Expand Down Expand Up @@ -3760,7 +3761,6 @@ static int run(struct mddev *mddev)
}
conf->offset_diff = min_offset_diff;

conf->reshape_safe = conf->reshape_progress;
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
Expand Down Expand Up @@ -4103,6 +4103,7 @@ static int raid10_start_reshape(struct mddev *mddev)
conf->reshape_progress = size;
} else
conf->reshape_progress = 0;
conf->reshape_safe = conf->reshape_progress;
spin_unlock_irq(&conf->device_lock);

if (mddev->delta_disks && mddev->bitmap) {
Expand Down Expand Up @@ -4170,6 +4171,7 @@ static int raid10_start_reshape(struct mddev *mddev)
rdev->new_data_offset = rdev->data_offset;
smp_wmb();
conf->reshape_progress = MaxSector;
conf->reshape_safe = MaxSector;
mddev->reshape_position = MaxSector;
spin_unlock_irq(&conf->device_lock);
return ret;
Expand Down Expand Up @@ -4524,6 +4526,7 @@ static void end_reshape(struct r10conf *conf)
md_finish_reshape(conf->mddev);
smp_wmb();
conf->reshape_progress = MaxSector;
conf->reshape_safe = MaxSector;
spin_unlock_irq(&conf->device_lock);

/* read-ahead size must cover two whole stripes, which is
Expand Down
35 changes: 28 additions & 7 deletions drivers/md/raid5.c
Original file line number Diff line number Diff line change
Expand Up @@ -2162,6 +2162,9 @@ static int resize_stripes(struct r5conf *conf, int newsize)
if (!sc)
return -ENOMEM;

/* Need to ensure auto-resizing doesn't interfere */
mutex_lock(&conf->cache_size_mutex);

for (i = conf->max_nr_stripes; i; i--) {
nsh = alloc_stripe(sc, GFP_KERNEL);
if (!nsh)
Expand All @@ -2178,6 +2181,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
kmem_cache_free(sc, nsh);
}
kmem_cache_destroy(sc);
mutex_unlock(&conf->cache_size_mutex);
return -ENOMEM;
}
/* Step 2 - Must use GFP_NOIO now.
Expand Down Expand Up @@ -2224,6 +2228,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
} else
err = -ENOMEM;

mutex_unlock(&conf->cache_size_mutex);
/* Step 4, return new stripes to service */
while(!list_empty(&newstripes)) {
nsh = list_entry(newstripes.next, struct stripe_head, lru);
Expand Down Expand Up @@ -4061,8 +4066,10 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
&first_bad, &bad_sectors))
set_bit(R5_ReadRepl, &dev->flags);
else {
if (rdev)
if (rdev && !test_bit(Faulty, &rdev->flags))
set_bit(R5_NeedReplace, &dev->flags);
else
clear_bit(R5_NeedReplace, &dev->flags);
rdev = rcu_dereference(conf->disks[i].rdev);
clear_bit(R5_ReadRepl, &dev->flags);
}
Expand Down Expand Up @@ -5857,12 +5864,14 @@ static void raid5d(struct md_thread *thread)
pr_debug("%d stripes handled\n", handled);

spin_unlock_irq(&conf->device_lock);
if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state)) {
if (test_and_clear_bit(R5_ALLOC_MORE, &conf->cache_state) &&
mutex_trylock(&conf->cache_size_mutex)) {
grow_one_stripe(conf, __GFP_NOWARN);
/* Set flag even if allocation failed. This helps
* slow down allocation requests when mem is short
*/
set_bit(R5_DID_ALLOC, &conf->cache_state);
mutex_unlock(&conf->cache_size_mutex);
}

async_tx_issue_pending_all();
Expand Down Expand Up @@ -5894,18 +5903,22 @@ raid5_set_cache_size(struct mddev *mddev, int size)
return -EINVAL;

conf->min_nr_stripes = size;
mutex_lock(&conf->cache_size_mutex);
while (size < conf->max_nr_stripes &&
drop_one_stripe(conf))
;
mutex_unlock(&conf->cache_size_mutex);


err = md_allow_write(mddev);
if (err)
return err;

mutex_lock(&conf->cache_size_mutex);
while (size > conf->max_nr_stripes)
if (!grow_one_stripe(conf, GFP_KERNEL))
break;
mutex_unlock(&conf->cache_size_mutex);

return 0;
}
Expand Down Expand Up @@ -6371,11 +6384,18 @@ static unsigned long raid5_cache_scan(struct shrinker *shrink,
struct shrink_control *sc)
{
struct r5conf *conf = container_of(shrink, struct r5conf, shrinker);
int ret = 0;
while (ret < sc->nr_to_scan) {
if (drop_one_stripe(conf) == 0)
return SHRINK_STOP;
ret++;
unsigned long ret = SHRINK_STOP;

if (mutex_trylock(&conf->cache_size_mutex)) {
ret= 0;
while (ret < sc->nr_to_scan) {
if (drop_one_stripe(conf) == 0) {
ret = SHRINK_STOP;
break;
}
ret++;
}
mutex_unlock(&conf->cache_size_mutex);
}
return ret;
}
Expand Down Expand Up @@ -6444,6 +6464,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
goto abort;
spin_lock_init(&conf->device_lock);
seqcount_init(&conf->gen_lock);
mutex_init(&conf->cache_size_mutex);
init_waitqueue_head(&conf->wait_for_quiescent);
for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
init_waitqueue_head(&conf->wait_for_stripe[i]);
Expand Down
3 changes: 2 additions & 1 deletion drivers/md/raid5.h
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,8 @@ struct r5conf {
*/
int active_name;
char cache_name[2][32];
struct kmem_cache *slab_cache; /* for allocating stripes */
struct kmem_cache *slab_cache; /* for allocating stripes */
struct mutex cache_size_mutex; /* Protect changes to cache size */

int seq_flush, seq_write;
int quiesce;
Expand Down

0 comments on commit aca105a

Please sign in to comment.