Skip to content

Commit

Permalink
md: allow a reshape operation to be reversed.
Browse files Browse the repository at this point in the history
Currently a reshape operation always progresses from the start
of the array to the end unless the number of devices is being
reduced, in which case it progressed in the opposite direction.

To reverse a partial reshape which changes the number of devices
you can stop the array and re-assemble with the raid-disks numbers
reversed and it will undo.

However for a reshape that does not change the number of devices
it is not possible to reverse the reshape in the middle - you have to
wait until it completes.

So add a 'reshape_direction' attribute with is either 'forwards' or
'backwards' and can be explicitly set when delta_disks is zero.

This will become more important when we allow the data_offset to
change in a reshape.  Then the explicit statement of what direction is
being used will be more useful.

This can be enabled in raid5 trivially as it already supports
reverse reshape and just needs to use a different trigger to request it.

Signed-off-by: NeilBrown <[email protected]>
  • Loading branch information
neilbrown committed May 20, 2012
1 parent b5e1b8c commit 2c810cd
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 14 deletions.
67 changes: 65 additions & 2 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,7 @@ void mddev_init(struct mddev *mddev)
init_waitqueue_head(&mddev->sb_wait);
init_waitqueue_head(&mddev->recovery_wait);
mddev->reshape_position = MaxSector;
mddev->reshape_backwards = 0;
mddev->resync_min = 0;
mddev->resync_max = MaxSector;
mddev->level = LEVEL_NONE;
Expand Down Expand Up @@ -1185,13 +1186,16 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
mddev->events = ev1;
mddev->bitmap_info.offset = 0;
mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
mddev->reshape_backwards = 0;

if (mddev->minor_version >= 91) {
mddev->reshape_position = sb->reshape_position;
mddev->delta_disks = sb->delta_disks;
mddev->new_level = sb->new_level;
mddev->new_layout = sb->new_layout;
mddev->new_chunk_sectors = sb->new_chunk >> 9;
if (mddev->delta_disks < 0)
mddev->reshape_backwards = 1;
} else {
mddev->reshape_position = MaxSector;
mddev->delta_disks = 0;
Expand Down Expand Up @@ -1645,7 +1649,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
mddev->events = ev1;
mddev->bitmap_info.offset = 0;
mddev->bitmap_info.default_offset = 1024 >> 9;

mddev->reshape_backwards = 0;

mddev->recovery_cp = le64_to_cpu(sb->resync_offset);
memcpy(mddev->uuid, sb->set_uuid, 16);

Expand All @@ -1662,6 +1667,11 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
mddev->new_level = le32_to_cpu(sb->new_level);
mddev->new_layout = le32_to_cpu(sb->new_layout);
mddev->new_chunk_sectors = le32_to_cpu(sb->new_chunk);
if (mddev->delta_disks < 0 ||
(mddev->delta_disks == 0 &&
(le32_to_cpu(sb->feature_map)
& MD_FEATURE_RESHAPE_BACKWARDS)))
mddev->reshape_backwards = 1;
} else {
mddev->reshape_position = MaxSector;
mddev->delta_disks = 0;
Expand Down Expand Up @@ -1781,6 +1791,10 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
sb->delta_disks = cpu_to_le32(mddev->delta_disks);
sb->new_level = cpu_to_le32(mddev->new_level);
sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
if (mddev->delta_disks == 0 &&
mddev->reshape_backwards)
sb->feature_map
|= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS);
}

if (rdev->badblocks.count == 0)
Expand Down Expand Up @@ -3419,6 +3433,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
mddev->new_chunk_sectors = mddev->chunk_sectors;
mddev->raid_disks -= mddev->delta_disks;
mddev->delta_disks = 0;
mddev->reshape_backwards = 0;
module_put(pers->owner);
printk(KERN_WARNING "md: %s: %s would not accept array\n",
mdname(mddev), clevel);
Expand Down Expand Up @@ -3492,6 +3507,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
mddev->layout = mddev->new_layout;
mddev->chunk_sectors = mddev->new_chunk_sectors;
mddev->delta_disks = 0;
mddev->reshape_backwards = 0;
mddev->degraded = 0;
if (mddev->pers->sync_request == NULL) {
/* this is now an array without redundancy, so
Expand Down Expand Up @@ -3585,6 +3601,7 @@ raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
int olddisks = mddev->raid_disks - mddev->delta_disks;
mddev->delta_disks = n - olddisks;
mddev->raid_disks = n;
mddev->reshape_backwards = (mddev->delta_disks < 0);
} else
mddev->raid_disks = n;
return rv ? rv : len;
Expand Down Expand Up @@ -4436,6 +4453,7 @@ reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
return -EINVAL;
mddev->reshape_position = new;
mddev->delta_disks = 0;
mddev->reshape_backwards = 0;
mddev->new_level = mddev->level;
mddev->new_layout = mddev->layout;
mddev->new_chunk_sectors = mddev->chunk_sectors;
Expand All @@ -4446,6 +4464,42 @@ static struct md_sysfs_entry md_reshape_position =
__ATTR(reshape_position, S_IRUGO|S_IWUSR, reshape_position_show,
reshape_position_store);

static ssize_t
reshape_direction_show(struct mddev *mddev, char *page)
{
return sprintf(page, "%s\n",
mddev->reshape_backwards ? "backwards" : "forwards");
}

static ssize_t
reshape_direction_store(struct mddev *mddev, const char *buf, size_t len)
{
int backwards = 0;
if (cmd_match(buf, "forwards"))
backwards = 0;
else if (cmd_match(buf, "backwards"))
backwards = 1;
else
return -EINVAL;
if (mddev->reshape_backwards == backwards)
return len;

/* check if we are allowed to change */
if (mddev->delta_disks)
return -EBUSY;

if (mddev->persistent &&
mddev->major_version == 0)
return -EINVAL;

mddev->reshape_backwards = backwards;
return len;
}

static struct md_sysfs_entry md_reshape_direction =
__ATTR(reshape_direction, S_IRUGO|S_IWUSR, reshape_direction_show,
reshape_direction_store);

static ssize_t
array_size_show(struct mddev *mddev, char *page)
{
Expand Down Expand Up @@ -4501,6 +4555,7 @@ static struct attribute *md_default_attrs[] = {
&md_safe_delay.attr,
&md_array_state.attr,
&md_reshape_position.attr,
&md_reshape_direction.attr,
&md_array_size.attr,
&max_corr_read_errors.attr,
NULL,
Expand Down Expand Up @@ -5064,6 +5119,7 @@ static void md_clean(struct mddev *mddev)
mddev->events = 0;
mddev->can_decrease_events = 0;
mddev->delta_disks = 0;
mddev->reshape_backwards = 0;
mddev->new_level = LEVEL_NONE;
mddev->new_layout = 0;
mddev->new_chunk_sectors = 0;
Expand Down Expand Up @@ -5888,6 +5944,7 @@ static int set_array_info(struct mddev * mddev, mdu_array_info_t *info)
mddev->new_chunk_sectors = mddev->chunk_sectors;
mddev->new_layout = mddev->layout;
mddev->delta_disks = 0;
mddev->reshape_backwards = 0;

return 0;
}
Expand Down Expand Up @@ -5953,10 +6010,16 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks)
if (mddev->sync_thread || mddev->reshape_position != MaxSector)
return -EBUSY;
mddev->delta_disks = raid_disks - mddev->raid_disks;
if (mddev->delta_disks < 0)
mddev->reshape_backwards = 1;
else if (mddev->delta_disks > 0)
mddev->reshape_backwards = 0;

rv = mddev->pers->check_reshape(mddev);
if (rv < 0)
if (rv < 0) {
mddev->delta_disks = 0;
mddev->reshape_backwards = 0;
}
return rv;
}

Expand Down
1 change: 1 addition & 0 deletions drivers/md/md.h
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ struct mddev {
sector_t reshape_position;
int delta_disks, new_level, new_layout;
int new_chunk_sectors;
int reshape_backwards;

atomic_t plug_cnt; /* If device is expecting
* more bios soon.
Expand Down
23 changes: 12 additions & 11 deletions drivers/md/raid5.c
Original file line number Diff line number Diff line change
Expand Up @@ -3970,13 +3970,13 @@ static void make_request(struct mddev *mddev, struct bio * bi)
* to check again.
*/
spin_lock_irq(&conf->device_lock);
if (mddev->delta_disks < 0
if (mddev->reshape_backwards
? logical_sector < conf->reshape_progress
: logical_sector >= conf->reshape_progress) {
disks = conf->previous_raid_disks;
previous = 1;
} else {
if (mddev->delta_disks < 0
if (mddev->reshape_backwards
? logical_sector < conf->reshape_safe
: logical_sector >= conf->reshape_safe) {
spin_unlock_irq(&conf->device_lock);
Expand Down Expand Up @@ -4009,7 +4009,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
*/
int must_retry = 0;
spin_lock_irq(&conf->device_lock);
if (mddev->delta_disks < 0
if (mddev->reshape_backwards
? logical_sector >= conf->reshape_progress
: logical_sector < conf->reshape_progress)
/* mismatch, need to try again */
Expand Down Expand Up @@ -4108,11 +4108,11 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk

if (sector_nr == 0) {
/* If restarting in the middle, skip the initial sectors */
if (mddev->delta_disks < 0 &&
if (mddev->reshape_backwards &&
conf->reshape_progress < raid5_size(mddev, 0, 0)) {
sector_nr = raid5_size(mddev, 0, 0)
- conf->reshape_progress;
} else if (mddev->delta_disks >= 0 &&
} else if (!mddev->reshape_backwards &&
conf->reshape_progress > 0)
sector_nr = conf->reshape_progress;
sector_div(sector_nr, new_data_disks);
Expand Down Expand Up @@ -4147,7 +4147,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
sector_div(readpos, data_disks);
safepos = conf->reshape_safe;
sector_div(safepos, data_disks);
if (mddev->delta_disks < 0) {
if (mddev->reshape_backwards) {
writepos -= min_t(sector_t, reshape_sectors, writepos);
readpos += reshape_sectors;
safepos += reshape_sectors;
Expand All @@ -4174,7 +4174,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
* Maybe that number should be configurable, but I'm not sure it is
* worth it.... maybe it could be a multiple of safemode_delay???
*/
if ((mddev->delta_disks < 0
if ((mddev->reshape_backwards
? (safepos > writepos && readpos < writepos)
: (safepos < writepos && readpos > writepos)) ||
time_after(jiffies, conf->reshape_checkpoint + 10*HZ)) {
Expand All @@ -4195,7 +4195,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
}

if (mddev->delta_disks < 0) {
if (mddev->reshape_backwards) {
BUG_ON(conf->reshape_progress == 0);
stripe_addr = writepos;
BUG_ON((mddev->dev_sectors &
Expand Down Expand Up @@ -4239,7 +4239,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
list_add(&sh->lru, &stripes);
}
spin_lock_irq(&conf->device_lock);
if (mddev->delta_disks < 0)
if (mddev->reshape_backwards)
conf->reshape_progress -= reshape_sectors * new_data_disks;
else
conf->reshape_progress += reshape_sectors * new_data_disks;
Expand Down Expand Up @@ -5008,7 +5008,7 @@ static int run(struct mddev *mddev)
mdname(mddev));
return -EINVAL;
}
} else if (mddev->delta_disks < 0
} else if (mddev->reshape_backwards
? (here_new * mddev->new_chunk_sectors <=
here_old * mddev->chunk_sectors)
: (here_new * mddev->new_chunk_sectors >=
Expand Down Expand Up @@ -5535,7 +5535,7 @@ static int raid5_start_reshape(struct mddev *mddev)
conf->chunk_sectors = mddev->new_chunk_sectors;
conf->prev_algo = conf->algorithm;
conf->algorithm = mddev->new_layout;
if (mddev->delta_disks < 0)
if (mddev->reshape_backwards)
conf->reshape_progress = raid5_size(mddev, 0, 0);
else
conf->reshape_progress = 0;
Expand Down Expand Up @@ -5663,6 +5663,7 @@ static void raid5_finish_reshape(struct mddev *mddev)
mddev->chunk_sectors = conf->chunk_sectors;
mddev->reshape_position = MaxSector;
mddev->delta_disks = 0;
mddev->reshape_backwards = 0;
}
}

Expand Down
7 changes: 6 additions & 1 deletion include/linux/raid/md_p.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,10 +281,15 @@ struct mdp_superblock_1 {
* active device with same 'role'.
* 'recovery_offset' is also set.
*/
#define MD_FEATURE_RESHAPE_BACKWARDS 32 /* Reshape doesn't change number
* of devices, but is going
* backwards anyway.
*/
#define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \
|MD_FEATURE_RECOVERY_OFFSET \
|MD_FEATURE_RESHAPE_ACTIVE \
|MD_FEATURE_BAD_BLOCKS \
|MD_FEATURE_REPLACEMENT)
|MD_FEATURE_REPLACEMENT \
|MD_FEATURE_RESHAPE_BACKWARDS)

#endif

0 comments on commit 2c810cd

Please sign in to comment.