Skip to content

Commit

Permalink
Merge branch 'for-jens' of git://git.drbd.org/linux-drbd into for-3.5…
Browse files Browse the repository at this point in the history
…/drivers

Philipp writes:

This are the updates we have in the drbd-8.3 tree. They are intended
for your "for-3.5/drivers" drivers branch.

These changes include one new feature:
 * Allow detach from frozen backing devices with the new --force option;
   configurable timeout for backing devices by the new disk-timeout option

And huge number of bug fixes:
 * Fixed a write ordering problem on SyncTarget nodes for a write
   to a block that gets resynced at the same time. The bug can
   only be triggered with a device that has a firmware that
   actually reorders writes to the same block
 * Fixed a race between disconnect and receive_state, that could cause
   a IO lockup
 * Fixed resend/resubmit for requests with disk or network timeout
 * Make sure that hard state changed do not disturb the connection
   establishing process (I.e. detach due to an IO error). When the
   bug was triggered it caused a retry in the connect process
 * Postpone soft state changes to no disturb the connection
   establishing process (I.e. becoming primary). When the bug
   was triggered it could cause both nodes going into SyncSource state
 * Fixed a refcount leak that could cause failures when trying to
   unload a protocol family modules, that was used by DRBD
 * Dedicated page pool for meta data IOs
 * Deny normal detach (as opposed to --forced) if the user tries
   to detach from the last UpToDate disk in the resource
 * Fixed a possible protocol error that could be caused by
   "unusual" BIOs.
 * Enforce the disk-timeout option also on meta-data IO operations
 * Implemented stable bitmap pages when we do a full write out of
   the bitmap
 * Fixed a rare compatibility issue with DRBD's older than 8.3.7
   when negotiating the bio_size
 * Fixed a rare race condition where an empty resync could stall with
   if pause/unpause events happen in parallel
 * Made the re-establishing of connections quicker, if it got a broken pipe
   once. Previously there was a bug in the code caused it to waste the first
   successful established connection after a broken pipe event.

PS: I am postponing the drbd-8.4 for mainline for one or two kernel
    development cycles more (the ~400 patchets set).
  • Loading branch information
axboe committed May 18, 2012
2 parents 13828de + 92b4ca2 commit 4fd1ffa
Show file tree
Hide file tree
Showing 13 changed files with 754 additions and 288 deletions.
104 changes: 78 additions & 26 deletions drivers/block/drbd/drbd_actlog.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,39 +65,80 @@ struct drbd_atodb_wait {

int w_al_write_transaction(struct drbd_conf *, struct drbd_work *, int);

void *drbd_md_get_buffer(struct drbd_conf *mdev)
{
int r;

wait_event(mdev->misc_wait,
(r = atomic_cmpxchg(&mdev->md_io_in_use, 0, 1)) == 0 ||
mdev->state.disk <= D_FAILED);

return r ? NULL : page_address(mdev->md_io_page);
}

void drbd_md_put_buffer(struct drbd_conf *mdev)
{
if (atomic_dec_and_test(&mdev->md_io_in_use))
wake_up(&mdev->misc_wait);
}

static bool md_io_allowed(struct drbd_conf *mdev)
{
enum drbd_disk_state ds = mdev->state.disk;
return ds >= D_NEGOTIATING || ds == D_ATTACHING;
}

void wait_until_done_or_disk_failure(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
unsigned int *done)
{
long dt = bdev->dc.disk_timeout * HZ / 10;
if (dt == 0)
dt = MAX_SCHEDULE_TIMEOUT;

dt = wait_event_timeout(mdev->misc_wait, *done || !md_io_allowed(mdev), dt);
if (dt == 0)
dev_err(DEV, "meta-data IO operation timed out\n");
}

static int _drbd_md_sync_page_io(struct drbd_conf *mdev,
struct drbd_backing_dev *bdev,
struct page *page, sector_t sector,
int rw, int size)
{
struct bio *bio;
struct drbd_md_io md_io;
int ok;

md_io.mdev = mdev;
init_completion(&md_io.event);
md_io.error = 0;
mdev->md_io.done = 0;
mdev->md_io.error = -ENODEV;

if ((rw & WRITE) && !test_bit(MD_NO_FUA, &mdev->flags))
rw |= REQ_FUA | REQ_FLUSH;
rw |= REQ_SYNC;

bio = bio_alloc(GFP_NOIO, 1);
bio = bio_alloc_drbd(GFP_NOIO);
bio->bi_bdev = bdev->md_bdev;
bio->bi_sector = sector;
ok = (bio_add_page(bio, page, size, 0) == size);
if (!ok)
goto out;
bio->bi_private = &md_io;
bio->bi_private = &mdev->md_io;
bio->bi_end_io = drbd_md_io_complete;
bio->bi_rw = rw;

if (!get_ldev_if_state(mdev, D_ATTACHING)) { /* Corresponding put_ldev in drbd_md_io_complete() */
dev_err(DEV, "ASSERT FAILED: get_ldev_if_state() == 1 in _drbd_md_sync_page_io()\n");
ok = 0;
goto out;
}

bio_get(bio); /* one bio_put() is in the completion handler */
atomic_inc(&mdev->md_io_in_use); /* drbd_md_put_buffer() is in the completion handler */
if (drbd_insert_fault(mdev, (rw & WRITE) ? DRBD_FAULT_MD_WR : DRBD_FAULT_MD_RD))
bio_endio(bio, -EIO);
else
submit_bio(rw, bio);
wait_for_completion(&md_io.event);
ok = bio_flagged(bio, BIO_UPTODATE) && md_io.error == 0;
wait_until_done_or_disk_failure(mdev, bdev, &mdev->md_io.done);
ok = bio_flagged(bio, BIO_UPTODATE) && mdev->md_io.error == 0;

out:
bio_put(bio);
Expand All @@ -111,7 +152,7 @@ int drbd_md_sync_page_io(struct drbd_conf *mdev, struct drbd_backing_dev *bdev,
int offset = 0;
struct page *iop = mdev->md_io_page;

D_ASSERT(mutex_is_locked(&mdev->md_io_mutex));
D_ASSERT(atomic_read(&mdev->md_io_in_use) == 1);

BUG_ON(!bdev->md_bdev);

Expand Down Expand Up @@ -328,8 +369,13 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
return 1;
}

mutex_lock(&mdev->md_io_mutex); /* protects md_io_buffer, al_tr_cycle, ... */
buffer = (struct al_transaction *)page_address(mdev->md_io_page);
buffer = drbd_md_get_buffer(mdev); /* protects md_io_buffer, al_tr_cycle, ... */
if (!buffer) {
dev_err(DEV, "disk failed while waiting for md_io buffer\n");
complete(&((struct update_al_work *)w)->event);
put_ldev(mdev);
return 1;
}

buffer->magic = __constant_cpu_to_be32(DRBD_MAGIC);
buffer->tr_number = cpu_to_be32(mdev->al_tr_number);
Expand Down Expand Up @@ -374,7 +420,7 @@ w_al_write_transaction(struct drbd_conf *mdev, struct drbd_work *w, int unused)
D_ASSERT(mdev->al_tr_pos < MD_AL_MAX_SIZE);
mdev->al_tr_number++;

mutex_unlock(&mdev->md_io_mutex);
drbd_md_put_buffer(mdev);

complete(&((struct update_al_work *)w)->event);
put_ldev(mdev);
Expand Down Expand Up @@ -443,16 +489,17 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
/* lock out all other meta data io for now,
* and make sure the page is mapped.
*/
mutex_lock(&mdev->md_io_mutex);
buffer = page_address(mdev->md_io_page);
buffer = drbd_md_get_buffer(mdev);
if (!buffer)
return 0;

/* Find the valid transaction in the log */
for (i = 0; i <= mx; i++) {
rv = drbd_al_read_tr(mdev, bdev, buffer, i);
if (rv == 0)
continue;
if (rv == -1) {
mutex_unlock(&mdev->md_io_mutex);
drbd_md_put_buffer(mdev);
return 0;
}
cnr = be32_to_cpu(buffer->tr_number);
Expand All @@ -478,7 +525,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)

if (!found_valid) {
dev_warn(DEV, "No usable activity log found.\n");
mutex_unlock(&mdev->md_io_mutex);
drbd_md_put_buffer(mdev);
return 1;
}

Expand All @@ -493,7 +540,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
rv = drbd_al_read_tr(mdev, bdev, buffer, i);
ERR_IF(rv == 0) goto cancel;
if (rv == -1) {
mutex_unlock(&mdev->md_io_mutex);
drbd_md_put_buffer(mdev);
return 0;
}

Expand Down Expand Up @@ -534,7 +581,7 @@ int drbd_al_read_log(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
mdev->al_tr_pos = 0;

/* ok, we are done with it */
mutex_unlock(&mdev->md_io_mutex);
drbd_md_put_buffer(mdev);

dev_info(DEV, "Found %d transactions (%d active extents) in activity log.\n",
transactions, active_extents);
Expand Down Expand Up @@ -671,16 +718,20 @@ static void drbd_try_clear_on_disk_bm(struct drbd_conf *mdev, sector_t sector,
else
ext->rs_failed += count;
if (ext->rs_left < ext->rs_failed) {
dev_err(DEV, "BAD! sector=%llus enr=%u rs_left=%d "
"rs_failed=%d count=%d\n",
dev_warn(DEV, "BAD! sector=%llus enr=%u rs_left=%d "
"rs_failed=%d count=%d cstate=%s\n",
(unsigned long long)sector,
ext->lce.lc_number, ext->rs_left,
ext->rs_failed, count);
dump_stack();

lc_put(mdev->resync, &ext->lce);
drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
return;
ext->rs_failed, count,
drbd_conn_str(mdev->state.conn));

/* We don't expect to be able to clear more bits
* than have been set when we originally counted
* the set bits to cache that value in ext->rs_left.
* Whatever the reason (disconnect during resync,
* delayed local completion of an application write),
* try to fix it up by recounting here. */
ext->rs_left = drbd_bm_e_weight(mdev, enr);
}
} else {
/* Normally this element should be in the cache,
Expand Down Expand Up @@ -1192,6 +1243,7 @@ int drbd_rs_del_all(struct drbd_conf *mdev)
put_ldev(mdev);
}
spin_unlock_irq(&mdev->al_lock);
wake_up(&mdev->al_wait);

return 0;
}
Expand Down
Loading

0 comments on commit 4fd1ffa

Please sign in to comment.