Skip to content

Commit

Permalink
block: fix disk->part[] dereferencing race
Browse files Browse the repository at this point in the history
disk->part[] is protected by its matching bdev's lock.  However,
non-critical accesses like collecting stats and printing out sysfs and
proc information used to be performed without any locking.  As
partitions can come and go dynamically, partitions can go away
underneath those non-critical accesses.  As some of those accesses are
writes, this theoretically can lead to silent corruption.

This patch fixes the race by using RCU for the partition array and dev
reference counter to hold partitions.

* Rename disk->part[] to disk->__part[] to make sure no one outside
  genhd layer proper accesses it directly.

* Use RCU for disk->__part[] dereferencing.

* Implement disk_{get|put}_part() which can be used to get and put
  partitions from gendisk respectively.

* Iterators are implemented to help iterate through all partitions
  safely.

* Functions which require RCU readlock are marked with _rcu suffix.

* Use disk_put_part() in __blkdev_put() instead of directly putting
  the contained kobject.

Signed-off-by: Tejun Heo <[email protected]>
Signed-off-by: Jens Axboe <[email protected]>
  • Loading branch information
htejun authored and Jens Axboe committed Oct 9, 2008
1 parent f331c02 commit e71bf0d
Show file tree
Hide file tree
Showing 8 changed files with 323 additions and 94 deletions.
20 changes: 16 additions & 4 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,9 @@ static void drive_stat_acct(struct request *rq, int new_io)
if (!blk_fs_request(rq) || !rq->rq_disk)
return;

part = disk_map_sector(rq->rq_disk, rq->sector);
rcu_read_lock();

part = disk_map_sector_rcu(rq->rq_disk, rq->sector);
if (!new_io)
__all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector);
else {
Expand All @@ -71,6 +73,8 @@ static void drive_stat_acct(struct request *rq, int new_io)
part->in_flight++;
}
}

rcu_read_unlock();
}

void blk_queue_congestion_threshold(struct request_queue *q)
Expand Down Expand Up @@ -1557,12 +1561,14 @@ static int __end_that_request_first(struct request *req, int error,
}

if (blk_fs_request(req) && req->rq_disk) {
struct hd_struct *part =
disk_map_sector(req->rq_disk, req->sector);
const int rw = rq_data_dir(req);
struct hd_struct *part;

rcu_read_lock();
part = disk_map_sector_rcu(req->rq_disk, req->sector);
all_stat_add(req->rq_disk, part, sectors[rw],
nr_bytes >> 9, req->sector);
rcu_read_unlock();
}

total_bytes = bio_nbytes = 0;
Expand Down Expand Up @@ -1746,7 +1752,11 @@ static void end_that_request_last(struct request *req, int error)
if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
unsigned long duration = jiffies - req->start_time;
const int rw = rq_data_dir(req);
struct hd_struct *part = disk_map_sector(disk, req->sector);
struct hd_struct *part;

rcu_read_lock();

part = disk_map_sector_rcu(disk, req->sector);

__all_stat_inc(disk, part, ios[rw], req->sector);
__all_stat_add(disk, part, ticks[rw], duration, req->sector);
Expand All @@ -1756,6 +1766,8 @@ static void end_that_request_last(struct request *req, int error)
part_round_stats(part);
part->in_flight--;
}

rcu_read_unlock();
}

if (req->end_io)
Expand Down
9 changes: 7 additions & 2 deletions block/blk-merge.c
Original file line number Diff line number Diff line change
Expand Up @@ -387,14 +387,19 @@ static int attempt_merge(struct request_queue *q, struct request *req,
elv_merge_requests(q, req, next);

if (req->rq_disk) {
struct hd_struct *part =
disk_map_sector(req->rq_disk, req->sector);
struct hd_struct *part;

rcu_read_lock();

part = disk_map_sector_rcu(req->rq_disk, req->sector);
disk_round_stats(req->rq_disk);
req->rq_disk->in_flight--;
if (part) {
part_round_stats(part);
part->in_flight--;
}

rcu_read_unlock();
}

req->ioprio = ioprio_best(req->ioprio, next->ioprio);
Expand Down
Loading

0 comments on commit e71bf0d

Please sign in to comment.