Skip to content

Commit

Permalink
Merge tag 'for-5.6/drivers-2020-01-27' of git://git.kernel.dk/linux-b…
Browse files Browse the repository at this point in the history
…lock

Pull block driver updates from Jens Axboe:
 "Like the core side, not a lot of changes here, just two main items:

   - Series of patches (via Coly) with fixes for bcache (Coly,
     Christoph)

   - MD pull request from Song"

* tag 'for-5.6/drivers-2020-01-27' of git://git.kernel.dk/linux-block: (31 commits)
  bcache: reap from tail of c->btree_cache in bch_mca_scan()
  bcache: reap c->btree_cache_freeable from the tail in bch_mca_scan()
  bcache: remove member accessed from struct btree
  bcache: print written and keys in trace_bcache_btree_write
  bcache: avoid unnecessary btree nodes flushing in btree_flush_write()
  bcache: add code comments for state->pool in __btree_sort()
  lib: crc64: include <linux/crc64.h> for 'crc64_be'
  bcache: use read_cache_page_gfp to read the superblock
  bcache: store a pointer to the on-disk sb in the cache and cached_dev structures
  bcache: return a pointer to the on-disk sb from read_super
  bcache: transfer the sb_page reference to register_{bdev,cache}
  bcache: fix use-after-free in register_bcache()
  bcache: properly initialize 'path' and 'err' in register_bcache()
  bcache: rework error unwinding in register_bcache
  bcache: use a separate data structure for the on-disk super block
  bcache: cached_dev_free needs to put the sb page
  md/raid1: introduce wait_for_serialization
  md/raid1: use bucket based mechanism for IO serialization
  md: introduce a new struct for IO serialization
  md: don't destroy serial_info_pool if serialize_policy is true
  ...
  • Loading branch information
torvalds committed Jan 27, 2020
2 parents 48b4b4f + e3de044 commit 22a8f39
Show file tree
Hide file tree
Showing 17 changed files with 571 additions and 262 deletions.
2 changes: 2 additions & 0 deletions drivers/md/bcache/bcache.h
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,7 @@ struct cached_dev {
struct block_device *bdev;

struct cache_sb sb;
struct cache_sb_disk *sb_disk;
struct bio sb_bio;
struct bio_vec sb_bv[1];
struct closure sb_write;
Expand Down Expand Up @@ -403,6 +404,7 @@ enum alloc_reserve {
struct cache {
struct cache_set *set;
struct cache_sb sb;
struct cache_sb_disk *sb_disk;
struct bio sb_bio;
struct bio_vec sb_bv[1];

Expand Down
5 changes: 5 additions & 0 deletions drivers/md/bcache/bset.c
Original file line number Diff line number Diff line change
Expand Up @@ -1257,6 +1257,11 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
* Our temporary buffer is the same size as the btree node's
* buffer, we can just swap buffers instead of doing a big
* memcpy()
*
* Don't worry event 'out' is allocated from mempool, it can
* still be swapped here. Because state->pool is a page mempool
* creaated by by mempool_init_page_pool(), which allocates
* pages by alloc_pages() indeed.
*/

out->magic = b->set->data->magic;
Expand Down
24 changes: 10 additions & 14 deletions drivers/md/bcache/btree.c
Original file line number Diff line number Diff line change
Expand Up @@ -734,34 +734,32 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,

i = 0;
btree_cache_used = c->btree_cache_used;
list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) {
list_for_each_entry_safe_reverse(b, t, &c->btree_cache_freeable, list) {
if (nr <= 0)
goto out;

if (++i > 3 &&
!mca_reap(b, 0, false)) {
if (!mca_reap(b, 0, false)) {
mca_data_free(b);
rw_unlock(true, b);
freed++;
}
nr--;
i++;
}

for (; (nr--) && i < btree_cache_used; i++) {
if (list_empty(&c->btree_cache))
list_for_each_entry_safe_reverse(b, t, &c->btree_cache, list) {
if (nr <= 0 || i >= btree_cache_used)
goto out;

b = list_first_entry(&c->btree_cache, struct btree, list);
list_rotate_left(&c->btree_cache);

if (!b->accessed &&
!mca_reap(b, 0, false)) {
if (!mca_reap(b, 0, false)) {
mca_bucket_free(b);
mca_data_free(b);
rw_unlock(true, b);
freed++;
} else
b->accessed = 0;
}

nr--;
i++;
}
out:
mutex_unlock(&c->bucket_lock);
Expand Down Expand Up @@ -1069,7 +1067,6 @@ struct btree *bch_btree_node_get(struct cache_set *c, struct btree_op *op,
BUG_ON(!b->written);

b->parent = parent;
b->accessed = 1;

for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
prefetch(b->keys.set[i].tree);
Expand Down Expand Up @@ -1160,7 +1157,6 @@ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op,
goto retry;
}

b->accessed = 1;
b->parent = parent;
bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));

Expand Down
2 changes: 0 additions & 2 deletions drivers/md/bcache/btree.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,6 @@ struct btree {
/* Key/pointer for this btree node */
BKEY_PADDED(key);

/* Single bit - set when accessed, cleared by shrinker */
unsigned long accessed;
unsigned long seq;
struct rw_semaphore lock;
struct cache_set *c;
Expand Down
80 changes: 75 additions & 5 deletions drivers/md/bcache/journal.c
Original file line number Diff line number Diff line change
Expand Up @@ -417,10 +417,14 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)

/* Journalling */

#define nr_to_fifo_front(p, front_p, mask) (((p) - (front_p)) & (mask))

static void btree_flush_write(struct cache_set *c)
{
struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR];
unsigned int i, n;
unsigned int i, nr, ref_nr;
atomic_t *fifo_front_p, *now_fifo_front_p;
size_t mask;

if (c->journal.btree_flushing)
return;
Expand All @@ -433,12 +437,50 @@ static void btree_flush_write(struct cache_set *c)
c->journal.btree_flushing = true;
spin_unlock(&c->journal.flush_write_lock);

/* get the oldest journal entry and check its refcount */
spin_lock(&c->journal.lock);
fifo_front_p = &fifo_front(&c->journal.pin);
ref_nr = atomic_read(fifo_front_p);
if (ref_nr <= 0) {
/*
* do nothing if no btree node references
* the oldest journal entry
*/
spin_unlock(&c->journal.lock);
goto out;
}
spin_unlock(&c->journal.lock);

mask = c->journal.pin.mask;
nr = 0;
atomic_long_inc(&c->flush_write);
memset(btree_nodes, 0, sizeof(btree_nodes));
n = 0;

mutex_lock(&c->bucket_lock);
list_for_each_entry_safe_reverse(b, t, &c->btree_cache, list) {
/*
* It is safe to get now_fifo_front_p without holding
* c->journal.lock here, because we don't need to know
* the exactly accurate value, just check whether the
* front pointer of c->journal.pin is changed.
*/
now_fifo_front_p = &fifo_front(&c->journal.pin);
/*
* If the oldest journal entry is reclaimed and front
* pointer of c->journal.pin changes, it is unnecessary
* to scan c->btree_cache anymore, just quit the loop and
* flush out what we have already.
*/
if (now_fifo_front_p != fifo_front_p)
break;
/*
* quit this loop if all matching btree nodes are
* scanned and record in btree_nodes[] already.
*/
ref_nr = atomic_read(fifo_front_p);
if (nr >= ref_nr)
break;

if (btree_node_journal_flush(b))
pr_err("BUG: flush_write bit should not be set here!");

Expand All @@ -454,17 +496,44 @@ static void btree_flush_write(struct cache_set *c)
continue;
}

/*
* Only select the btree node which exactly references
* the oldest journal entry.
*
* If the journal entry pointed by fifo_front_p is
* reclaimed in parallel, don't worry:
* - the list_for_each_xxx loop will quit when checking
* next now_fifo_front_p.
* - If there are matched nodes recorded in btree_nodes[],
* they are clean now (this is why and how the oldest
* journal entry can be reclaimed). These selected nodes
* will be ignored and skipped in the folowing for-loop.
*/
if (nr_to_fifo_front(btree_current_write(b)->journal,
fifo_front_p,
mask) != 0) {
mutex_unlock(&b->write_lock);
continue;
}

set_btree_node_journal_flush(b);

mutex_unlock(&b->write_lock);

btree_nodes[n++] = b;
if (n == BTREE_FLUSH_NR)
btree_nodes[nr++] = b;
/*
* To avoid holding c->bucket_lock too long time,
* only scan for BTREE_FLUSH_NR matched btree nodes
* at most. If there are more btree nodes reference
* the oldest journal entry, try to flush them next
* time when btree_flush_write() is called.
*/
if (nr == BTREE_FLUSH_NR)
break;
}
mutex_unlock(&c->bucket_lock);

for (i = 0; i < n; i++) {
for (i = 0; i < nr; i++) {
b = btree_nodes[i];
if (!b) {
pr_err("BUG: btree_nodes[%d] is NULL", i);
Expand Down Expand Up @@ -497,6 +566,7 @@ static void btree_flush_write(struct cache_set *c)
mutex_unlock(&b->write_lock);
}

out:
spin_lock(&c->journal.flush_write_lock);
c->journal.btree_flushing = false;
spin_unlock(&c->journal.flush_write_lock);
Expand Down
Loading

0 comments on commit 22a8f39

Please sign in to comment.