Skip to content

Commit

Permalink
Merge tag 'bcachefs-2024-03-19' of https://evilpiepirate.org/git/bcac…
Browse files Browse the repository at this point in the history
…hefs

Pull bcachefs fixes from Kent Overstreet:
 "Assorted bugfixes.

  Most are fixes for simple assertion pops; the most significant fix is
  for a deadlock in recovery when we have to rewrite large numbers of
  btree nodes to fix errors. This was incorrectly running out of the
  same workqueue as the core interior btree update path - we now give it
  its own single threaded workqueue.

  This was visible to users as "bch2_btree_update_start(): error:
  BCH_ERR_journal_reclaim_would_deadlock" - and then recovery hanging"

* tag 'bcachefs-2024-03-19' of https://evilpiepirate.org/git/bcachefs:
  bcachefs: Fix lost wakeup on journal shutdown
  bcachefs; Fix deadlock in bch2_btree_update_start()
  bcachefs: ratelimit errors from async_btree_node_rewrite
  bcachefs: Run check_topology() first
  bcachefs: Improve bch2_fatal_error()
  bcachefs: Fix lost transaction restart error
  bcachefs: Don't corrupt journal keys gap buffer when dropping alloc info
  bcachefs: fix for building in userspace
  bcachefs: bch2_snapshot_is_ancestor() now safe to call in early recovery
  bcachefs: Fix nested transaction restart handling in bch2_bucket_gens_init()
  bcachefs: Improve sysfs internal/btree_updates
  bcachefs: Split out btree_node_rewrite_worker
  bcachefs: Fix locking in bch2_alloc_write_key()
  bcachefs: Avoid extent entry type assertions in .invalid()
  bcachefs: Fix spurious -BCH_ERR_transaction_restart_nested
  bcachefs: Fix check_key_has_snapshot() call
  bcachefs: Change "accounting overran journal reservation" to a warning
  • Loading branch information
torvalds committed Mar 20, 2024
2 parents 78c3925 + 2e92d26 commit a4145ce
Show file tree
Hide file tree
Showing 26 changed files with 157 additions and 111 deletions.
15 changes: 8 additions & 7 deletions fs/bcachefs/alloc_background.c
Original file line number Diff line number Diff line change
Expand Up @@ -532,13 +532,13 @@ int bch2_bucket_gens_init(struct bch_fs *c)
u8 gen = bch2_alloc_to_v4(k, &a)->gen;
unsigned offset;
struct bpos pos = alloc_gens_pos(iter.pos, &offset);
int ret2 = 0;

if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) {
ret = commit_do(trans, NULL, NULL,
BCH_TRANS_COMMIT_no_enospc,
bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0));
if (ret)
break;
ret2 = bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?:
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
if (ret2)
goto iter_err;
have_bucket_gens_key = false;
}

Expand All @@ -549,7 +549,8 @@ int bch2_bucket_gens_init(struct bch_fs *c)
}

g.v.gens[offset] = gen;
0;
iter_err:
ret2;
}));

if (have_bucket_gens_key && !ret)
Expand Down Expand Up @@ -852,7 +853,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
bucket_journal_seq);
if (ret) {
bch2_fs_fatal_error(c,
"error setting bucket_needs_journal_commit: %i", ret);
"setting bucket_needs_journal_commit: %s", bch2_err_str(ret));
return ret;
}
}
Expand Down
10 changes: 6 additions & 4 deletions fs/bcachefs/alloc_foreground.c
Original file line number Diff line number Diff line change
Expand Up @@ -1356,15 +1356,17 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,

/* Don't retry from all devices if we're out of open buckets: */
if (bch2_err_matches(ret, BCH_ERR_open_buckets_empty)) {
int ret = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
int ret2 = open_bucket_add_buckets(trans, &ptrs, wp, devs_have,
target, erasure_code,
nr_replicas, &nr_effective,
&have_cache, watermark,
flags, cl);
if (!ret ||
bch2_err_matches(ret, BCH_ERR_transaction_restart) ||
bch2_err_matches(ret, BCH_ERR_open_buckets_empty))
if (!ret2 ||
bch2_err_matches(ret2, BCH_ERR_transaction_restart) ||
bch2_err_matches(ret2, BCH_ERR_open_buckets_empty)) {
ret = ret2;
goto alloc_done;
}
}

/*
Expand Down
2 changes: 2 additions & 0 deletions fs/bcachefs/bcachefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,8 @@ struct bch_fs {
struct workqueue_struct *btree_interior_update_worker;
struct work_struct btree_interior_update_work;

struct workqueue_struct *btree_node_rewrite_worker;

struct list_head pending_node_rewrites;
struct mutex pending_node_rewrites_lock;

Expand Down
2 changes: 1 addition & 1 deletion fs/bcachefs/btree_gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1392,11 +1392,11 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
*old,
b->data_type);
gc = *b;
percpu_up_read(&c->mark_lock);

if (gc.data_type != old_gc.data_type ||
gc.dirty_sectors != old_gc.dirty_sectors)
bch2_dev_usage_update_m(c, ca, &old_gc, &gc);
percpu_up_read(&c->mark_lock);

if (metadata_only &&
gc.data_type != BCH_DATA_sb &&
Expand Down
12 changes: 6 additions & 6 deletions fs/bcachefs/btree_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1066,7 +1066,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,

ret = bset_encrypt(c, i, b->written << 9);
if (bch2_fs_fatal_err_on(ret, c,
"error decrypting btree node: %i", ret))
"decrypting btree node: %s", bch2_err_str(ret)))
goto fsck_err;

btree_err_on(btree_node_type_is_extents(btree_node_type(b)) &&
Expand Down Expand Up @@ -1107,7 +1107,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,

ret = bset_encrypt(c, i, b->written << 9);
if (bch2_fs_fatal_err_on(ret, c,
"error decrypting btree node: %i\n", ret))
"decrypting btree node: %s", bch2_err_str(ret)))
goto fsck_err;

sectors = vstruct_sectors(bne, c->block_bits);
Expand Down Expand Up @@ -1338,7 +1338,7 @@ static void btree_node_read_work(struct work_struct *work)
if (saw_error && !btree_node_read_error(b)) {
printbuf_reset(&buf);
bch2_bpos_to_text(&buf, b->key.k.p);
bch_info(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
bch_err_ratelimited(c, "%s: rewriting btree node at btree=%s level=%u %s due to error",
__func__, bch2_btree_id_str(b->c.btree_id), b->c.level, buf.buf);

bch2_btree_node_rewrite_async(c, b);
Expand Down Expand Up @@ -1874,8 +1874,8 @@ static void btree_node_write_work(struct work_struct *work)
return;
err:
set_btree_node_noevict(b);
if (!bch2_err_matches(ret, EROFS))
bch2_fs_fatal_error(c, "fatal error writing btree node: %s", bch2_err_str(ret));
bch2_fs_fatal_err_on(!bch2_err_matches(ret, EROFS), c,
"writing btree node: %s", bch2_err_str(ret));
goto out;
}

Expand Down Expand Up @@ -2131,7 +2131,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)

ret = bset_encrypt(c, i, b->written << 9);
if (bch2_fs_fatal_err_on(ret, c,
"error encrypting btree node: %i\n", ret))
"encrypting btree node: %s", bch2_err_str(ret)))
goto err;

nonce = btree_nonce(i, b->written << 9);
Expand Down
2 changes: 1 addition & 1 deletion fs/bcachefs/btree_key_cache.c
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
!bch2_err_matches(ret, BCH_ERR_transaction_restart) &&
!bch2_err_matches(ret, BCH_ERR_journal_reclaim_would_deadlock) &&
!bch2_journal_error(j), c,
"error flushing key cache: %s", bch2_err_str(ret));
"flushing key cache: %s", bch2_err_str(ret));
if (ret)
goto out;

Expand Down
44 changes: 29 additions & 15 deletions fs/bcachefs/btree_update_interior.c
Original file line number Diff line number Diff line change
Expand Up @@ -646,7 +646,7 @@ static void btree_update_nodes_written(struct btree_update *as)
bch2_trans_unlock(trans);

bch2_fs_fatal_err_on(ret && !bch2_journal_error(&c->journal), c,
"%s(): error %s", __func__, bch2_err_str(ret));
"%s", bch2_err_str(ret));
err:
if (as->b) {

Expand Down Expand Up @@ -1067,13 +1067,18 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
flags &= ~BCH_WATERMARK_MASK;
flags |= watermark;

if (!(flags & BCH_TRANS_COMMIT_journal_reclaim) &&
watermark < c->journal.watermark) {
if (watermark < c->journal.watermark) {
struct journal_res res = { 0 };
unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;

if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
watermark != BCH_WATERMARK_reclaim)
journal_flags |= JOURNAL_RES_GET_NONBLOCK;

ret = drop_locks_do(trans,
bch2_journal_res_get(&c->journal, &res, 1,
watermark|JOURNAL_RES_GET_CHECK));
bch2_journal_res_get(&c->journal, &res, 1, journal_flags));
if (bch2_err_matches(ret, BCH_ERR_operation_blocked))
ret = -BCH_ERR_journal_reclaim_would_deadlock;
if (ret)
return ERR_PTR(ret);
}
Expand Down Expand Up @@ -1117,6 +1122,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
closure_init(&as->cl, NULL);
as->c = c;
as->start_time = start_time;
as->ip_started = _RET_IP_;
as->mode = BTREE_INTERIOR_NO_UPDATE;
as->took_gc_lock = true;
as->btree_id = path->btree_id;
Expand Down Expand Up @@ -1192,7 +1198,8 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
err:
bch2_btree_update_free(as, trans);
if (!bch2_err_matches(ret, ENOSPC) &&
!bch2_err_matches(ret, EROFS))
!bch2_err_matches(ret, EROFS) &&
ret != -BCH_ERR_journal_reclaim_would_deadlock)
bch_err_fn_ratelimited(c, ret);
return ERR_PTR(ret);
}
Expand Down Expand Up @@ -2114,7 +2121,7 @@ static void async_btree_node_rewrite_work(struct work_struct *work)

ret = bch2_trans_do(c, NULL, NULL, 0,
async_btree_node_rewrite_trans(trans, a));
bch_err_fn(c, ret);
bch_err_fn_ratelimited(c, ret);
bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
kfree(a);
}
Expand Down Expand Up @@ -2161,7 +2168,7 @@ void bch2_btree_node_rewrite_async(struct bch_fs *c, struct btree *b)
bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
}

queue_work(c->btree_interior_update_worker, &a->work);
queue_work(c->btree_node_rewrite_worker, &a->work);
}

void bch2_do_pending_node_rewrites(struct bch_fs *c)
Expand All @@ -2173,7 +2180,7 @@ void bch2_do_pending_node_rewrites(struct bch_fs *c)
list_del(&a->list);

bch2_write_ref_get(c, BCH_WRITE_REF_node_rewrite);
queue_work(c->btree_interior_update_worker, &a->work);
queue_work(c->btree_node_rewrite_worker, &a->work);
}
mutex_unlock(&c->pending_node_rewrites_lock);
}
Expand Down Expand Up @@ -2441,12 +2448,12 @@ void bch2_btree_updates_to_text(struct printbuf *out, struct bch_fs *c)

mutex_lock(&c->btree_interior_update_lock);
list_for_each_entry(as, &c->btree_interior_update_list, list)
prt_printf(out, "%p m %u w %u r %u j %llu\n",
as,
as->mode,
as->nodes_written,
closure_nr_remaining(&as->cl),
as->journal.seq);
prt_printf(out, "%ps: mode=%u nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
(void *) as->ip_started,
as->mode,
as->nodes_written,
closure_nr_remaining(&as->cl),
as->journal.seq);
mutex_unlock(&c->btree_interior_update_lock);
}

Expand Down Expand Up @@ -2510,6 +2517,8 @@ bch2_btree_roots_to_journal_entries(struct bch_fs *c,

void bch2_fs_btree_interior_update_exit(struct bch_fs *c)
{
if (c->btree_node_rewrite_worker)
destroy_workqueue(c->btree_node_rewrite_worker);
if (c->btree_interior_update_worker)
destroy_workqueue(c->btree_interior_update_worker);
mempool_exit(&c->btree_interior_update_pool);
Expand All @@ -2534,6 +2543,11 @@ int bch2_fs_btree_interior_update_init(struct bch_fs *c)
if (!c->btree_interior_update_worker)
return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;

c->btree_node_rewrite_worker =
alloc_ordered_workqueue("btree_node_rewrite", WQ_UNBOUND);
if (!c->btree_node_rewrite_worker)
return -BCH_ERR_ENOMEM_btree_interior_update_worker_init;

if (mempool_init_kmalloc_pool(&c->btree_interior_update_pool, 1,
sizeof(struct btree_update)))
return -BCH_ERR_ENOMEM_btree_interior_update_pool_init;
Expand Down
1 change: 1 addition & 0 deletions fs/bcachefs/btree_update_interior.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ struct btree_update {
struct closure cl;
struct bch_fs *c;
u64 start_time;
unsigned long ip_started;

struct list_head list;
struct list_head unwritten_list;
Expand Down
2 changes: 1 addition & 1 deletion fs/bcachefs/btree_write_buffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
}
}
err:
bch2_fs_fatal_err_on(ret, c, "%s: insert error %s", __func__, bch2_err_str(ret));
bch2_fs_fatal_err_on(ret, c, "%s", bch2_err_str(ret));
trace_write_buffer_flush(trans, wb->flushing.keys.nr, skipped, fast, 0);
bch2_journal_pin_drop(j, &wb->flushing.pin);
wb->flushing.keys.nr = 0;
Expand Down
6 changes: 3 additions & 3 deletions fs/bcachefs/buckets.c
Original file line number Diff line number Diff line change
Expand Up @@ -990,8 +990,8 @@ static int __trigger_extent(struct btree_trans *trans,
ret = !gc
? bch2_update_cached_sectors_list(trans, p.ptr.dev, disk_sectors)
: update_cached_sectors(c, k, p.ptr.dev, disk_sectors, 0, true);
bch2_fs_fatal_err_on(ret && gc, c, "%s(): no replicas entry while updating cached sectors",
__func__);
bch2_fs_fatal_err_on(ret && gc, c, "%s: no replicas entry while updating cached sectors",
bch2_err_str(ret));
if (ret)
return ret;
}
Expand Down Expand Up @@ -1020,7 +1020,7 @@ static int __trigger_extent(struct btree_trans *trans,
struct printbuf buf = PRINTBUF;

bch2_bkey_val_to_text(&buf, c, k);
bch2_fs_fatal_error(c, "%s(): no replicas entry for %s", __func__, buf.buf);
bch2_fs_fatal_error(c, ": no replicas entry for %s", buf.buf);
printbuf_exit(&buf);
}
if (ret)
Expand Down
2 changes: 1 addition & 1 deletion fs/bcachefs/debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b)
struct printbuf buf = PRINTBUF;

bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&b->key));
bch2_fs_fatal_error(c, "btree node verify failed for : %s\n", buf.buf);
bch2_fs_fatal_error(c, ": btree node verify failed for: %s\n", buf.buf);
printbuf_exit(&buf);
}
out:
Expand Down
6 changes: 3 additions & 3 deletions fs/bcachefs/ec.c
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,7 @@ int bch2_trigger_stripe(struct btree_trans *trans,
struct printbuf buf = PRINTBUF;

bch2_bkey_val_to_text(&buf, c, new);
bch2_fs_fatal_error(c, "no replicas entry for %s", buf.buf);
bch2_fs_fatal_error(c, ": no replicas entry for %s", buf.buf);
printbuf_exit(&buf);
return ret;
}
Expand Down Expand Up @@ -1868,10 +1868,10 @@ static int __bch2_ec_stripe_head_reuse(struct btree_trans *trans, struct ec_stri
return -BCH_ERR_stripe_alloc_blocked;

ret = get_stripe_key_trans(trans, idx, &h->s->existing_stripe);
bch2_fs_fatal_err_on(ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart), c,
"reading stripe key: %s", bch2_err_str(ret));
if (ret) {
bch2_stripe_close(c, h->s);
if (!bch2_err_matches(ret, BCH_ERR_transaction_restart))
bch2_fs_fatal_error(c, "error reading stripe key: %s", bch2_err_str(ret));
return ret;
}

Expand Down
4 changes: 2 additions & 2 deletions fs/bcachefs/error.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,9 +191,9 @@ do { \

void bch2_fatal_error(struct bch_fs *);

#define bch2_fs_fatal_error(c, ...) \
#define bch2_fs_fatal_error(c, _msg, ...) \
do { \
bch_err(c, __VA_ARGS__); \
bch_err(c, "%s(): fatal error " _msg, __func__, ##__VA_ARGS__); \
bch2_fatal_error(c); \
} while (0)

Expand Down
6 changes: 3 additions & 3 deletions fs/bcachefs/extents.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,17 +108,17 @@ static inline void extent_entry_drop(struct bkey_s k, union bch_extent_entry *en

static inline bool extent_entry_is_ptr(const union bch_extent_entry *e)
{
return extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
return __extent_entry_type(e) == BCH_EXTENT_ENTRY_ptr;
}

static inline bool extent_entry_is_stripe_ptr(const union bch_extent_entry *e)
{
return extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr;
return __extent_entry_type(e) == BCH_EXTENT_ENTRY_stripe_ptr;
}

static inline bool extent_entry_is_crc(const union bch_extent_entry *e)
{
switch (extent_entry_type(e)) {
switch (__extent_entry_type(e)) {
case BCH_EXTENT_ENTRY_crc32:
case BCH_EXTENT_ENTRY_crc64:
case BCH_EXTENT_ENTRY_crc128:
Expand Down
3 changes: 2 additions & 1 deletion fs/bcachefs/fs.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ int __must_check bch2_write_inode(struct bch_fs *c,
goto retry;

bch2_fs_fatal_err_on(bch2_err_matches(ret, ENOENT), c,
"inode %u:%llu not found when updating",
"%s: inode %u:%llu not found when updating",
bch2_err_str(ret),
inode_inum(inode).subvol,
inode_inum(inode).inum);

Expand Down
Loading

0 comments on commit a4145ce

Please sign in to comment.