Skip to content

Commit

Permalink
Merge tag 'bcachefs-2024-10-22' of https://github.com/koverstreet/bca…
Browse files Browse the repository at this point in the history
…chefs

Pull bcachefs fixes from Kent Overstreet:
 "Lots of hotfixes:

   - transaction restart injection has been shaking out a few things

   - fix a data corruption in the buffered write path on -ENOSPC, found
     by xfstests generic/299

   - Some small show_options fixes

   - Repair mismatches in inode hash type, seed: different snapshot
     versions of an inode must have the same hash/type seed, used for
     directory entries and xattrs. We were checking the hash seed, but
     not the type, and a user contributed a filesystem where the hash
     type on one inode had somehow been flipped; these fixes allow his
     filesystem to repair.

     Additionally, the hash type flip made some directory entries
     invisible, which were then recreated by userspace; so the hash
     check code now checks for duplicate non dangling dirents, and
     renames one of them if necessary.

   - Don't use wait_event_interruptible() in recovery: this fixes some
     filesystems failing to mount with -ERESTARTSYS

   - Workaround for kvmalloc not supporting > INT_MAX allocations,
     causing an -ENOMEM when allocating the sorted array of journal
     keys: this allows a 75 TB filesystem to mount

   - Make sure bch_inode_unpacked.bi_snapshot is set in the old inode
     compat path: this alllows Marcin's filesystem (in use since before
     6.7) to repair and mount"

* tag 'bcachefs-2024-10-22' of https://github.com/koverstreet/bcachefs: (26 commits)
  bcachefs: Set bch_inode_unpacked.bi_snapshot in old inode path
  bcachefs: Mark more errors as AUTOFIX
  bcachefs: Workaround for kvmalloc() not supporting > INT_MAX allocations
  bcachefs: Don't use wait_event_interruptible() in recovery
  bcachefs: Fix __bch2_fsck_err() warning
  bcachefs: fsck: Improve hash_check_key()
  bcachefs: bch2_hash_set_or_get_in_snapshot()
  bcachefs: Repair mismatches in inode hash seed, type
  bcachefs: Add hash seed, type to inode_to_text()
  bcachefs: INODE_STR_HASH() for bch_inode_unpacked
  bcachefs: Run in-kernel offline fsck without ratelimit errors
  bcachefs: skip mount option handle for empty string.
  bcachefs: fix incorrect show_options results
  bcachefs: Fix data corruption on -ENOSPC in buffered write path
  bcachefs: bch2_folio_reservation_get_partial() is now better behaved
  bcachefs: fix disk reservation accounting in bch2_folio_reservation_get()
  bcachefS: ec: fix data type on stripe deletion
  bcachefs: Don't use commit_do() unnecessarily
  bcachefs: handle restarts in bch2_bucket_io_time_reset()
  bcachefs: fix restart handling in __bch2_resume_logged_op_finsert()
  ...
  • Loading branch information
torvalds committed Oct 24, 2024
2 parents f009e94 + a069f01 commit c1e8227
Show file tree
Hide file tree
Showing 41 changed files with 475 additions and 205 deletions.
37 changes: 21 additions & 16 deletions fs/bcachefs/alloc_background.c
Original file line number Diff line number Diff line change
Expand Up @@ -1977,7 +1977,7 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
ca->mi.bucket_size,
GFP_KERNEL);

int ret = bch2_trans_do(c, NULL, NULL,
int ret = bch2_trans_commit_do(c, NULL, NULL,
BCH_WATERMARK_btree|
BCH_TRANS_COMMIT_no_enospc,
bch2_clear_bucket_needs_discard(trans, POS(ca->dev_idx, bucket)));
Expand Down Expand Up @@ -2137,14 +2137,15 @@ static void bch2_do_invalidates_work(struct work_struct *work)

struct bkey_s_c k = next_lru_key(trans, &iter, ca, &wrapped);
ret = bkey_err(k);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
break;
goto restart_err;
if (!k.k)
break;

ret = invalidate_one_bucket(trans, &iter, k, &nr_to_invalidate);
restart_err:
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
continue;
if (ret)
break;

Expand Down Expand Up @@ -2350,24 +2351,19 @@ int bch2_dev_remove_alloc(struct bch_fs *c, struct bch_dev *ca)

/* Bucket IO clocks: */

int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
size_t bucket_nr, int rw)
static int __bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
size_t bucket_nr, int rw)
{
struct bch_fs *c = trans->c;
struct btree_iter iter;
struct bkey_i_alloc_v4 *a;
u64 now;
int ret = 0;

if (bch2_trans_relock(trans))
bch2_trans_begin(trans);

a = bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(dev, bucket_nr));
ret = PTR_ERR_OR_ZERO(a);
struct btree_iter iter;
struct bkey_i_alloc_v4 *a =
bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(dev, bucket_nr));
int ret = PTR_ERR_OR_ZERO(a);
if (ret)
return ret;

now = bch2_current_io_time(c, rw);
u64 now = bch2_current_io_time(c, rw);
if (a->v.io_time[rw] == now)
goto out;

Expand All @@ -2380,6 +2376,15 @@ int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
return ret;
}

int bch2_bucket_io_time_reset(struct btree_trans *trans, unsigned dev,
size_t bucket_nr, int rw)
{
if (bch2_trans_relock(trans))
bch2_trans_begin(trans);

return nested_lockrestart_do(trans, __bch2_bucket_io_time_reset(trans, dev, bucket_nr, rw));
}

/* Startup/shutdown (ro/rw): */

void bch2_recalc_capacity(struct bch_fs *c)
Expand Down
2 changes: 1 addition & 1 deletion fs/bcachefs/alloc_foreground.c
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ struct open_bucket *bch2_bucket_alloc(struct bch_fs *c, struct bch_dev *ca,
struct bch_dev_usage usage;
struct open_bucket *ob;

bch2_trans_do(c, NULL, NULL, 0,
bch2_trans_do(c,
PTR_ERR_OR_ZERO(ob = bch2_bucket_alloc_trans(trans, ca, watermark,
data_type, cl, false, &usage)));
return ob;
Expand Down
12 changes: 11 additions & 1 deletion fs/bcachefs/btree_gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -820,12 +820,22 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
* fix that here:
*/
alloc_data_type_set(&gc, gc.data_type);

if (gc.data_type != old_gc.data_type ||
gc.dirty_sectors != old_gc.dirty_sectors) {
ret = bch2_alloc_key_to_dev_counters(trans, ca, &old_gc, &gc, BTREE_TRIGGER_gc);
if (ret)
return ret;

/*
* Ugly: alloc_key_to_dev_counters(..., BTREE_TRIGGER_gc) is not
* safe w.r.t. transaction restarts, so fixup the gc_bucket so
* we don't run it twice:
*/
percpu_down_read(&c->mark_lock);
struct bucket *gc_m = gc_bucket(ca, iter->pos.offset);
gc_m->data_type = gc.data_type;
gc_m->dirty_sectors = gc.dirty_sectors;
percpu_up_read(&c->mark_lock);
}

if (fsck_err_on(new.data_type != gc.data_type,
Expand Down
2 changes: 1 addition & 1 deletion fs/bcachefs/btree_io.c
Original file line number Diff line number Diff line change
Expand Up @@ -1871,7 +1871,7 @@ static void btree_node_write_work(struct work_struct *work)

}
} else {
ret = bch2_trans_do(c, NULL, NULL, 0,
ret = bch2_trans_do(c,
bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
BCH_WATERMARK_interior_updates|
BCH_TRANS_COMMIT_journal_reclaim|
Expand Down
2 changes: 2 additions & 0 deletions fs/bcachefs/btree_iter.h
Original file line number Diff line number Diff line change
Expand Up @@ -912,6 +912,8 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
_ret; \
})

#define bch2_trans_do(_c, _do) bch2_trans_run(_c, lockrestart_do(trans, _do))

struct btree_trans *__bch2_trans_get(struct bch_fs *, unsigned);
void bch2_trans_put(struct btree_trans *);

Expand Down
4 changes: 2 additions & 2 deletions fs/bcachefs/btree_update.c
Original file line number Diff line number Diff line change
Expand Up @@ -668,7 +668,7 @@ int bch2_btree_insert(struct bch_fs *c, enum btree_id id, struct bkey_i *k,
struct disk_reservation *disk_res, int flags,
enum btree_iter_update_trigger_flags iter_flags)
{
return bch2_trans_do(c, disk_res, NULL, flags,
return bch2_trans_commit_do(c, disk_res, NULL, flags,
bch2_btree_insert_trans(trans, id, k, iter_flags));
}

Expand Down Expand Up @@ -865,7 +865,7 @@ __bch2_fs_log_msg(struct bch_fs *c, unsigned commit_flags, const char *fmt,
memcpy(l->d, buf.buf, buf.pos);
c->journal.early_journal_entries.nr += jset_u64s(u64s);
} else {
ret = bch2_trans_do(c, NULL, NULL,
ret = bch2_trans_commit_do(c, NULL, NULL,
BCH_TRANS_COMMIT_lazy_rw|commit_flags,
__bch2_trans_log_msg(trans, &buf, u64s));
}
Expand Down
2 changes: 1 addition & 1 deletion fs/bcachefs/btree_update.h
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ static inline int bch2_trans_commit(struct btree_trans *trans,
nested_lockrestart_do(_trans, _do ?: bch2_trans_commit(_trans, (_disk_res),\
(_journal_seq), (_flags)))

#define bch2_trans_do(_c, _disk_res, _journal_seq, _flags, _do) \
#define bch2_trans_commit_do(_c, _disk_res, _journal_seq, _flags, _do) \
bch2_trans_run(_c, commit_do(trans, _disk_res, _journal_seq, _flags, _do))

#define trans_for_each_update(_trans, _i) \
Expand Down
4 changes: 1 addition & 3 deletions fs/bcachefs/btree_update_interior.c
Original file line number Diff line number Diff line change
Expand Up @@ -2239,10 +2239,8 @@ static void async_btree_node_rewrite_work(struct work_struct *work)
struct async_btree_rewrite *a =
container_of(work, struct async_btree_rewrite, work);
struct bch_fs *c = a->c;
int ret;

ret = bch2_trans_do(c, NULL, NULL, 0,
async_btree_node_rewrite_trans(trans, a));
int ret = bch2_trans_do(c, async_btree_node_rewrite_trans(trans, a));
bch_err_fn_ratelimited(c, ret);
bch2_write_ref_put(c, BCH_WRITE_REF_node_rewrite);
kfree(a);
Expand Down
7 changes: 5 additions & 2 deletions fs/bcachefs/buckets.c
Original file line number Diff line number Diff line change
Expand Up @@ -1160,11 +1160,11 @@ int bch2_trans_mark_dev_sbs(struct bch_fs *c)
#define SECTORS_CACHE 1024

int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
u64 sectors, int flags)
u64 sectors, enum bch_reservation_flags flags)
{
struct bch_fs_pcpu *pcpu;
u64 old, get;
s64 sectors_available;
u64 sectors_available;
int ret;

percpu_down_read(&c->mark_lock);
Expand Down Expand Up @@ -1202,6 +1202,9 @@ int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
percpu_u64_set(&c->pcpu->sectors_available, 0);
sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free);

if (sectors_available && (flags & BCH_DISK_RESERVATION_PARTIAL))
sectors = min(sectors, sectors_available);

if (sectors <= sectors_available ||
(flags & BCH_DISK_RESERVATION_NOFAIL)) {
atomic64_set(&c->sectors_available,
Expand Down
12 changes: 7 additions & 5 deletions fs/bcachefs/buckets.h
Original file line number Diff line number Diff line change
Expand Up @@ -344,14 +344,16 @@ static inline void bch2_disk_reservation_put(struct bch_fs *c,
}
}

#define BCH_DISK_RESERVATION_NOFAIL (1 << 0)
enum bch_reservation_flags {
BCH_DISK_RESERVATION_NOFAIL = 1 << 0,
BCH_DISK_RESERVATION_PARTIAL = 1 << 1,
};

int __bch2_disk_reservation_add(struct bch_fs *,
struct disk_reservation *,
u64, int);
int __bch2_disk_reservation_add(struct bch_fs *, struct disk_reservation *,
u64, enum bch_reservation_flags);

static inline int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
u64 sectors, int flags)
u64 sectors, enum bch_reservation_flags flags)
{
#ifdef __KERNEL__
u64 old, new;
Expand Down
1 change: 1 addition & 0 deletions fs/bcachefs/chardev.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a

opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio);
opt_set(thr->opts, read_only, 1);
opt_set(thr->opts, ratelimit_errors, 0);

/* We need request_key() to be called before we punt to kthread: */
opt_set(thr->opts, nostart, true);
Expand Down
15 changes: 14 additions & 1 deletion fs/bcachefs/darray.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,27 @@

#include <linux/log2.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include "darray.h"

int __bch2_darray_resize_noprof(darray_char *d, size_t element_size, size_t new_size, gfp_t gfp)
{
if (new_size > d->size) {
new_size = roundup_pow_of_two(new_size);

void *data = kvmalloc_array_noprof(new_size, element_size, gfp);
/*
* This is a workaround: kvmalloc() doesn't support > INT_MAX
* allocations, but vmalloc() does.
* The limit needs to be lifted from kvmalloc, and when it does
* we'll go back to just using that.
*/
size_t bytes;
if (unlikely(check_mul_overflow(new_size, element_size, &bytes)))
return -ENOMEM;

void *data = likely(bytes < INT_MAX)
? kvmalloc_noprof(bytes, gfp)
: vmalloc_noprof(bytes);
if (!data)
return -ENOMEM;

Expand Down
7 changes: 0 additions & 7 deletions fs/bcachefs/dirent.c
Original file line number Diff line number Diff line change
Expand Up @@ -250,13 +250,6 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
return ret;
}

static void dirent_copy_target(struct bkey_i_dirent *dst,
struct bkey_s_c_dirent src)
{
dst->v.d_inum = src.v->d_inum;
dst->v.d_type = src.v->d_type;
}

int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
struct bkey_s_c_dirent d, subvol_inum *target)
{
Expand Down
7 changes: 7 additions & 0 deletions fs/bcachefs/dirent.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ static inline unsigned dirent_val_u64s(unsigned len)
int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
struct bkey_s_c_dirent, subvol_inum *);

static inline void dirent_copy_target(struct bkey_i_dirent *dst,
struct bkey_s_c_dirent src)
{
dst->v.d_inum = src.v->d_inum;
dst->v.d_type = src.v->d_type;
}

int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32,
const struct bch_hash_info *, u8,
const struct qstr *, u64, u64 *,
Expand Down
6 changes: 4 additions & 2 deletions fs/bcachefs/disk_accounting.c
Original file line number Diff line number Diff line change
Expand Up @@ -856,8 +856,10 @@ int bch2_dev_usage_init(struct bch_dev *ca, bool gc)
};
u64 v[3] = { ca->mi.nbuckets - ca->mi.first_bucket, 0, 0 };

int ret = bch2_trans_do(c, NULL, NULL, 0,
bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), gc));
int ret = bch2_trans_do(c, ({
bch2_disk_accounting_mod(trans, &acc, v, ARRAY_SIZE(v), gc) ?:
(!gc ? bch2_trans_commit(trans, NULL, NULL, 0) : 0);
}));
bch_err_fn(c, ret);
return ret;
}
Expand Down
22 changes: 11 additions & 11 deletions fs/bcachefs/ec.c
Original file line number Diff line number Diff line change
Expand Up @@ -266,12 +266,12 @@ static int __mark_stripe_bucket(struct btree_trans *trans,
if (!deleting) {
a->stripe = s.k->p.offset;
a->stripe_redundancy = s.v->nr_redundant;
alloc_data_type_set(a, data_type);
} else {
a->stripe = 0;
a->stripe_redundancy = 0;
alloc_data_type_set(a, BCH_DATA_user);
}

alloc_data_type_set(a, data_type);
err:
printbuf_exit(&buf);
return ret;
Expand Down Expand Up @@ -1186,7 +1186,7 @@ static void ec_stripe_delete_work(struct work_struct *work)
if (!idx)
break;

int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc,
ec_stripe_delete(trans, idx));
bch_err_fn(c, ret);
if (ret)
Expand Down Expand Up @@ -1519,14 +1519,14 @@ static void ec_stripe_create(struct ec_stripe_new *s)
goto err;
}

ret = bch2_trans_do(c, &s->res, NULL,
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_no_enospc,
ec_stripe_key_update(trans,
s->have_existing_stripe
? bkey_i_to_stripe(&s->existing_stripe.key)
: NULL,
bkey_i_to_stripe(&s->new_stripe.key)));
ret = bch2_trans_commit_do(c, &s->res, NULL,
BCH_TRANS_COMMIT_no_check_rw|
BCH_TRANS_COMMIT_no_enospc,
ec_stripe_key_update(trans,
s->have_existing_stripe
? bkey_i_to_stripe(&s->existing_stripe.key)
: NULL,
bkey_i_to_stripe(&s->new_stripe.key)));
bch_err_msg(c, ret, "creating stripe key");
if (ret) {
goto err;
Expand Down
5 changes: 4 additions & 1 deletion fs/bcachefs/error.c
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,10 @@ int __bch2_fsck_err(struct bch_fs *c,
* delete the key)
* - and we don't need to warn if we're not prompting
*/
WARN_ON(!(flags & FSCK_AUTOFIX) && !trans && bch2_current_has_btree_trans(c));
WARN_ON((flags & FSCK_CAN_FIX) &&
!(flags & FSCK_AUTOFIX) &&
!trans &&
bch2_current_has_btree_trans(c));

if ((flags & FSCK_CAN_FIX) &&
test_bit(err, c->sb.errors_silent))
Expand Down
6 changes: 6 additions & 0 deletions fs/bcachefs/fs-io-buffered.c
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,12 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
folios_trunc(&fs, fi);
end = min(end, folio_end_pos(darray_last(fs)));
} else {
if (!folio_test_uptodate(f)) {
ret = bch2_read_single_folio(f, mapping);
if (ret)
goto out;
}

folios_trunc(&fs, fi + 1);
end = f_pos + f_reserved;
}
Expand Down
Loading

0 comments on commit c1e8227

Please sign in to comment.