Skip to content

Commit

Permalink
Merge tag 'for-5.15-tag' of git://git.kernel.org/pub/scm/linux/kernel…
Browse files Browse the repository at this point in the history
…/git/kdave/linux

Pull btrfs updates from David Sterba:
 "The highlights of this round are integrations with fs-verity and
  idmapped mounts, the rest is usual mix of minor improvements, speedups
  and cleanups.

  There are some patches outside of btrfs, namely updating some VFS
  interfaces, all straightforward and acked.

  Features:

   - fs-verity support, using standard ioctls, backward compatible with
     read-only limitation on inodes with previously enabled fs-verity

   - idmapped mount support

   - make mount with rescue=ibadroots more tolerant to partially damaged
     trees

   - allow raid0 on a single device and raid10 on two devices,
     degenerate cases but might be useful as an intermediate step during
     conversion to other profiles

   - zoned mode block group auto reclaim can be disabled via sysfs knob

  Performance improvements:

   - continue readahead of node siblings even if target node is in
     memory, could speed up full send (on sample test +11%)

   - batching of delayed items can speed up creating many files

   - fsync/tree-log speedups
       - avoid unnecessary work (gains +2% throughput, -2% run time on
         sample load)
       - reduced lock contention on renames (on dbench +4% throughput,
         up to -30% latency)

  Fixes:

   - various zoned mode fixes

   - preemptive flushing threshold tuning, avoid excessive work on
     almost full filesystems

  Core:

   - continued subpage support, preparation for implementing remaining
     features like compression and defragmentation; with some
     limitations, write is now enabled on 64K page systems with 4K
     sectors, still considered experimental
       - no readahead on compressed reads
       - inline extents disabled
       - disabled raid56 profile conversion and mount

   - improved flushing logic, fixing early ENOSPC on some workloads

   - inode flags have been internally split to read-only and read-write
     incompat bit parts, used by fs-verity

   - new tree items for fs-verity
       - descriptor item
       - Merkle tree item

   - inode operations extended to be namespace-aware

   - cleanups and refactoring

  Generic code changes:

   - fs: new export filemap_fdatawrite_wbc

   - fs: removed sync_inode

   - block: bio_trim argument type fixups

   - vfs: add namespace-aware lookup"

* tag 'for-5.15-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (114 commits)
  btrfs: reset replace target device to allocation state on close
  btrfs: zoned: fix ordered extent boundary calculation
  btrfs: do not do preemptive flushing if the majority is global rsv
  btrfs: reduce the preemptive flushing threshold to 90%
  btrfs: tree-log: check btrfs_lookup_data_extent return value
  btrfs: avoid unnecessarily logging directories that had no changes
  btrfs: allow idmapped mount
  btrfs: handle ACLs on idmapped mounts
  btrfs: allow idmapped INO_LOOKUP_USER ioctl
  btrfs: allow idmapped SUBVOL_SETFLAGS ioctl
  btrfs: allow idmapped SET_RECEIVED_SUBVOL ioctls
  btrfs: relax restrictions for SNAP_DESTROY_V2 with subvolids
  btrfs: allow idmapped SNAP_DESTROY ioctls
  btrfs: allow idmapped SNAP_CREATE/SUBVOL_CREATE ioctls
  btrfs: check whether fsgid/fsuid are mapped during subvolume creation
  btrfs: allow idmapped permission inode op
  btrfs: allow idmapped setattr inode op
  btrfs: allow idmapped tmpfile inode op
  btrfs: allow idmapped symlink inode op
  btrfs: allow idmapped mkdir inode op
  ...
  • Loading branch information
torvalds committed Aug 31, 2021
2 parents 9c849ce + 0d977e0 commit 87045e6
Show file tree
Hide file tree
Showing 58 changed files with 2,769 additions and 1,425 deletions.
12 changes: 7 additions & 5 deletions block/bio.c
Original file line number Diff line number Diff line change
Expand Up @@ -1544,12 +1544,15 @@ EXPORT_SYMBOL(bio_split);
* @bio: bio to trim
* @offset: number of sectors to trim from the front of @bio
* @size: size we want to trim @bio to, in sectors
*
* This function is typically used for bios that are cloned and submitted
* to the underlying device in parts.
*/
void bio_trim(struct bio *bio, int offset, int size)
void bio_trim(struct bio *bio, sector_t offset, sector_t size)
{
/* 'bio' is a cloned bio which we need to trim to match
* the given offset and size.
*/
if (WARN_ON_ONCE(offset > BIO_MAX_SECTORS || size > BIO_MAX_SECTORS ||
offset + size > bio->bi_iter.bi_size))
return;

size <<= 9;
if (offset == 0 && size == bio->bi_iter.bi_size)
Expand All @@ -1560,7 +1563,6 @@ void bio_trim(struct bio *bio, int offset, int size)

if (bio_integrity(bio))
bio_integrity_trim(bio);

}
EXPORT_SYMBOL_GPL(bio_trim);

Expand Down
7 changes: 1 addition & 6 deletions fs/9p/vfs_file.c
Original file line number Diff line number Diff line change
Expand Up @@ -612,12 +612,7 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma)
p9_debug(P9_DEBUG_VFS, "9p VMA close, %p, flushing", vma);

inode = file_inode(vma->vm_file);

if (!mapping_can_writeback(inode->i_mapping))
wbc.nr_to_write = 0;

might_sleep();
sync_inode(inode, &wbc);
filemap_fdatawrite_wbc(inode->i_mapping, &wbc);
}


Expand Down
1 change: 1 addition & 0 deletions fs/btrfs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
btrfs-$(CONFIG_BLK_DEV_ZONED) += zoned.o
btrfs-$(CONFIG_FS_VERITY) += verity.o

btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
tests/extent-buffer-tests.o tests/btrfs-tests.o \
Expand Down
11 changes: 6 additions & 5 deletions fs/btrfs/acl.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
}

static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
struct inode *inode, struct posix_acl *acl, int type)
struct user_namespace *mnt_userns,
struct inode *inode, struct posix_acl *acl, int type)
{
int ret, size = 0;
const char *name;
Expand Down Expand Up @@ -114,12 +115,12 @@ int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
umode_t old_mode = inode->i_mode;

if (type == ACL_TYPE_ACCESS && acl) {
ret = posix_acl_update_mode(&init_user_ns, inode,
ret = posix_acl_update_mode(mnt_userns, inode,
&inode->i_mode, &acl);
if (ret)
return ret;
}
ret = __btrfs_set_acl(NULL, inode, acl, type);
ret = __btrfs_set_acl(NULL, mnt_userns, inode, acl, type);
if (ret)
inode->i_mode = old_mode;
return ret;
Expand All @@ -140,14 +141,14 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
return ret;

if (default_acl) {
ret = __btrfs_set_acl(trans, inode, default_acl,
ret = __btrfs_set_acl(trans, &init_user_ns, inode, default_acl,
ACL_TYPE_DEFAULT);
posix_acl_release(default_acl);
}

if (acl) {
if (!ret)
ret = __btrfs_set_acl(trans, inode, acl,
ret = __btrfs_set_acl(trans, &init_user_ns, inode, acl,
ACL_TYPE_ACCESS);
posix_acl_release(acl);
}
Expand Down
6 changes: 3 additions & 3 deletions fs/btrfs/backref.c
Original file line number Diff line number Diff line change
Expand Up @@ -1211,7 +1211,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
again:
head = NULL;

ret = btrfs_search_slot(trans, fs_info->extent_root, &key, path, 0, 0);
ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0);
if (ret < 0)
goto out;
BUG_ON(ret == 0);
Expand Down Expand Up @@ -1488,14 +1488,14 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans,
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots,
bool ignore_offset, bool skip_commit_root_sem)
bool skip_commit_root_sem)
{
int ret;

if (!trans && !skip_commit_root_sem)
down_read(&fs_info->commit_root_sem);
ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr,
time_seq, roots, ignore_offset);
time_seq, roots, false);
if (!trans && !skip_commit_root_sem)
up_read(&fs_info->commit_root_sem);
return ret;
Expand Down
2 changes: 1 addition & 1 deletion fs/btrfs/backref.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
const u64 *extent_item_pos, bool ignore_offset);
int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 bytenr,
u64 time_seq, struct ulist **roots, bool ignore_offset,
u64 time_seq, struct ulist **roots,
bool skip_commit_root_sem);
char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path,
u32 name_len, unsigned long name_off,
Expand Down
114 changes: 111 additions & 3 deletions fs/btrfs/block-group.c
Original file line number Diff line number Diff line change
Expand Up @@ -1561,7 +1561,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
div64_u64(zone_unusable * 100, bg->length));
trace_btrfs_reclaim_block_group(bg);
ret = btrfs_relocate_chunk(fs_info, bg->start);
if (ret)
if (ret && ret != -EAGAIN)
btrfs_err(fs_info, "error relocating chunk %llu",
bg->start);

Expand Down Expand Up @@ -2105,11 +2105,22 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
bg->used = em->len;
bg->flags = map->type;
ret = btrfs_add_block_group_cache(fs_info, bg);
/*
* We may have some valid block group cache added already, in
* that case we skip to the next one.
*/
if (ret == -EEXIST) {
ret = 0;
btrfs_put_block_group(bg);
continue;
}

if (ret) {
btrfs_remove_free_space_cache(bg);
btrfs_put_block_group(bg);
break;
}

btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
0, 0, &space_info);
bg->space_info = space_info;
Expand Down Expand Up @@ -2212,6 +2223,14 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
ret = check_chunk_block_group_mappings(info);
error:
btrfs_free_path(path);
/*
* We've hit some error while reading the extent tree, and have
* rescue=ibadroots mount option.
* Try to fill the tree using dummy block groups so that the user can
* continue to mount and grab their data.
*/
if (ret && btrfs_test_opt(info, IGNOREBADROOTS))
ret = fill_dummy_bgs(info);
return ret;
}

Expand Down Expand Up @@ -2244,6 +2263,95 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans,
return btrfs_insert_item(trans, root, &key, &bgi, sizeof(bgi));
}

static int insert_dev_extent(struct btrfs_trans_handle *trans,
struct btrfs_device *device, u64 chunk_offset,
u64 start, u64 num_bytes)
{
struct btrfs_fs_info *fs_info = device->fs_info;
struct btrfs_root *root = fs_info->dev_root;
struct btrfs_path *path;
struct btrfs_dev_extent *extent;
struct extent_buffer *leaf;
struct btrfs_key key;
int ret;

WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;

key.objectid = device->devid;
key.type = BTRFS_DEV_EXTENT_KEY;
key.offset = start;
ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*extent));
if (ret)
goto out;

leaf = path->nodes[0];
extent = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent);
btrfs_set_dev_extent_chunk_tree(leaf, extent, BTRFS_CHUNK_TREE_OBJECTID);
btrfs_set_dev_extent_chunk_objectid(leaf, extent,
BTRFS_FIRST_CHUNK_TREE_OBJECTID);
btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);

btrfs_set_dev_extent_length(leaf, extent, num_bytes);
btrfs_mark_buffer_dirty(leaf);
out:
btrfs_free_path(path);
return ret;
}

/*
* This function belongs to phase 2.
*
* See the comment at btrfs_chunk_alloc() for details about the chunk allocation
* phases.
*/
static int insert_dev_extents(struct btrfs_trans_handle *trans,
u64 chunk_offset, u64 chunk_size)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
struct btrfs_device *device;
struct extent_map *em;
struct map_lookup *map;
u64 dev_offset;
u64 stripe_size;
int i;
int ret = 0;

em = btrfs_get_chunk_map(fs_info, chunk_offset, chunk_size);
if (IS_ERR(em))
return PTR_ERR(em);

map = em->map_lookup;
stripe_size = em->orig_block_len;

/*
* Take the device list mutex to prevent races with the final phase of
* a device replace operation that replaces the device object associated
* with the map's stripes, because the device object's id can change
* at any time during that final phase of the device replace operation
* (dev-replace.c:btrfs_dev_replace_finishing()), so we could grab the
* replaced device and then see it with an ID of BTRFS_DEV_REPLACE_DEVID,
* resulting in persisting a device extent item with such ID.
*/
mutex_lock(&fs_info->fs_devices->device_list_mutex);
for (i = 0; i < map->num_stripes; i++) {
device = map->stripes[i].dev;
dev_offset = map->stripes[i].physical;

ret = insert_dev_extent(trans, device, chunk_offset, dev_offset,
stripe_size);
if (ret)
break;
}
mutex_unlock(&fs_info->fs_devices->device_list_mutex);

free_extent_map(em);
return ret;
}

/*
* This function, btrfs_create_pending_block_groups(), belongs to the phase 2 of
* chunk allocation.
Expand Down Expand Up @@ -2278,8 +2386,8 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
if (ret)
btrfs_abort_transaction(trans, ret);
}
ret = btrfs_finish_chunk_alloc(trans, block_group->start,
block_group->length);
ret = insert_dev_extents(trans, block_group->start,
block_group->length);
if (ret)
btrfs_abort_transaction(trans, ret);
add_block_group_free_space(trans, block_group);
Expand Down
27 changes: 26 additions & 1 deletion fs/btrfs/btrfs_inode.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@ enum {
* the file range, inode's io_tree).
*/
BTRFS_INODE_NO_DELALLOC_FLUSH,
/*
* Set when we are working on enabling verity for a file. Computing and
* writing the whole Merkle tree can take a while so we want to prevent
* races where two separate tasks attempt to simultaneously start verity
* on the same file.
*/
BTRFS_INODE_VERITY_IN_PROGRESS,
};

/* in memory btrfs inode */
Expand Down Expand Up @@ -189,8 +196,10 @@ struct btrfs_inode {
*/
u64 csum_bytes;

/* flags field from the on disk inode */
/* Backwards incompatible flags, lower half of inode_item::flags */
u32 flags;
/* Read-only compatibility flags, upper half of inode_item::flags */
u32 ro_flags;

/*
* Counters to keep track of the number of extent item's we may use due
Expand Down Expand Up @@ -348,6 +357,22 @@ struct btrfs_dio_private {
u8 csums[];
};

/*
* btrfs_inode_item stores flags in a u64, btrfs_inode stores them in two
* separate u32s. These two functions convert between the two representations.
*/
static inline u64 btrfs_inode_combine_flags(u32 flags, u32 ro_flags)
{
return (flags | ((u64)ro_flags << 32));
}

static inline void btrfs_inode_split_flags(u64 inode_item_flags,
u32 *flags, u32 *ro_flags)
{
*flags = (u32)inode_item_flags;
*ro_flags = (u32)(inode_item_flags >> 32);
}

/* Array of bytes with variable length, hexadecimal format 0x1234 */
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes
Expand Down
Loading

0 comments on commit 87045e6

Please sign in to comment.