Skip to content

Commit

Permalink
Merge tag 'for-6.4-tag' of git://git.kernel.org/pub/scm/linux/kernel/…
Browse files Browse the repository at this point in the history
…git/kdave/linux

Pull btrfs updates from David Sterba:
 "Mostly core changes and cleanups, some notable fixes and two
  performance improvements in directory logging.

  The IO path cleanups are removing or refactoring old code, scrub main
  loop has been completely rewritten also refactoring old code.

  There are some changes to non-btrfs code, mostly trivial, the cgroup
  punt bio logic is only moved from generic code.

  Performance improvements:

   - improve logging changes in a directory during one transaction,
     avoid iterating over items and reduce lock contention (fsync time
     4x lower)

   - when logging directory entries during one transaction, reduce
     locking of subvolume trees by checking tree-log instead
     (improvement in throughput and latency for concurrent access to a
     subvolume)

  Notable fixes:

   - dev-replace:
      - properly honor read mode when requested to avoid reading from
        source device
      - target device won't be used for eventual read repair, this is
        unreliable for NODATASUM files
      - when there are unpaired (and unrepairable) metadata during
        replace, exit early with error and don't try to finish whole
        operation

   - scrub ioctl properly rejects unknown flags

   - fix global block reserve calculations

   - fix partial direct io write when there's a page fault in the
     middle, iomap will try to continue with partial request but the
     btrfs part did not match that, this can lead to zeros written
     instead of data

  Core changes:

   - io path:
      - continued cleanups and refactoring around bio handling
      - extent io submit path simplifications and cleanups
      - flush write path simplifications and cleanups
      - rework logic of passing sync mode of bio, with further cleanups

   - rewrite scrub code flow, restructure how the stripes are enumerated
     and verified in a more unified way

   - allow to set lower threshold for block group reclaim in debug mode
     to aid zoned mode testing

   - remove obsolete time-based delayed ref throttling logic when
     truncating items

   - DREW locks are not using percpu variables anymore

   - more warning fixes (-Wmaybe-uninitialized)

   - u64 division simplifications

   - error handling improvements

  Non-btrfs code changes:

   - push cgroup punt bio logic to btrfs code (there was no other user
     of that), the functionality can be now selected separately by
     BLK_CGROUP_PUNT_BIO

   - crc32c_impl removed after removing last uses in btrfs code

   - add btrfs_assertfail() to objtool table"

* tag 'for-6.4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (147 commits)
  btrfs: mark btrfs_assertfail() __noreturn
  btrfs: fix uninitialized variable warnings
  btrfs: use log root when iterating over index keys when logging directory
  btrfs: avoid iterating over all indexes when logging directory
  btrfs: dev-replace: error out if we have unrepaired metadata error during
  btrfs: remove pointless loop at btrfs_get_next_valid_item()
  btrfs: scrub: reject unsupported scrub flags
  btrfs: reinterpret async discard iops_limit=0 as no delay
  btrfs: set default discard iops_limit to 1000
  btrfs: remove unused raid56 functions which were dedicated for scrub
  btrfs: scrub: remove scrub_bio structure
  btrfs: scrub: remove scrub_block and scrub_sector structures
  btrfs: scrub: remove the old scrub recheck code
  btrfs: scrub: remove the old writeback infrastructure
  btrfs: scrub: remove scrub_parity structure
  btrfs: scrub: use scrub_stripe to implement RAID56 P/Q scrub
  btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure
  btrfs: scrub: introduce helper to queue a stripe for scrub
  btrfs: scrub: introduce error reporting functionality for scrub_stripe
  btrfs: scrub: introduce a writeback helper for scrub_stripe
  ...
  • Loading branch information
torvalds committed Apr 26, 2023
2 parents 94fc079 + f372463 commit 85d7ab2
Show file tree
Hide file tree
Showing 62 changed files with 2,867 additions and 4,844 deletions.
3 changes: 3 additions & 0 deletions block/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ config BLK_RQ_ALLOC_TIME
config BLK_CGROUP_RWSTAT
bool

config BLK_CGROUP_PUNT_BIO
bool

config BLK_DEV_BSG_COMMON
tristate

Expand Down
78 changes: 44 additions & 34 deletions block/blk-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */

bool blkcg_debug_stats = false;
static struct workqueue_struct *blkcg_punt_bio_wq;

#define BLKG_DESTROY_BATCH_SIZE 64

Expand Down Expand Up @@ -166,7 +165,9 @@ static void __blkg_release(struct rcu_head *rcu)
{
struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);

#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
WARN_ON(!bio_list_empty(&blkg->async_bios));
#endif

/* release the blkcg and parent blkg refs this blkg has been holding */
css_put(&blkg->blkcg->css);
Expand All @@ -188,6 +189,9 @@ static void blkg_release(struct percpu_ref *ref)
call_rcu(&blkg->rcu_head, __blkg_release);
}

#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
static struct workqueue_struct *blkcg_punt_bio_wq;

static void blkg_async_bio_workfn(struct work_struct *work)
{
struct blkcg_gq *blkg = container_of(work, struct blkcg_gq,
Expand All @@ -198,10 +202,10 @@ static void blkg_async_bio_workfn(struct work_struct *work)
bool need_plug = false;

/* as long as there are pending bios, @blkg can't go away */
spin_lock_bh(&blkg->async_bio_lock);
spin_lock(&blkg->async_bio_lock);
bio_list_merge(&bios, &blkg->async_bios);
bio_list_init(&blkg->async_bios);
spin_unlock_bh(&blkg->async_bio_lock);
spin_unlock(&blkg->async_bio_lock);

/* start plug only when bio_list contains at least 2 bios */
if (bios.head && bios.head->bi_next) {
Expand All @@ -214,6 +218,40 @@ static void blkg_async_bio_workfn(struct work_struct *work)
blk_finish_plug(&plug);
}

/*
* When a shared kthread issues a bio for a cgroup, doing so synchronously can
* lead to priority inversions as the kthread can be trapped waiting for that
* cgroup. Use this helper instead of submit_bio to punt the actual issuing to
* a dedicated per-blkcg work item to avoid such priority inversions.
*/
void blkcg_punt_bio_submit(struct bio *bio)
{
struct blkcg_gq *blkg = bio->bi_blkg;

if (blkg->parent) {
spin_lock(&blkg->async_bio_lock);
bio_list_add(&blkg->async_bios, bio);
spin_unlock(&blkg->async_bio_lock);
queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
} else {
/* never bounce for the root cgroup */
submit_bio(bio);
}
}
EXPORT_SYMBOL_GPL(blkcg_punt_bio_submit);

static int __init blkcg_punt_bio_init(void)
{
blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
WQ_MEM_RECLAIM | WQ_FREEZABLE |
WQ_UNBOUND | WQ_SYSFS, 0);
if (!blkcg_punt_bio_wq)
return -ENOMEM;
return 0;
}
subsys_initcall(blkcg_punt_bio_init);
#endif /* CONFIG_BLK_CGROUP_PUNT_BIO */

/**
* bio_blkcg_css - return the blkcg CSS associated with a bio
* @bio: target bio
Expand Down Expand Up @@ -269,10 +307,12 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct gendisk *disk,

blkg->q = disk->queue;
INIT_LIST_HEAD(&blkg->q_node);
blkg->blkcg = blkcg;
#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
spin_lock_init(&blkg->async_bio_lock);
bio_list_init(&blkg->async_bios);
INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn);
blkg->blkcg = blkcg;
#endif

u64_stats_init(&blkg->iostat.sync);
for_each_possible_cpu(cpu) {
Expand Down Expand Up @@ -1688,25 +1728,6 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
}
EXPORT_SYMBOL_GPL(blkcg_policy_unregister);

bool __blkcg_punt_bio_submit(struct bio *bio)
{
struct blkcg_gq *blkg = bio->bi_blkg;

/* consume the flag first */
bio->bi_opf &= ~REQ_CGROUP_PUNT;

/* never bounce for the root cgroup */
if (!blkg->parent)
return false;

spin_lock_bh(&blkg->async_bio_lock);
bio_list_add(&blkg->async_bios, bio);
spin_unlock_bh(&blkg->async_bio_lock);

queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work);
return true;
}

/*
* Scale the accumulated delay based on how long it has been since we updated
* the delay. We only call this when we are adding delay, in case it's been a
Expand Down Expand Up @@ -2085,16 +2106,5 @@ bool blk_cgroup_congested(void)
return ret;
}

static int __init blkcg_init(void)
{
blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio",
WQ_MEM_RECLAIM | WQ_FREEZABLE |
WQ_UNBOUND | WQ_SYSFS, 0);
if (!blkcg_punt_bio_wq)
return -ENOMEM;
return 0;
}
subsys_initcall(blkcg_init);

module_param(blkcg_debug_stats, bool, 0644);
MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not");
15 changes: 2 additions & 13 deletions block/blk-cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,10 @@ struct blkcg_gq {
struct blkg_iostat_set iostat;

struct blkg_policy_data *pd[BLKCG_MAX_POLS];

#ifdef CONFIG_BLK_CGROUP_PUNT_BIO
spinlock_t async_bio_lock;
struct bio_list async_bios;
#endif
union {
struct work_struct async_bio_work;
struct work_struct free_work;
Expand Down Expand Up @@ -375,16 +376,6 @@ static inline void blkg_put(struct blkcg_gq *blkg)
if (((d_blkg) = blkg_lookup(css_to_blkcg(pos_css), \
(p_blkg)->q)))

bool __blkcg_punt_bio_submit(struct bio *bio);

static inline bool blkcg_punt_bio_submit(struct bio *bio)
{
if (bio->bi_opf & REQ_CGROUP_PUNT)
return __blkcg_punt_bio_submit(bio);
else
return false;
}

static inline void blkcg_bio_issue_init(struct bio *bio)
{
bio_issue_init(&bio->bi_issue, bio_sectors(bio));
Expand Down Expand Up @@ -506,8 +497,6 @@ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return
static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }

static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; }
static inline void blkcg_bio_issue_init(struct bio *bio) { }
static inline void blk_cgroup_bio_start(struct bio *bio) { }
static inline bool blk_cgroup_mergeable(struct request *rq, struct bio *bio) { return true; }
Expand Down
3 changes: 0 additions & 3 deletions block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -830,9 +830,6 @@ EXPORT_SYMBOL(submit_bio_noacct);
*/
void submit_bio(struct bio *bio)
{
if (blkcg_punt_bio_submit(bio))
return;

if (bio_op(bio) == REQ_OP_READ) {
task_io_account_read(bio->bi_iter.bi_size);
count_vm_events(PGPGIN, bio_sectors(bio));
Expand Down
1 change: 1 addition & 0 deletions fs/btrfs/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

config BTRFS_FS
tristate "Btrfs filesystem support"
select BLK_CGROUP_PUNT_BIO
select CRYPTO
select CRYPTO_CRC32C
select LIBCRC32C
Expand Down
Loading

0 comments on commit 85d7ab2

Please sign in to comment.