Skip to content

Commit

Permalink
Merge tag 'for-6.11/block-20240722' of git://git.kernel.dk/linux
Browse files Browse the repository at this point in the history
Pull more block updates from Jens Axboe:

 - MD fixes via Song:
     - md-cluster fixes (Heming Zhao)
     - raid1 fix (Mateusz Jończyk)

 - s390/dasd module description (Jeff)

 - Series cleaning up and hardening the blk-mq debugfs flag handling
   (John, Christoph)

 - blk-cgroup cleanup (Xiu)

 - Error polled IO attempts if backend doesn't support it (hexue)

 - Fix for an sbitmap hang (Yang)

* tag 'for-6.11/block-20240722' of git://git.kernel.dk/linux: (23 commits)
  blk-cgroup: move congestion_count to struct blkcg
  sbitmap: fix io hung due to race on sbitmap_word::cleared
  block: avoid polling configuration errors
  block: Catch possible entries missing from rqf_name[]
  block: Simplify definition of RQF_NAME()
  block: Use enum to define RQF_x bit indexes
  block: Catch possible entries missing from cmd_flag_name[]
  block: Catch possible entries missing from alloc_policy_name[]
  block: Catch possible entries missing from hctx_flag_name[]
  block: Catch possible entries missing from hctx_state_name[]
  block: Catch possible entries missing from blk_queue_flag_name[]
  block: Make QUEUE_FLAG_x as an enum
  block: Relocate BLK_MQ_MAX_DEPTH
  block: Relocate BLK_MQ_CPU_WORK_BATCH
  block: remove QUEUE_FLAG_STOPPED
  block: Add missing entry to hctx_flag_name[]
  block: Add zone write plugging entry to rqf_name[]
  block: Add missing entries from cmd_flag_name[]
  s390/dasd: fix error checks in dasd_copy_pair_store()
  s390/dasd: add missing MODULE_DESCRIPTION() macros
  ...
  • Loading branch information
torvalds committed Jul 22, 2024
2 parents 0256994 + 89ed6c9 commit 7d080fa
Show file tree
Hide file tree
Showing 19 changed files with 233 additions and 102 deletions.
7 changes: 4 additions & 3 deletions block/blk-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -2182,12 +2182,13 @@ void blk_cgroup_bio_start(struct bio *bio)

bool blk_cgroup_congested(void)
{
struct cgroup_subsys_state *css;
struct blkcg *blkcg;
bool ret = false;

rcu_read_lock();
for (css = blkcg_css(); css; css = css->parent) {
if (atomic_read(&css->cgroup->congestion_count)) {
for (blkcg = css_to_blkcg(blkcg_css()); blkcg;
blkcg = blkcg_parent(blkcg)) {
if (atomic_read(&blkcg->congestion_count)) {
ret = true;
break;
}
Expand Down
10 changes: 6 additions & 4 deletions block/blk-cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ struct blkcg {
struct cgroup_subsys_state css;
spinlock_t lock;
refcount_t online_pin;
/* If there is block congestion on this cgroup. */
atomic_t congestion_count;

struct radix_tree_root blkg_tree;
struct blkcg_gq __rcu *blkg_hint;
Expand Down Expand Up @@ -374,7 +376,7 @@ static inline void blkcg_use_delay(struct blkcg_gq *blkg)
if (WARN_ON_ONCE(atomic_read(&blkg->use_delay) < 0))
return;
if (atomic_add_return(1, &blkg->use_delay) == 1)
atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
atomic_inc(&blkg->blkcg->congestion_count);
}

static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
Expand All @@ -399,7 +401,7 @@ static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
if (old == 0)
return 0;
if (old == 1)
atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
atomic_dec(&blkg->blkcg->congestion_count);
return 1;
}

Expand All @@ -418,7 +420,7 @@ static inline void blkcg_set_delay(struct blkcg_gq *blkg, u64 delay)

/* We only want 1 person setting the congestion count for this blkg. */
if (!old && atomic_try_cmpxchg(&blkg->use_delay, &old, -1))
atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
atomic_inc(&blkg->blkcg->congestion_count);

atomic64_set(&blkg->delay_nsec, delay);
}
Expand All @@ -435,7 +437,7 @@ static inline void blkcg_clear_delay(struct blkcg_gq *blkg)

/* We only want 1 person clearing the congestion count for this blkg. */
if (old && atomic_try_cmpxchg(&blkg->use_delay, &old, 0))
atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
atomic_dec(&blkg->blkcg->congestion_count);
}

/**
Expand Down
5 changes: 4 additions & 1 deletion block/blk-core.c
Original file line number Diff line number Diff line change
Expand Up @@ -791,8 +791,11 @@ void submit_bio_noacct(struct bio *bio)
}
}

if (!(q->limits.features & BLK_FEAT_POLL))
if (!(q->limits.features & BLK_FEAT_POLL) &&
(bio->bi_opf & REQ_POLLED)) {
bio_clear_polled(bio);
goto not_supported;
}

switch (bio_op(bio)) {
case REQ_OP_READ:
Expand Down
26 changes: 21 additions & 5 deletions block/blk-mq-debugfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/build_bug.h>
#include <linux/debugfs.h>

#include "blk.h"
Expand Down Expand Up @@ -79,7 +80,6 @@ static int queue_pm_only_show(void *data, struct seq_file *m)

#define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name
static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(STOPPED),
QUEUE_FLAG_NAME(DYING),
QUEUE_FLAG_NAME(NOMERGES),
QUEUE_FLAG_NAME(SAME_COMP),
Expand All @@ -100,6 +100,7 @@ static int queue_state_show(void *data, struct seq_file *m)
{
struct request_queue *q = data;

BUILD_BUG_ON(ARRAY_SIZE(blk_queue_flag_name) != QUEUE_FLAG_MAX);
blk_flags_show(m, q->queue_flags, blk_queue_flag_name,
ARRAY_SIZE(blk_queue_flag_name));
seq_puts(m, "\n");
Expand Down Expand Up @@ -164,6 +165,7 @@ static int hctx_state_show(void *data, struct seq_file *m)
{
struct blk_mq_hw_ctx *hctx = data;

BUILD_BUG_ON(ARRAY_SIZE(hctx_state_name) != BLK_MQ_S_MAX);
blk_flags_show(m, hctx->state, hctx_state_name,
ARRAY_SIZE(hctx_state_name));
seq_puts(m, "\n");
Expand All @@ -181,10 +183,11 @@ static const char *const alloc_policy_name[] = {
static const char *const hctx_flag_name[] = {
HCTX_FLAG_NAME(SHOULD_MERGE),
HCTX_FLAG_NAME(TAG_QUEUE_SHARED),
HCTX_FLAG_NAME(BLOCKING),
HCTX_FLAG_NAME(NO_SCHED),
HCTX_FLAG_NAME(STACKING),
HCTX_FLAG_NAME(TAG_HCTX_SHARED),
HCTX_FLAG_NAME(BLOCKING),
HCTX_FLAG_NAME(NO_SCHED),
HCTX_FLAG_NAME(NO_SCHED_BY_DEFAULT),
};
#undef HCTX_FLAG_NAME

Expand All @@ -193,6 +196,10 @@ static int hctx_flags_show(void *data, struct seq_file *m)
struct blk_mq_hw_ctx *hctx = data;
const int alloc_policy = BLK_MQ_FLAG_TO_ALLOC_POLICY(hctx->flags);

BUILD_BUG_ON(ARRAY_SIZE(hctx_flag_name) !=
BLK_MQ_F_ALLOC_POLICY_START_BIT);
BUILD_BUG_ON(ARRAY_SIZE(alloc_policy_name) != BLK_TAG_ALLOC_MAX);

seq_puts(m, "alloc_policy=");
if (alloc_policy < ARRAY_SIZE(alloc_policy_name) &&
alloc_policy_name[alloc_policy])
Expand Down Expand Up @@ -223,12 +230,17 @@ static const char *const cmd_flag_name[] = {
CMD_FLAG_NAME(RAHEAD),
CMD_FLAG_NAME(BACKGROUND),
CMD_FLAG_NAME(NOWAIT),
CMD_FLAG_NAME(NOUNMAP),
CMD_FLAG_NAME(POLLED),
CMD_FLAG_NAME(ALLOC_CACHE),
CMD_FLAG_NAME(SWAP),
CMD_FLAG_NAME(DRV),
CMD_FLAG_NAME(FS_PRIVATE),
CMD_FLAG_NAME(ATOMIC),
CMD_FLAG_NAME(NOUNMAP),
};
#undef CMD_FLAG_NAME

#define RQF_NAME(name) [ilog2((__force u32)RQF_##name)] = #name
#define RQF_NAME(name) [__RQF_##name] = #name
static const char *const rqf_name[] = {
RQF_NAME(STARTED),
RQF_NAME(FLUSH_SEQ),
Expand All @@ -243,6 +255,7 @@ static const char *const rqf_name[] = {
RQF_NAME(HASHED),
RQF_NAME(STATS),
RQF_NAME(SPECIAL_PAYLOAD),
RQF_NAME(ZONE_WRITE_PLUGGING),
RQF_NAME(TIMED_OUT),
RQF_NAME(RESV),
};
Expand All @@ -268,6 +281,9 @@ int __blk_mq_debugfs_rq_show(struct seq_file *m, struct request *rq)
const enum req_op op = req_op(rq);
const char *op_str = blk_op_str(op);

BUILD_BUG_ON(ARRAY_SIZE(cmd_flag_name) != __REQ_NR_BITS);
BUILD_BUG_ON(ARRAY_SIZE(rqf_name) != __RQF_BITS);

seq_printf(m, "%p {.op=", rq);
if (strcmp(op_str, "UNKNOWN") == 0)
seq_printf(m, "%u", op);
Expand Down
2 changes: 2 additions & 0 deletions block/blk-mq.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ enum {
BLK_MQ_TAG_MAX = BLK_MQ_NO_TAG - 1,
};

#define BLK_MQ_CPU_WORK_BATCH (8)

typedef unsigned int __bitwise blk_insert_t;
#define BLK_MQ_INSERT_AT_HEAD ((__force blk_insert_t)0x01)

Expand Down
49 changes: 39 additions & 10 deletions drivers/md/md-cluster.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#define LVB_SIZE 64
#define NEW_DEV_TIMEOUT 5000
#define WAIT_DLM_LOCK_TIMEOUT (30 * HZ)

struct dlm_lock_resource {
dlm_lockspace_t *ls;
Expand Down Expand Up @@ -56,6 +57,7 @@ struct resync_info {
#define MD_CLUSTER_ALREADY_IN_CLUSTER 6
#define MD_CLUSTER_PENDING_RECV_EVENT 7
#define MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD 8
#define MD_CLUSTER_WAITING_FOR_SYNC 9

struct md_cluster_info {
struct mddev *mddev; /* the md device which md_cluster_info belongs to */
Expand Down Expand Up @@ -91,6 +93,7 @@ struct md_cluster_info {
sector_t sync_hi;
};

/* For compatibility, add the new msg_type at the end. */
enum msg_type {
METADATA_UPDATED = 0,
RESYNCING,
Expand All @@ -100,6 +103,7 @@ enum msg_type {
BITMAP_NEEDS_SYNC,
CHANGE_CAPACITY,
BITMAP_RESIZE,
RESYNCING_START,
};

struct cluster_msg {
Expand Down Expand Up @@ -130,8 +134,13 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode)
0, sync_ast, res, res->bast);
if (ret)
return ret;
wait_event(res->sync_locking, res->sync_locking_done);
ret = wait_event_timeout(res->sync_locking, res->sync_locking_done,
WAIT_DLM_LOCK_TIMEOUT);
res->sync_locking_done = false;
if (!ret) {
pr_err("locking DLM '%s' timeout!\n", res->name);
return -EBUSY;
}
if (res->lksb.sb_status == 0)
res->mode = mode;
return res->lksb.sb_status;
Expand Down Expand Up @@ -455,6 +464,7 @@ static void process_suspend_info(struct mddev *mddev,
clear_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
remove_suspend_info(mddev, slot);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
clear_bit(MD_CLUSTER_WAITING_FOR_SYNC, &cinfo->state);
md_wakeup_thread(mddev->thread);
return;
}
Expand Down Expand Up @@ -525,6 +535,7 @@ static int process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
res = -1;
}
clear_bit(MD_CLUSTER_WAITING_FOR_NEWDISK, &cinfo->state);
set_bit(MD_CLUSTER_WAITING_FOR_SYNC, &cinfo->state);
return res;
}

Expand Down Expand Up @@ -593,6 +604,9 @@ static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
case CHANGE_CAPACITY:
set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
break;
case RESYNCING_START:
clear_bit(MD_CLUSTER_WAITING_FOR_SYNC, &mddev->cluster_info->state);
break;
case RESYNCING:
set_bit(MD_RESYNCING_REMOTE, &mddev->recovery);
process_suspend_info(mddev, le32_to_cpu(msg->slot),
Expand Down Expand Up @@ -743,15 +757,15 @@ static void unlock_comm(struct md_cluster_info *cinfo)
*/
static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
{
int error;
int error, unlock_error;
int slot = cinfo->slot_number - 1;

cmsg->slot = cpu_to_le32(slot);
/*get EX on Message*/
error = dlm_lock_sync(cinfo->message_lockres, DLM_LOCK_EX);
if (error) {
pr_err("md-cluster: failed to get EX on MESSAGE (%d)\n", error);
goto failed_message;
return error;
}

memcpy(cinfo->message_lockres->lksb.sb_lvbptr, (void *)cmsg,
Expand Down Expand Up @@ -781,14 +795,10 @@ static int __sendmsg(struct md_cluster_info *cinfo, struct cluster_msg *cmsg)
}

failed_ack:
error = dlm_unlock_sync(cinfo->message_lockres);
if (unlikely(error != 0)) {
while ((unlock_error = dlm_unlock_sync(cinfo->message_lockres)))
pr_err("md-cluster: failed convert to NL on MESSAGE(%d)\n",
error);
/* in case the message can't be released due to some reason */
goto failed_ack;
}
failed_message:
unlock_error);

return error;
}

Expand Down Expand Up @@ -1343,6 +1353,23 @@ static void resync_info_get(struct mddev *mddev, sector_t *lo, sector_t *hi)
spin_unlock_irq(&cinfo->suspend_lock);
}

static int resync_status_get(struct mddev *mddev)
{
struct md_cluster_info *cinfo = mddev->cluster_info;

return test_bit(MD_CLUSTER_WAITING_FOR_SYNC, &cinfo->state);
}

static int resync_start_notify(struct mddev *mddev)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
struct cluster_msg cmsg = {0};

cmsg.type = cpu_to_le32(RESYNCING_START);

return sendmsg(cinfo, &cmsg, 0);
}

static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
Expand Down Expand Up @@ -1577,6 +1604,8 @@ static const struct md_cluster_operations cluster_ops = {
.resync_start = resync_start,
.resync_finish = resync_finish,
.resync_info_update = resync_info_update,
.resync_start_notify = resync_start_notify,
.resync_status_get = resync_status_get,
.resync_info_get = resync_info_get,
.metadata_update_start = metadata_update_start,
.metadata_update_finish = metadata_update_finish,
Expand Down
2 changes: 2 additions & 0 deletions drivers/md/md-cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ struct md_cluster_operations {
int (*leave)(struct mddev *mddev);
int (*slot_number)(struct mddev *mddev);
int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
int (*resync_start_notify)(struct mddev *mddev);
int (*resync_status_get)(struct mddev *mddev);
void (*resync_info_get)(struct mddev *mddev, sector_t *lo, sector_t *hi);
int (*metadata_update_start)(struct mddev *mddev);
int (*metadata_update_finish)(struct mddev *mddev);
Expand Down
17 changes: 14 additions & 3 deletions drivers/md/md.c
Original file line number Diff line number Diff line change
Expand Up @@ -8978,7 +8978,8 @@ void md_do_sync(struct md_thread *thread)
* This will mean we have to start checking from the beginning again.
*
*/

if (mddev_is_clustered(mddev))
md_cluster_ops->resync_start_notify(mddev);
do {
int mddev2_minor = -1;
mddev->curr_resync = MD_RESYNC_DELAYED;
Expand Down Expand Up @@ -9992,8 +9993,18 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)
*/
if (rdev2->raid_disk == -1 && role != MD_DISK_ROLE_SPARE &&
!(le32_to_cpu(sb->feature_map) &
MD_FEATURE_RESHAPE_ACTIVE)) {
rdev2->saved_raid_disk = role;
MD_FEATURE_RESHAPE_ACTIVE) &&
!md_cluster_ops->resync_status_get(mddev)) {
/*
* -1 to make raid1_add_disk() set conf->fullsync
* to 1. This could avoid skipping sync when the
* remote node is down during resyncing.
*/
if ((le32_to_cpu(sb->feature_map)
& MD_FEATURE_RECOVERY_OFFSET))
rdev2->saved_raid_disk = -1;
else
rdev2->saved_raid_disk = role;
ret = remove_and_add_spares(mddev, rdev2);
pr_info("Activated spare: %pg\n",
rdev2->bdev);
Expand Down
1 change: 1 addition & 0 deletions drivers/md/raid1.c
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,7 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio,
len = r1_bio->sectors;
read_len = raid1_check_read_range(rdev, this_sector, &len);
if (read_len == r1_bio->sectors) {
*max_sectors = read_len;
update_read_sectors(conf, disk, this_sector, read_len);
return disk;
}
Expand Down
10 changes: 8 additions & 2 deletions drivers/s390/block/dasd_devmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -2248,13 +2248,19 @@ static ssize_t dasd_copy_pair_store(struct device *dev,

/* allocate primary devmap if needed */
prim_devmap = dasd_find_busid(prim_busid);
if (IS_ERR(prim_devmap))
if (IS_ERR(prim_devmap)) {
prim_devmap = dasd_add_busid(prim_busid, DASD_FEATURE_DEFAULT);
if (IS_ERR(prim_devmap))
return PTR_ERR(prim_devmap);
}

/* allocate secondary devmap if needed */
sec_devmap = dasd_find_busid(sec_busid);
if (IS_ERR(sec_devmap))
if (IS_ERR(sec_devmap)) {
sec_devmap = dasd_add_busid(sec_busid, DASD_FEATURE_DEFAULT);
if (IS_ERR(sec_devmap))
return PTR_ERR(sec_devmap);
}

/* setting copy relation is only allowed for offline secondary */
if (sec_devmap->device)
Expand Down
1 change: 1 addition & 0 deletions drivers/s390/block/dasd_diag.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "dasd_int.h"
#include "dasd_diag.h"

MODULE_DESCRIPTION("S/390 Support for DIAG access to DASD Disks");
MODULE_LICENSE("GPL");

/* The maximum number of blocks per request (max_blocks) is dependent on the
Expand Down
Loading

0 comments on commit 7d080fa

Please sign in to comment.