From 094bb5d766cfcdae47e332c6d6713c7029241be1 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 21 Nov 2017 01:12:43 +0100 Subject: [PATCH 01/36] target-core: don't use "const char*" for a buffer that is written to iscsi_parse_pr_out_transport_id launders the const away via a call to strstr(), and then modifies the buffer (writing a nul byte) through the return value. It's cleaner to be honest and simply declare the parameter as "char*", fixing up the call chain, and allowing us to drop the cast in the return statement. Amusingly, the two current callers found it necessary to cast a non-const pointer to a const. Signed-off-by: Rasmus Villemoes Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_fabric_lib.c | 6 +++--- drivers/target/target_core_internal.h | 2 +- drivers/target/target_core_pr.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/target/target_core_fabric_lib.c b/drivers/target/target_core_fabric_lib.c index 508da345b73fdb..71a80257a05260 100644 --- a/drivers/target/target_core_fabric_lib.c +++ b/drivers/target/target_core_fabric_lib.c @@ -273,7 +273,7 @@ static int iscsi_get_pr_transport_id_len( static char *iscsi_parse_pr_out_transport_id( struct se_portal_group *se_tpg, - const char *buf, + char *buf, u32 *out_tid_len, char **port_nexus_ptr) { @@ -356,7 +356,7 @@ static char *iscsi_parse_pr_out_transport_id( } } - return (char *)&buf[4]; + return &buf[4]; } int target_get_pr_transport_id_len(struct se_node_acl *nacl, @@ -405,7 +405,7 @@ int target_get_pr_transport_id(struct se_node_acl *nacl, } const char *target_parse_pr_out_transport_id(struct se_portal_group *tpg, - const char *buf, u32 *out_tid_len, char **port_nexus_ptr) + char *buf, u32 *out_tid_len, char **port_nexus_ptr) { u32 offset; diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 9384d19a7326c8..6d53d9fcb883b7 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -102,7 +102,7 @@ int target_get_pr_transport_id(struct se_node_acl *nacl, struct t10_pr_registration *pr_reg, int *format_code, unsigned char *buf); const char *target_parse_pr_out_transport_id(struct se_portal_group *tpg, - const char *buf, u32 *out_tid_len, char **port_nexus_ptr); + char *buf, u32 *out_tid_len, char **port_nexus_ptr); /* target_core_hba.c */ struct se_hba *core_alloc_hba(const char *, u32, u32); diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c index b024613f921718..01ac306131c1f1 100644 --- a/drivers/target/target_core_pr.c +++ b/drivers/target/target_core_pr.c @@ -1601,7 +1601,7 @@ core_scsi3_decode_spec_i_port( dest_rtpi = tmp_lun->lun_rtpi; i_str = target_parse_pr_out_transport_id(tmp_tpg, - (const char *)ptr, &tid_len, &iport_ptr); + ptr, &tid_len, &iport_ptr); if (!i_str) continue; @@ -3287,7 +3287,7 @@ core_scsi3_emulate_pro_register_and_move(struct se_cmd *cmd, u64 res_key, goto out; } initiator_str = target_parse_pr_out_transport_id(dest_se_tpg, - (const char *)&buf[24], &tmp_tid_len, &iport_ptr); + &buf[24], &tmp_tid_len, &iport_ptr); if (!initiator_str) { pr_err("SPC-3 PR REGISTER_AND_MOVE: Unable to locate" " initiator_str from Transport ID\n"); From 26d2b3106f6015b1d19ae5f8b0cc1fc7fe8e669e Mon Sep 17 00:00:00 2001 From: tangwenji Date: Tue, 28 Nov 2017 12:40:27 -0600 Subject: [PATCH 02/36] tcmu: fix page addr in tcmu_flush_dcache_range The page addr should be update. Signed-off-by: tangwenji Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index a415d87f22d242..6bcaa8b5684c0f 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -455,12 +455,13 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd) static inline void tcmu_flush_dcache_range(void *vaddr, size_t size) { unsigned long offset = offset_in_page(vaddr); + void *start = vaddr - offset; size = round_up(size+offset, PAGE_SIZE); - vaddr -= offset; while (size) { - flush_dcache_page(virt_to_page(vaddr)); + flush_dcache_page(virt_to_page(start)); + start += PAGE_SIZE; size -= PAGE_SIZE; } } From bf99ec13327bb5b0f6475aea8735c0ca34cc2a26 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 28 Nov 2017 12:40:28 -0600 Subject: [PATCH 03/36] tcmu: merge common block release code Have unmap_thread_fn use tcmu_blocks_release. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 6bcaa8b5684c0f..d9fd91ee8282d9 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1302,21 +1302,19 @@ static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd) return -EINVAL; } -static void tcmu_blocks_release(struct tcmu_dev *udev) +static void tcmu_blocks_release(struct radix_tree_root *blocks, + int start, int end) { int i; struct page *page; - /* Try to release all block pages */ - mutex_lock(&udev->cmdr_lock); - for (i = 0; i <= udev->dbi_max; i++) { - page = radix_tree_delete(&udev->data_blocks, i); + for (i = start; i < end; i++) { + page = radix_tree_delete(blocks, i); if (page) { __free_page(page); atomic_dec(&global_db_count); } } - mutex_unlock(&udev->cmdr_lock); } static void tcmu_dev_kref_release(struct kref *kref) @@ -1340,7 +1338,9 @@ static void tcmu_dev_kref_release(struct kref *kref) spin_unlock_irq(&udev->commands_lock); WARN_ON(!all_expired); - tcmu_blocks_release(udev); + mutex_lock(&udev->cmdr_lock); + tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1); + mutex_unlock(&udev->cmdr_lock); call_rcu(&dev->rcu_head, tcmu_dev_call_rcu); } @@ -1978,8 +1978,6 @@ static int unmap_thread_fn(void *data) struct tcmu_dev *udev; loff_t off; uint32_t start, end, block; - struct page *page; - int i; while (!kthread_should_stop()) { DEFINE_WAIT(__wait); @@ -2027,13 +2025,7 @@ static int unmap_thread_fn(void *data) unmap_mapping_range(udev->inode->i_mapping, off, 0, 1); /* Release the block pages */ - for (i = start; i < end; i++) { - page = radix_tree_delete(&udev->data_blocks, i); - if (page) { - __free_page(page); - atomic_dec(&global_db_count); - } - } + tcmu_blocks_release(&udev->data_blocks, start, end); mutex_unlock(&udev->cmdr_lock); } From 89ec9cfd3b644fbc36047e36776509130d2fc1ec Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 28 Nov 2017 12:40:29 -0600 Subject: [PATCH 04/36] tcmu: split unmap_thread_fn Separate unmap_thread_fn to make it easier to read. Note: this patch does not fix the bug where we might miss a wake up call. The next patch will fix that. This patch only separates the code into functions. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 120 +++++++++++++++++------------- 1 file changed, 70 insertions(+), 50 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index d9fd91ee8282d9..cab6c72eb012a2 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1973,71 +1973,91 @@ static struct target_backend_ops tcmu_ops = { .tb_dev_attrib_attrs = NULL, }; -static int unmap_thread_fn(void *data) + +static void find_free_blocks(void) { struct tcmu_dev *udev; loff_t off; uint32_t start, end, block; - while (!kthread_should_stop()) { - DEFINE_WAIT(__wait); - - prepare_to_wait(&unmap_wait, &__wait, TASK_INTERRUPTIBLE); - schedule(); - finish_wait(&unmap_wait, &__wait); + mutex_lock(&root_udev_mutex); + list_for_each_entry(udev, &root_udev, node) { + mutex_lock(&udev->cmdr_lock); - if (kthread_should_stop()) - break; + /* Try to complete the finished commands first */ + tcmu_handle_completions(udev); - mutex_lock(&root_udev_mutex); - list_for_each_entry(udev, &root_udev, node) { - mutex_lock(&udev->cmdr_lock); + /* Skip the udevs waiting the global pool or in idle */ + if (udev->waiting_global || !udev->dbi_thresh) { + mutex_unlock(&udev->cmdr_lock); + continue; + } - /* Try to complete the finished commands first */ - tcmu_handle_completions(udev); + end = udev->dbi_max + 1; + block = find_last_bit(udev->data_bitmap, end); + if (block == udev->dbi_max) { + /* + * The last bit is dbi_max, so there is + * no need to shrink any blocks. + */ + mutex_unlock(&udev->cmdr_lock); + continue; + } else if (block == end) { + /* The current udev will goto idle state */ + udev->dbi_thresh = start = 0; + udev->dbi_max = 0; + } else { + udev->dbi_thresh = start = block + 1; + udev->dbi_max = block; + } - /* Skip the udevs waiting the global pool or in idle */ - if (udev->waiting_global || !udev->dbi_thresh) { - mutex_unlock(&udev->cmdr_lock); - continue; - } + /* Here will truncate the data area from off */ + off = udev->data_off + start * DATA_BLOCK_SIZE; + unmap_mapping_range(udev->inode->i_mapping, off, 0, 1); - end = udev->dbi_max + 1; - block = find_last_bit(udev->data_bitmap, end); - if (block == udev->dbi_max) { - /* - * The last bit is dbi_max, so there is - * no need to shrink any blocks. - */ - mutex_unlock(&udev->cmdr_lock); - continue; - } else if (block == end) { - /* The current udev will goto idle state */ - udev->dbi_thresh = start = 0; - udev->dbi_max = 0; - } else { - udev->dbi_thresh = start = block + 1; - udev->dbi_max = block; - } + /* Release the block pages */ + tcmu_blocks_release(&udev->data_blocks, start, end); + mutex_unlock(&udev->cmdr_lock); + } + mutex_unlock(&root_udev_mutex); +} - /* Here will truncate the data area from off */ - off = udev->data_off + start * DATA_BLOCK_SIZE; - unmap_mapping_range(udev->inode->i_mapping, off, 0, 1); +static void run_cmdr_queues(void) +{ + struct tcmu_dev *udev; - /* Release the block pages */ - tcmu_blocks_release(&udev->data_blocks, start, end); + /* + * Try to wake up the udevs who are waiting + * for the global data block pool. + */ + mutex_lock(&root_udev_mutex); + list_for_each_entry(udev, &root_udev, node) { + mutex_lock(&udev->cmdr_lock); + if (!udev->waiting_global) { mutex_unlock(&udev->cmdr_lock); + break; } + mutex_unlock(&udev->cmdr_lock); - /* - * Try to wake up the udevs who are waiting - * for the global data pool. - */ - list_for_each_entry(udev, &root_udev, node) { - if (udev->waiting_global) - wake_up(&udev->wait_cmdr); - } - mutex_unlock(&root_udev_mutex); + wake_up(&udev->wait_cmdr); + } + mutex_unlock(&root_udev_mutex); +} + +static int unmap_thread_fn(void *data) +{ + while (!kthread_should_stop()) { + DEFINE_WAIT(__wait); + + prepare_to_wait(&unmap_wait, &__wait, TASK_INTERRUPTIBLE); + schedule(); + finish_wait(&unmap_wait, &__wait); + + if (kthread_should_stop()) + break; + + find_free_blocks(); + run_cmdr_queues(); } return 0; From 9972cebb59a653cca735178a70c8ab09a5f4de1a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 28 Nov 2017 12:40:30 -0600 Subject: [PATCH 05/36] tcmu: fix unmap thread race If the unmap thread has already run find_free_blocks but not yet run prepare_to_wait when a wake_up(&unmap_wait) call is done, the unmap thread is going to miss the wake call. Instead of adding checks for if new waiters were added this just has us use a work queue which will run us again in this type of case. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 43 +++++++------------------------ 1 file changed, 10 insertions(+), 33 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index cab6c72eb012a2..a9f5c52e8b1d9b 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include @@ -176,12 +176,11 @@ struct tcmu_cmd { unsigned long flags; }; -static struct task_struct *unmap_thread; -static wait_queue_head_t unmap_wait; static DEFINE_MUTEX(root_udev_mutex); static LIST_HEAD(root_udev); static atomic_t global_db_count = ATOMIC_INIT(0); +static struct work_struct tcmu_unmap_work; static struct kmem_cache *tcmu_cmd_cache; @@ -389,8 +388,7 @@ static bool tcmu_get_empty_blocks(struct tcmu_dev *udev, err: udev->waiting_global = true; - /* Try to wake up the unmap thread */ - wake_up(&unmap_wait); + schedule_work(&tcmu_unmap_work); return false; } @@ -1065,8 +1063,7 @@ static void tcmu_device_timedout(struct timer_list *t) idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL); spin_unlock_irqrestore(&udev->commands_lock, flags); - /* Try to wake up the ummap thread */ - wake_up(&unmap_wait); + schedule_work(&tcmu_unmap_work); /* * We don't need to wakeup threads on wait_cmdr since they have their @@ -2044,23 +2041,10 @@ static void run_cmdr_queues(void) mutex_unlock(&root_udev_mutex); } -static int unmap_thread_fn(void *data) +static void tcmu_unmap_work_fn(struct work_struct *work) { - while (!kthread_should_stop()) { - DEFINE_WAIT(__wait); - - prepare_to_wait(&unmap_wait, &__wait, TASK_INTERRUPTIBLE); - schedule(); - finish_wait(&unmap_wait, &__wait); - - if (kthread_should_stop()) - break; - - find_free_blocks(); - run_cmdr_queues(); - } - - return 0; + find_free_blocks(); + run_cmdr_queues(); } static int __init tcmu_module_init(void) @@ -2069,6 +2053,8 @@ static int __init tcmu_module_init(void) BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0); + INIT_WORK(&tcmu_unmap_work, tcmu_unmap_work_fn); + tcmu_cmd_cache = kmem_cache_create("tcmu_cmd_cache", sizeof(struct tcmu_cmd), __alignof__(struct tcmu_cmd), @@ -2114,17 +2100,8 @@ static int __init tcmu_module_init(void) if (ret) goto out_attrs; - init_waitqueue_head(&unmap_wait); - unmap_thread = kthread_run(unmap_thread_fn, NULL, "tcmu_unmap"); - if (IS_ERR(unmap_thread)) { - ret = PTR_ERR(unmap_thread); - goto out_unreg_transport; - } - return 0; -out_unreg_transport: - target_backend_unregister(&tcmu_ops); out_attrs: kfree(tcmu_attrs); out_unreg_genl: @@ -2139,7 +2116,7 @@ static int __init tcmu_module_init(void) static void __exit tcmu_module_exit(void) { - kthread_stop(unmap_thread); + cancel_work_sync(&tcmu_unmap_work); target_backend_unregister(&tcmu_ops); kfree(tcmu_attrs); genl_unregister_family(&tcmu_genl_family); From 488ebe4c355fdead39dbb3f6a51329c16cbfcc60 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 28 Nov 2017 12:40:31 -0600 Subject: [PATCH 06/36] tcmu: move expired command completion to unmap thread This moves the expired command completion handling to the unmap wq, so the next patch can use a mutex in tcmu_check_expired_cmd. Note: tcmu_device_timedout's use of spin_lock_irq was not needed. The commands_lock is used between thread context (tcmu_queue_cmd_ring and tcmu_irqcontrol (even though this is named irqcontrol it is not run in irq context)) and timer/bh context. In the timer/bh context bhs are disabled, so you need to use the _bh lock calls from the thread context callers. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 48 +++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 9 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index a9f5c52e8b1d9b..2ccc8e61449bf2 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -143,6 +143,7 @@ struct tcmu_dev { struct timer_list timeout; unsigned int cmd_time_out; + struct list_head timedout_entry; spinlock_t nl_cmd_lock; struct tcmu_nl_cmd curr_nl_cmd; @@ -179,6 +180,9 @@ struct tcmu_cmd { static DEFINE_MUTEX(root_udev_mutex); static LIST_HEAD(root_udev); +static DEFINE_SPINLOCK(timed_out_udevs_lock); +static LIST_HEAD(timed_out_udevs); + static atomic_t global_db_count = ATOMIC_INIT(0); static struct work_struct tcmu_unmap_work; @@ -1057,18 +1061,15 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data) static void tcmu_device_timedout(struct timer_list *t) { struct tcmu_dev *udev = from_timer(udev, t, timeout); - unsigned long flags; - spin_lock_irqsave(&udev->commands_lock, flags); - idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL); - spin_unlock_irqrestore(&udev->commands_lock, flags); + pr_debug("%s cmd timeout has expired\n", udev->name); - schedule_work(&tcmu_unmap_work); + spin_lock(&timed_out_udevs_lock); + if (list_empty(&udev->timedout_entry)) + list_add_tail(&udev->timedout_entry, &timed_out_udevs); + spin_unlock(&timed_out_udevs_lock); - /* - * We don't need to wakeup threads on wait_cmdr since they have their - * own timeout. - */ + schedule_work(&tcmu_unmap_work); } static int tcmu_attach_hba(struct se_hba *hba, u32 host_id) @@ -1112,6 +1113,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) init_waitqueue_head(&udev->wait_cmdr); mutex_init(&udev->cmdr_lock); + INIT_LIST_HEAD(&udev->timedout_entry); idr_init(&udev->commands); spin_lock_init(&udev->commands_lock); @@ -1325,6 +1327,11 @@ static void tcmu_dev_kref_release(struct kref *kref) vfree(udev->mb_addr); udev->mb_addr = NULL; + spin_lock_bh(&timed_out_udevs_lock); + if (!list_empty(&udev->timedout_entry)) + list_del(&udev->timedout_entry); + spin_unlock_bh(&timed_out_udevs_lock); + /* Upper layer should drain all requests before calling this */ spin_lock_irq(&udev->commands_lock); idr_for_each_entry(&udev->commands, cmd, i) { @@ -2041,8 +2048,31 @@ static void run_cmdr_queues(void) mutex_unlock(&root_udev_mutex); } +static void check_timedout_devices(void) +{ + struct tcmu_dev *udev, *tmp_dev; + LIST_HEAD(devs); + + spin_lock_bh(&timed_out_udevs_lock); + list_splice_init(&timed_out_udevs, &devs); + + list_for_each_entry_safe(udev, tmp_dev, &devs, timedout_entry) { + list_del_init(&udev->timedout_entry); + spin_unlock_bh(&timed_out_udevs_lock); + + spin_lock(&udev->commands_lock); + idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL); + spin_unlock(&udev->commands_lock); + + spin_lock_bh(&timed_out_udevs_lock); + } + + spin_unlock_bh(&timed_out_udevs_lock); +} + static void tcmu_unmap_work_fn(struct work_struct *work) { + check_timedout_devices(); find_free_blocks(); run_cmdr_queues(); } From 6fddcb775477bb2213bd76ab62145645eb570f33 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 28 Nov 2017 12:40:32 -0600 Subject: [PATCH 07/36] tcmu: remove commands_lock No need for the commands_lock. The cmdr_lock is already held during idr addition and deletion, so just grab it during traversal. Note: This also fixes a issue where we should have been using at least _bh locking in tcmu_handle_completions when taking the commands lock to prevent the case where tcmu_handle_completions could be interrupted by a timer softirq while the commands_lock is held. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 2ccc8e61449bf2..43583a792439ed 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -139,7 +139,6 @@ struct tcmu_dev { struct radix_tree_root data_blocks; struct idr commands; - spinlock_t commands_lock; struct timer_list timeout; unsigned int cmd_time_out; @@ -1014,10 +1013,7 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) } WARN_ON(tcmu_hdr_get_op(entry->hdr.len_op) != TCMU_OP_CMD); - spin_lock(&udev->commands_lock); cmd = idr_remove(&udev->commands, entry->hdr.cmd_id); - spin_unlock(&udev->commands_lock); - if (!cmd) { pr_err("cmd_id not found, ring is broken\n"); set_bit(TCMU_DEV_BIT_BROKEN, &udev->flags); @@ -1115,7 +1111,6 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) INIT_LIST_HEAD(&udev->timedout_entry); idr_init(&udev->commands); - spin_lock_init(&udev->commands_lock); timer_setup(&udev->timeout, tcmu_device_timedout, 0); @@ -1333,16 +1328,14 @@ static void tcmu_dev_kref_release(struct kref *kref) spin_unlock_bh(&timed_out_udevs_lock); /* Upper layer should drain all requests before calling this */ - spin_lock_irq(&udev->commands_lock); + mutex_lock(&udev->cmdr_lock); idr_for_each_entry(&udev->commands, cmd, i) { if (tcmu_check_and_free_pending_cmd(cmd) != 0) all_expired = false; } idr_destroy(&udev->commands); - spin_unlock_irq(&udev->commands_lock); WARN_ON(!all_expired); - mutex_lock(&udev->cmdr_lock); tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1); mutex_unlock(&udev->cmdr_lock); @@ -2060,9 +2053,9 @@ static void check_timedout_devices(void) list_del_init(&udev->timedout_entry); spin_unlock_bh(&timed_out_udevs_lock); - spin_lock(&udev->commands_lock); + mutex_lock(&udev->cmdr_lock); idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL); - spin_unlock(&udev->commands_lock); + mutex_unlock(&udev->cmdr_lock); spin_lock_bh(&timed_out_udevs_lock); } From 810b8153c4243d2012a6ec002ddd3bbc9a9ae8c2 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 28 Nov 2017 12:40:33 -0600 Subject: [PATCH 08/36] tcmu: release blocks for partially setup cmds If we cannot setup a cmd because we run out of ring space or global pages release the blocks before sleeping. This prevents a deadlock where dev0 has waiting_blocks set and needs N blocks, but dev1 to devX have each allocated N / X blocks and also hit the global block limit so they went to sleep. find_free_blocks is not able to take the sleeping dev's blocks becaause their waiting_blocks is set and even if it was not the block returned by find_last_bit could equal dbi_max. The latter will probably never happen because DATA_BLOCK_BITS is so high but in the next patches DATA_BLOCK_BITS and TCMU_GLOBAL_MAX_BLOCKS will be settable so it might be lower and could happen. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 43583a792439ed..c7541f090453d7 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -807,6 +807,13 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) int ret; DEFINE_WAIT(__wait); + /* + * Don't leave commands partially setup because the unmap + * thread might need the blocks to make forward progress. + */ + tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cur); + tcmu_cmd_reset_dbi_cur(tcmu_cmd); + prepare_to_wait(&udev->wait_cmdr, &__wait, TASK_INTERRUPTIBLE); pr_debug("sleeping for ring space\n"); From 1a1fc0b88e9019cb3b2f291bdcb2d03d38614690 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 28 Nov 2017 12:40:34 -0600 Subject: [PATCH 09/36] tcmu: simplify scatter_data_area error handling scatter_data_area always returns 0, so stop checking for errors. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index c7541f090453d7..965f462eaa22da 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -520,7 +520,7 @@ static inline size_t iov_tail(struct iovec *iov) return (size_t)iov->iov_base + iov->iov_len; } -static int scatter_data_area(struct tcmu_dev *udev, +static void scatter_data_area(struct tcmu_dev *udev, struct tcmu_cmd *tcmu_cmd, struct scatterlist *data_sg, unsigned int data_nents, struct iovec **iov, int *iov_cnt, bool copy_data) @@ -573,8 +573,6 @@ static int scatter_data_area(struct tcmu_dev *udev, } if (to) kunmap_atomic(to); - - return 0; } static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, @@ -864,33 +862,18 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) iov_cnt = 0; copy_to_data_area = (se_cmd->data_direction == DMA_TO_DEVICE || se_cmd->se_cmd_flags & SCF_BIDI); - ret = scatter_data_area(udev, tcmu_cmd, se_cmd->t_data_sg, - se_cmd->t_data_nents, &iov, &iov_cnt, - copy_to_data_area); - if (ret) { - tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt); - mutex_unlock(&udev->cmdr_lock); - - pr_err("tcmu: alloc and scatter data failed\n"); - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - } + scatter_data_area(udev, tcmu_cmd, se_cmd->t_data_sg, + se_cmd->t_data_nents, &iov, &iov_cnt, + copy_to_data_area); entry->req.iov_cnt = iov_cnt; /* Handle BIDI commands */ iov_cnt = 0; if (se_cmd->se_cmd_flags & SCF_BIDI) { iov++; - ret = scatter_data_area(udev, tcmu_cmd, - se_cmd->t_bidi_data_sg, - se_cmd->t_bidi_data_nents, - &iov, &iov_cnt, false); - if (ret) { - tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt); - mutex_unlock(&udev->cmdr_lock); - - pr_err("tcmu: alloc and scatter bidi data failed\n"); - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - } + scatter_data_area(udev, tcmu_cmd, se_cmd->t_bidi_data_sg, + se_cmd->t_bidi_data_nents, &iov, &iov_cnt, + false); } entry->req.iov_bidi_cnt = iov_cnt; From 3c0f26ff9d040c6193b33689bbc03103854dba4d Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 28 Nov 2017 12:40:35 -0600 Subject: [PATCH 10/36] tcmu: fix free block calculation The blocks_left calculation does not account for free blocks between 0 and thresh, so we could be queueing/waiting when there are enough blocks free. This has us add in the blocks between 0 and thresh as well as at the end from thresh to DATA_BLOCK_BITS. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 965f462eaa22da..5d1daea5107905 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -637,7 +637,7 @@ static void gather_data_area(struct tcmu_dev *udev, struct tcmu_cmd *cmd, static inline size_t spc_bitmap_free(unsigned long *bitmap, uint32_t thresh) { - return DATA_BLOCK_SIZE * (thresh - bitmap_weight(bitmap, thresh)); + return thresh - bitmap_weight(bitmap, thresh); } /* @@ -677,8 +677,9 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, struct tcmu_cmd *cmd, /* try to check and get the data blocks as needed */ space = spc_bitmap_free(udev->data_bitmap, udev->dbi_thresh); - if (space < data_needed) { - unsigned long blocks_left = DATA_BLOCK_BITS - udev->dbi_thresh; + if ((space * DATA_BLOCK_SIZE) < data_needed) { + unsigned long blocks_left = DATA_BLOCK_BITS - udev->dbi_thresh + + space; unsigned long grow; if (blocks_left < blocks_needed) { From 3e60913579b2fefa74eeb3269426e864f4afa7e7 Mon Sep 17 00:00:00 2001 From: Xiubo Li Date: Tue, 28 Nov 2017 12:40:36 -0600 Subject: [PATCH 11/36] tcmu: clean up the scatter helper Add some comments to make the scatter code to be more readable, and drop unused arg to new_iov. Signed-off-by: Xiubo Li Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 5d1daea5107905..8d0dc471fce802 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -492,8 +492,7 @@ static inline size_t head_to_end(size_t head, size_t size) return size - head; } -static inline void new_iov(struct iovec **iov, int *iov_cnt, - struct tcmu_dev *udev) +static inline void new_iov(struct iovec **iov, int *iov_cnt) { struct iovec *iovec; @@ -546,19 +545,38 @@ static void scatter_data_area(struct tcmu_dev *udev, to = kmap_atomic(page); } - copy_bytes = min_t(size_t, sg_remaining, - block_remaining); + /* + * Covert to virtual offset of the ring data area. + */ to_offset = get_block_offset_user(udev, dbi, block_remaining); + /* + * The following code will gather and map the blocks + * to the same iovec when the blocks are all next to + * each other. + */ + copy_bytes = min_t(size_t, sg_remaining, + block_remaining); if (*iov_cnt != 0 && to_offset == iov_tail(*iov)) { + /* + * Will append to the current iovec, because + * the current block page is next to the + * previous one. + */ (*iov)->iov_len += copy_bytes; } else { - new_iov(iov, iov_cnt, udev); + /* + * Will allocate a new iovec because we are + * first time here or the current block page + * is not next to the previous one. + */ + new_iov(iov, iov_cnt); (*iov)->iov_base = (void __user *)to_offset; (*iov)->iov_len = copy_bytes; } + if (copy_data) { offset = DATA_BLOCK_SIZE - block_remaining; memcpy(to + offset, @@ -566,11 +584,13 @@ static void scatter_data_area(struct tcmu_dev *udev, copy_bytes); tcmu_flush_dcache_range(to, copy_bytes); } + sg_remaining -= copy_bytes; block_remaining -= copy_bytes; } kunmap_atomic(from - sg->offset); } + if (to) kunmap_atomic(to); } From 6fd0ce79724dabe2cd0bd8aed111cbe94755bf88 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 28 Nov 2017 12:40:37 -0600 Subject: [PATCH 12/36] tcmu: prep queue_cmd_ring to be used by unmap wq In the next patches we will call queue_cmd_ring from the submitting context and also the completion path. This changes the queue_cmd_ring return code so in the next patches we can return a sense_reason_t and also signal if a command was requeued. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 42 ++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 8d0dc471fce802..68d1d7214eeb73 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -776,8 +776,16 @@ static int tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd) return 0; } -static sense_reason_t -tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) +/** + * queue_cmd_ring - queue cmd to ring or internally + * @tcmu_cmd: cmd to queue + * @scsi_err: TCM error code if failure (-1) returned. + * + * Returns: + * -1 we cannot queue internally or to the ring. + * 0 success + */ +static sense_reason_t queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, int *scsi_err) { struct tcmu_dev *udev = tcmu_cmd->tcmu_dev; struct se_cmd *se_cmd = tcmu_cmd->se_cmd; @@ -791,8 +799,12 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) bool copy_to_data_area; size_t data_length = tcmu_cmd_get_data_length(tcmu_cmd); - if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + *scsi_err = TCM_NO_SENSE; + + if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) { + *scsi_err = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + return -1; + } /* * Must be a certain minimum size for response sense info, but @@ -819,7 +831,8 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) "cmd ring/data area\n", command_size, data_length, udev->cmdr_size, udev->data_size); mutex_unlock(&udev->cmdr_lock); - return TCM_INVALID_CDB_FIELD; + *scsi_err = TCM_INVALID_CDB_FIELD; + return -1; } while (!is_ring_space_avail(udev, tcmu_cmd, command_size, data_length)) { @@ -845,7 +858,8 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) finish_wait(&udev->wait_cmdr, &__wait); if (!ret) { pr_warn("tcmu: command timed out\n"); - return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + *scsi_err = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + return -1; } mutex_lock(&udev->cmdr_lock); @@ -902,7 +916,9 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) if (ret) { tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt); mutex_unlock(&udev->cmdr_lock); - return TCM_OUT_OF_RESOURCES; + + *scsi_err = TCM_OUT_OF_RESOURCES; + return -1; } entry->hdr.cmd_id = tcmu_cmd->cmd_id; @@ -933,27 +949,23 @@ tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd) mod_timer(&udev->timeout, round_jiffies_up(jiffies + msecs_to_jiffies(udev->cmd_time_out))); - return TCM_NO_SENSE; + return 0; } static sense_reason_t tcmu_queue_cmd(struct se_cmd *se_cmd) { struct tcmu_cmd *tcmu_cmd; - sense_reason_t ret; + sense_reason_t scsi_ret; tcmu_cmd = tcmu_alloc_cmd(se_cmd); if (!tcmu_cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - ret = tcmu_queue_cmd_ring(tcmu_cmd); - if (ret != TCM_NO_SENSE) { - pr_err("TCMU: Could not queue command\n"); - + if (queue_cmd_ring(tcmu_cmd, &scsi_ret) < 0) tcmu_free_cmd(tcmu_cmd); - } - return ret; + return scsi_ret; } static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *entry) From f890f5799a6628fe006ae524e625900186074cdb Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 28 Nov 2017 12:40:38 -0600 Subject: [PATCH 13/36] tcmu: simplify dbi thresh handling We do not really save a lot by trying to increase thresh a multiple of the existing value. This just simplifies the code by increasing it to whatever is needed for the command being executed. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 68d1d7214eeb73..2679e4dcd0f18d 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -79,7 +79,6 @@ #define DATA_BLOCK_SIZE PAGE_SIZE #define DATA_BLOCK_BITS (256 * 1024) #define DATA_SIZE (DATA_BLOCK_BITS * DATA_BLOCK_SIZE) -#define DATA_BLOCK_INIT_BITS 128 /* The total size of the ring is 8M + 256K * PAGE_SIZE */ #define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE) @@ -700,7 +699,6 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, struct tcmu_cmd *cmd, if ((space * DATA_BLOCK_SIZE) < data_needed) { unsigned long blocks_left = DATA_BLOCK_BITS - udev->dbi_thresh + space; - unsigned long grow; if (blocks_left < blocks_needed) { pr_debug("no data space: only %lu available, but ask for %zu\n", @@ -709,23 +707,9 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, struct tcmu_cmd *cmd, return false; } - /* Try to expand the thresh */ - if (!udev->dbi_thresh) { - /* From idle state */ - uint32_t init_thresh = DATA_BLOCK_INIT_BITS; - - udev->dbi_thresh = max(blocks_needed, init_thresh); - } else { - /* - * Grow the data area by max(blocks needed, - * dbi_thresh / 2), but limited to the max - * DATA_BLOCK_BITS size. - */ - grow = max(blocks_needed, udev->dbi_thresh / 2); - udev->dbi_thresh += grow; - if (udev->dbi_thresh > DATA_BLOCK_BITS) - udev->dbi_thresh = DATA_BLOCK_BITS; - } + udev->dbi_thresh += blocks_needed; + if (udev->dbi_thresh > DATA_BLOCK_BITS) + udev->dbi_thresh = DATA_BLOCK_BITS; } return tcmu_get_empty_blocks(udev, cmd); From af1dd7ff46824a94da1d90443bd07db2796bd545 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 28 Nov 2017 12:40:39 -0600 Subject: [PATCH 14/36] tcmu: don't block submitting context for block waits This patch has tcmu internally queue cmds if its ring buffer is full. It also makes the TCMU_GLOBAL_MAX_BLOCKS limit a hint instead of a hard limit, so we do not have to add any new locks/atomics in the main IO path except when IO is not running. This fixes the following bugs: 1. We cannot sleep from the submitting context because it might be called from a target recv context. This results in transport level commands timing out. For example if the ring is full, we would sleep, and a iscsi initiator would send a iscsi ping/nop which times out because the target's recv thread is sleeping here. 2. Devices were not fairly scheduled to run when they hit the global limit so they could time out waiting for ring space while others got run. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 264 +++++++++++++++++++----------- 1 file changed, 169 insertions(+), 95 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 2679e4dcd0f18d..52fc1d440d2371 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -83,7 +83,10 @@ /* The total size of the ring is 8M + 256K * PAGE_SIZE */ #define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE) -/* Default maximum of the global data blocks(512K * PAGE_SIZE) */ +/* + * Default number of global data blocks(512K * PAGE_SIZE) + * when the unmap thread will be started. + */ #define TCMU_GLOBAL_MAX_BLOCKS (512 * 1024) static u8 tcmu_kern_cmd_reply_supported; @@ -106,6 +109,7 @@ struct tcmu_nl_cmd { struct tcmu_dev { struct list_head node; struct kref kref; + struct se_device se_dev; char *name; @@ -128,10 +132,9 @@ struct tcmu_dev { size_t data_off; size_t data_size; - wait_queue_head_t wait_cmdr; struct mutex cmdr_lock; + struct list_head cmdr_queue; - bool waiting_global; uint32_t dbi_max; uint32_t dbi_thresh; DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS); @@ -160,6 +163,7 @@ struct tcmu_dev { struct tcmu_cmd { struct se_cmd *se_cmd; struct tcmu_dev *tcmu_dev; + struct list_head cmdr_queue_entry; uint16_t cmd_id; @@ -174,7 +178,16 @@ struct tcmu_cmd { #define TCMU_CMD_BIT_EXPIRED 0 unsigned long flags; }; - +/* + * To avoid dead lock the mutex lock order should always be: + * + * mutex_lock(&root_udev_mutex); + * ... + * mutex_lock(&tcmu_dev->cmdr_lock); + * mutex_unlock(&tcmu_dev->cmdr_lock); + * ... + * mutex_unlock(&root_udev_mutex); + */ static DEFINE_MUTEX(root_udev_mutex); static LIST_HEAD(root_udev); @@ -182,7 +195,7 @@ static DEFINE_SPINLOCK(timed_out_udevs_lock); static LIST_HEAD(timed_out_udevs); static atomic_t global_db_count = ATOMIC_INIT(0); -static struct work_struct tcmu_unmap_work; +static struct delayed_work tcmu_unmap_work; static struct kmem_cache *tcmu_cmd_cache; @@ -346,10 +359,8 @@ static inline bool tcmu_get_empty_block(struct tcmu_dev *udev, page = radix_tree_lookup(&udev->data_blocks, dbi); if (!page) { if (atomic_add_return(1, &global_db_count) > - TCMU_GLOBAL_MAX_BLOCKS) { - atomic_dec(&global_db_count); - return false; - } + TCMU_GLOBAL_MAX_BLOCKS) + schedule_delayed_work(&tcmu_unmap_work, 0); /* try to get new page from the mm */ page = alloc_page(GFP_KERNEL); @@ -380,18 +391,11 @@ static bool tcmu_get_empty_blocks(struct tcmu_dev *udev, { int i; - udev->waiting_global = false; - for (i = tcmu_cmd->dbi_cur; i < tcmu_cmd->dbi_cnt; i++) { if (!tcmu_get_empty_block(udev, tcmu_cmd)) - goto err; + return false; } return true; - -err: - udev->waiting_global = true; - schedule_work(&tcmu_unmap_work); - return false; } static inline struct page * @@ -437,6 +441,7 @@ static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd) if (!tcmu_cmd) return NULL; + INIT_LIST_HEAD(&tcmu_cmd->cmdr_queue_entry); tcmu_cmd->se_cmd = se_cmd; tcmu_cmd->tcmu_dev = udev; @@ -742,6 +747,10 @@ static int tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd) unsigned long tmo = udev->cmd_time_out; int cmd_id; + /* + * If it was on the cmdr queue waiting we do not reset the timer + * for requeues and when it is finally sent to userspace. + */ if (tcmu_cmd->cmd_id) return 0; @@ -753,13 +762,31 @@ static int tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd) tcmu_cmd->cmd_id = cmd_id; if (!tmo) - return 0; + tmo = TCMU_TIME_OUT; + + pr_debug("allocated cmd %u for dev %s tmo %lu\n", tcmu_cmd->cmd_id, + udev->name, tmo / MSEC_PER_SEC); tcmu_cmd->deadline = round_jiffies_up(jiffies + msecs_to_jiffies(tmo)); mod_timer(&udev->timeout, tcmu_cmd->deadline); return 0; } +static int add_to_cmdr_queue(struct tcmu_cmd *tcmu_cmd) +{ + struct tcmu_dev *udev = tcmu_cmd->tcmu_dev; + int ret; + + ret = tcmu_setup_cmd_timer(tcmu_cmd); + if (ret) + return ret; + + list_add_tail(&tcmu_cmd->cmdr_queue_entry, &udev->cmdr_queue); + pr_debug("adding cmd %u on dev %s to ring space wait queue\n", + tcmu_cmd->cmd_id, udev->name); + return 0; +} + /** * queue_cmd_ring - queue cmd to ring or internally * @tcmu_cmd: cmd to queue @@ -768,6 +795,7 @@ static int tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd) * Returns: * -1 we cannot queue internally or to the ring. * 0 success + * 1 internally queued to wait for ring memory to free. */ static sense_reason_t queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, int *scsi_err) { @@ -805,7 +833,8 @@ static sense_reason_t queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, int *scsi_err) base_command_size = tcmu_cmd_get_base_cmd_size(tcmu_cmd->dbi_cnt); command_size = tcmu_cmd_get_cmd_size(tcmu_cmd, base_command_size); - mutex_lock(&udev->cmdr_lock); + if (!list_empty(&udev->cmdr_queue)) + goto queue; mb = udev->mb_addr; cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */ @@ -814,42 +843,18 @@ static sense_reason_t queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, int *scsi_err) pr_warn("TCMU: Request of size %zu/%zu is too big for %u/%zu " "cmd ring/data area\n", command_size, data_length, udev->cmdr_size, udev->data_size); - mutex_unlock(&udev->cmdr_lock); *scsi_err = TCM_INVALID_CDB_FIELD; return -1; } - while (!is_ring_space_avail(udev, tcmu_cmd, command_size, data_length)) { - int ret; - DEFINE_WAIT(__wait); - + if (!is_ring_space_avail(udev, tcmu_cmd, command_size, data_length)) { /* * Don't leave commands partially setup because the unmap * thread might need the blocks to make forward progress. */ tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cur); tcmu_cmd_reset_dbi_cur(tcmu_cmd); - - prepare_to_wait(&udev->wait_cmdr, &__wait, TASK_INTERRUPTIBLE); - - pr_debug("sleeping for ring space\n"); - mutex_unlock(&udev->cmdr_lock); - if (udev->cmd_time_out) - ret = schedule_timeout( - msecs_to_jiffies(udev->cmd_time_out)); - else - ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT)); - finish_wait(&udev->wait_cmdr, &__wait); - if (!ret) { - pr_warn("tcmu: command timed out\n"); - *scsi_err = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - return -1; - } - - mutex_lock(&udev->cmdr_lock); - - /* We dropped cmdr_lock, cmd_head is stale */ - cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */ + goto queue; } /* Insert a PAD if end-of-ring space is too small */ @@ -924,31 +929,39 @@ static sense_reason_t queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, int *scsi_err) UPDATE_HEAD(mb->cmd_head, command_size, udev->cmdr_size); tcmu_flush_dcache_range(mb, sizeof(*mb)); - mutex_unlock(&udev->cmdr_lock); /* TODO: only if FLUSH and FUA? */ uio_event_notify(&udev->uio_info); - if (udev->cmd_time_out) - mod_timer(&udev->timeout, round_jiffies_up(jiffies + - msecs_to_jiffies(udev->cmd_time_out))); - return 0; + +queue: + if (add_to_cmdr_queue(tcmu_cmd)) { + *scsi_err = TCM_OUT_OF_RESOURCES; + return -1; + } + + return 1; } static sense_reason_t tcmu_queue_cmd(struct se_cmd *se_cmd) { + struct se_device *se_dev = se_cmd->se_dev; + struct tcmu_dev *udev = TCMU_DEV(se_dev); struct tcmu_cmd *tcmu_cmd; sense_reason_t scsi_ret; + int ret; tcmu_cmd = tcmu_alloc_cmd(se_cmd); if (!tcmu_cmd) return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; - if (queue_cmd_ring(tcmu_cmd, &scsi_ret) < 0) + mutex_lock(&udev->cmdr_lock); + ret = queue_cmd_ring(tcmu_cmd, &scsi_ret); + mutex_unlock(&udev->cmdr_lock); + if (ret < 0) tcmu_free_cmd(tcmu_cmd); - return scsi_ret; } @@ -1036,10 +1049,15 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) handled++; } - if (mb->cmd_tail == mb->cmd_head) - del_timer(&udev->timeout); /* no more pending cmds */ - - wake_up(&udev->wait_cmdr); + if (mb->cmd_tail == mb->cmd_head && list_empty(&udev->cmdr_queue)) { + del_timer(&udev->timeout); + /* + * not more pending or waiting commands so try to reclaim + * blocks if needed. + */ + if (atomic_read(&global_db_count) > TCMU_GLOBAL_MAX_BLOCKS) + schedule_delayed_work(&tcmu_unmap_work, 0); + } return handled; } @@ -1047,6 +1065,10 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) static int tcmu_check_expired_cmd(int id, void *p, void *data) { struct tcmu_cmd *cmd = p; + struct tcmu_dev *udev = cmd->tcmu_dev; + u8 scsi_status; + struct se_cmd *se_cmd; + bool is_running; if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) return 0; @@ -1054,10 +1076,27 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data) if (!time_after(jiffies, cmd->deadline)) return 0; - set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags); - target_complete_cmd(cmd->se_cmd, SAM_STAT_CHECK_CONDITION); + is_running = list_empty(&cmd->cmdr_queue_entry); + pr_debug("Timing out cmd %u on dev %s that is %s.\n", + id, udev->name, is_running ? "inflight" : "queued"); + + se_cmd = cmd->se_cmd; cmd->se_cmd = NULL; + if (is_running) { + set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags); + /* + * target_complete_cmd will translate this to LUN COMM FAILURE + */ + scsi_status = SAM_STAT_CHECK_CONDITION; + } else { + list_del_init(&cmd->cmdr_queue_entry); + + idr_remove(&udev->commands, id); + tcmu_free_cmd(cmd); + scsi_status = SAM_STAT_TASK_SET_FULL; + } + target_complete_cmd(se_cmd, scsi_status); return 0; } @@ -1072,7 +1111,7 @@ static void tcmu_device_timedout(struct timer_list *t) list_add_tail(&udev->timedout_entry, &timed_out_udevs); spin_unlock(&timed_out_udevs_lock); - schedule_work(&tcmu_unmap_work); + schedule_delayed_work(&tcmu_unmap_work, 0); } static int tcmu_attach_hba(struct se_hba *hba, u32 host_id) @@ -1113,10 +1152,10 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) udev->hba = hba; udev->cmd_time_out = TCMU_TIME_OUT; - init_waitqueue_head(&udev->wait_cmdr); mutex_init(&udev->cmdr_lock); INIT_LIST_HEAD(&udev->timedout_entry); + INIT_LIST_HEAD(&udev->cmdr_queue); idr_init(&udev->commands); timer_setup(&udev->timeout, tcmu_device_timedout, 0); @@ -1129,13 +1168,63 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) return &udev->se_dev; } +static bool run_cmdr_queue(struct tcmu_dev *udev) +{ + struct tcmu_cmd *tcmu_cmd, *tmp_cmd; + LIST_HEAD(cmds); + bool drained = true; + sense_reason_t scsi_ret; + int ret; + + if (list_empty(&udev->cmdr_queue)) + return true; + + pr_debug("running %s's cmdr queue\n", udev->name); + + list_splice_init(&udev->cmdr_queue, &cmds); + + list_for_each_entry_safe(tcmu_cmd, tmp_cmd, &cmds, cmdr_queue_entry) { + list_del_init(&tcmu_cmd->cmdr_queue_entry); + + pr_debug("removing cmd %u on dev %s from queue\n", + tcmu_cmd->cmd_id, udev->name); + + ret = queue_cmd_ring(tcmu_cmd, &scsi_ret); + if (ret < 0) { + pr_debug("cmd %u on dev %s failed with %u\n", + tcmu_cmd->cmd_id, udev->name, scsi_ret); + + idr_remove(&udev->commands, tcmu_cmd->cmd_id); + /* + * Ignore scsi_ret for now. target_complete_cmd + * drops it. + */ + target_complete_cmd(tcmu_cmd->se_cmd, + SAM_STAT_CHECK_CONDITION); + tcmu_free_cmd(tcmu_cmd); + } else if (ret > 0) { + pr_debug("ran out of space during cmdr queue run\n"); + /* + * cmd was requeued, so just put all cmds back in + * the queue + */ + list_splice_tail(&cmds, &udev->cmdr_queue); + drained = false; + goto done; + } + } +done: + return drained; +} + static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on) { - struct tcmu_dev *tcmu_dev = container_of(info, struct tcmu_dev, uio_info); + struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info); - mutex_lock(&tcmu_dev->cmdr_lock); - tcmu_handle_completions(tcmu_dev); - mutex_unlock(&tcmu_dev->cmdr_lock); + mutex_lock(&udev->cmdr_lock); + tcmu_handle_completions(udev); + run_cmdr_queue(udev); + mutex_unlock(&udev->cmdr_lock); return 0; } @@ -1531,7 +1620,6 @@ static int tcmu_configure_device(struct se_device *dev) udev->data_off = CMDR_SIZE; udev->data_size = DATA_SIZE; udev->dbi_thresh = 0; /* Default in Idle state */ - udev->waiting_global = false; /* Initialise the mailbox of the ring buffer */ mb = udev->mb_addr; @@ -1977,12 +2065,14 @@ static struct target_backend_ops tcmu_ops = { .tb_dev_attrib_attrs = NULL, }; - static void find_free_blocks(void) { struct tcmu_dev *udev; loff_t off; - uint32_t start, end, block; + u32 start, end, block, total_freed = 0; + + if (atomic_read(&global_db_count) <= TCMU_GLOBAL_MAX_BLOCKS) + return; mutex_lock(&root_udev_mutex); list_for_each_entry(udev, &root_udev, node) { @@ -1991,8 +2081,8 @@ static void find_free_blocks(void) /* Try to complete the finished commands first */ tcmu_handle_completions(udev); - /* Skip the udevs waiting the global pool or in idle */ - if (udev->waiting_global || !udev->dbi_thresh) { + /* Skip the udevs in idle */ + if (!udev->dbi_thresh) { mutex_unlock(&udev->cmdr_lock); continue; } @@ -2001,8 +2091,8 @@ static void find_free_blocks(void) block = find_last_bit(udev->data_bitmap, end); if (block == udev->dbi_max) { /* - * The last bit is dbi_max, so there is - * no need to shrink any blocks. + * The last bit is dbi_max, so it is not possible + * reclaim any blocks. */ mutex_unlock(&udev->cmdr_lock); continue; @@ -2022,30 +2112,15 @@ static void find_free_blocks(void) /* Release the block pages */ tcmu_blocks_release(&udev->data_blocks, start, end); mutex_unlock(&udev->cmdr_lock); - } - mutex_unlock(&root_udev_mutex); -} -static void run_cmdr_queues(void) -{ - struct tcmu_dev *udev; - - /* - * Try to wake up the udevs who are waiting - * for the global data block pool. - */ - mutex_lock(&root_udev_mutex); - list_for_each_entry(udev, &root_udev, node) { - mutex_lock(&udev->cmdr_lock); - if (!udev->waiting_global) { - mutex_unlock(&udev->cmdr_lock); - break; - } - mutex_unlock(&udev->cmdr_lock); - - wake_up(&udev->wait_cmdr); + total_freed += end - start; + pr_debug("Freed %u blocks (total %u) from %s.\n", end - start, + total_freed, udev->name); } mutex_unlock(&root_udev_mutex); + + if (atomic_read(&global_db_count) > TCMU_GLOBAL_MAX_BLOCKS) + schedule_delayed_work(&tcmu_unmap_work, msecs_to_jiffies(5000)); } static void check_timedout_devices(void) @@ -2074,7 +2149,6 @@ static void tcmu_unmap_work_fn(struct work_struct *work) { check_timedout_devices(); find_free_blocks(); - run_cmdr_queues(); } static int __init tcmu_module_init(void) @@ -2083,7 +2157,7 @@ static int __init tcmu_module_init(void) BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0); - INIT_WORK(&tcmu_unmap_work, tcmu_unmap_work_fn); + INIT_DELAYED_WORK(&tcmu_unmap_work, tcmu_unmap_work_fn); tcmu_cmd_cache = kmem_cache_create("tcmu_cmd_cache", sizeof(struct tcmu_cmd), @@ -2146,7 +2220,7 @@ static int __init tcmu_module_init(void) static void __exit tcmu_module_exit(void) { - cancel_work_sync(&tcmu_unmap_work); + cancel_delayed_work_sync(&tcmu_unmap_work); target_backend_unregister(&tcmu_ops); kfree(tcmu_attrs); genl_unregister_family(&tcmu_genl_family); From 9103575ae34e9d60d40940bebf47fc9e9652067a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 28 Nov 2017 12:40:40 -0600 Subject: [PATCH 15/36] tcmu: make ring buffer timer configurable This adds a timer, qfull_time_out, that controls how long a device will wait for ring buffer space to open before failing the commands in the queue. It is useful to separate this timer from the cmd_time_out and default 30 sec one, because for HA setups cmd_time_out may be disbled and 30 seconds is too long to wait when some OSs like ESX will timeout commands after as little as 8 - 15 seconds. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 149 +++++++++++++++++++++++------- 1 file changed, 115 insertions(+), 34 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 52fc1d440d2371..c6a0c3198ccc9e 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -142,8 +142,12 @@ struct tcmu_dev { struct idr commands; - struct timer_list timeout; + struct timer_list cmd_timer; unsigned int cmd_time_out; + + struct timer_list qfull_timer; + int qfull_time_out; + struct list_head timedout_entry; spinlock_t nl_cmd_lock; @@ -741,18 +745,14 @@ static inline size_t tcmu_cmd_get_cmd_size(struct tcmu_cmd *tcmu_cmd, return command_size; } -static int tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd) +static int tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd, unsigned int tmo, + struct timer_list *timer) { struct tcmu_dev *udev = tcmu_cmd->tcmu_dev; - unsigned long tmo = udev->cmd_time_out; int cmd_id; - /* - * If it was on the cmdr queue waiting we do not reset the timer - * for requeues and when it is finally sent to userspace. - */ if (tcmu_cmd->cmd_id) - return 0; + goto setup_timer; cmd_id = idr_alloc(&udev->commands, tcmu_cmd, 1, USHRT_MAX, GFP_NOWAIT); if (cmd_id < 0) { @@ -761,23 +761,38 @@ static int tcmu_setup_cmd_timer(struct tcmu_cmd *tcmu_cmd) } tcmu_cmd->cmd_id = cmd_id; - if (!tmo) - tmo = TCMU_TIME_OUT; - pr_debug("allocated cmd %u for dev %s tmo %lu\n", tcmu_cmd->cmd_id, udev->name, tmo / MSEC_PER_SEC); +setup_timer: + if (!tmo) + return 0; + tcmu_cmd->deadline = round_jiffies_up(jiffies + msecs_to_jiffies(tmo)); - mod_timer(&udev->timeout, tcmu_cmd->deadline); + mod_timer(timer, tcmu_cmd->deadline); return 0; } static int add_to_cmdr_queue(struct tcmu_cmd *tcmu_cmd) { struct tcmu_dev *udev = tcmu_cmd->tcmu_dev; + unsigned int tmo; int ret; - ret = tcmu_setup_cmd_timer(tcmu_cmd); + /* + * For backwards compat if qfull_time_out is not set use + * cmd_time_out and if that's not set use the default time out. + */ + if (!udev->qfull_time_out) + return -ETIMEDOUT; + else if (udev->qfull_time_out > 0) + tmo = udev->qfull_time_out; + else if (udev->cmd_time_out) + tmo = udev->cmd_time_out; + else + tmo = TCMU_TIME_OUT; + + ret = tcmu_setup_cmd_timer(tcmu_cmd, tmo, &udev->qfull_timer); if (ret) return ret; @@ -901,7 +916,8 @@ static sense_reason_t queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, int *scsi_err) } entry->req.iov_bidi_cnt = iov_cnt; - ret = tcmu_setup_cmd_timer(tcmu_cmd); + ret = tcmu_setup_cmd_timer(tcmu_cmd, udev->cmd_time_out, + &udev->cmd_timer); if (ret) { tcmu_cmd_free_data(tcmu_cmd, tcmu_cmd->dbi_cnt); mutex_unlock(&udev->cmdr_lock); @@ -1049,14 +1065,19 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) handled++; } - if (mb->cmd_tail == mb->cmd_head && list_empty(&udev->cmdr_queue)) { - del_timer(&udev->timeout); - /* - * not more pending or waiting commands so try to reclaim - * blocks if needed. - */ - if (atomic_read(&global_db_count) > TCMU_GLOBAL_MAX_BLOCKS) - schedule_delayed_work(&tcmu_unmap_work, 0); + if (mb->cmd_tail == mb->cmd_head) { + /* no more pending commands */ + del_timer(&udev->cmd_timer); + + if (list_empty(&udev->cmdr_queue)) { + /* + * no more pending or waiting commands so try to + * reclaim blocks if needed. + */ + if (atomic_read(&global_db_count) > + TCMU_GLOBAL_MAX_BLOCKS) + schedule_delayed_work(&tcmu_unmap_work, 0); + } } return handled; @@ -1077,13 +1098,15 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data) return 0; is_running = list_empty(&cmd->cmdr_queue_entry); - pr_debug("Timing out cmd %u on dev %s that is %s.\n", - id, udev->name, is_running ? "inflight" : "queued"); - - se_cmd = cmd->se_cmd; - cmd->se_cmd = NULL; if (is_running) { + /* + * If cmd_time_out is disabled but qfull is set deadline + * will only reflect the qfull timeout. Ignore it. + */ + if (!udev->cmd_time_out) + return 0; + set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags); /* * target_complete_cmd will translate this to LUN COMM FAILURE @@ -1096,16 +1119,18 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data) tcmu_free_cmd(cmd); scsi_status = SAM_STAT_TASK_SET_FULL; } + + pr_debug("Timing out cmd %u on dev %s that is %s.\n", + id, udev->name, is_running ? "inflight" : "queued"); + + se_cmd = cmd->se_cmd; + cmd->se_cmd = NULL; target_complete_cmd(se_cmd, scsi_status); return 0; } -static void tcmu_device_timedout(struct timer_list *t) +static void tcmu_device_timedout(struct tcmu_dev *udev) { - struct tcmu_dev *udev = from_timer(udev, t, timeout); - - pr_debug("%s cmd timeout has expired\n", udev->name); - spin_lock(&timed_out_udevs_lock); if (list_empty(&udev->timedout_entry)) list_add_tail(&udev->timedout_entry, &timed_out_udevs); @@ -1114,6 +1139,22 @@ static void tcmu_device_timedout(struct timer_list *t) schedule_delayed_work(&tcmu_unmap_work, 0); } +static void tcmu_cmd_timedout(struct timer_list *t) +{ + struct tcmu_dev *udev = from_timer(udev, t, cmd_timer); + + pr_debug("%s cmd timeout has expired\n", udev->name); + tcmu_device_timedout(udev); +} + +static void tcmu_qfull_timedout(struct timer_list *t) +{ + struct tcmu_dev *udev = from_timer(udev, t, qfull_timer); + + pr_debug("%s qfull timeout has expired\n", udev->name); + tcmu_device_timedout(udev); +} + static int tcmu_attach_hba(struct se_hba *hba, u32 host_id) { struct tcmu_hba *tcmu_hba; @@ -1151,6 +1192,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) udev->hba = hba; udev->cmd_time_out = TCMU_TIME_OUT; + udev->qfull_time_out = -1; mutex_init(&udev->cmdr_lock); @@ -1158,7 +1200,8 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) INIT_LIST_HEAD(&udev->cmdr_queue); idr_init(&udev->commands); - timer_setup(&udev->timeout, tcmu_device_timedout, 0); + timer_setup(&udev->qfull_timer, tcmu_qfull_timedout, 0); + timer_setup(&udev->cmd_timer, tcmu_cmd_timedout, 0); init_waitqueue_head(&udev->nl_cmd_wq); spin_lock_init(&udev->nl_cmd_lock); @@ -1213,6 +1256,8 @@ static bool run_cmdr_queue(struct tcmu_dev *udev) goto done; } } + if (list_empty(&udev->cmdr_queue)) + del_timer(&udev->qfull_timer); done: return drained; } @@ -1712,7 +1757,8 @@ static void tcmu_destroy_device(struct se_device *dev) { struct tcmu_dev *udev = TCMU_DEV(dev); - del_timer_sync(&udev->timeout); + del_timer_sync(&udev->cmd_timer); + del_timer_sync(&udev->qfull_timer); mutex_lock(&root_udev_mutex); list_del(&udev->node); @@ -1893,6 +1939,40 @@ static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *pag } CONFIGFS_ATTR(tcmu_, cmd_time_out); +static ssize_t tcmu_qfull_time_out_show(struct config_item *item, char *page) +{ + struct se_dev_attrib *da = container_of(to_config_group(item), + struct se_dev_attrib, da_group); + struct tcmu_dev *udev = TCMU_DEV(da->da_dev); + + return snprintf(page, PAGE_SIZE, "%ld\n", udev->qfull_time_out <= 0 ? + udev->qfull_time_out : + udev->qfull_time_out / MSEC_PER_SEC); +} + +static ssize_t tcmu_qfull_time_out_store(struct config_item *item, + const char *page, size_t count) +{ + struct se_dev_attrib *da = container_of(to_config_group(item), + struct se_dev_attrib, da_group); + struct tcmu_dev *udev = TCMU_DEV(da->da_dev); + s32 val; + int ret; + + ret = kstrtos32(page, 0, &val); + if (ret < 0) + return ret; + + if (val >= 0) { + udev->qfull_time_out = val * MSEC_PER_SEC; + } else { + printk(KERN_ERR "Invalid qfull timeout value %d\n", val); + return -EINVAL; + } + return count; +} +CONFIGFS_ATTR(tcmu_, qfull_time_out); + static ssize_t tcmu_dev_config_show(struct config_item *item, char *page) { struct se_dev_attrib *da = container_of(to_config_group(item), @@ -2038,6 +2118,7 @@ CONFIGFS_ATTR(tcmu_, emulate_write_cache); static struct configfs_attribute *tcmu_attrib_attrs[] = { &tcmu_attr_cmd_time_out, + &tcmu_attr_qfull_time_out, &tcmu_attr_dev_config, &tcmu_attr_dev_size, &tcmu_attr_emulate_write_cache, From 80eb876138a1adc7d30831ce275ea744c050d97e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 28 Nov 2017 12:40:41 -0600 Subject: [PATCH 16/36] tcmu: allow max block and global max blocks to be settable Users might have a physical system to a target so they could have a lot more than 2 gigs of memory they want to devote to tcmu. OTOH, we could be running in a vm and so a 2 gig global and 1 gig per dev limit might be too high. This patch allows the user to specify the limits. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 143 ++++++++++++++++++++++++++---- 1 file changed, 124 insertions(+), 19 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index c6a0c3198ccc9e..bac08bc72e3b91 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -77,9 +77,13 @@ * the total size is 256K * PAGE_SIZE. */ #define DATA_BLOCK_SIZE PAGE_SIZE -#define DATA_BLOCK_BITS (256 * 1024) +#define DATA_BLOCK_SHIFT PAGE_SHIFT +#define DATA_BLOCK_BITS_DEF (256 * 1024) #define DATA_SIZE (DATA_BLOCK_BITS * DATA_BLOCK_SIZE) +#define TCMU_MBS_TO_BLOCKS(_mbs) (_mbs << (20 - DATA_BLOCK_SHIFT)) +#define TCMU_BLOCKS_TO_MBS(_blocks) (_blocks >> (20 - DATA_BLOCK_SHIFT)) + /* The total size of the ring is 8M + 256K * PAGE_SIZE */ #define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE) @@ -87,7 +91,7 @@ * Default number of global data blocks(512K * PAGE_SIZE) * when the unmap thread will be started. */ -#define TCMU_GLOBAL_MAX_BLOCKS (512 * 1024) +#define TCMU_GLOBAL_MAX_BLOCKS_DEF (512 * 1024) static u8 tcmu_kern_cmd_reply_supported; @@ -131,13 +135,15 @@ struct tcmu_dev { /* Must add data_off and mb_addr to get the address */ size_t data_off; size_t data_size; + uint32_t max_blocks; + size_t ring_size; struct mutex cmdr_lock; struct list_head cmdr_queue; uint32_t dbi_max; uint32_t dbi_thresh; - DECLARE_BITMAP(data_bitmap, DATA_BLOCK_BITS); + unsigned long *data_bitmap; struct radix_tree_root data_blocks; struct idr commands; @@ -198,10 +204,51 @@ static LIST_HEAD(root_udev); static DEFINE_SPINLOCK(timed_out_udevs_lock); static LIST_HEAD(timed_out_udevs); +static struct kmem_cache *tcmu_cmd_cache; + static atomic_t global_db_count = ATOMIC_INIT(0); static struct delayed_work tcmu_unmap_work; +static int tcmu_global_max_blocks = TCMU_GLOBAL_MAX_BLOCKS_DEF; -static struct kmem_cache *tcmu_cmd_cache; +static int tcmu_set_global_max_data_area(const char *str, + const struct kernel_param *kp) +{ + int ret, max_area_mb; + + ret = kstrtoint(str, 10, &max_area_mb); + if (ret) + return -EINVAL; + + if (max_area_mb <= 0) { + pr_err("global_max_data_area must be larger than 0.\n"); + return -EINVAL; + } + + tcmu_global_max_blocks = TCMU_MBS_TO_BLOCKS(max_area_mb); + if (atomic_read(&global_db_count) > tcmu_global_max_blocks) + schedule_delayed_work(&tcmu_unmap_work, 0); + else + cancel_delayed_work_sync(&tcmu_unmap_work); + + return 0; +} + +static int tcmu_get_global_max_data_area(char *buffer, + const struct kernel_param *kp) +{ + return sprintf(buffer, "%d", TCMU_BLOCKS_TO_MBS(tcmu_global_max_blocks)); +} + +static const struct kernel_param_ops tcmu_global_max_data_area_op = { + .set = tcmu_set_global_max_data_area, + .get = tcmu_get_global_max_data_area, +}; + +module_param_cb(global_max_data_area_mb, &tcmu_global_max_data_area_op, NULL, + S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(global_max_data_area_mb, + "Max MBs allowed to be allocated to all the tcmu device's " + "data areas."); /* multicast group */ enum tcmu_multicast_groups { @@ -363,7 +410,7 @@ static inline bool tcmu_get_empty_block(struct tcmu_dev *udev, page = radix_tree_lookup(&udev->data_blocks, dbi); if (!page) { if (atomic_add_return(1, &global_db_count) > - TCMU_GLOBAL_MAX_BLOCKS) + tcmu_global_max_blocks) schedule_delayed_work(&tcmu_unmap_work, 0); /* try to get new page from the mm */ @@ -706,8 +753,8 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, struct tcmu_cmd *cmd, /* try to check and get the data blocks as needed */ space = spc_bitmap_free(udev->data_bitmap, udev->dbi_thresh); if ((space * DATA_BLOCK_SIZE) < data_needed) { - unsigned long blocks_left = DATA_BLOCK_BITS - udev->dbi_thresh + - space; + unsigned long blocks_left = + (udev->max_blocks - udev->dbi_thresh) + space; if (blocks_left < blocks_needed) { pr_debug("no data space: only %lu available, but ask for %zu\n", @@ -717,8 +764,8 @@ static bool is_ring_space_avail(struct tcmu_dev *udev, struct tcmu_cmd *cmd, } udev->dbi_thresh += blocks_needed; - if (udev->dbi_thresh > DATA_BLOCK_BITS) - udev->dbi_thresh = DATA_BLOCK_BITS; + if (udev->dbi_thresh > udev->max_blocks) + udev->dbi_thresh = udev->max_blocks; } return tcmu_get_empty_blocks(udev, cmd); @@ -1075,7 +1122,7 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) * reclaim blocks if needed. */ if (atomic_read(&global_db_count) > - TCMU_GLOBAL_MAX_BLOCKS) + tcmu_global_max_blocks) schedule_delayed_work(&tcmu_unmap_work, 0); } } @@ -1194,6 +1241,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) udev->cmd_time_out = TCMU_TIME_OUT; udev->qfull_time_out = -1; + udev->max_blocks = DATA_BLOCK_BITS_DEF; mutex_init(&udev->cmdr_lock); INIT_LIST_HEAD(&udev->timedout_entry); @@ -1335,7 +1383,7 @@ static struct page *tcmu_try_get_block_page(struct tcmu_dev *udev, uint32_t dbi) /* * Since this case is rare in page fault routine, here we - * will allow the global_db_count >= TCMU_GLOBAL_MAX_BLOCKS + * will allow the global_db_count >= tcmu_global_max_blocks * to reduce possible page fault call trace. */ atomic_inc(&global_db_count); @@ -1396,7 +1444,7 @@ static int tcmu_mmap(struct uio_info *info, struct vm_area_struct *vma) vma->vm_private_data = udev; /* Ensure the mmap is exactly the right size */ - if (vma_pages(vma) != (TCMU_RING_SIZE >> PAGE_SHIFT)) + if (vma_pages(vma) != (udev->ring_size >> PAGE_SHIFT)) return -EINVAL; return 0; @@ -1478,6 +1526,7 @@ static void tcmu_dev_kref_release(struct kref *kref) WARN_ON(!all_expired); tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1); + kfree(udev->data_bitmap); mutex_unlock(&udev->cmdr_lock); call_rcu(&dev->rcu_head, tcmu_dev_call_rcu); @@ -1654,6 +1703,11 @@ static int tcmu_configure_device(struct se_device *dev) info = &udev->uio_info; + udev->data_bitmap = kzalloc(BITS_TO_LONGS(udev->max_blocks) * + sizeof(unsigned long), GFP_KERNEL); + if (!udev->data_bitmap) + goto err_bitmap_alloc; + udev->mb_addr = vzalloc(CMDR_SIZE); if (!udev->mb_addr) { ret = -ENOMEM; @@ -1663,7 +1717,7 @@ static int tcmu_configure_device(struct se_device *dev) /* mailbox fits in first part of CMDR space */ udev->cmdr_size = CMDR_SIZE - CMDR_OFF; udev->data_off = CMDR_SIZE; - udev->data_size = DATA_SIZE; + udev->data_size = udev->max_blocks * DATA_BLOCK_SIZE; udev->dbi_thresh = 0; /* Default in Idle state */ /* Initialise the mailbox of the ring buffer */ @@ -1681,7 +1735,7 @@ static int tcmu_configure_device(struct se_device *dev) info->mem[0].name = "tcm-user command & data buffer"; info->mem[0].addr = (phys_addr_t)(uintptr_t)udev->mb_addr; - info->mem[0].size = TCMU_RING_SIZE; + info->mem[0].size = udev->ring_size = udev->data_size + CMDR_SIZE; info->mem[0].memtype = UIO_MEM_NONE; info->irqcontrol = tcmu_irqcontrol; @@ -1734,6 +1788,9 @@ static int tcmu_configure_device(struct se_device *dev) vfree(udev->mb_addr); udev->mb_addr = NULL; err_vzalloc: + kfree(udev->data_bitmap); + udev->data_bitmap = NULL; +err_bitmap_alloc: kfree(info->name); info->name = NULL; @@ -1774,7 +1831,7 @@ static void tcmu_destroy_device(struct se_device *dev) enum { Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors, - Opt_nl_reply_supported, Opt_err, + Opt_nl_reply_supported, Opt_max_data_area_mb, Opt_err, }; static match_table_t tokens = { @@ -1783,6 +1840,7 @@ static match_table_t tokens = { {Opt_hw_block_size, "hw_block_size=%u"}, {Opt_hw_max_sectors, "hw_max_sectors=%u"}, {Opt_nl_reply_supported, "nl_reply_supported=%d"}, + {Opt_max_data_area_mb, "max_data_area_mb=%u"}, {Opt_err, NULL} }; @@ -1816,7 +1874,7 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, struct tcmu_dev *udev = TCMU_DEV(dev); char *orig, *ptr, *opts, *arg_p; substring_t args[MAX_OPT_ARGS]; - int ret = 0, token; + int ret = 0, token, tmpval; opts = kstrdup(page, GFP_KERNEL); if (!opts) @@ -1868,6 +1926,39 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev, if (ret < 0) pr_err("kstrtoint() failed for nl_reply_supported=\n"); break; + case Opt_max_data_area_mb: + if (dev->export_count) { + pr_err("Unable to set max_data_area_mb while exports exist\n"); + ret = -EINVAL; + break; + } + + arg_p = match_strdup(&args[0]); + if (!arg_p) { + ret = -ENOMEM; + break; + } + ret = kstrtoint(arg_p, 0, &tmpval); + kfree(arg_p); + if (ret < 0) { + pr_err("kstrtoint() failed for max_data_area_mb=\n"); + break; + } + + if (tmpval <= 0) { + pr_err("Invalid max_data_area %d\n", tmpval); + ret = -EINVAL; + break; + } + + udev->max_blocks = TCMU_MBS_TO_BLOCKS(tmpval); + if (udev->max_blocks > tcmu_global_max_blocks) { + pr_err("%d is too large. Adjusting max_data_area_mb to global limit of %u\n", + tmpval, + TCMU_BLOCKS_TO_MBS(tcmu_global_max_blocks)); + udev->max_blocks = tcmu_global_max_blocks; + } + break; default: break; } @@ -1887,7 +1978,9 @@ static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b) bl = sprintf(b + bl, "Config: %s ", udev->dev_config[0] ? udev->dev_config : "NULL"); - bl += sprintf(b + bl, "Size: %zu\n", udev->dev_size); + bl += sprintf(b + bl, "Size: %zu ", udev->dev_size); + bl += sprintf(b + bl, "MaxDataAreaMB: %u\n", + TCMU_BLOCKS_TO_MBS(udev->max_blocks)); return bl; } @@ -1973,6 +2066,17 @@ static ssize_t tcmu_qfull_time_out_store(struct config_item *item, } CONFIGFS_ATTR(tcmu_, qfull_time_out); +static ssize_t tcmu_max_data_area_mb_show(struct config_item *item, char *page) +{ + struct se_dev_attrib *da = container_of(to_config_group(item), + struct se_dev_attrib, da_group); + struct tcmu_dev *udev = TCMU_DEV(da->da_dev); + + return snprintf(page, PAGE_SIZE, "%u\n", + TCMU_BLOCKS_TO_MBS(udev->max_blocks)); +} +CONFIGFS_ATTR_RO(tcmu_, max_data_area_mb); + static ssize_t tcmu_dev_config_show(struct config_item *item, char *page) { struct se_dev_attrib *da = container_of(to_config_group(item), @@ -2119,6 +2223,7 @@ CONFIGFS_ATTR(tcmu_, emulate_write_cache); static struct configfs_attribute *tcmu_attrib_attrs[] = { &tcmu_attr_cmd_time_out, &tcmu_attr_qfull_time_out, + &tcmu_attr_max_data_area_mb, &tcmu_attr_dev_config, &tcmu_attr_dev_size, &tcmu_attr_emulate_write_cache, @@ -2152,7 +2257,7 @@ static void find_free_blocks(void) loff_t off; u32 start, end, block, total_freed = 0; - if (atomic_read(&global_db_count) <= TCMU_GLOBAL_MAX_BLOCKS) + if (atomic_read(&global_db_count) <= tcmu_global_max_blocks) return; mutex_lock(&root_udev_mutex); @@ -2200,7 +2305,7 @@ static void find_free_blocks(void) } mutex_unlock(&root_udev_mutex); - if (atomic_read(&global_db_count) > TCMU_GLOBAL_MAX_BLOCKS) + if (atomic_read(&global_db_count) > tcmu_global_max_blocks) schedule_delayed_work(&tcmu_unmap_work, msecs_to_jiffies(5000)); } From e0f3d4c23e2158beaa403b343a75dfb5381a5ebe Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 10 Dec 2017 19:54:11 +0100 Subject: [PATCH 17/36] sbp-target: Delete an error message for a failed memory allocation in three functions Omit an extra message for a memory allocation failure in these functions. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Acked-by: Chris Boot Signed-off-by: Nicholas Bellinger --- drivers/target/sbp/sbp_target.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c index e5c3e5f827d0b8..fb1003921d85af 100644 --- a/drivers/target/sbp/sbp_target.c +++ b/drivers/target/sbp/sbp_target.c @@ -201,10 +201,9 @@ static struct sbp_session *sbp_session_create( snprintf(guid_str, sizeof(guid_str), "%016llx", guid); sess = kmalloc(sizeof(*sess), GFP_KERNEL); - if (!sess) { - pr_err("failed to allocate session descriptor\n"); + if (!sess) return ERR_PTR(-ENOMEM); - } + spin_lock_init(&sess->lock); INIT_LIST_HEAD(&sess->login_list); INIT_DELAYED_WORK(&sess->maint_work, session_maintenance_work); @@ -2029,10 +2028,8 @@ static struct se_portal_group *sbp_make_tpg( } tpg = kzalloc(sizeof(*tpg), GFP_KERNEL); - if (!tpg) { - pr_err("Unable to allocate struct sbp_tpg\n"); + if (!tpg) return ERR_PTR(-ENOMEM); - } tpg->tport = tport; tpg->tport_tpgt = tpgt; @@ -2088,10 +2085,8 @@ static struct se_wwn *sbp_make_tport( return ERR_PTR(-EINVAL); tport = kzalloc(sizeof(*tport), GFP_KERNEL); - if (!tport) { - pr_err("Unable to allocate struct sbp_tport\n"); + if (!tport) return ERR_PTR(-ENOMEM); - } tport->guid = guid; sbp_format_wwn(tport->tport_name, SBP_NAMELEN, guid); From cd3fb32a72c17ff45fd671b12e8b5f018df527b4 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 10 Dec 2017 21:18:15 +0100 Subject: [PATCH 18/36] target: tcm_loop: Delete an error message for a failed memory allocation in four functions Omit an extra message for a memory allocation failure in these functions. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Nicholas Bellinger --- drivers/target/loopback/tcm_loop.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index b6a913e38b3018..6bd58a064924d9 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -184,7 +184,6 @@ static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) tl_cmd = kmem_cache_zalloc(tcm_loop_cmd_cache, GFP_ATOMIC); if (!tl_cmd) { - pr_err("Unable to allocate struct tcm_loop_cmd\n"); set_host_byte(sc, DID_ERROR); sc->scsi_done(sc); return 0; @@ -221,10 +220,8 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg, } tl_cmd = kmem_cache_zalloc(tcm_loop_cmd_cache, GFP_KERNEL); - if (!tl_cmd) { - pr_err("Unable to allocate memory for tl_cmd\n"); + if (!tl_cmd) return ret; - } init_completion(&tl_cmd->tmr_done); @@ -773,10 +770,8 @@ static int tcm_loop_make_nexus( } tl_nexus = kzalloc(sizeof(struct tcm_loop_nexus), GFP_KERNEL); - if (!tl_nexus) { - pr_err("Unable to allocate struct tcm_loop_nexus\n"); + if (!tl_nexus) return -ENOMEM; - } tl_nexus->se_sess = target_alloc_session(&tl_tpg->tl_se_tpg, 0, 0, TARGET_PROT_DIN_PASS | TARGET_PROT_DOUT_PASS, @@ -1082,10 +1077,9 @@ static struct se_wwn *tcm_loop_make_scsi_hba( int ret, off = 0; tl_hba = kzalloc(sizeof(struct tcm_loop_hba), GFP_KERNEL); - if (!tl_hba) { - pr_err("Unable to allocate struct tcm_loop_hba\n"); + if (!tl_hba) return ERR_PTR(-ENOMEM); - } + /* * Determine the emulated Protocol Identifier and Target Port Name * based on the incoming configfs directory name. From a572dba9e0a72a51d3a624cbb511c39a702c9cf3 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Sun, 10 Dec 2017 21:23:43 +0100 Subject: [PATCH 19/36] target: tcm_loop: Improve a size determination in two functions Replace the specification of data structures by pointer dereferences as the parameter for the operator "sizeof" to make the corresponding size determination a bit safer according to the Linux coding style convention. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Nicholas Bellinger --- drivers/target/loopback/tcm_loop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 6bd58a064924d9..60e4185e75088e 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -769,7 +769,7 @@ static int tcm_loop_make_nexus( return -EEXIST; } - tl_nexus = kzalloc(sizeof(struct tcm_loop_nexus), GFP_KERNEL); + tl_nexus = kzalloc(sizeof(*tl_nexus), GFP_KERNEL); if (!tl_nexus) return -ENOMEM; @@ -1076,7 +1076,7 @@ static struct se_wwn *tcm_loop_make_scsi_hba( char *ptr; int ret, off = 0; - tl_hba = kzalloc(sizeof(struct tcm_loop_hba), GFP_KERNEL); + tl_hba = kzalloc(sizeof(*tl_hba), GFP_KERNEL); if (!tl_hba) return ERR_PTR(-ENOMEM); From c8d1f4b76be3361b881bd88b0ebf16b306a4cab8 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 11 Dec 2017 10:40:23 +0100 Subject: [PATCH 20/36] target: tcm_loop: Combine substrings for 26 messages The script "checkpatch.pl" pointed information out like the following. WARNING: quoted string split across lines Thus fix the affected source code places. Signed-off-by: Markus Elfring Signed-off-by: Nicholas Bellinger --- drivers/target/loopback/tcm_loop.c | 120 +++++++++++++---------------- 1 file changed, 52 insertions(+), 68 deletions(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 60e4185e75088e..d91109c1c4aee3 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -123,8 +123,8 @@ static void tcm_loop_submission_work(struct work_struct *work) } tl_nexus = tl_tpg->tl_nexus; if (!tl_nexus) { - scmd_printk(KERN_ERR, sc, "TCM_Loop I_T Nexus" - " does not exist\n"); + scmd_printk(KERN_ERR, sc, + "TCM_Loop I_T Nexus does not exist\n"); set_host_byte(sc, DID_ERROR); goto out_done; } @@ -177,10 +177,10 @@ static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) { struct tcm_loop_cmd *tl_cmd; - pr_debug("tcm_loop_queuecommand() %d:%d:%d:%llu got CDB: 0x%02x" - " scsi_buf_len: %u\n", sc->device->host->host_no, - sc->device->id, sc->device->channel, sc->device->lun, - sc->cmnd[0], scsi_bufflen(sc)); + pr_debug("%s() %d:%d:%d:%llu got CDB: 0x%02x scsi_buf_len: %u\n", + __func__, sc->device->host->host_no, sc->device->id, + sc->device->channel, sc->device->lun, sc->cmnd[0], + scsi_bufflen(sc)); tl_cmd = kmem_cache_zalloc(tcm_loop_cmd_cache, GFP_ATOMIC); if (!tl_cmd) { @@ -214,8 +214,7 @@ static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg, */ tl_nexus = tl_tpg->tl_nexus; if (!tl_nexus) { - pr_err("Unable to perform device reset without" - " active I_T Nexus\n"); + pr_err("Unable to perform device reset without active I_T Nexus\n"); return ret; } @@ -295,8 +294,7 @@ static int tcm_loop_target_reset(struct scsi_cmnd *sc) */ tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host); if (!tl_hba) { - pr_err("Unable to perform device reset without" - " active I_T Nexus\n"); + pr_err("Unable to perform device reset without active I_T Nexus\n"); return FAILED; } /* @@ -414,8 +412,7 @@ static int tcm_loop_setup_hba_bus(struct tcm_loop_hba *tl_hba, int tcm_loop_host ret = device_register(&tl_hba->dev); if (ret) { - pr_err("device_register() failed for" - " tl_hba->dev: %d\n", ret); + pr_err("device_register() failed for tl_hba->dev: %d\n", ret); return -ENODEV; } @@ -444,8 +441,7 @@ static int tcm_loop_alloc_core_bus(void) ret = driver_register(&tcm_loop_driverfs); if (ret) { - pr_err("driver_register() failed for" - "tcm_loop_driverfs\n"); + pr_err("driver_register() failed for tcm_loop_driverfs\n"); goto bus_unreg; } @@ -584,8 +580,8 @@ static int tcm_loop_queue_data_in(struct se_cmd *se_cmd) struct tcm_loop_cmd, tl_se_cmd); struct scsi_cmnd *sc = tl_cmd->sc; - pr_debug("tcm_loop_queue_data_in() called for scsi_cmnd: %p" - " cdb: 0x%02x\n", sc, sc->cmnd[0]); + pr_debug("%s() called for scsi_cmnd: %p cdb: 0x%02x\n", + __func__, sc, sc->cmnd[0]); sc->result = SAM_STAT_GOOD; set_host_byte(sc, DID_OK); @@ -602,8 +598,8 @@ static int tcm_loop_queue_status(struct se_cmd *se_cmd) struct tcm_loop_cmd, tl_se_cmd); struct scsi_cmnd *sc = tl_cmd->sc; - pr_debug("tcm_loop_queue_status() called for scsi_cmnd: %p" - " cdb: 0x%02x\n", sc, sc->cmnd[0]); + pr_debug("%s() called for scsi_cmnd: %p cdb: 0x%02x\n", + __func__, sc, sc->cmnd[0]); if (se_cmd->sense_buffer && ((se_cmd->se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) || @@ -688,8 +684,8 @@ static void tcm_loop_port_unlink( sd = scsi_device_lookup(tl_hba->sh, 0, tl_tpg->tl_tpgt, se_lun->unpacked_lun); if (!sd) { - pr_err("Unable to locate struct scsi_device for %d:%d:" - "%llu\n", 0, tl_tpg->tl_tpgt, se_lun->unpacked_lun); + pr_err("Unable to locate struct scsi_device for %d:%d:%llu\n", + 0, tl_tpg->tl_tpgt, se_lun->unpacked_lun); return; } /* @@ -782,9 +778,8 @@ static int tcm_loop_make_nexus( return ret; } - pr_debug("TCM_Loop_ConfigFS: Established I_T Nexus to emulated" - " %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tl_hba), - name); + pr_debug("TCM_Loop_ConfigFS: Established I_T Nexus to emulated %s Initiator Port: %s\n", + tcm_loop_dump_proto_id(tl_hba), name); return 0; } @@ -803,15 +798,14 @@ static int tcm_loop_drop_nexus( return -ENODEV; if (atomic_read(&tpg->tl_tpg_port_count)) { - pr_err("Unable to remove TCM_Loop I_T Nexus with" - " active TPG port count: %d\n", - atomic_read(&tpg->tl_tpg_port_count)); + pr_err("Unable to remove TCM_Loop I_T Nexus with active TPG port count: %d\n", + atomic_read(&tpg->tl_tpg_port_count)); return -EPERM; } - pr_debug("TCM_Loop_ConfigFS: Removing I_T Nexus to emulated" - " %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tpg->tl_hba), - tl_nexus->se_sess->se_node_acl->initiatorname); + pr_debug("TCM_Loop_ConfigFS: Removing I_T Nexus to emulated %s Initiator Port: %s\n", + tcm_loop_dump_proto_id(tpg->tl_hba), + tl_nexus->se_sess->se_node_acl->initiatorname); /* * Release the SCSI I_T Nexus to the emulated Target Port */ @@ -863,8 +857,8 @@ static ssize_t tcm_loop_tpg_nexus_store(struct config_item *item, * tcm_loop_make_nexus() */ if (strlen(page) >= TL_WWN_ADDR_LEN) { - pr_err("Emulated NAA Sas Address: %s, exceeds" - " max: %d\n", page, TL_WWN_ADDR_LEN); + pr_err("Emulated NAA Sas Address: %s, exceeds max: %d\n", + page, TL_WWN_ADDR_LEN); return -EINVAL; } snprintf(&i_port[0], TL_WWN_ADDR_LEN, "%s", page); @@ -872,9 +866,8 @@ static ssize_t tcm_loop_tpg_nexus_store(struct config_item *item, ptr = strstr(i_port, "naa."); if (ptr) { if (tl_hba->tl_proto_id != SCSI_PROTOCOL_SAS) { - pr_err("Passed SAS Initiator Port %s does not" - " match target port protoid: %s\n", i_port, - tcm_loop_dump_proto_id(tl_hba)); + pr_err("Passed SAS Initiator Port %s does not match target port protoid: %s\n", + i_port, tcm_loop_dump_proto_id(tl_hba)); return -EINVAL; } port_ptr = &i_port[0]; @@ -883,9 +876,8 @@ static ssize_t tcm_loop_tpg_nexus_store(struct config_item *item, ptr = strstr(i_port, "fc."); if (ptr) { if (tl_hba->tl_proto_id != SCSI_PROTOCOL_FCP) { - pr_err("Passed FCP Initiator Port %s does not" - " match target port protoid: %s\n", i_port, - tcm_loop_dump_proto_id(tl_hba)); + pr_err("Passed FCP Initiator Port %s does not match target port protoid: %s\n", + i_port, tcm_loop_dump_proto_id(tl_hba)); return -EINVAL; } port_ptr = &i_port[3]; /* Skip over "fc." */ @@ -894,16 +886,15 @@ static ssize_t tcm_loop_tpg_nexus_store(struct config_item *item, ptr = strstr(i_port, "iqn."); if (ptr) { if (tl_hba->tl_proto_id != SCSI_PROTOCOL_ISCSI) { - pr_err("Passed iSCSI Initiator Port %s does not" - " match target port protoid: %s\n", i_port, - tcm_loop_dump_proto_id(tl_hba)); + pr_err("Passed iSCSI Initiator Port %s does not match target port protoid: %s\n", + i_port, tcm_loop_dump_proto_id(tl_hba)); return -EINVAL; } port_ptr = &i_port[0]; goto check_newline; } - pr_err("Unable to locate prefix for emulated Initiator Port:" - " %s\n", i_port); + pr_err("Unable to locate prefix for emulated Initiator Port: %s\n", + i_port); return -EINVAL; /* * Clear any trailing newline for the NAA WWN @@ -1005,16 +996,15 @@ static struct se_portal_group *tcm_loop_make_naa_tpg( unsigned long tpgt; if (strstr(name, "tpgt_") != name) { - pr_err("Unable to locate \"tpgt_#\" directory" - " group\n"); + pr_err("Unable to locate \"tpgt_#\" directory group\n"); return ERR_PTR(-EINVAL); } if (kstrtoul(name+5, 10, &tpgt)) return ERR_PTR(-EINVAL); if (tpgt >= TL_TPGS_PER_HBA) { - pr_err("Passed tpgt: %lu exceeds TL_TPGS_PER_HBA:" - " %u\n", tpgt, TL_TPGS_PER_HBA); + pr_err("Passed tpgt: %lu exceeds TL_TPGS_PER_HBA: %u\n", + tpgt, TL_TPGS_PER_HBA); return ERR_PTR(-EINVAL); } tl_tpg = &tl_hba->tl_hba_tpgs[tpgt]; @@ -1027,10 +1017,9 @@ static struct se_portal_group *tcm_loop_make_naa_tpg( if (ret < 0) return ERR_PTR(-ENOMEM); - pr_debug("TCM_Loop_ConfigFS: Allocated Emulated %s" - " Target Port %s,t,0x%04lx\n", tcm_loop_dump_proto_id(tl_hba), - config_item_name(&wwn->wwn_group.cg_item), tpgt); - + pr_debug("TCM_Loop_ConfigFS: Allocated Emulated %s Target Port %s,t,0x%04lx\n", + tcm_loop_dump_proto_id(tl_hba), + config_item_name(&wwn->wwn_group.cg_item), tpgt); return &tl_tpg->tl_se_tpg; } @@ -1057,9 +1046,9 @@ static void tcm_loop_drop_naa_tpg( tl_tpg->tl_hba = NULL; tl_tpg->tl_tpgt = 0; - pr_debug("TCM_Loop_ConfigFS: Deallocated Emulated %s" - " Target Port %s,t,0x%04x\n", tcm_loop_dump_proto_id(tl_hba), - config_item_name(&wwn->wwn_group.cg_item), tpgt); + pr_debug("TCM_Loop_ConfigFS: Deallocated Emulated %s Target Port %s,t,0x%04x\n", + tcm_loop_dump_proto_id(tl_hba), + config_item_name(&wwn->wwn_group.cg_item), tpgt); } /* End items for tcm_loop_naa_cit */ @@ -1097,8 +1086,8 @@ static struct se_wwn *tcm_loop_make_scsi_hba( } ptr = strstr(name, "iqn."); if (!ptr) { - pr_err("Unable to locate prefix for emulated Target " - "Port: %s\n", name); + pr_err("Unable to locate prefix for emulated Target Port: %s\n", + name); ret = -EINVAL; goto out; } @@ -1106,9 +1095,8 @@ static struct se_wwn *tcm_loop_make_scsi_hba( check_len: if (strlen(name) >= TL_WWN_ADDR_LEN) { - pr_err("Emulated NAA %s Address: %s, exceeds" - " max: %d\n", name, tcm_loop_dump_proto_id(tl_hba), - TL_WWN_ADDR_LEN); + pr_err("Emulated NAA %s Address: %s, exceeds max: %d\n", + name, tcm_loop_dump_proto_id(tl_hba), TL_WWN_ADDR_LEN); ret = -EINVAL; goto out; } @@ -1125,10 +1113,8 @@ static struct se_wwn *tcm_loop_make_scsi_hba( sh = tl_hba->sh; tcm_loop_hba_no_cnt++; - pr_debug("TCM_Loop_ConfigFS: Allocated emulated Target" - " %s Address: %s at Linux/SCSI Host ID: %d\n", - tcm_loop_dump_proto_id(tl_hba), name, sh->host_no); - + pr_debug("TCM_Loop_ConfigFS: Allocated emulated Target %s Address: %s at Linux/SCSI Host ID: %d\n", + tcm_loop_dump_proto_id(tl_hba), name, sh->host_no); return &tl_hba->tl_hba_wwn; out: kfree(tl_hba); @@ -1141,10 +1127,9 @@ static void tcm_loop_drop_scsi_hba( struct tcm_loop_hba *tl_hba = container_of(wwn, struct tcm_loop_hba, tl_hba_wwn); - pr_debug("TCM_Loop_ConfigFS: Deallocating emulated Target" - " %s Address: %s at Linux/SCSI Host ID: %d\n", - tcm_loop_dump_proto_id(tl_hba), tl_hba->tl_wwn_address, - tl_hba->sh->host_no); + pr_debug("TCM_Loop_ConfigFS: Deallocating emulated Target %s Address: %s at Linux/SCSI Host ID: %d\n", + tcm_loop_dump_proto_id(tl_hba), tl_hba->tl_wwn_address, + tl_hba->sh->host_no); /* * Call device_unregister() on the original tl_hba->dev. * tcm_loop_fabric_scsi.c:tcm_loop_release_adapter() will @@ -1217,8 +1202,7 @@ static int __init tcm_loop_fabric_init(void) __alignof__(struct tcm_loop_cmd), 0, NULL); if (!tcm_loop_cmd_cache) { - pr_debug("kmem_cache_create() for" - " tcm_loop_cmd_cache failed\n"); + pr_debug("kmem_cache_create() for tcm_loop_cmd_cache failed\n"); goto out_destroy_workqueue; } From 7deeceb4c7c53efeba54fbc7aab9e6b393f5d984 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 11 Dec 2017 10:56:42 +0100 Subject: [PATCH 21/36] target: tcm_loop: Delete two unnecessary variable initialisations in tcm_loop_issue_tmr() The variables "se_cmd" and "tl_cmd" will eventually be set to appropriate pointers a bit later. Thus omit the explicit initialisation at the beginning. Signed-off-by: Markus Elfring Signed-off-by: Nicholas Bellinger --- drivers/target/loopback/tcm_loop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index d91109c1c4aee3..08dbf5b4ff15e6 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -203,10 +203,10 @@ static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc) static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg, u64 lun, int task, enum tcm_tmreq_table tmr) { - struct se_cmd *se_cmd = NULL; + struct se_cmd *se_cmd; struct se_session *se_sess; struct tcm_loop_nexus *tl_nexus; - struct tcm_loop_cmd *tl_cmd = NULL; + struct tcm_loop_cmd *tl_cmd; int ret = TMR_FUNCTION_FAILED, rc; /* From 2dfe3511ce8c15f25c121f30353dcf02b074ef15 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 11 Dec 2017 10:58:33 +0100 Subject: [PATCH 22/36] target: tcm_loop: Delete an unnecessary return statement in tcm_loop_submission_work() The script "checkpatch.pl" pointed information out like the following. WARNING: void function return statements are not generally useful Thus remove such a statement in the affected function. Signed-off-by: Markus Elfring Signed-off-by: Nicholas Bellinger --- drivers/target/loopback/tcm_loop.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 08dbf5b4ff15e6..ec01070bbb40e3 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -166,7 +166,6 @@ static void tcm_loop_submission_work(struct work_struct *work) out_done: kmem_cache_free(tcm_loop_cmd_cache, tl_cmd); sc->scsi_done(sc); - return; } /* From 09f99a3dfadce47ba868cee318d55d249eb4e413 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Mon, 11 Dec 2017 11:01:57 +0100 Subject: [PATCH 23/36] target: tcm_loop: Use seq_puts() in tcm_loop_show_info() The script "checkpatch.pl" pointed information out like the following. WARNING: Prefer seq_puts to seq_printf Thus fix the affected source code place. Signed-off-by: Markus Elfring Signed-off-by: Nicholas Bellinger --- drivers/target/loopback/tcm_loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index ec01070bbb40e3..9cd4ffe76c07ff 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -64,7 +64,7 @@ static void tcm_loop_release_cmd(struct se_cmd *se_cmd) static int tcm_loop_show_info(struct seq_file *m, struct Scsi_Host *host) { - seq_printf(m, "tcm_loop_proc_info()\n"); + seq_puts(m, "tcm_loop_proc_info()\n"); return 0; } From 093ec1430f41063d65dfc68d7eddec9d80f8efbb Mon Sep 17 00:00:00 2001 From: Varun Prakash Date: Mon, 11 Dec 2017 21:00:25 +0530 Subject: [PATCH 24/36] cxgbit: call neigh_event_send() to update MAC address If nud_state is not valid then call neigh_event_send() to update MAC address. Signed-off-by: Varun Prakash Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/cxgbit/cxgbit_cm.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index 92eb57e2adaf55..8de16016b6de90 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -893,6 +893,9 @@ cxgbit_offload_init(struct cxgbit_sock *csk, int iptype, __u8 *peer_ip, return -ENODEV; rcu_read_lock(); + if (!(n->nud_state & NUD_VALID)) + neigh_event_send(n, NULL); + ret = -ENOMEM; if (n->dev->flags & IFF_LOOPBACK) { if (iptype == 4) From ce512d79d0466a604793addb6b769d12ee326822 Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Wed, 13 Dec 2017 18:22:30 +0100 Subject: [PATCH 25/36] target/iscsi: avoid NULL dereference in CHAP auth error path If chap_server_compute_md5() fails early, e.g. via CHAP_N mismatch, then crypto_free_shash() is called with a NULL pointer which gets dereferenced in crypto_shash_tfm(). Fixes: 69110e3cedbb ("iscsi-target: Use shash and ahash") Suggested-by: Markus Elfring Signed-off-by: David Disseldorp Cc: stable@vger.kernel.org # 4.6+ Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_auth.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c index f9bc8ec6fb6b5e..9518ffd8b8bac8 100644 --- a/drivers/target/iscsi/iscsi_target_auth.c +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -421,7 +421,8 @@ static int chap_server_compute_md5( auth_ret = 0; out: kzfree(desc); - crypto_free_shash(tfm); + if (tfm) + crypto_free_shash(tfm); kfree(challenge); kfree(challenge_binhex); return auth_ret; From 9960f85181dd08cda03fddcf0bc8d81190bec4eb Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Wed, 13 Dec 2017 13:55:13 -0800 Subject: [PATCH 26/36] target: don't call an unmap callback if a range length is zero If a length of a range is zero, it means there is nothing to unmap and we can skip this range. Here is one more reason, why we have to skip such ranges. An unmap callback calls file_operations->fallocate(), but the man page for the fallocate syscall says that fallocate(fd, mode, offset, let) returns EINVAL, if len is zero. It means that file_operations->fallocate() isn't obligated to handle zero ranges too. Signed-off-by: Andrei Vagin Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_sbc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 750a04ed0e93ac..b054682e974f91 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -1216,9 +1216,11 @@ sbc_execute_unmap(struct se_cmd *cmd) goto err; } - ret = ops->execute_unmap(cmd, lba, range); - if (ret) - goto err; + if (range) { + ret = ops->execute_unmap(cmd, lba, range); + if (ret) + goto err; + } ptr += 16; size -= 16; From c1c390ba53195aef36e94b2354bc0e603057c293 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 19 Dec 2017 04:03:54 -0600 Subject: [PATCH 27/36] tcmu: prevent corruption when invalid data page requested We will always have a page mapped for cmd data if it is valid command. If the mapping does not exist then something bad happened in userspace and it should not proceed. This has us return VM_FAULT_SIGBUS when this happens instead of returning a freshly allocated paged. The latter can cause corruption because userspace might write the pages data overwriting valid data or return it to the initiator. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 43 +++++-------------------------- 1 file changed, 6 insertions(+), 37 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index bac08bc72e3b91..a746fedbb4869f 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1342,7 +1342,6 @@ static int tcmu_find_mem_index(struct vm_area_struct *vma) static struct page *tcmu_try_get_block_page(struct tcmu_dev *udev, uint32_t dbi) { struct page *page; - int ret; mutex_lock(&udev->cmdr_lock); page = tcmu_get_block_page(udev, dbi); @@ -1352,42 +1351,12 @@ static struct page *tcmu_try_get_block_page(struct tcmu_dev *udev, uint32_t dbi) } /* - * Normally it shouldn't be here: - * Only when the userspace has touched the blocks which - * are out of the tcmu_cmd's data iov[], and will return - * one zeroed page. + * Userspace messed up and passed in a address not in the + * data iov passed to it. */ - pr_warn("Block(%u) out of cmd's iov[] has been touched!\n", dbi); - pr_warn("Mostly it will be a bug of userspace, please have a check!\n"); - - if (dbi >= udev->dbi_thresh) { - /* Extern the udev->dbi_thresh to dbi + 1 */ - udev->dbi_thresh = dbi + 1; - udev->dbi_max = dbi; - } - - page = radix_tree_lookup(&udev->data_blocks, dbi); - if (!page) { - page = alloc_page(GFP_KERNEL | __GFP_ZERO); - if (!page) { - mutex_unlock(&udev->cmdr_lock); - return NULL; - } - - ret = radix_tree_insert(&udev->data_blocks, dbi, page); - if (ret) { - mutex_unlock(&udev->cmdr_lock); - __free_page(page); - return NULL; - } - - /* - * Since this case is rare in page fault routine, here we - * will allow the global_db_count >= tcmu_global_max_blocks - * to reduce possible page fault call trace. - */ - atomic_inc(&global_db_count); - } + pr_err("Invalid addr to data block mapping (dbi %u) on device %s\n", + dbi, udev->name); + page = NULL; mutex_unlock(&udev->cmdr_lock); return page; @@ -1422,7 +1391,7 @@ static int tcmu_vma_fault(struct vm_fault *vmf) dbi = (offset - udev->data_off) / DATA_BLOCK_SIZE; page = tcmu_try_get_block_page(udev, dbi); if (!page) - return VM_FAULT_NOPAGE; + return VM_FAULT_SIGBUS; } get_page(page); From d120c7083f49d177e6765afad543faa0f243590e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 19 Dec 2017 04:03:55 -0600 Subject: [PATCH 28/36] target: add SAM_STAT_BUSY sense reason Add SAM_STAT_BUSY sense_reason. The next patch will have target_core_user return this value while it is temporarily blocked and restarting. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_transport.c | 3 +++ include/target/target_core_base.h | 1 + 2 files changed, 4 insertions(+) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 58caacd54a3b2a..74b646f165d4e4 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1774,6 +1774,9 @@ void transport_generic_request_failure(struct se_cmd *cmd, case TCM_OUT_OF_RESOURCES: cmd->scsi_status = SAM_STAT_TASK_SET_FULL; goto queue_status; + case TCM_LUN_BUSY: + cmd->scsi_status = SAM_STAT_BUSY; + goto queue_status; case TCM_RESERVATION_CONFLICT: /* * No SENSE Data payload for this case, set SCSI Status diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 2c8d8115469dce..00482f903decba 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -183,6 +183,7 @@ enum tcm_sense_reason_table { TCM_TOO_MANY_SEGMENT_DESCS = R(0x1b), TCM_UNSUPPORTED_SEGMENT_DESC_TYPE_CODE = R(0x1c), TCM_INSUFFICIENT_REGISTRATION_RESOURCES = R(0x1d), + TCM_LUN_BUSY = R(0x1e), #undef R }; From 88cf107325fc7e37925de1960bd34ab917924362 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 19 Dec 2017 04:03:56 -0600 Subject: [PATCH 29/36] target_core_user: add cmd id to broken ring message Log cmd id that was not found in the tcmu_handle_completions lookup failure path. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index a746fedbb4869f..1238480cd4c4f2 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1098,7 +1098,8 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev) cmd = idr_remove(&udev->commands, entry->hdr.cmd_id); if (!cmd) { - pr_err("cmd_id not found, ring is broken\n"); + pr_err("cmd_id %u not found, ring is broken\n", + entry->hdr.cmd_id); set_bit(TCMU_DEV_BIT_BROKEN, &udev->flags); break; } From a24e7917e1758daf9ac0da4fac0f7b48f0b4b624 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 11 Jan 2018 11:12:25 +0000 Subject: [PATCH 30/36] tcmu: fix error return code in tcmu_configure_device() Fix to return error code -ENOMEM from the kzalloc() error handling case instead of 0, as done elsewhere in this function. Fixes: 80eb876 ("tcmu: allow max block and global max blocks to be settable") Signed-off-by: Wei Yongjun Acked-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 1238480cd4c4f2..60c8a87b7a882a 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1675,8 +1675,10 @@ static int tcmu_configure_device(struct se_device *dev) udev->data_bitmap = kzalloc(BITS_TO_LONGS(udev->max_blocks) * sizeof(unsigned long), GFP_KERNEL); - if (!udev->data_bitmap) + if (!udev->data_bitmap) { + ret = -ENOMEM; goto err_bitmap_alloc; + } udev->mb_addr = vzalloc(CMDR_SIZE); if (!udev->mb_addr) { From 8dc31ff9298963425f5c4bb6011bc2bedb76b1e9 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 19 Dec 2017 04:03:57 -0600 Subject: [PATCH 31/36] target core: add device action configfs files This patch adds a new group of files that are to be used to have the kernel module execution some action. The next patch will have target_core_user use the group/files to be able to block a device and to reset its memory buffer used to pass commands between user/kernel space. This type of file is different from the existing device attributes in that they may be write only and when written to they result in the kernel module executing some function. These need to be separate from the normal device attributes which get/set device values so userspace can continue to loop over all the attribs and get/set them during initialization. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_configfs.c | 6 ++++++ drivers/target/target_core_internal.h | 1 + include/target/target_core_backend.h | 1 + include/target/target_core_base.h | 1 + 4 files changed, 9 insertions(+) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 72b1cd1bf9d9fd..3f4bf126eed066 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -1197,6 +1197,7 @@ struct configfs_attribute *passthrough_attrib_attrs[] = { EXPORT_SYMBOL(passthrough_attrib_attrs); TB_CIT_SETUP_DRV(dev_attrib, NULL, NULL); +TB_CIT_SETUP_DRV(dev_action, NULL, NULL); /* End functions for struct config_item_type tb_dev_attrib_cit */ @@ -2940,6 +2941,10 @@ static struct config_group *target_core_make_subdev( config_group_init_type_name(&dev->dev_group, name, &tb->tb_dev_cit); + config_group_init_type_name(&dev->dev_action_group, "action", + &tb->tb_dev_action_cit); + configfs_add_default_group(&dev->dev_action_group, &dev->dev_group); + config_group_init_type_name(&dev->dev_attrib.da_group, "attrib", &tb->tb_dev_attrib_cit); configfs_add_default_group(&dev->dev_attrib.da_group, &dev->dev_group); @@ -3200,6 +3205,7 @@ static const struct config_item_type target_core_cit = { void target_setup_backend_cits(struct target_backend *tb) { target_core_setup_dev_cit(tb); + target_core_setup_dev_action_cit(tb); target_core_setup_dev_attrib_cit(tb); target_core_setup_dev_pr_cit(tb); target_core_setup_dev_wwn_cit(tb); diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 6d53d9fcb883b7..1d5afc3ae017cf 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -17,6 +17,7 @@ struct target_backend { struct config_item_type tb_dev_cit; struct config_item_type tb_dev_attrib_cit; + struct config_item_type tb_dev_action_cit; struct config_item_type tb_dev_pr_cit; struct config_item_type tb_dev_wwn_cit; struct config_item_type tb_dev_alua_tg_pt_gps_cit; diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index b6b3fb444a9204..34a15d59ed887e 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -53,6 +53,7 @@ struct target_backend_ops { void (*free_prot)(struct se_device *); struct configfs_attribute **tb_dev_attrib_attrs; + struct configfs_attribute **tb_dev_action_attrs; }; struct sbc_ops { diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 00482f903decba..9f9f5902af386b 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -809,6 +809,7 @@ struct se_device { /* T10 SPC-2 + SPC-3 Reservations */ struct t10_reservation t10_pr; struct se_dev_attrib dev_attrib; + struct config_group dev_action_group; struct config_group dev_group; struct config_group dev_pr_group; struct se_dev_stat_grps dev_stat_grps; From 892782caf19a97ccc95df51b3bb659ecacff986a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Tue, 19 Dec 2017 04:03:58 -0600 Subject: [PATCH 32/36] tcmu: allow userspace to reset ring This patch adds 2 tcmu attrs to block/unblock a device and reset the ring buffer. They are used when the userspace daemon has crashed or forced to shutdown while IO is executing. On restart, the daemon can block the device so new IO is not sent to userspace while it puts the ring in a clean state. Notes: The reset ring opreation is specific to tcmu, but the block one could be generic. I kept it tcmu specific, because it requires some extra locking/state checks in the main IO path and since other backend modules did not need this functionality I thought only tcmu should take the perf hit. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 170 +++++++++++++++++++++++++++++- 1 file changed, 166 insertions(+), 4 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 60c8a87b7a882a..511168bec15931 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -121,6 +121,7 @@ struct tcmu_dev { #define TCMU_DEV_BIT_OPEN 0 #define TCMU_DEV_BIT_BROKEN 1 +#define TCMU_DEV_BIT_BLOCKED 2 unsigned long flags; struct uio_info uio_info; @@ -875,6 +876,11 @@ static sense_reason_t queue_cmd_ring(struct tcmu_cmd *tcmu_cmd, int *scsi_err) *scsi_err = TCM_NO_SENSE; + if (test_bit(TCMU_DEV_BIT_BLOCKED, &udev->flags)) { + *scsi_err = TCM_LUN_BUSY; + return -1; + } + if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) { *scsi_err = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; return -1; @@ -1260,7 +1266,7 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) return &udev->se_dev; } -static bool run_cmdr_queue(struct tcmu_dev *udev) +static bool run_cmdr_queue(struct tcmu_dev *udev, bool fail) { struct tcmu_cmd *tcmu_cmd, *tmp_cmd; LIST_HEAD(cmds); @@ -1271,7 +1277,7 @@ static bool run_cmdr_queue(struct tcmu_dev *udev) if (list_empty(&udev->cmdr_queue)) return true; - pr_debug("running %s's cmdr queue\n", udev->name); + pr_debug("running %s's cmdr queue forcefail %d\n", udev->name, fail); list_splice_init(&udev->cmdr_queue, &cmds); @@ -1281,6 +1287,20 @@ static bool run_cmdr_queue(struct tcmu_dev *udev) pr_debug("removing cmd %u on dev %s from queue\n", tcmu_cmd->cmd_id, udev->name); + if (fail) { + idr_remove(&udev->commands, tcmu_cmd->cmd_id); + /* + * We were not able to even start the command, so + * fail with busy to allow a retry in case runner + * was only temporarily down. If the device is being + * removed then LIO core will do the right thing and + * fail the retry. + */ + target_complete_cmd(tcmu_cmd->se_cmd, SAM_STAT_BUSY); + tcmu_free_cmd(tcmu_cmd); + continue; + } + ret = queue_cmd_ring(tcmu_cmd, &scsi_ret); if (ret < 0) { pr_debug("cmd %u on dev %s failed with %u\n", @@ -1317,7 +1337,7 @@ static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on) mutex_lock(&udev->cmdr_lock); tcmu_handle_completions(udev); - run_cmdr_queue(udev); + run_cmdr_queue(udev, false); mutex_unlock(&udev->cmdr_lock); return 0; @@ -1801,6 +1821,78 @@ static void tcmu_destroy_device(struct se_device *dev) kref_put(&udev->kref, tcmu_dev_kref_release); } +static void tcmu_unblock_dev(struct tcmu_dev *udev) +{ + mutex_lock(&udev->cmdr_lock); + clear_bit(TCMU_DEV_BIT_BLOCKED, &udev->flags); + mutex_unlock(&udev->cmdr_lock); +} + +static void tcmu_block_dev(struct tcmu_dev *udev) +{ + mutex_lock(&udev->cmdr_lock); + + if (test_and_set_bit(TCMU_DEV_BIT_BLOCKED, &udev->flags)) + goto unlock; + + /* complete IO that has executed successfully */ + tcmu_handle_completions(udev); + /* fail IO waiting to be queued */ + run_cmdr_queue(udev, true); + +unlock: + mutex_unlock(&udev->cmdr_lock); +} + +static void tcmu_reset_ring(struct tcmu_dev *udev, u8 err_level) +{ + struct tcmu_mailbox *mb; + struct tcmu_cmd *cmd; + int i; + + mutex_lock(&udev->cmdr_lock); + + idr_for_each_entry(&udev->commands, cmd, i) { + if (!list_empty(&cmd->cmdr_queue_entry)) + continue; + + pr_debug("removing cmd %u on dev %s from ring (is expired %d)\n", + cmd->cmd_id, udev->name, + test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)); + + idr_remove(&udev->commands, i); + if (!test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) { + if (err_level == 1) { + /* + * Userspace was not able to start the + * command or it is retryable. + */ + target_complete_cmd(cmd->se_cmd, SAM_STAT_BUSY); + } else { + /* hard failure */ + target_complete_cmd(cmd->se_cmd, + SAM_STAT_CHECK_CONDITION); + } + } + tcmu_cmd_free_data(cmd, cmd->dbi_cnt); + tcmu_free_cmd(cmd); + } + + mb = udev->mb_addr; + tcmu_flush_dcache_range(mb, sizeof(*mb)); + pr_debug("mb last %u head %u tail %u\n", udev->cmdr_last_cleaned, + mb->cmd_tail, mb->cmd_head); + + udev->cmdr_last_cleaned = 0; + mb->cmd_tail = 0; + mb->cmd_head = 0; + tcmu_flush_dcache_range(mb, sizeof(*mb)); + + del_timer(&udev->cmd_timer); + + mutex_unlock(&udev->cmdr_lock); +} + enum { Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors, Opt_nl_reply_supported, Opt_max_data_area_mb, Opt_err, @@ -2192,6 +2284,70 @@ static ssize_t tcmu_emulate_write_cache_store(struct config_item *item, } CONFIGFS_ATTR(tcmu_, emulate_write_cache); +static ssize_t tcmu_block_dev_show(struct config_item *item, char *page) +{ + struct se_device *se_dev = container_of(to_config_group(item), + struct se_device, + dev_action_group); + struct tcmu_dev *udev = TCMU_DEV(se_dev); + + if (test_bit(TCMU_DEV_BIT_BLOCKED, &udev->flags)) + return snprintf(page, PAGE_SIZE, "%s\n", "blocked"); + else + return snprintf(page, PAGE_SIZE, "%s\n", "unblocked"); +} + +static ssize_t tcmu_block_dev_store(struct config_item *item, const char *page, + size_t count) +{ + struct se_device *se_dev = container_of(to_config_group(item), + struct se_device, + dev_action_group); + struct tcmu_dev *udev = TCMU_DEV(se_dev); + u8 val; + int ret; + + ret = kstrtou8(page, 0, &val); + if (ret < 0) + return ret; + + if (val > 1) { + pr_err("Invalid block value %d\n", val); + return -EINVAL; + } + + if (!val) + tcmu_unblock_dev(udev); + else + tcmu_block_dev(udev); + return count; +} +CONFIGFS_ATTR(tcmu_, block_dev); + +static ssize_t tcmu_reset_ring_store(struct config_item *item, const char *page, + size_t count) +{ + struct se_device *se_dev = container_of(to_config_group(item), + struct se_device, + dev_action_group); + struct tcmu_dev *udev = TCMU_DEV(se_dev); + u8 val; + int ret; + + ret = kstrtou8(page, 0, &val); + if (ret < 0) + return ret; + + if (val != 1 && val != 2) { + pr_err("Invalid reset ring value %d\n", val); + return -EINVAL; + } + + tcmu_reset_ring(udev, val); + return count; +} +CONFIGFS_ATTR_WO(tcmu_, reset_ring); + static struct configfs_attribute *tcmu_attrib_attrs[] = { &tcmu_attr_cmd_time_out, &tcmu_attr_qfull_time_out, @@ -2205,6 +2361,12 @@ static struct configfs_attribute *tcmu_attrib_attrs[] = { static struct configfs_attribute **tcmu_attrs; +static struct configfs_attribute *tcmu_action_attrs[] = { + &tcmu_attr_block_dev, + &tcmu_attr_reset_ring, + NULL, +}; + static struct target_backend_ops tcmu_ops = { .name = "user", .owner = THIS_MODULE, @@ -2220,7 +2382,7 @@ static struct target_backend_ops tcmu_ops = { .show_configfs_dev_params = tcmu_show_configfs_dev_params, .get_device_type = sbc_get_device_type, .get_blocks = tcmu_get_blocks, - .tb_dev_attrib_attrs = NULL, + .tb_dev_action_attrs = tcmu_action_attrs, }; static void find_free_blocks(void) From c82b59e7c3f81059b1d280e21028c7ac8451dd52 Mon Sep 17 00:00:00 2001 From: tangwenji Date: Mon, 15 Jan 2018 20:09:37 +0800 Subject: [PATCH 33/36] target: fix destroy device in target_configure_device After dev->transport->configure_device succeeds, target_configure_device exits abnormally, dev_flags has not set DF_CONFIGURED yet, does not call destroy_device function in free_device. Signed-off-by: tangwenji Acked-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_device.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index e8dd6da164b285..e27db4d45a9d34 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -997,7 +997,7 @@ int target_configure_device(struct se_device *dev) ret = core_setup_alua(dev); if (ret) - goto out_free_index; + goto out_destroy_device; /* * Startup the struct se_device processing thread @@ -1041,6 +1041,8 @@ int target_configure_device(struct se_device *dev) out_free_alua: core_alua_free_lu_gp_mem(dev); +out_destroy_device: + dev->transport->destroy_device(dev); out_free_index: mutex_lock(&device_mutex); idr_remove(&devices_idr, dev->dev_index); From 45dc488c0ee19ba5cba7a67be473aeaf88a7447e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 15 Jan 2018 14:37:59 -0600 Subject: [PATCH 34/36] tcmu: fix cmd user after free If we are failing the command due to a qfull timeout we are also freeing the tcmu command, so we cannot access it later to get the se_cmd. Note: The clearing of cmd->se_cmd is not needed. We do not check it later for something like determining if the command was failed due to a timeout. As a result I am dropping it. Signed-off-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 511168bec15931..3096257a00d913 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1152,6 +1152,7 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data) return 0; is_running = list_empty(&cmd->cmdr_queue_entry); + se_cmd = cmd->se_cmd; if (is_running) { /* @@ -1177,8 +1178,6 @@ static int tcmu_check_expired_cmd(int id, void *p, void *data) pr_debug("Timing out cmd %u on dev %s that is %s.\n", id, udev->name, is_running ? "inflight" : "queued"); - se_cmd = cmd->se_cmd; - cmd->se_cmd = NULL; target_complete_cmd(se_cmd, scsi_status); return 0; } From 85fae482a9dd3560b9dcd35136fb29ccab6fae48 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Tue, 16 Jan 2018 15:34:32 +0000 Subject: [PATCH 35/36] tcmu: Fix trailing semicolon The trailing semicolon is an empty statement that does no operation. It is completely stripped out by the compiler. Removing it since it doesn't do anything. Signed-off-by: Luis de Bethencourt Acked-by: Mike Christie Signed-off-by: Nicholas Bellinger --- drivers/target/target_core_user.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 3096257a00d913..4ad89ea71a7011 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1583,7 +1583,7 @@ static int tcmu_wait_genl_cmd_reply(struct tcmu_dev *udev) wake_up_all(&udev->nl_cmd_wq); - return ret;; + return ret; } static int tcmu_netlink_event(struct tcmu_dev *udev, enum tcmu_genl_cmd cmd, From 1c130ae00b769a2e2df41bad3d6051ee8234b636 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 19 Jan 2018 14:36:29 +0100 Subject: [PATCH 36/36] iscsi-target: make sure to wake up sleeping login worker Mike Christie reports: Starting in 4.14 iscsi logins will fail around 50% of the time. Problem appears to be that iscsi_target_sk_data_ready() callback may return without doing anything in case it finds the login work queue is still blocked in sock_recvmsg(). Nicholas Bellinger says: It would indicate users providing their own ->sk_data_ready() callback must be responsible for waking up a kthread context blocked on sock_recvmsg(..., MSG_WAITALL), when a second ->sk_data_ready() is received before the first sock_recvmsg(..., MSG_WAITALL) completes. So, do this and invoke the original data_ready() callback -- in case of tcp sockets this takes care of waking the thread. Disclaimer: I do not understand why this problem did not show up before tcp prequeue removal. (Drop WARN_ON usage - nab) Reported-by: Mike Christie Bisected-by: Mike Christie Tested-by: Mike Christie Diagnosed-by: Nicholas Bellinger Fixes: e7942d0633c4 ("tcp: remove prequeue support") Signed-off-by: Florian Westphal Cc: stable@vger.kernel.org # 4.14+ Signed-off-by: Nicholas Bellinger --- drivers/target/iscsi/iscsi_target_nego.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index b686e2ce9c0e5d..8a5e8d17a94262 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -432,6 +432,9 @@ static void iscsi_target_sk_data_ready(struct sock *sk) if (test_and_set_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) { write_unlock_bh(&sk->sk_callback_lock); pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1, conn: %p >>>>\n", conn); + if (iscsi_target_sk_data_ready == conn->orig_data_ready) + return; + conn->orig_data_ready(sk); return; }