Skip to content

Commit 8603e1b

Browse files
committed
workqueue: fix hang involving racing cancel[_delayed]_work_sync()'s for PREEMPT_NONE
cancel[_delayed]_work_sync() are implemented using __cancel_work_timer() which grabs the PENDING bit using try_to_grab_pending() and then flushes the work item with PENDING set to prevent the on-going execution of the work item from requeueing itself. try_to_grab_pending() can always grab PENDING bit without blocking except when someone else is doing the above flushing during cancelation. In that case, try_to_grab_pending() returns -ENOENT. In this case, __cancel_work_timer() currently invokes flush_work(). The assumption is that the completion of the work item is what the other canceling task would be waiting for too and thus waiting for the same condition and retrying should allow forward progress without excessive busy looping Unfortunately, this doesn't work if preemption is disabled or the latter task has real time priority. Let's say task A just got woken up from flush_work() by the completion of the target work item. If, before task A starts executing, task B gets scheduled and invokes __cancel_work_timer() on the same work item, its try_to_grab_pending() will return -ENOENT as the work item is still being canceled by task A and flush_work() will also immediately return false as the work item is no longer executing. This puts task B in a busy loop possibly preventing task A from executing and clearing the canceling state on the work item leading to a hang. task A task B worker executing work __cancel_work_timer() try_to_grab_pending() set work CANCELING flush_work() block for work completion completion, wakes up A __cancel_work_timer() while (forever) { try_to_grab_pending() -ENOENT as work is being canceled flush_work() false as work is no longer executing } This patch removes the possible hang by updating __cancel_work_timer() to explicitly wait for clearing of CANCELING rather than invoking flush_work() after try_to_grab_pending() fails with -ENOENT. Link: http://lkml.kernel.org/g/[email protected] v3: bit_waitqueue() can't be used for work items defined in vmalloc area. Switched to custom wake function which matches the target work item and exclusive wait and wakeup. v2: v1 used wake_up() on bit_waitqueue() which leads to NULL deref if the target bit waitqueue has wait_bit_queue's on it. Use DEFINE_WAIT_BIT() and __wake_up_bit() instead. Reported by Tomeu Vizoso. Signed-off-by: Tejun Heo <[email protected]> Reported-by: Rabin Vincent <[email protected]> Cc: Tomeu Vizoso <[email protected]> Cc: [email protected] Tested-by: Jesper Nilsson <[email protected]> Tested-by: Rabin Vincent <[email protected]>
1 parent c517d83 commit 8603e1b

File tree

2 files changed

+54
-5
lines changed

2 files changed

+54
-5
lines changed

include/linux/workqueue.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,8 @@ enum {
7070
/* data contains off-queue information when !WORK_STRUCT_PWQ */
7171
WORK_OFFQ_FLAG_BASE = WORK_STRUCT_COLOR_SHIFT,
7272

73-
WORK_OFFQ_CANCELING = (1 << WORK_OFFQ_FLAG_BASE),
73+
__WORK_OFFQ_CANCELING = WORK_OFFQ_FLAG_BASE,
74+
WORK_OFFQ_CANCELING = (1 << __WORK_OFFQ_CANCELING),
7475

7576
/*
7677
* When a work item is off queue, its high bits point to the last

kernel/workqueue.c

+52-4
Original file line numberDiff line numberDiff line change
@@ -2728,19 +2728,57 @@ bool flush_work(struct work_struct *work)
27282728
}
27292729
EXPORT_SYMBOL_GPL(flush_work);
27302730

2731+
struct cwt_wait {
2732+
wait_queue_t wait;
2733+
struct work_struct *work;
2734+
};
2735+
2736+
static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key)
2737+
{
2738+
struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
2739+
2740+
if (cwait->work != key)
2741+
return 0;
2742+
return autoremove_wake_function(wait, mode, sync, key);
2743+
}
2744+
27312745
static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
27322746
{
2747+
static DECLARE_WAIT_QUEUE_HEAD(cancel_waitq);
27332748
unsigned long flags;
27342749
int ret;
27352750

27362751
do {
27372752
ret = try_to_grab_pending(work, is_dwork, &flags);
27382753
/*
2739-
* If someone else is canceling, wait for the same event it
2740-
* would be waiting for before retrying.
2754+
* If someone else is already canceling, wait for it to
2755+
* finish. flush_work() doesn't work for PREEMPT_NONE
2756+
* because we may get scheduled between @work's completion
2757+
* and the other canceling task resuming and clearing
2758+
* CANCELING - flush_work() will return false immediately
2759+
* as @work is no longer busy, try_to_grab_pending() will
2760+
* return -ENOENT as @work is still being canceled and the
2761+
* other canceling task won't be able to clear CANCELING as
2762+
* we're hogging the CPU.
2763+
*
2764+
* Let's wait for completion using a waitqueue. As this
2765+
* may lead to the thundering herd problem, use a custom
2766+
* wake function which matches @work along with exclusive
2767+
* wait and wakeup.
27412768
*/
2742-
if (unlikely(ret == -ENOENT))
2743-
flush_work(work);
2769+
if (unlikely(ret == -ENOENT)) {
2770+
struct cwt_wait cwait;
2771+
2772+
init_wait(&cwait.wait);
2773+
cwait.wait.func = cwt_wakefn;
2774+
cwait.work = work;
2775+
2776+
prepare_to_wait_exclusive(&cancel_waitq, &cwait.wait,
2777+
TASK_UNINTERRUPTIBLE);
2778+
if (work_is_canceling(work))
2779+
schedule();
2780+
finish_wait(&cancel_waitq, &cwait.wait);
2781+
}
27442782
} while (unlikely(ret < 0));
27452783

27462784
/* tell other tasks trying to grab @work to back off */
@@ -2749,6 +2787,16 @@ static bool __cancel_work_timer(struct work_struct *work, bool is_dwork)
27492787

27502788
flush_work(work);
27512789
clear_work_data(work);
2790+
2791+
/*
2792+
* Paired with prepare_to_wait() above so that either
2793+
* waitqueue_active() is visible here or !work_is_canceling() is
2794+
* visible there.
2795+
*/
2796+
smp_mb();
2797+
if (waitqueue_active(&cancel_waitq))
2798+
__wake_up(&cancel_waitq, TASK_NORMAL, 1, work);
2799+
27522800
return ret;
27532801
}
27542802

0 commit comments

Comments
 (0)