Skip to content

Commit

Permalink
ocfs2: fix race between mount and delete node/cluster
Browse files Browse the repository at this point in the history
There is a race case between mount and delete node/cluster, which will
lead o2hb_thread to malfunctioning dead loop.

    o2hb_thread
    {
        o2nm_depend_this_node();
        <<<<<< race window, node may have already been deleted, and then
               enter the loop, o2hb thread will be malfunctioning
               because of no configured nodes found.
        while (!kthread_should_stop() &&
               !reg->hr_unclean_stop && !reg->hr_aborted_start) {
    }

So check the return value of o2nm_depend_this_node() is needed.  If node
has been deleted, do not enter the loop and let mount fail.

Signed-off-by: Joseph Qi <[email protected]>
Cc: Mark Fasheh <[email protected]>
Cc: Joel Becker <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
josephhz authored and torvalds committed Nov 6, 2015
1 parent 93d911f commit 0986fe9
Showing 1 changed file with 16 additions and 3 deletions.
19 changes: 16 additions & 3 deletions fs/ocfs2/cluster/heartbeat.c
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,8 @@ struct o2hb_region {
unsigned hr_unclean_stop:1,
hr_aborted_start:1,
hr_item_pinned:1,
hr_item_dropped:1;
hr_item_dropped:1,
hr_node_deleted:1;

/* protected by the hr_callback_sem */
struct task_struct *hr_task;
Expand Down Expand Up @@ -1078,7 +1079,13 @@ static int o2hb_thread(void *data)
set_user_nice(current, MIN_NICE);

/* Pin node */
o2nm_depend_this_node();
ret = o2nm_depend_this_node();
if (ret) {
mlog(ML_ERROR, "Node has been deleted, ret = %d\n", ret);
reg->hr_node_deleted = 1;
wake_up(&o2hb_steady_queue);
return 0;
}

while (!kthread_should_stop() &&
!reg->hr_unclean_stop && !reg->hr_aborted_start) {
Expand Down Expand Up @@ -1787,7 +1794,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
spin_unlock(&o2hb_live_lock);

ret = wait_event_interruptible(o2hb_steady_queue,
atomic_read(&reg->hr_steady_iterations) == 0);
atomic_read(&reg->hr_steady_iterations) == 0 ||
reg->hr_node_deleted);
if (ret) {
atomic_set(&reg->hr_steady_iterations, 0);
reg->hr_aborted_start = 1;
Expand All @@ -1798,6 +1806,11 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
goto out3;
}

if (reg->hr_node_deleted) {
ret = -EINVAL;
goto out3;
}

/* Ok, we were woken. Make sure it wasn't by drop_item() */
spin_lock(&o2hb_live_lock);
hb_task = reg->hr_task;
Expand Down

0 comments on commit 0986fe9

Please sign in to comment.