Skip to content

Commit

Permalink
raft: Fix the problem of stuck in candidate role forever.
Browse files Browse the repository at this point in the history
Sometimes a server can stay in candidate role forever, even if the server
already see the new leader and handles append-requests normally. However,
because of the wrong role, it appears as disconnected from cluster and
so the clients are disconnected.

This problem happens when 2 servers become candidates in the same
term, and one of them is elected as leader in that term. It can be
reproduced by the test cases added in this patch.

The root cause is that the current implementation only changes role to
follower when a bigger term is observed (in raft_receive_term__()).
According to the RAFT paper, if another candidate becomes leader with
the same term, the candidate should change to follower.

This patch fixes it by changing the role to follower when leader
is being updated in raft_update_leader().

Signed-off-by: Han Zhou <[email protected]>
Signed-off-by: Ben Pfaff <[email protected]>
  • Loading branch information
hzhou8 authored and blp committed Mar 6, 2020
1 parent 315e88c commit 93ee420
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 2 deletions.
19 changes: 17 additions & 2 deletions ovsdb/raft.c
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ enum raft_failure_test {
FT_CRASH_BEFORE_SEND_EXEC_REQ,
FT_CRASH_AFTER_SEND_EXEC_REQ,
FT_CRASH_AFTER_RECV_APPEND_REQ_UPDATE,
FT_DELAY_ELECTION
FT_DELAY_ELECTION,
FT_DONT_SEND_VOTE_REQUEST
};
static enum raft_failure_test failure_test;

Expand Down Expand Up @@ -1647,6 +1648,7 @@ raft_start_election(struct raft *raft, bool leadership_transfer)
}

ovs_assert(raft->role != RAFT_LEADER);

raft->role = RAFT_CANDIDATE;
/* If there was no leader elected since last election, we know we are
* retrying now. */
Expand Down Expand Up @@ -1690,7 +1692,9 @@ raft_start_election(struct raft *raft, bool leadership_transfer)
.leadership_transfer = leadership_transfer,
},
};
raft_send(raft, &rq);
if (failure_test != FT_DONT_SEND_VOTE_REQUEST) {
raft_send(raft, &rq);
}
}

/* Vote for ourselves. */
Expand Down Expand Up @@ -2965,6 +2969,15 @@ raft_update_leader(struct raft *raft, const struct uuid *sid)
};
ignore(ovsdb_log_write_and_free(raft->log, raft_record_to_json(&r)));
}
if (raft->role == RAFT_CANDIDATE) {
/* Section 3.4: While waiting for votes, a candidate may
* receive an AppendEntries RPC from another server claiming to
* be leader. If the leader’s term (included in its RPC) is at
* least as large as the candidate’s current term, then the
* candidate recognizes the leader as legitimate and returns to
* follower state. */
raft->role = RAFT_FOLLOWER;
}
return true;
}

Expand Down Expand Up @@ -4673,6 +4686,8 @@ raft_unixctl_failure_test(struct unixctl_conn *conn OVS_UNUSED,
raft_reset_election_timer(raft);
}
}
} else if (!strcmp(test, "dont-send-vote-request")) {
failure_test = FT_DONT_SEND_VOTE_REQUEST;
} else if (!strcmp(test, "clear")) {
failure_test = FT_NO_TEST;
unixctl_command_reply(conn, "test dismissed");
Expand Down
55 changes: 55 additions & 0 deletions tests/ovsdb-cluster.at
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,61 @@ AT_KEYWORDS([ovsdb server negative unix cluster pending-txn])
ovsdb_cluster_failure_test 2 2 3 crash-after-receiving-append-request-update
AT_CLEANUP


AT_SETUP([OVSDB cluster - competing candidates])
AT_KEYWORDS([ovsdb server negative unix cluster competing-candidates])

n=3
schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
ordinal_schema > schema
AT_CHECK([ovsdb-tool '-vPATTERN:console:%c|%p|%m' create-cluster s1.db $abs_srcdir/idltest.ovsschema unix:s1.raft], [0], [], [stderr])
cid=`ovsdb-tool db-cid s1.db`
schema_name=`ovsdb-tool schema-name $abs_srcdir/idltest.ovsschema`
for i in `seq 2 $n`; do
AT_CHECK([ovsdb-tool join-cluster s$i.db $schema_name unix:s$i.raft unix:s1.raft])
done

on_exit 'kill `cat *.pid`'
for i in `seq $n`; do
AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s$i.log --pidfile=s$i.pid --unixctl=s$i --remote=punix:s$i.ovsdb s$i.db])
done
for i in `seq $n`; do
AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
done

# We need to simulate the situation when 2 candidates starts election with same
# term.
#
# Before triggering leader election, tell follower s2 don't send vote request (simulating
# vote-request lost or not handled in time), and tell follower s3 to delay
# election timer to make sure s3 doesn't send vote-request before s2 enters
# term 2.
AT_CHECK([ovs-appctl -t "`pwd`"/s2 cluster/failure-test dont-send-vote-request], [0], [ignore])
AT_CHECK([ovs-appctl -t "`pwd`"/s3 cluster/failure-test delay-election], [0], [ignore])

# Restart leader, which will become follower, and both old followers will start
# election as candidate. The new follower (old leader) will vote one of them,
# and the other candidate should step back as follower as again.
kill -9 `cat s1.pid`
AT_CHECK([ovsdb-server -v -vconsole:off -vsyslog:off --detach --no-chdir --log-file=s1.log --pidfile=s1.pid --unixctl=s1 --remote=punix:s1.ovsdb s1.db])

# Tell s1 to delay election timer so that it won't start election before s3
# becomes candidate.
AT_CHECK([ovs-appctl -t "`pwd`"/s1 cluster/failure-test delay-election], [0], [ignore])

OVS_WAIT_UNTIL([ovs-appctl -t "`pwd`"/s1 cluster/status $schema_name | grep "Term: 2"])

for i in `seq $n`; do
OVS_WAIT_WHILE([ovs-appctl -t "`pwd`"/s$i cluster/status $schema_name | grep "candidate"])
AT_CHECK([ovsdb_client_wait unix:s$i.ovsdb $schema_name connected])
done

for i in `seq $n`; do
OVS_APP_EXIT_AND_WAIT_BY_TARGET([`pwd`/s$i], [s$i.pid])
done

AT_CLEANUP


AT_BANNER([OVSDB - cluster tests])

Expand Down

0 comments on commit 93ee420

Please sign in to comment.