Skip to content

Commit

Permalink
raft: fix auto-transitioning out of joint config
Browse files Browse the repository at this point in the history
The code doing so was undertested and buggy: it would launch multiple
attempts to transition out when the conf change was not the last element
in the log.

This commit fixes the problem and adds a regression test. It also
reworks the code to handle a former untested edge case, in which the
auto-transition append is refused. This can't happen any more with the
current version of the code because this proposal has size zero and is
special cased in increaseUncommittedSize. Last but not least, the
auto-leave proposal now also bumps pendingConfIndex, which was not done
previously due to an oversight.
  • Loading branch information
tbg committed Feb 25, 2020
1 parent fee7213 commit bd3c759
Show file tree
Hide file tree
Showing 5 changed files with 271 additions and 31 deletions.
40 changes: 22 additions & 18 deletions raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -554,35 +554,34 @@ func (r *raft) bcastHeartbeatWithCtx(ctx []byte) {
}

func (r *raft) advance(rd Ready) {
r.reduceUncommittedSize(rd.CommittedEntries)

// If entries were applied (or a snapshot), update our cursor for
// the next Ready. Note that if the current HardState contains a
// new Commit index, this does not mean that we're also applying
// all of the new entries due to commit pagination by size.
if index := rd.appliedCursor(); index > 0 {
r.raftLog.appliedTo(index)
if r.prs.Config.AutoLeave && index >= r.pendingConfIndex && r.state == StateLeader {
if newApplied := rd.appliedCursor(); newApplied > 0 {
oldApplied := r.raftLog.applied
r.raftLog.appliedTo(newApplied)

if r.prs.Config.AutoLeave && oldApplied < r.pendingConfIndex && newApplied >= r.pendingConfIndex && r.state == StateLeader {
// If the current (and most recent, at least for this leader's term)
// configuration should be auto-left, initiate that now.
ccdata, err := (&pb.ConfChangeV2{}).Marshal()
if err != nil {
panic(err)
}
// configuration should be auto-left, initiate that now. We use a
// nil Data which unmarshals into an empty ConfChangeV2 and has the
// benefit that appendEntry can never refuse it based on its size
// (which registers as zero).
ent := pb.Entry{
Type: pb.EntryConfChangeV2,
Data: ccdata,
Data: nil,
}
// There's no way in which this proposal should be able to be rejected.
if !r.appendEntry(ent) {
// If we could not append the entry, bump the pending conf index
// so that we'll try again later.
//
// TODO(tbg): test this case.
r.pendingConfIndex = r.raftLog.lastIndex()
} else {
r.logger.Infof("initiating automatic transition out of joint configuration %s", r.prs.Config)
panic("refused un-refusable auto-leaving ConfChangeV2")
}
r.pendingConfIndex = r.raftLog.lastIndex()
r.logger.Infof("initiating automatic transition out of joint configuration %s", r.prs.Config)
}
}
r.reduceUncommittedSize(rd.CommittedEntries)

if len(rd.Entries) > 0 {
e := rd.Entries[len(rd.Entries)-1]
Expand Down Expand Up @@ -1607,16 +1606,21 @@ func (r *raft) abortLeaderTransfer() {
// If the new entries would exceed the limit, the method returns false. If not,
// the increase in uncommitted entry size is recorded and the method returns
// true.
// Configuration changes are never refused.
func (r *raft) increaseUncommittedSize(ents []pb.Entry) bool {
var s uint64
for _, e := range ents {
s += uint64(PayloadSize(e))
}

if r.uncommittedSize > 0 && r.uncommittedSize+s > r.maxUncommittedSize {
if r.uncommittedSize > 0 && s > 0 && r.uncommittedSize+s > r.maxUncommittedSize {
// If the uncommitted tail of the Raft log is empty, allow any size
// proposal. Otherwise, limit the size of the uncommitted tail of the
// log and drop any proposal that would push the size over the limit.
// Note the added requirement s>0 which is used to make sure that
// appending single empty entries to the log always succeeds, used both
// for replicating a new leader's initial empty entry, and for
// auto-leaving joint configurations.
return false
}
r.uncommittedSize += s
Expand Down
15 changes: 14 additions & 1 deletion raft_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,10 @@ func TestUncommittedEntryLimit(t *testing.T) {
testEntry := pb.Entry{Data: []byte("testdata")}
maxEntrySize := maxEntries * PayloadSize(testEntry)

if n := PayloadSize(pb.Entry{Data: nil}); n != 0 {
t.Fatal("entry with no Data must have zero payload size")
}

cfg := newTestConfig(1, []uint64{1, 2, 3}, 5, 1, NewMemoryStorage())
cfg.MaxUncommittedEntriesSize = uint64(maxEntrySize)
cfg.MaxInflightMsgs = 2 * 1024 // avoid interference
Expand Down Expand Up @@ -244,10 +248,19 @@ func TestUncommittedEntryLimit(t *testing.T) {
t.Fatalf("proposal not dropped: %v", err)
}

// But we can always append an entry with no Data. This is used both for the
// leader's first empty entry and for auto-transitioning out of joint config
// states.
if err := r.Step(
pb.Message{From: 1, To: 1, Type: pb.MsgProp, Entries: []pb.Entry{{}}},
); err != nil {
t.Fatal(err)
}

// Read messages and reduce the uncommitted size as if we had committed
// these entries.
ms = r.readMessages()
if e := 1 * numFollowers; len(ms) != e {
if e := 2 * numFollowers; len(ms) != e {
t.Fatalf("expected %d messages, got %d", e, len(ms))
}
r.reduceUncommittedSize(propEnts)
Expand Down
10 changes: 5 additions & 5 deletions raftpb/confchange.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func (c ConfChangeV2) AsV1() (ConfChange, bool) { return ConfChange{}, false }
// than one change or if the use of Joint Consensus was requested explicitly.
// The first bool can only be true if second one is, and indicates whether the
// Joint State will be left automatically.
func (c *ConfChangeV2) EnterJoint() (autoLeave bool, ok bool) {
func (c ConfChangeV2) EnterJoint() (autoLeave bool, ok bool) {
// NB: in theory, more config changes could qualify for the "simple"
// protocol but it depends on the config on top of which the changes apply.
// For example, adding two learners is not OK if both nodes are part of the
Expand All @@ -100,10 +100,10 @@ func (c *ConfChangeV2) EnterJoint() (autoLeave bool, ok bool) {
// LeaveJoint is true if the configuration change leaves a joint configuration.
// This is the case if the ConfChangeV2 is zero, with the possible exception of
// the Context field.
func (c *ConfChangeV2) LeaveJoint() bool {
cpy := *c
cpy.Context = nil
return proto.Equal(&cpy, &ConfChangeV2{})
func (c ConfChangeV2) LeaveJoint() bool {
// NB: c is already a copy.
c.Context = nil
return proto.Equal(&c, &ConfChangeV2{})
}

// ConfChangesFromString parses a Space-delimited sequence of operations into a
Expand Down
27 changes: 20 additions & 7 deletions rawnode_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,11 +200,12 @@ func TestRawNodeProposeAndConfChange(t *testing.T) {
},
// Ditto implicit.
{
pb.ConfChangeV2{Changes: []pb.ConfChangeSingle{
{NodeID: 2, Type: pb.ConfChangeAddNode},
{NodeID: 1, Type: pb.ConfChangeAddLearnerNode},
{NodeID: 3, Type: pb.ConfChangeAddLearnerNode},
},
pb.ConfChangeV2{
Changes: []pb.ConfChangeSingle{
{NodeID: 2, Type: pb.ConfChangeAddNode},
{NodeID: 1, Type: pb.ConfChangeAddLearnerNode},
{NodeID: 3, Type: pb.ConfChangeAddLearnerNode},
},
Transition: pb.ConfChangeTransitionJointImplicit,
},
pb.ConfState{
Expand Down Expand Up @@ -282,7 +283,9 @@ func TestRawNodeProposeAndConfChange(t *testing.T) {
}

// Check that the last index is exactly the conf change we put in,
// down to the bits.
// down to the bits. Note that this comes from the Storage, which
// will not reflect any unstable entries that we'll only be presented
// with in the next Ready.
lastIndex, err = s.LastIndex()
if err != nil {
t.Fatal(err)
Expand Down Expand Up @@ -313,7 +316,17 @@ func TestRawNodeProposeAndConfChange(t *testing.T) {
t.Fatalf("exp:\n%+v\nact:\n%+v", exp, cs)
}

if exp, act := lastIndex, rawNode.raft.pendingConfIndex; exp != act {
var maybePlusOne uint64
if autoLeave, ok := tc.cc.AsV2().EnterJoint(); ok && autoLeave {
// If this is an auto-leaving joint conf change, it will have
// appended the entry that auto-leaves, so add one to the last
// index that forms the basis of our expectations on
// pendingConfIndex. (Recall that lastIndex was taken from stable
// storage, but this auto-leaving entry isn't on stable storage
// yet).
maybePlusOne = 1
}
if exp, act := lastIndex+maybePlusOne, rawNode.raft.pendingConfIndex; exp != act {
t.Fatalf("pendingConfIndex: expected %d, got %d", exp, act)
}

Expand Down
210 changes: 210 additions & 0 deletions testdata/confchange_v2_add_double_auto.txt
Original file line number Diff line number Diff line change
Expand Up @@ -195,3 +195,213 @@ stabilize 1 3
stabilize
----
ok

# Now remove two nodes. What's new here is that the leader will actually have
# to go to a quorum to commit the transition into the joint config.

propose-conf-change 1
r2 r3
----
ok

# n1 sends out MsgApps.
stabilize 1
----
> 1 handling Ready
Ready MustSync=true:
Entries:
1/6 EntryConfChangeV2 r2 r3
Messages:
1->2 MsgApp Term:1 Log:1/5 Commit:5 Entries:[1/6 EntryConfChangeV2 r2 r3]
1->3 MsgApp Term:1 Log:1/5 Commit:5 Entries:[1/6 EntryConfChangeV2 r2 r3]

# n2, n3 ack them.
stabilize 2 3
----
> 2 receiving messages
1->2 MsgApp Term:1 Log:1/5 Commit:5 Entries:[1/6 EntryConfChangeV2 r2 r3]
> 3 receiving messages
1->3 MsgApp Term:1 Log:1/5 Commit:5 Entries:[1/6 EntryConfChangeV2 r2 r3]
> 2 handling Ready
Ready MustSync=true:
Entries:
1/6 EntryConfChangeV2 r2 r3
Messages:
2->1 MsgAppResp Term:1 Log:0/6
> 3 handling Ready
Ready MustSync=true:
Entries:
1/6 EntryConfChangeV2 r2 r3
Messages:
3->1 MsgAppResp Term:1 Log:0/6

# n1 gets some more proposals. This is part of a regression test: There used to
# be a bug in which these proposals would prompt the leader to transition out of
# the same joint state multiple times, which would cause a panic.
propose 1 foo
----
ok

propose 1 bar
----
ok

# n1 switches to the joint config, then initiates a transition into the final
# config.
stabilize 1
----
> 1 handling Ready
Ready MustSync=true:
Entries:
1/7 EntryNormal "foo"
1/8 EntryNormal "bar"
Messages:
1->2 MsgApp Term:1 Log:1/6 Commit:5 Entries:[1/7 EntryNormal "foo"]
1->3 MsgApp Term:1 Log:1/6 Commit:5 Entries:[1/7 EntryNormal "foo"]
1->2 MsgApp Term:1 Log:1/7 Commit:5 Entries:[1/8 EntryNormal "bar"]
1->3 MsgApp Term:1 Log:1/7 Commit:5 Entries:[1/8 EntryNormal "bar"]
> 1 receiving messages
2->1 MsgAppResp Term:1 Log:0/6
3->1 MsgAppResp Term:1 Log:0/6
> 1 handling Ready
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:6
CommittedEntries:
1/6 EntryConfChangeV2 r2 r3
Messages:
1->2 MsgApp Term:1 Log:1/8 Commit:6
1->3 MsgApp Term:1 Log:1/8 Commit:6
INFO 1 switched to configuration voters=(1)&&(1 2 3) autoleave
INFO initiating automatic transition out of joint configuration voters=(1)&&(1 2 3) autoleave
> 1 handling Ready
Ready MustSync=true:
Entries:
1/9 EntryConfChangeV2

# n2 and n3 also switch to the joint config, and ack the transition out of it.
stabilize 2 3
----
> 2 receiving messages
1->2 MsgApp Term:1 Log:1/6 Commit:5 Entries:[1/7 EntryNormal "foo"]
1->2 MsgApp Term:1 Log:1/7 Commit:5 Entries:[1/8 EntryNormal "bar"]
1->2 MsgApp Term:1 Log:1/8 Commit:6
> 3 receiving messages
1->3 MsgApp Term:1 Log:1/6 Commit:5 Entries:[1/7 EntryNormal "foo"]
1->3 MsgApp Term:1 Log:1/7 Commit:5 Entries:[1/8 EntryNormal "bar"]
1->3 MsgApp Term:1 Log:1/8 Commit:6
> 2 handling Ready
Ready MustSync=true:
HardState Term:1 Commit:6
Entries:
1/7 EntryNormal "foo"
1/8 EntryNormal "bar"
CommittedEntries:
1/6 EntryConfChangeV2 r2 r3
Messages:
2->1 MsgAppResp Term:1 Log:0/7
2->1 MsgAppResp Term:1 Log:0/8
2->1 MsgAppResp Term:1 Log:0/8
INFO 2 switched to configuration voters=(1)&&(1 2 3) autoleave
> 3 handling Ready
Ready MustSync=true:
HardState Term:1 Commit:6
Entries:
1/7 EntryNormal "foo"
1/8 EntryNormal "bar"
CommittedEntries:
1/6 EntryConfChangeV2 r2 r3
Messages:
3->1 MsgAppResp Term:1 Log:0/7
3->1 MsgAppResp Term:1 Log:0/8
3->1 MsgAppResp Term:1 Log:0/8
INFO 3 switched to configuration voters=(1)&&(1 2 3) autoleave

# n2 and n3 also leave the joint config and the dust settles. We see at the very
# end that n1 receives some messages from them that it refuses because it does
# not have them in its config any more.
stabilize
----
> 1 receiving messages
2->1 MsgAppResp Term:1 Log:0/7
2->1 MsgAppResp Term:1 Log:0/8
2->1 MsgAppResp Term:1 Log:0/8
3->1 MsgAppResp Term:1 Log:0/7
3->1 MsgAppResp Term:1 Log:0/8
3->1 MsgAppResp Term:1 Log:0/8
> 1 handling Ready
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:8
CommittedEntries:
1/7 EntryNormal "foo"
1/8 EntryNormal "bar"
Messages:
1->2 MsgApp Term:1 Log:1/8 Commit:7 Entries:[1/9 EntryConfChangeV2]
1->3 MsgApp Term:1 Log:1/8 Commit:7 Entries:[1/9 EntryConfChangeV2]
1->2 MsgApp Term:1 Log:1/9 Commit:8
1->3 MsgApp Term:1 Log:1/9 Commit:8
> 2 receiving messages
1->2 MsgApp Term:1 Log:1/8 Commit:7 Entries:[1/9 EntryConfChangeV2]
1->2 MsgApp Term:1 Log:1/9 Commit:8
> 3 receiving messages
1->3 MsgApp Term:1 Log:1/8 Commit:7 Entries:[1/9 EntryConfChangeV2]
1->3 MsgApp Term:1 Log:1/9 Commit:8
> 2 handling Ready
Ready MustSync=true:
HardState Term:1 Commit:8
Entries:
1/9 EntryConfChangeV2
CommittedEntries:
1/7 EntryNormal "foo"
1/8 EntryNormal "bar"
Messages:
2->1 MsgAppResp Term:1 Log:0/9
2->1 MsgAppResp Term:1 Log:0/9
> 3 handling Ready
Ready MustSync=true:
HardState Term:1 Commit:8
Entries:
1/9 EntryConfChangeV2
CommittedEntries:
1/7 EntryNormal "foo"
1/8 EntryNormal "bar"
Messages:
3->1 MsgAppResp Term:1 Log:0/9
3->1 MsgAppResp Term:1 Log:0/9
> 1 receiving messages
2->1 MsgAppResp Term:1 Log:0/9
2->1 MsgAppResp Term:1 Log:0/9
3->1 MsgAppResp Term:1 Log:0/9
3->1 MsgAppResp Term:1 Log:0/9
> 1 handling Ready
Ready MustSync=false:
HardState Term:1 Vote:1 Commit:9
CommittedEntries:
1/9 EntryConfChangeV2
Messages:
1->2 MsgApp Term:1 Log:1/9 Commit:9
1->3 MsgApp Term:1 Log:1/9 Commit:9
INFO 1 switched to configuration voters=(1)
> 2 receiving messages
1->2 MsgApp Term:1 Log:1/9 Commit:9
> 3 receiving messages
1->3 MsgApp Term:1 Log:1/9 Commit:9
> 2 handling Ready
Ready MustSync=false:
HardState Term:1 Commit:9
CommittedEntries:
1/9 EntryConfChangeV2
Messages:
2->1 MsgAppResp Term:1 Log:0/9
INFO 2 switched to configuration voters=(1)
> 3 handling Ready
Ready MustSync=false:
HardState Term:1 Commit:9
CommittedEntries:
1/9 EntryConfChangeV2
Messages:
3->1 MsgAppResp Term:1 Log:0/9
INFO 3 switched to configuration voters=(1)
> 1 receiving messages
2->1 MsgAppResp Term:1 Log:0/9
raft: cannot step as peer not found3->1 MsgAppResp Term:1 Log:0/9
raft: cannot step as peer not found

0 comments on commit bd3c759

Please sign in to comment.