Skip to content

Commit

Permalink
peer: restore the htlcManager's logCommitTimer to a persistent ticker
Browse files Browse the repository at this point in the history
This commit patches a whole in our optimistic channel synchronization
logic by making the logCommitTimer a persistent ticker rather than one
that is activated after receiving a commitment, and disabled once we
send a new commitment ourself. In the setting of batched full-duplex
channel updates, the prior approach could at times result in a benign
state desync caused by one side being one commitment ahead of the other
because one of the nodes failed to, or was unable to provide the other
with a state update during the workflow.
  • Loading branch information
Roasbeef committed Apr 12, 2017
1 parent 3393f3a commit 178f26b
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 36 deletions.
3 changes: 2 additions & 1 deletion htlcswitch.go
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,8 @@ out:
payHash: payHash,
msg: &lnwire.UpdateFailHTLC{
Reason: []byte{uint8(lnwire.InsufficientCapacity)},
}, err: make(chan error, 1),
},
err: make(chan error, 1),
}

// Send the cancel message along the
Expand Down
45 changes: 10 additions & 35 deletions peer.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,9 +277,8 @@ func (p *peer) loadActiveChannels(chans []*channeldb.OpenChannel) error {
return nil
}

// Start starts all helper goroutines the peer needs for normal operations.
// In the case this peer has already been started, then this function is a
// loop.
// Start starts all helper goroutines the peer needs for normal operations. In
// the case this peer has already been started, then this function is a loop.
func (p *peer) Start() error {
if atomic.AddInt32(&p.started, 1) != 1 {
return nil
Expand All @@ -294,7 +293,7 @@ func (p *peer) Start() error {
}

// Before we launch any of the helper goroutines off the peer struct,
// we'll first ensure proper adherance to the p2p protocl. The init
// we'll first ensure proper adherence to the p2p protocl. The init
// message MUST be sent before any other message.
readErr := make(chan error, 1)
msgChan := make(chan lnwire.Message, 1)
Expand All @@ -309,7 +308,7 @@ func (p *peer) Start() error {
}()

select {
// In order to avoid blocking indefinately, we'll give the other peer
// In order to avoid blocking indefinitely, we'll give the other peer
// an upper timeout of 5 seconds to respond before we bail out early.
case <-time.After(time.Second * 5):
return fmt.Errorf("peer did not complete handshake within 5 " +
Expand Down Expand Up @@ -1069,19 +1068,14 @@ type commitmentState struct {
// htlcSwitch, or subsystem that initiated the HTLC.
cancelReasons map[uint64]lnwire.FailCode

// pendingBatch is slice of payments which have been added to the
// channel update log, but not yet committed to latest commitment.
pendingBatch []*pendingPayment

// clearedHTCLs is a map of outgoing HTLCs we've committed to in our
// chain which have not yet been settled by the upstream peer.
clearedHTCLs map[uint64]*pendingPayment

// logCommitTimer is a timer which is sent upon if we go an interval
// without receiving/sending a commitment update. It's role is to
// ensure both chains converge to identical state in a timely manner.
// TODO(roasbeef): timer should be >> then RTT
logCommitTimer *time.Timer
logCommitTick <-chan time.Time

// switchChan is a channel used to send packets to the htlc switch for
// forwarding.
switchChan chan<- *htlcPacket
Expand Down Expand Up @@ -1143,7 +1137,6 @@ func (p *peer) htlcManager(channel *lnwallet.LightningChannel,
pendingCircuits: make(map[uint64]*sphinx.ProcessedPacket),
sphinx: p.server.sphinx,
switchChan: htlcPlex,
logCommitTimer: time.NewTimer(300 * time.Millisecond),
}

// TODO(roasbeef): check to see if able to settle any currently pending
Expand All @@ -1153,6 +1146,9 @@ func (p *peer) htlcManager(channel *lnwallet.LightningChannel,

batchTimer := time.NewTicker(50 * time.Millisecond)
defer batchTimer.Stop()

logCommitTimer := time.NewTicker(300 * time.Millisecond)
defer logCommitTimer.Stop()
out:
for {
select {
Expand All @@ -1176,7 +1172,7 @@ out:
state.chanPoint, p.id)
break out

case <-state.logCommitTick:
case <-logCommitTimer.C:
// If we haven't sent or received a new commitment
// update in some time, check to see if we have any
// pending updates we need to commit due to our
Expand Down Expand Up @@ -1508,16 +1504,6 @@ func (p *peer) handleUpstreamMsg(state *commitmentState, msg lnwire.Message) {
}
p.queueMsg(nextRevocation, nil)


if !state.logCommitTimer.Stop() {
select {
case <-state.logCommitTimer.C:
default:
}
}

state.logCommitTimer.Reset(300 * time.Millisecond)
state.logCommitTick = state.logCommitTimer.C
// If both commitment chains are fully synced from our PoV,
// then we don't need to reply with a signature as both sides
// already have a commitment with the latest accepted state.
Expand Down Expand Up @@ -1732,17 +1718,6 @@ func (p *peer) updateCommitTx(state *commitmentState) error {
state.clearedHTCLs[update.index] = update
}

// We've just initiated a state transition, attempt to stop the
// logCommitTimer. If the timer already ticked, then we'll consume the
// value, dropping
if state.logCommitTimer != nil && !state.logCommitTimer.Stop() {
select {
case <-state.logCommitTimer.C:
default:
}
}
state.logCommitTick = nil

// Finally, clear our the current batch, and flip the pendingUpdate
// bool to indicate were waiting for a commitment signature.
// TODO(roasbeef): re-slice instead to avoid GC?
Expand Down

0 comments on commit 178f26b

Please sign in to comment.