Skip to content

Commit

Permalink
In GetRemoteSignedCertificate, if the certificate hasn't been issued …
Browse files Browse the repository at this point in the history
…in 5 seconds it errors

with a context deadline exceeded and does not retry.  Update it so that if the node has not
been updated within 5 seconds, attempt to get the node status again after an exponential
backoff.

If NodeCertificateStatus errors with some other error (not context deadline exceeded),
GetRemoteSignedCertificate will try again with a different connection.

Signed-off-by: cyli <[email protected]>
  • Loading branch information
cyli committed Mar 29, 2017
1 parent 74a3d19 commit b727e68
Show file tree
Hide file tree
Showing 3 changed files with 345 additions and 19 deletions.
39 changes: 27 additions & 12 deletions ca/certificates.go
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,6 @@ func (rca *RootCA) RequestAndSaveNewCertificates(ctx context.Context, kw KeyWrit
// the local connection will not be returned by the connection
// broker anymore.
config.ForceRemote = true

}
if err != nil {
return nil, err
Expand Down Expand Up @@ -773,7 +772,6 @@ func GetRemoteSignedCertificate(ctx context.Context, csr []byte, rootCAPool *x50
if rootCAPool == nil {
return nil, errors.New("valid root CA pool required")
}

creds := config.Credentials

if creds == nil {
Expand Down Expand Up @@ -810,17 +808,29 @@ func GetRemoteSignedCertificate(ctx context.Context, csr []byte, rootCAPool *x50

// Exponential backoff with Max of 30 seconds to wait for a new retry
for {
timeout := 5 * time.Second
if config.NodeCertificateStatusRequestTimeout > 0 {
timeout = config.NodeCertificateStatusRequestTimeout
}
// Send the Request and retrieve the certificate
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
stateCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
statusResponse, err := caClient.NodeCertificateStatus(ctx, statusRequest)
if err != nil {
statusResponse, err := caClient.NodeCertificateStatus(stateCtx, statusRequest)
switch {
case err != nil && grpc.Code(err) != codes.DeadlineExceeded:
conn.Close(false)
return nil, err
}
// Because IssueNodeCertificate succeeded, if this call failed likely it is due to an issue with this
// particular connection, so we need to get another. We should try a remote connection - the local node
// may be a manager that was demoted, so the local connection (which is preferred) may not work.
config.ForceRemote = true
conn, err = getGRPCConnection(creds, config.ConnBroker, config.ForceRemote)
if err != nil {
return nil, err
}
caClient = api.NewNodeCAClient(conn.ClientConn)

// If the certificate was issued, return
if statusResponse.Status.State == api.IssuanceStateIssued {
// If there was no deadline exceeded error, and the certificate was issued, return
case err == nil && statusResponse.Status.State == api.IssuanceStateIssued:
if statusResponse.Certificate == nil {
conn.Close(false)
return nil, errors.New("no certificate in CertificateStatus response")
Expand All @@ -837,10 +847,15 @@ func GetRemoteSignedCertificate(ctx context.Context, csr []byte, rootCAPool *x50
}
}

// If we're still pending, the issuance failed, or the state is unknown
// let's continue trying.
// If NodeCertificateStatus timed out, we're still pending, the issuance failed, or
// the state is unknown let's continue trying after an exponential backoff
expBackoff.Failure(nil, nil)
time.Sleep(expBackoff.Proceed(nil))
select {
case <-ctx.Done():
conn.Close(true)
return nil, err
case <-time.After(expBackoff.Proceed(nil)):
}
}
}

Expand Down
Loading

0 comments on commit b727e68

Please sign in to comment.