Skip to content

Commit

Permalink
Prevent deadlock when all nodes are dead
Browse files Browse the repository at this point in the history
If we don't rely on sniffing to find healthy nodes, a client might be
stuck with all nodes dead and never return.

To resolve this situation we mark all nodes as alive in that case.
The next call to `PerformRequest` will then find a node that is
marked as alive (not healthy) and will retry. If a node came back
in the meantime, it will be marked as healthy.

Such a deadlock situation will also get logged.

See discussion in olivere#192.
  • Loading branch information
olivere committed Jan 4, 2016
1 parent 0f746c8 commit 1f8cbcb
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 13 deletions.
13 changes: 11 additions & 2 deletions client.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (

const (
// Version is the current version of Elastic.
Version = "3.0.16"
Version = "3.0.17"

// DefaultUrl is the default endpoint of Elasticsearch on the local machine.
// It is used e.g. when initializing a new Client without a specific URL.
Expand Down Expand Up @@ -906,7 +906,16 @@ func (c *Client) next() (*conn, error) {
}
}

// TODO(oe) As a last resort, we could try to awake a dead connection here.
// We have a deadlock here: All nodes are marked as dead.
// If sniffing is disabled, connections will never be marked alive again.
// So we are marking them as alive--if sniffing is disabled.
// They'll then be picked up in the next call to PerformRequest.
if !c.snifferEnabled {
c.errorf("elastic: all %d nodes marked as dead; resurrecting them to prevent deadlock", len(c.conns))
for _, conn := range c.conns {
conn.MarkAsAlive()
}
}

// We tried hard, but there is no node available
return nil, ErrNoClient
Expand Down
58 changes: 47 additions & 11 deletions client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,41 @@ func TestClientSniffDisabled(t *testing.T) {
}
}

func TestClientWillMarkConnectionsAsAliveWhenAllAreDead(t *testing.T) {
client, err := NewClient(SetURL("http://127.0.0.1:9201"),
SetSniff(false), SetHealthcheck(false), SetMaxRetries(0))
if err != nil {
t.Fatal(err)
}
// We should have a connection.
if len(client.conns) != 1 {
t.Fatalf("expected 1 node, got: %d (%v)", len(client.conns), client.conns)
}

// Make a request, so that the connections is marked as dead.
client.Flush().Do()

// The connection should now be marked as dead.
if i, found := findConn("http://127.0.0.1:9201", client.conns...); !found {
t.Fatalf("expected connection to %q to be found", "http://127.0.0.1:9201")
} else {
if conn := client.conns[i]; !conn.IsDead() {
t.Fatalf("expected connection to be dead, got: %v", conn)
}
}

// Now send another request and the connection should be marked as alive again.
client.Flush().Do()

if i, found := findConn("http://127.0.0.1:9201", client.conns...); !found {
t.Fatalf("expected connection to %q to be found", "http://127.0.0.1:9201")
} else {
if conn := client.conns[i]; conn.IsDead() {
t.Fatalf("expected connection to be alive, got: %v", conn)
}
}
}

func TestClientWithRequiredPlugins(t *testing.T) {
_, err := NewClient(SetRequiredPlugins("no-such-plugin"))
if err == nil {
Expand Down Expand Up @@ -486,29 +521,30 @@ func TestClientSelectConnAllDead(t *testing.T) {
client.conns[0].MarkAsDead()
client.conns[1].MarkAsDead()

// #1: Return ErrNoClient
// If all connections are dead, next should make them alive again, but
// still return ErrNoClient when it first finds out.
c, err := client.next()
if err != ErrNoClient {
t.Fatal(err)
}
if c != nil {
t.Fatalf("expected no connection; got: %v", c)
}
// #2: Return ErrNoClient again
// Return a connection
c, err = client.next()
if err != ErrNoClient {
t.Fatal(err)
if err != nil {
t.Fatalf("expected no error; got: %v", err)
}
if c != nil {
t.Fatalf("expected no connection; got: %v", c)
if c == nil {
t.Fatalf("expected connection; got: %v", c)
}
// #3: Return ErrNoClient again and again
// Return a connection
c, err = client.next()
if err != ErrNoClient {
t.Fatal(err)
if err != nil {
t.Fatalf("expected no error; got: %v", err)
}
if c != nil {
t.Fatalf("expected no connection; got: %v", c)
if c == nil {
t.Fatalf("expected connection; got: %v", c)
}
}

Expand Down

0 comments on commit 1f8cbcb

Please sign in to comment.