Skip to content

Commit

Permalink
Add Health Check Tags (ava-labs#1304)
Browse files Browse the repository at this point in the history
  • Loading branch information
ceyonur authored Apr 10, 2023
1 parent 1915ce6 commit 5fbcac0
Show file tree
Hide file tree
Showing 10 changed files with 427 additions and 109 deletions.
35 changes: 18 additions & 17 deletions api/health/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ var _ Client = (*client)(nil)
// AwaitHealthy, and AwaitAlive.
type Client interface {
// Readiness returns if the node has finished initialization
Readiness(context.Context, ...rpc.Option) (*APIReply, error)
Readiness(ctx context.Context, tags []string, options ...rpc.Option) (*APIReply, error)
// Health returns a summation of the health of the node
Health(context.Context, ...rpc.Option) (*APIReply, error)
Health(ctx context.Context, tags []string, options ...rpc.Option) (*APIReply, error)
// Liveness returns if the node is in need of a restart
Liveness(context.Context, ...rpc.Option) (*APIReply, error)
Liveness(ctx context.Context, tags []string, options ...rpc.Option) (*APIReply, error)
}

// Client implementation for Avalanche Health API Endpoint
Expand All @@ -36,53 +36,54 @@ func NewClient(uri string) Client {
)}
}

func (c *client) Readiness(ctx context.Context, options ...rpc.Option) (*APIReply, error) {
func (c *client) Readiness(ctx context.Context, tags []string, options ...rpc.Option) (*APIReply, error) {
res := &APIReply{}
err := c.requester.SendRequest(ctx, "health.readiness", struct{}{}, res, options...)
err := c.requester.SendRequest(ctx, "health.readiness", &APIArgs{Tags: tags}, res, options...)
return res, err
}

func (c *client) Health(ctx context.Context, options ...rpc.Option) (*APIReply, error) {
func (c *client) Health(ctx context.Context, tags []string, options ...rpc.Option) (*APIReply, error) {
res := &APIReply{}
err := c.requester.SendRequest(ctx, "health.health", struct{}{}, res, options...)
err := c.requester.SendRequest(ctx, "health.health", &APIArgs{Tags: tags}, res, options...)
return res, err
}

func (c *client) Liveness(ctx context.Context, options ...rpc.Option) (*APIReply, error) {
func (c *client) Liveness(ctx context.Context, tags []string, options ...rpc.Option) (*APIReply, error) {
res := &APIReply{}
err := c.requester.SendRequest(ctx, "health.liveness", struct{}{}, res, options...)
err := c.requester.SendRequest(ctx, "health.liveness", &APIArgs{Tags: tags}, res, options...)
return res, err
}

// AwaitReady polls the node every [freq] until the node reports ready.
// Only returns an error if [ctx] returns an error.
func AwaitReady(ctx context.Context, c Client, freq time.Duration, options ...rpc.Option) (bool, error) {
return await(ctx, freq, c.Readiness, options...)
func AwaitReady(ctx context.Context, c Client, freq time.Duration, tags []string, options ...rpc.Option) (bool, error) {
return await(ctx, freq, c.Readiness, tags, options...)
}

// AwaitHealthy polls the node every [freq] until the node reports healthy.
// Only returns an error if [ctx] returns an error.
func AwaitHealthy(ctx context.Context, c Client, freq time.Duration, options ...rpc.Option) (bool, error) {
return await(ctx, freq, c.Health, options...)
func AwaitHealthy(ctx context.Context, c Client, freq time.Duration, tags []string, options ...rpc.Option) (bool, error) {
return await(ctx, freq, c.Health, tags, options...)
}

// AwaitAlive polls the node every [freq] until the node reports liveness.
// Only returns an error if [ctx] returns an error.
func AwaitAlive(ctx context.Context, c Client, freq time.Duration, options ...rpc.Option) (bool, error) {
return await(ctx, freq, c.Liveness, options...)
func AwaitAlive(ctx context.Context, c Client, freq time.Duration, tags []string, options ...rpc.Option) (bool, error) {
return await(ctx, freq, c.Liveness, tags, options...)
}

func await(
ctx context.Context,
freq time.Duration,
check func(context.Context, ...rpc.Option) (*APIReply, error),
check func(ctx context.Context, tags []string, options ...rpc.Option) (*APIReply, error),
tags []string,
options ...rpc.Option,
) (bool, error) {
ticker := time.NewTicker(freq)
defer ticker.Stop()

for {
res, err := check(ctx, options...)
res, err := check(ctx, tags, options...)
if err == nil && res.Healthy {
return true, nil
}
Expand Down
18 changes: 9 additions & 9 deletions api/health/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,34 +48,34 @@ func TestClient(t *testing.T) {
}

{
readiness, err := c.Readiness(context.Background())
readiness, err := c.Readiness(context.Background(), nil)
require.NoError(err)
require.True(readiness.Healthy)
}

{
health, err := c.Health(context.Background())
health, err := c.Health(context.Background(), nil)
require.NoError(err)
require.True(health.Healthy)
}

{
liveness, err := c.Liveness(context.Background())
liveness, err := c.Liveness(context.Background(), nil)
require.NoError(err)
require.True(liveness.Healthy)
}

{
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
healthy, err := AwaitHealthy(ctx, c, time.Second)
healthy, err := AwaitHealthy(ctx, c, time.Second, nil)
cancel()
require.NoError(err)
require.True(healthy)
}

{
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
healthy, err := AwaitReady(ctx, c, time.Second)
healthy, err := AwaitReady(ctx, c, time.Second, nil)
cancel()
require.NoError(err)
require.True(healthy)
Expand All @@ -85,15 +85,15 @@ func TestClient(t *testing.T) {

{
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Microsecond)
healthy, err := AwaitHealthy(ctx, c, time.Microsecond)
healthy, err := AwaitHealthy(ctx, c, time.Microsecond, nil)
cancel()
require.Error(err)
require.False(healthy)
}

{
ctx, cancel := context.WithTimeout(context.Background(), 20*time.Microsecond)
healthy, err := AwaitReady(ctx, c, time.Microsecond)
healthy, err := AwaitReady(ctx, c, time.Microsecond, nil)
cancel()
require.Error(err)
require.False(healthy)
Expand All @@ -104,14 +104,14 @@ func TestClient(t *testing.T) {
}

{
healthy, err := AwaitHealthy(context.Background(), c, time.Microsecond)
healthy, err := AwaitHealthy(context.Background(), c, time.Microsecond, nil)
require.NoError(err)
require.True(healthy)
}

mc.reply.Healthy = false
{
healthy, err := AwaitReady(context.Background(), c, time.Microsecond)
healthy, err := AwaitReady(context.Background(), c, time.Microsecond, nil)
require.NoError(err)
require.True(healthy)
}
Expand Down
5 changes: 3 additions & 2 deletions api/health/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,13 @@ func NewGetAndPostHandler(log logging.Logger, reporter Reporter) (http.Handler,

// NewGetHandler return a health handler that supports GET requests reporting
// the result of the provided [reporter].
func NewGetHandler(reporter func() (map[string]Result, bool)) http.Handler {
func NewGetHandler(reporter func(tags ...string) (map[string]Result, bool)) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// Make sure the content type is set before writing the header.
w.Header().Set("Content-Type", "application/json")

checks, healthy := reporter()
tags := r.URL.Query()["tag"]
checks, healthy := reporter(tags...)
if !healthy {
// If a health check has failed, we should return a 503.
w.WriteHeader(http.StatusServiceUnavailable)
Expand Down
42 changes: 24 additions & 18 deletions api/health/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ import (
"github.com/ava-labs/avalanchego/utils/logging"
)

// GlobalTag is the tag that is returned for all health check results,
// regardless of the tags passed to the Reporter.
// Registering a health check with this tag will ensure that it is always
// included in the results.
const GlobalTag = "global"

var _ Health = (*health)(nil)

// Health defines the full health service interface for registering, reporting
Expand All @@ -28,16 +34,16 @@ type Health interface {

// Registerer defines how to register new components to check the health of.
type Registerer interface {
RegisterReadinessCheck(name string, checker Checker) error
RegisterHealthCheck(name string, checker Checker) error
RegisterLivenessCheck(name string, checker Checker) error
RegisterReadinessCheck(name string, checker Checker, tags ...string) error
RegisterHealthCheck(name string, checker Checker, tags ...string) error
RegisterLivenessCheck(name string, checker Checker, tags ...string) error
}

// Reporter returns the current health status.
type Reporter interface {
Readiness() (map[string]Result, bool)
Health() (map[string]Result, bool)
Liveness() (map[string]Result, bool)
Readiness(tags ...string) (map[string]Result, bool)
Health(tags ...string) (map[string]Result, bool)
Liveness(tags ...string) (map[string]Result, bool)
}

type health struct {
Expand Down Expand Up @@ -67,20 +73,20 @@ func New(log logging.Logger, registerer prometheus.Registerer) (Health, error) {
}, err
}

func (h *health) RegisterReadinessCheck(name string, checker Checker) error {
return h.readiness.RegisterMonotonicCheck(name, checker)
func (h *health) RegisterReadinessCheck(name string, checker Checker, tags ...string) error {
return h.readiness.RegisterMonotonicCheck(name, checker, tags...)
}

func (h *health) RegisterHealthCheck(name string, checker Checker) error {
return h.health.RegisterCheck(name, checker)
func (h *health) RegisterHealthCheck(name string, checker Checker, tags ...string) error {
return h.health.RegisterCheck(name, checker, tags...)
}

func (h *health) RegisterLivenessCheck(name string, checker Checker) error {
return h.liveness.RegisterCheck(name, checker)
func (h *health) RegisterLivenessCheck(name string, checker Checker, tags ...string) error {
return h.liveness.RegisterCheck(name, checker, tags...)
}

func (h *health) Readiness() (map[string]Result, bool) {
results, healthy := h.readiness.Results()
func (h *health) Readiness(tags ...string) (map[string]Result, bool) {
results, healthy := h.readiness.Results(tags...)
if !healthy {
h.log.Warn("failing readiness check",
zap.Reflect("reason", results),
Expand All @@ -89,8 +95,8 @@ func (h *health) Readiness() (map[string]Result, bool) {
return results, healthy
}

func (h *health) Health() (map[string]Result, bool) {
results, healthy := h.health.Results()
func (h *health) Health(tags ...string) (map[string]Result, bool) {
results, healthy := h.health.Results(tags...)
if !healthy {
h.log.Warn("failing health check",
zap.Reflect("reason", results),
Expand All @@ -99,8 +105,8 @@ func (h *health) Health() (map[string]Result, bool) {
return results, healthy
}

func (h *health) Liveness() (map[string]Result, bool) {
results, healthy := h.liveness.Results()
func (h *health) Liveness(tags ...string) (map[string]Result, bool) {
results, healthy := h.liveness.Results(tags...)
if !healthy {
h.log.Warn("failing liveness check",
zap.Reflect("reason", results),
Expand Down
Loading

0 comments on commit 5fbcac0

Please sign in to comment.