diff --git a/alert.go b/alert.go index 3f089f78b..3e330659f 100644 --- a/alert.go +++ b/alert.go @@ -5,7 +5,6 @@ import ( "encoding/json" "fmt" html "html/template" - "log" "os" "sync" text "text/template" @@ -14,6 +13,7 @@ import ( "github.com/influxdata/kapacitor/alert" "github.com/influxdata/kapacitor/edge" "github.com/influxdata/kapacitor/expvar" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/models" "github.com/influxdata/kapacitor/pipeline" alertservice "github.com/influxdata/kapacitor/services/alert" @@ -75,9 +75,13 @@ type AlertNode struct { } // Create a new AlertNode which caches the most recent item and exposes it over the HTTP API. -func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an *AlertNode, err error) { +func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, d NodeDiagnostic) (an *AlertNode, err error) { + ctx := []keyvalue.T{ + keyvalue.KV("task", et.Task.ID), + } + an = &AlertNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, a: n, } an.node.runF = an.runAlert @@ -126,7 +130,7 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * c := alertservice.TCPHandlerConfig{ Address: tcp.Address, } - h := alertservice.NewTCPHandler(c, l) + h := alertservice.NewTCPHandler(c, an.diag) an.handlers = append(an.handlers, h) } @@ -134,12 +138,12 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * c := smtp.HandlerConfig{ To: email.ToList, } - h := et.tm.SMTPService.Handler(c, l) + h := et.tm.SMTPService.Handler(c, ctx...) an.handlers = append(an.handlers, h) } if len(n.EmailHandlers) == 0 && (et.tm.SMTPService != nil && et.tm.SMTPService.Global()) { c := smtp.HandlerConfig{} - h := et.tm.SMTPService.Handler(c, l) + h := et.tm.SMTPService.Handler(c, ctx...) an.handlers = append(an.handlers, h) } // If email has been configured with state changes only set it. @@ -155,7 +159,7 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * Args: e.Command[1:], Commander: et.tm.Commander, } - h := alertservice.NewExecHandler(c, l) + h := alertservice.NewExecHandler(c, an.diag) an.handlers = append(an.handlers, h) } @@ -165,7 +169,7 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * if log.Mode != 0 { c.Mode = os.FileMode(log.Mode) } - h, err := alertservice.NewLogHandler(c, l) + h, err := alertservice.NewLogHandler(c, an.diag) if err != nil { return nil, errors.Wrap(err, "failed to create log alert handler") } @@ -176,12 +180,12 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * c := victorops.HandlerConfig{ RoutingKey: vo.RoutingKey, } - h := et.tm.VictorOpsService.Handler(c, l) + h := et.tm.VictorOpsService.Handler(c, ctx...) an.handlers = append(an.handlers, h) } if len(n.VictorOpsHandlers) == 0 && (et.tm.VictorOpsService != nil && et.tm.VictorOpsService.Global()) { c := victorops.HandlerConfig{} - h := et.tm.VictorOpsService.Handler(c, l) + h := et.tm.VictorOpsService.Handler(c, ctx...) an.handlers = append(an.handlers, h) } @@ -189,12 +193,12 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * c := pagerduty.HandlerConfig{ ServiceKey: pd.ServiceKey, } - h := et.tm.PagerDutyService.Handler(c, l) + h := et.tm.PagerDutyService.Handler(c, ctx...) an.handlers = append(an.handlers, h) } if len(n.PagerDutyHandlers) == 0 && (et.tm.PagerDutyService != nil && et.tm.PagerDutyService.Global()) { c := pagerduty.HandlerConfig{} - h := et.tm.PagerDutyService.Handler(c, l) + h := et.tm.PagerDutyService.Handler(c, ctx...) an.handlers = append(an.handlers, h) } @@ -203,7 +207,7 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * Source: s.Source, Handlers: s.HandlersList, } - h, err := et.tm.SensuService.Handler(c, l) + h, err := et.tm.SensuService.Handler(c, ctx...) if err != nil { return nil, errors.Wrap(err, "failed to create sensu alert handler") } @@ -216,11 +220,11 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * Username: s.Username, IconEmoji: s.IconEmoji, } - h := et.tm.SlackService.Handler(c, l) + h := et.tm.SlackService.Handler(c, ctx...) an.handlers = append(an.handlers, h) } if len(n.SlackHandlers) == 0 && (et.tm.SlackService != nil && et.tm.SlackService.Global()) { - h := et.tm.SlackService.Handler(slack.HandlerConfig{}, l) + h := et.tm.SlackService.Handler(slack.HandlerConfig{}, ctx...) an.handlers = append(an.handlers, h) } // If slack has been configured with state changes only set it. @@ -237,7 +241,7 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * DisableWebPagePreview: t.IsDisableWebPagePreview, DisableNotification: t.IsDisableNotification, } - h := et.tm.TelegramService.Handler(c, l) + h := et.tm.TelegramService.Handler(c, ctx...) an.handlers = append(an.handlers, h) } @@ -254,7 +258,7 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * TrapOid: s.TrapOid, DataList: dataList, } - h, err := et.tm.SNMPTrapService.Handler(c, l) + h, err := et.tm.SNMPTrapService.Handler(c, ctx...) if err != nil { return nil, errors.Wrapf(err, "failed to create SNMP handler") } @@ -263,7 +267,7 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * if len(n.TelegramHandlers) == 0 && (et.tm.TelegramService != nil && et.tm.TelegramService.Global()) { c := telegram.HandlerConfig{} - h := et.tm.TelegramService.Handler(c, l) + h := et.tm.TelegramService.Handler(c, ctx...) an.handlers = append(an.handlers, h) } // If telegram has been configured with state changes only set it. @@ -278,12 +282,12 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * Room: hc.Room, Token: hc.Token, } - h := et.tm.HipChatService.Handler(c, l) + h := et.tm.HipChatService.Handler(c, ctx...) an.handlers = append(an.handlers, h) } if len(n.HipChatHandlers) == 0 && (et.tm.HipChatService != nil && et.tm.HipChatService.Global()) { c := hipchat.HandlerConfig{} - h := et.tm.HipChatService.Handler(c, l) + h := et.tm.HipChatService.Handler(c, ctx...) an.handlers = append(an.handlers, h) } // If HipChat has been configured with state changes only set it. @@ -322,7 +326,7 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * if a.Timeout != 0 { c.Timeout = a.Timeout } - h, err := et.tm.AlertaService.Handler(c, l) + h, err := et.tm.AlertaService.Handler(c, ctx...) if err != nil { return nil, errors.Wrap(err, "failed to create Alerta handler") } @@ -346,7 +350,7 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * if p.Sound != "" { c.Sound = p.Sound } - h := et.tm.PushoverService.Handler(c, l) + h := et.tm.PushoverService.Handler(c, ctx...) an.handlers = append(an.handlers, h) } @@ -356,7 +360,7 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * Endpoint: p.Endpoint, Headers: p.Headers, } - h := et.tm.HTTPPostService.Handler(c, l) + h := et.tm.HTTPPostService.Handler(c, ctx...) an.handlers = append(an.handlers, h) } @@ -365,17 +369,17 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * TeamsList: og.TeamsList, RecipientsList: og.RecipientsList, } - h := et.tm.OpsGenieService.Handler(c, l) + h := et.tm.OpsGenieService.Handler(c, ctx...) an.handlers = append(an.handlers, h) } if len(n.OpsGenieHandlers) == 0 && (et.tm.OpsGenieService != nil && et.tm.OpsGenieService.Global()) { c := opsgenie.HandlerConfig{} - h := et.tm.OpsGenieService.Handler(c, l) + h := et.tm.OpsGenieService.Handler(c, ctx...) an.handlers = append(an.handlers, h) } for range n.TalkHandlers { - h := et.tm.TalkService.Handler(l) + h := et.tm.TalkService.Handler(ctx...) an.handlers = append(an.handlers, h) } @@ -386,7 +390,7 @@ func newAlertNode(et *ExecutingTask, n *pipeline.AlertNode, l *log.Logger) (an * QoS: mqtt.QoSLevel(m.Qos), Retained: m.Retained, } - h := et.tm.MQTTService.Handler(c, l) + h := et.tm.MQTTService.Handler(c, ctx...) an.handlers = append(an.handlers, h) } // Parse level expressions @@ -560,8 +564,8 @@ func (n *AlertNode) restoreEvent(id string) (alert.Level, time.Time) { // Check for previous state on anonTopic if n.hasAnonTopic() { if state, ok, err := n.et.tm.AlertService.EventState(n.anonTopic, id); err != nil { - n.incrementErrorCount() - n.logger.Printf("E! failed to get event state for anonymous topic %s, event %s: %v", n.anonTopic, id, err) + n.diag.Error("failed to get event state for anonymous topic", err, + keyvalue.KV("topic", n.anonTopic), keyvalue.KV("event", id)) } else if ok { anonTopicState = state anonFound = true @@ -570,8 +574,8 @@ func (n *AlertNode) restoreEvent(id string) (alert.Level, time.Time) { // Check for previous state on topic. if n.hasTopic() { if state, ok, err := n.et.tm.AlertService.EventState(n.topic, id); err != nil { - n.incrementErrorCount() - n.logger.Printf("E! failed to get event state for topic %s, event %s: %v", n.topic, id, err) + n.diag.Error("failed to get event state for topic", err, + keyvalue.KV("topic", n.anonTopic), keyvalue.KV("event", id)) } else if ok { topicState = state topicFound = true @@ -581,14 +585,12 @@ func (n *AlertNode) restoreEvent(id string) (alert.Level, time.Time) { if anonFound && topicFound { // Anon topic takes precedence if err := n.et.tm.AlertService.UpdateEvent(n.topic, anonTopicState); err != nil { - n.incrementErrorCount() - n.logger.Printf("E! failed to update topic %q event state for event %q", n.topic, id) + n.diag.Error("failed to update topic event state", err, keyvalue.KV("topic", n.topic), keyvalue.KV("event", id)) } } else if topicFound && n.hasAnonTopic() { // Update event state for topic if err := n.et.tm.AlertService.UpdateEvent(n.anonTopic, topicState); err != nil { - n.incrementErrorCount() - n.logger.Printf("E! failed to update topic %q event state for event %q", n.topic, id) + n.diag.Error("failed to update topic event state", err, keyvalue.KV("topic", n.topic), keyvalue.KV("event", id)) } } // else nothing was found, nothing to do } @@ -623,7 +625,7 @@ func (n *AlertNode) handleEvent(event alert.Event) { case alert.Critical: n.critsTriggered.Add(1) } - n.logger.Printf("D! %v alert triggered id:%s msg:%s data:%v", event.State.Level, event.State.ID, event.State.Message, event.Data.Result.Series[0]) + n.diag.AlertTriggered(event.State.Level, event.State.ID, event.State.Message, event.Data.Result.Series[0]) // If we have anon handlers, emit event to the anonTopic if n.hasAnonTopic() { @@ -631,8 +633,7 @@ func (n *AlertNode) handleEvent(event alert.Event) { err := n.et.tm.AlertService.Collect(event) if err != nil { n.eventsDropped.Add(1) - n.incrementErrorCount() - n.logger.Println("E!", err) + n.diag.Error("encountered error collecting event", err) } } @@ -642,8 +643,7 @@ func (n *AlertNode) handleEvent(event alert.Event) { err := n.et.tm.AlertService.Collect(event) if err != nil { n.eventsDropped.Add(1) - n.incrementErrorCount() - n.logger.Println("E!", err) + n.diag.Error("encountered error collecting event", err) } } } @@ -654,8 +654,7 @@ func (n *AlertNode) determineLevel(p edge.FieldsTagsTimeGetter, currentLevel ale } if rse := n.levelResets[currentLevel]; rse != nil { if pass, err := EvalPredicate(rse, n.lrScopePools[currentLevel], p); err != nil { - n.incrementErrorCount() - n.logger.Printf("E! error evaluating reset expression for current level %v: %s", currentLevel, err) + n.diag.Error("error evaluating reset expression for current level", err, keyvalue.KV("level", currentLevel.String())) } else if !pass { return currentLevel } @@ -676,8 +675,7 @@ func (n *AlertNode) findFirstMatchLevel(start alert.Level, stop alert.Level, p e continue } if pass, err := EvalPredicate(se, n.scopePools[l], p); err != nil { - n.incrementErrorCount() - n.logger.Printf("E! error evaluating expression for level %v: %s", alert.Level(l), err) + n.diag.Error("error evaluating expression for level", err, keyvalue.KV("level", alert.Level(l).String())) continue } else if pass { return alert.Level(l), true diff --git a/alert/topics.go b/alert/topics.go index b33c00803..7d2a22220 100644 --- a/alert/topics.go +++ b/alert/topics.go @@ -2,7 +2,6 @@ package alert import ( "fmt" - "log" "path" "sort" "sync" @@ -20,14 +19,11 @@ type Topics struct { mu sync.RWMutex topics map[string]*Topic - - logger *log.Logger } -func NewTopics(l *log.Logger) *Topics { +func NewTopics() *Topics { s := &Topics{ topics: make(map[string]*Topic), - logger: l, } return s } diff --git a/autoscale.go b/autoscale.go index 7c386655c..877f898b1 100644 --- a/autoscale.go +++ b/autoscale.go @@ -2,7 +2,6 @@ package kapacitor import ( "fmt" - "log" "time" "github.com/influxdata/kapacitor/edge" @@ -71,7 +70,7 @@ type AutoscaleNode struct { // Create a new AutoscaleNode which can trigger autoscale events. func newAutoscaleNode( et *ExecutingTask, - l *log.Logger, + d NodeDiagnostic, n pipeline.Node, a autoscaler, min, @@ -91,7 +90,7 @@ func newAutoscaleNode( } replicasScopePool := stateful.NewScopePool(ast.FindReferenceVariables(replicas.Expression)) kn := &AutoscaleNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, resourceStates: make(map[string]resourceState), min: min, max: max, @@ -153,8 +152,7 @@ func (g *autoscaleGroup) BeginBatch(begin edge.BeginBatchMessage) (edge.Message, func (g *autoscaleGroup) BatchPoint(bp edge.BatchPointMessage) (edge.Message, error) { np, err := g.n.handlePoint(g.begin.Name(), g.begin.Dimensions(), bp, g.expr) if err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E!", err) + g.n.diag.Error("error batch handling point", err) } return np, nil } @@ -166,8 +164,7 @@ func (g *autoscaleGroup) EndBatch(end edge.EndBatchMessage) (edge.Message, error func (g *autoscaleGroup) Point(p edge.PointMessage) (edge.Message, error) { np, err := g.n.handlePoint(p.Name(), p.Dimensions(), p, g.expr) if err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E!", err) + g.n.diag.Error("error handling point", err) } return np, nil } @@ -284,7 +281,7 @@ func (n *AutoscaleNode) handlePoint(streamName string, dims models.Dimensions, p } func (n *AutoscaleNode) applyEvent(e event) error { - n.logger.Printf("D! setting replicas to %d was %d for %q", e.New, e.Old, e.ID) + n.diag.SettingReplicas(e.New, e.Old, e.ID.ID()) err := n.a.SetReplicas(e.ID, e.New) return errors.Wrapf(err, "failed to set new replica count for %q", e.ID) } @@ -333,7 +330,7 @@ type k8sAutoscaler struct { namespace string } -func newK8sAutoscaleNode(et *ExecutingTask, n *pipeline.K8sAutoscaleNode, l *log.Logger) (*AutoscaleNode, error) { +func newK8sAutoscaleNode(et *ExecutingTask, n *pipeline.K8sAutoscaleNode, d NodeDiagnostic) (*AutoscaleNode, error) { client, err := et.tm.K8sService.Client(n.Cluster) if err != nil { return nil, fmt.Errorf("cannot use the k8sAutoscale node, could not create kubernetes client: %v", err) @@ -350,7 +347,7 @@ func newK8sAutoscaleNode(et *ExecutingTask, n *pipeline.K8sAutoscaleNode, l *log } return newAutoscaleNode( et, - l, + d, n, a, int(n.Min), @@ -458,7 +455,7 @@ type swarmAutoscaler struct { outputServiceNameTag string } -func newSwarmAutoscaleNode(et *ExecutingTask, n *pipeline.SwarmAutoscaleNode, l *log.Logger) (*AutoscaleNode, error) { +func newSwarmAutoscaleNode(et *ExecutingTask, n *pipeline.SwarmAutoscaleNode, d NodeDiagnostic) (*AutoscaleNode, error) { client, err := et.tm.SwarmService.Client(n.Cluster) if err != nil { return nil, fmt.Errorf("cannot use the swarmAutoscale node, could not create swarm client: %v", err) @@ -475,7 +472,7 @@ func newSwarmAutoscaleNode(et *ExecutingTask, n *pipeline.SwarmAutoscaleNode, l } return newAutoscaleNode( et, - l, + d, n, a, int(n.Min), diff --git a/batch.go b/batch.go index 155a3fbe5..f1a475745 100644 --- a/batch.go +++ b/batch.go @@ -3,7 +3,6 @@ package kapacitor import ( "bytes" "fmt" - "log" "sync" "time" @@ -27,9 +26,9 @@ type BatchNode struct { idx int } -func newBatchNode(et *ExecutingTask, n *pipeline.BatchNode, l *log.Logger) (*BatchNode, error) { +func newBatchNode(et *ExecutingTask, n *pipeline.BatchNode, d NodeDiagnostic) (*BatchNode, error) { sn := &BatchNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, s: n, } return sn, nil @@ -141,9 +140,9 @@ type QueryNode struct { byName bool } -func newQueryNode(et *ExecutingTask, n *pipeline.QueryNode, l *log.Logger) (*QueryNode, error) { +func newQueryNode(et *ExecutingTask, n *pipeline.QueryNode, d NodeDiagnostic) (*QueryNode, error) { bn := &QueryNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, b: n, closing: make(chan struct{}), aborting: make(chan struct{}), @@ -295,7 +294,7 @@ func (n *QueryNode) doQuery(in edge.Edge) error { n.query.SetStopTime(stop) qStr := n.query.String() - n.logger.Println("D! starting next batch query:", qStr) + n.diag.StartingBatchQuery(qStr) // Execute query q := influxdb.Query{ @@ -303,8 +302,7 @@ func (n *QueryNode) doQuery(in edge.Edge) error { } resp, err := con.Query(q) if err != nil { - n.incrementErrorCount() - n.logger.Println("E!", err) + n.diag.Error("error executing query", err) n.timer.Stop() break } @@ -313,8 +311,7 @@ func (n *QueryNode) doQuery(in edge.Edge) error { for _, res := range resp.Results { batches, err := edge.ResultToBufferedBatches(res, n.byName) if err != nil { - n.incrementErrorCount() - n.logger.Println("E! failed to understand query result:", err) + n.diag.Error("failed to understand query result", err) continue } for _, bch := range batches { diff --git a/cmd/kapacitord/main.go b/cmd/kapacitord/main.go index a11d28e52..318a4e238 100644 --- a/cmd/kapacitord/main.go +++ b/cmd/kapacitord/main.go @@ -4,7 +4,6 @@ import ( "flag" "fmt" "io" - "log" "math/rand" "os" "os/signal" @@ -14,9 +13,10 @@ import ( "github.com/influxdata/kapacitor/cmd/kapacitord/help" "github.com/influxdata/kapacitor/cmd/kapacitord/run" - "github.com/influxdata/wlog" ) +type Diagnostic run.Diagnostic + // These variables are populated via the Go linker. var ( version string @@ -46,7 +46,7 @@ func main() { // Main represents the program execution. type Main struct { - Logger *log.Logger + Diag Diagnostic Stdin io.Reader Stdout io.Writer @@ -56,7 +56,6 @@ type Main struct { // NewMain return a new instance of Main. func NewMain() *Main { return &Main{ - Logger: wlog.New(os.Stderr, "[run] ", log.LstdFlags), Stdin: os.Stdin, Stdout: os.Stdout, Stderr: os.Stderr, @@ -79,22 +78,22 @@ func (m *Main) Run(args ...string) error { err := cmd.Run(args...) // Use logger from cmd since it may have special config now. - if cmd.Logger != nil { - m.Logger = cmd.Logger + if cmd.Diag != nil { + m.Diag = cmd.Diag } if err != nil { - m.Logger.Println("E!", err) + m.Diag.Error("encountered error", err) return fmt.Errorf("run: %s", err) } signalCh := make(chan os.Signal, 1) signal.Notify(signalCh, os.Interrupt, syscall.SIGTERM) - m.Logger.Println("I! Listening for signals") + m.Diag.Info("listening for signals") // Block until one of the signals above is received select { case <-signalCh: - m.Logger.Println("I! Signal received, initializing clean shutdown...") + m.Diag.Info("signal received, initializing clean shutdown...") go func() { cmd.Close() }() @@ -102,14 +101,14 @@ func (m *Main) Run(args ...string) error { // Block again until another signal is received, a shutdown timeout elapses, // or the Command is gracefully closed - m.Logger.Println("I! Waiting for clean shutdown...") + m.Diag.Info("waiting for clean shutdown...") select { case <-signalCh: - m.Logger.Println("I! second signal received, initializing hard shutdown") + m.Diag.Info("second signal received, initializing hard shutdown") case <-time.After(time.Second * 30): - m.Logger.Println("I! time limit reached, initializing hard shutdown") + m.Diag.Info("time limit reached, initializing hard shutdown") case <-cmd.Closed: - m.Logger.Println("I! server shutdown completed") + m.Diag.Info("server shutdown completed") } // goodbye. diff --git a/cmd/kapacitord/run/command.go b/cmd/kapacitord/run/command.go index 72e8186a9..fb930b862 100644 --- a/cmd/kapacitord/run/command.go +++ b/cmd/kapacitord/run/command.go @@ -8,13 +8,11 @@ import ( "log" "os" "path/filepath" - "runtime" "strconv" "github.com/BurntSushi/toml" "github.com/influxdata/kapacitor/server" - "github.com/influxdata/kapacitor/services/logging" - "github.com/influxdata/kapacitor/tick" + "github.com/influxdata/kapacitor/services/diagnostic" ) const logo = ` @@ -29,6 +27,13 @@ const logo = ` ` +type Diagnostic interface { + Error(msg string, err error) + KapacitorStarting(version, branch, commit string) + GoVersion() + Info(msg string) +} + // Command represents the command executed by "kapacitord run". type Command struct { Version string @@ -42,9 +47,10 @@ type Command struct { Stdout io.Writer Stderr io.Writer - Server *server.Server - Logger *log.Logger - logService *logging.Service + Server *server.Server + diagService *diagnostic.Service + + Diag Diagnostic } // NewCommand return a new instance of Command. @@ -96,20 +102,15 @@ func (cmd *Command) Run(args ...string) error { } // Initialize Logging Services - cmd.logService = logging.NewService(config.Logging, cmd.Stdout, cmd.Stderr) - err = cmd.logService.Open() - if err != nil { - return fmt.Errorf("init logging: %s", err) - } - // Initialize packages loggers - tick.SetLogger(cmd.logService.NewLogger("[tick] ", log.LstdFlags)) + cmd.diagService = diagnostic.NewService(config.Logging, cmd.Stdout, cmd.Stderr) + cmd.diagService.Open() - // Initialize cmd logger - cmd.Logger = cmd.logService.NewLogger("[run] ", log.LstdFlags) + // Initialize cmd diagnostic + cmd.Diag = cmd.diagService.NewCmdHandler() // Mark start-up in log., - cmd.Logger.Printf("I! Kapacitor starting, version %s, branch %s, commit %s", cmd.Version, cmd.Branch, cmd.Commit) - cmd.Logger.Printf("I! Go version %s", runtime.Version()) + cmd.Diag.KapacitorStarting(cmd.Version, cmd.Branch, cmd.Commit) + cmd.Diag.GoVersion() // Write the PID file. if err := cmd.writePIDFile(options.PIDFile); err != nil { @@ -118,7 +119,7 @@ func (cmd *Command) Run(args ...string) error { // Create server from config and start it. buildInfo := server.BuildInfo{Version: cmd.Version, Commit: cmd.Commit, Branch: cmd.Branch} - s, err := server.New(config, buildInfo, cmd.logService) + s, err := server.New(config, buildInfo, cmd.diagService) if err != nil { return fmt.Errorf("create server: %s", err) } @@ -142,8 +143,8 @@ func (cmd *Command) Close() error { if cmd.Server != nil { return cmd.Server.Close() } - if cmd.logService != nil { - return cmd.logService.Close() + if cmd.diagService != nil { + return cmd.diagService.Close() } return nil } @@ -153,7 +154,7 @@ func (cmd *Command) monitorServerErrors() { select { case err := <-cmd.Server.Err(): if err != nil { - cmd.Logger.Println("E! " + err.Error()) + cmd.Diag.Error("encountered error", err) } case <-cmd.closing: return diff --git a/combine.go b/combine.go index 5e40fe4b2..5c1e1f9a2 100644 --- a/combine.go +++ b/combine.go @@ -2,7 +2,6 @@ package kapacitor import ( "fmt" - "log" "time" "github.com/influxdata/kapacitor/edge" @@ -23,10 +22,10 @@ type CombineNode struct { } // Create a new CombineNode, which combines a stream with itself dynamically. -func newCombineNode(et *ExecutingTask, n *pipeline.CombineNode, l *log.Logger) (*CombineNode, error) { +func newCombineNode(et *ExecutingTask, n *pipeline.CombineNode, d NodeDiagnostic) (*CombineNode, error) { cn := &CombineNode{ c: n, - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, combination: combination{max: n.Max}, } @@ -157,8 +156,7 @@ func (b *combineBuffer) combine() error { for i := range b.expressions { matched, err := EvalPredicate(b.expressions[i], b.n.scopePools[i], p) if err != nil { - b.n.incrementErrorCount() - b.n.logger.Println("E! evaluating lambda expression:", err) + b.n.diag.Error("error evaluating lambda expression", err) } matches[i][idx] = matched } diff --git a/default.go b/default.go index 3cba8b662..e6c505fa4 100644 --- a/default.go +++ b/default.go @@ -1,8 +1,6 @@ package kapacitor import ( - "log" - "github.com/influxdata/kapacitor/edge" "github.com/influxdata/kapacitor/expvar" "github.com/influxdata/kapacitor/models" @@ -23,9 +21,9 @@ type DefaultNode struct { } // Create a new DefaultNode which applies a transformation func to each point in a stream and returns a single point. -func newDefaultNode(et *ExecutingTask, n *pipeline.DefaultNode, l *log.Logger) (*DefaultNode, error) { +func newDefaultNode(et *ExecutingTask, n *pipeline.DefaultNode, d NodeDiagnostic) (*DefaultNode, error) { dn := &DefaultNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, d: n, fieldsDefaulted: new(expvar.Int), tagsDefaulted: new(expvar.Int), diff --git a/delete.go b/delete.go index a51002adc..a4ca01b6b 100644 --- a/delete.go +++ b/delete.go @@ -1,8 +1,6 @@ package kapacitor import ( - "log" - "github.com/influxdata/kapacitor/edge" "github.com/influxdata/kapacitor/expvar" "github.com/influxdata/kapacitor/models" @@ -25,14 +23,14 @@ type DeleteNode struct { } // Create a new DeleteNode which applies a transformation func to each point in a stream and returns a single point. -func newDeleteNode(et *ExecutingTask, n *pipeline.DeleteNode, l *log.Logger) (*DeleteNode, error) { +func newDeleteNode(et *ExecutingTask, n *pipeline.DeleteNode, d NodeDiagnostic) (*DeleteNode, error) { tags := make(map[string]bool) for _, tag := range n.Tags { tags[tag] = true } dn := &DeleteNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, d: n, fieldsDeleted: new(expvar.Int), tagsDeleted: new(expvar.Int), diff --git a/derivative.go b/derivative.go index ea0c4ed12..dd349937e 100644 --- a/derivative.go +++ b/derivative.go @@ -1,10 +1,12 @@ package kapacitor import ( - "log" + "errors" + "fmt" "time" "github.com/influxdata/kapacitor/edge" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/models" "github.com/influxdata/kapacitor/pipeline" ) @@ -15,9 +17,9 @@ type DerivativeNode struct { } // Create a new derivative node. -func newDerivativeNode(et *ExecutingTask, n *pipeline.DerivativeNode, l *log.Logger) (*DerivativeNode, error) { +func newDerivativeNode(et *ExecutingTask, n *pipeline.DerivativeNode, d NodeDiagnostic) (*DerivativeNode, error) { dn := &DerivativeNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, d: n, } // Create stateful expressions @@ -124,8 +126,11 @@ func (g *derivativeGroup) DeleteGroup(d edge.DeleteGroupMessage) (edge.Message, func (n *DerivativeNode) derivative(prev, curr models.Fields, prevTime, currTime time.Time) (float64, bool, bool) { f1, ok := numToFloat(curr[n.d.Field]) if !ok { - n.incrementErrorCount() - n.logger.Printf("E! cannot apply derivative to type %T", curr[n.d.Field]) + n.diag.Error("cannot perform derivative", + errors.New("field is the wrong type"), + keyvalue.KV("field", n.d.Field), + keyvalue.KV("type", fmt.Sprintf("%T", curr[n.d.Field])), + ) return 0, false, false } @@ -139,8 +144,7 @@ func (n *DerivativeNode) derivative(prev, curr models.Fields, prevTime, currTime elapsed := float64(currTime.Sub(prevTime)) if elapsed == 0 { - n.incrementErrorCount() - n.logger.Printf("E! cannot perform derivative elapsed time was 0") + n.diag.Error("cannot perform derivative", errors.New("elaspsed time was 0")) return 0, true, false } diff := f1 - f0 diff --git a/edge.go b/edge.go index 46312f9b6..562466217 100644 --- a/edge.go +++ b/edge.go @@ -2,8 +2,6 @@ package kapacitor import ( "errors" - "fmt" - "log" "sync" "github.com/influxdata/kapacitor/edge" @@ -21,6 +19,10 @@ const ( var ErrAborted = errors.New("edged aborted") +type EdgeDiagnostic interface { + ClosingEdge(collected, emitted int64) +} + type Edge struct { edge.StatsEdge @@ -29,10 +31,10 @@ type Edge struct { statsKey string statMap *expvar.Map - logger *log.Logger + diag EdgeDiagnostic } -func newEdge(taskName, parentName, childName string, t pipeline.EdgeType, size int, logService LogService) edge.StatsEdge { +func newEdge(taskName, parentName, childName string, t pipeline.EdgeType, size int, d EdgeDiagnostic) edge.StatsEdge { e := edge.NewStatsEdge(edge.NewChannelEdge(t, defaultEdgeBufferSize)) tags := map[string]string{ "task": taskName, @@ -43,12 +45,11 @@ func newEdge(taskName, parentName, childName string, t pipeline.EdgeType, size i key, sm := vars.NewStatistic("edges", tags) sm.Set(statCollected, e.CollectedVar()) sm.Set(statEmitted, e.EmittedVar()) - name := fmt.Sprintf("%s|%s->%s", taskName, parentName, childName) return &Edge{ StatsEdge: e, statsKey: key, statMap: sm, - logger: logService.NewLogger(fmt.Sprintf("[edge:%s] ", name), log.LstdFlags), + diag: d, } } @@ -60,9 +61,6 @@ func (e *Edge) Close() error { } e.closed = true vars.DeleteStatistic(e.statsKey) - e.logger.Printf("D! closing c: %d e: %d", - e.Collected(), - e.Emitted(), - ) + e.diag.ClosingEdge(e.Collected(), e.Emitted()) return e.StatsEdge.Close() } diff --git a/edge/log.go b/edge/log.go index f1cfb1d44..c0ac4b31b 100644 --- a/edge/log.go +++ b/edge/log.go @@ -3,14 +3,17 @@ package edge import ( - "log" - "github.com/influxdata/kapacitor/pipeline" ) +type Diagnostic interface { + Collect(mtype MessageType) + Emit(mtype MessageType) +} + type logEdge struct { - e Edge - logger *log.Logger + e Edge + diag Diagnostic } // NewLogEdge creates an edge that logs the type of all collected and emitted messages. @@ -18,22 +21,22 @@ type logEdge struct { // This edge should only be used during debug sessions and not in production code. // As such by default build tags exclude this file from being compiled. // Add the `-tags debug` arguments to build or test commands in order to include this file for compilation. -func NewLogEdge(l *log.Logger, e Edge) Edge { +func NewLogEdge(d Diagnostic, e Edge) Edge { return &logEdge{ - e: e, - logger: l, + e: e, + diag: d, } } func (e *logEdge) Collect(m Message) error { - e.logger.Println("D! collect:", m.Type()) + e.diag.Collect(m.Type()) return e.e.Collect(m) } func (e *logEdge) Emit() (m Message, ok bool) { m, ok = e.e.Emit() if ok { - e.logger.Println("D! emit:", m.Type()) + e.diag.Emit(m.Type()) } return } diff --git a/eval.go b/eval.go index a3ee5dbc7..e1bd81b3d 100644 --- a/eval.go +++ b/eval.go @@ -3,7 +3,6 @@ package kapacitor import ( "errors" "fmt" - "log" "github.com/influxdata/kapacitor/edge" "github.com/influxdata/kapacitor/expvar" @@ -25,12 +24,12 @@ type EvalNode struct { } // Create a new EvalNode which applies a transformation func to each point in a stream and returns a single point. -func newEvalNode(et *ExecutingTask, n *pipeline.EvalNode, l *log.Logger) (*EvalNode, error) { +func newEvalNode(et *ExecutingTask, n *pipeline.EvalNode, d NodeDiagnostic) (*EvalNode, error) { if len(n.AsList) != len(n.Lambdas) { return nil, errors.New("must provide one name per expression via the 'As' property") } en := &EvalNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, e: n, } @@ -210,9 +209,8 @@ func (g *evalGroup) Point(p edge.PointMessage) (edge.Message, error) { func (g *evalGroup) doEval(p edge.FieldsTagsTimeSetter) bool { err := g.n.eval(g.expressions, p) if err != nil { - g.n.incrementErrorCount() if !g.n.e.QuietFlag { - g.n.logger.Println("E!", err) + g.n.diag.Error("error evaluating expression", err) } // Skip bad point return false diff --git a/flatten.go b/flatten.go index 3189f9812..9bcb50155 100644 --- a/flatten.go +++ b/flatten.go @@ -2,7 +2,7 @@ package kapacitor import ( "bytes" - "log" + "fmt" "sync" "time" @@ -19,10 +19,10 @@ type FlattenNode struct { } // Create a new FlattenNode, which takes pairs from parent streams combines them into a single point. -func newFlattenNode(et *ExecutingTask, n *pipeline.FlattenNode, l *log.Logger) (*FlattenNode, error) { +func newFlattenNode(et *ExecutingTask, n *pipeline.FlattenNode, d NodeDiagnostic) (*FlattenNode, error) { fn := &FlattenNode{ f: n, - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, bufPool: sync.Pool{ New: func() interface{} { return &bytes.Buffer{} }, }, @@ -201,8 +201,7 @@ POINTS: } fieldPrefix.WriteString(v) } else { - n.incrementErrorCount() - n.logger.Printf("E! point missing tag %q for flatten operation", tag) + n.diag.Error("poing missing tag for flatten operation", fmt.Errorf("tag %s is missing from point", tag)) continue POINTS } } diff --git a/group_by.go b/group_by.go index 8e12d6cb5..b39b7025d 100644 --- a/group_by.go +++ b/group_by.go @@ -1,7 +1,6 @@ package kapacitor import ( - "log" "sort" "sync" "time" @@ -31,9 +30,9 @@ type GroupByNode struct { } // Create a new GroupByNode which splits the stream dynamically based on the specified dimensions. -func newGroupByNode(et *ExecutingTask, n *pipeline.GroupByNode, l *log.Logger) (*GroupByNode, error) { +func newGroupByNode(et *ExecutingTask, n *pipeline.GroupByNode, d NodeDiagnostic) (*GroupByNode, error) { gn := &GroupByNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, g: n, groups: make(map[models.GroupID]edge.BufferedBatchMessage), } diff --git a/http_out.go b/http_out.go index 002f75fa9..8ccf0d8b3 100644 --- a/http_out.go +++ b/http_out.go @@ -2,7 +2,7 @@ package kapacitor import ( "encoding/json" - "log" + "fmt" "net/http" "path" "sync" @@ -26,9 +26,9 @@ type HTTPOutNode struct { } // Create a new HTTPOutNode which caches the most recent item and exposes it over the HTTP API. -func newHTTPOutNode(et *ExecutingTask, n *pipeline.HTTPOutNode, l *log.Logger) (*HTTPOutNode, error) { +func newHTTPOutNode(et *ExecutingTask, n *pipeline.HTTPOutNode, d NodeDiagnostic) (*HTTPOutNode, error) { hn := &HTTPOutNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, c: n, result: new(models.Result), } @@ -91,8 +91,8 @@ func (n *HTTPOutNode) updateResultWithRow(idx int, row *models.Row) { n.mu.Lock() defer n.mu.Unlock() if idx >= len(n.result.Series) { - n.incrementErrorCount() - n.logger.Printf("E! index out of range for row update %d", idx) + n.diag.Error("index out of range for row update", + fmt.Errorf("index %v is larger than number of series %v", idx, len(n.result.Series))) return } n.result.Series[idx] = row diff --git a/http_post.go b/http_post.go index c7a520e1b..b4b60c4b2 100644 --- a/http_post.go +++ b/http_post.go @@ -3,7 +3,6 @@ package kapacitor import ( "encoding/json" "fmt" - "log" "net/http" "sync" @@ -23,10 +22,10 @@ type HTTPPostNode struct { } // Create a new HTTPPostNode which submits received items via POST to an HTTP endpoint -func newHTTPPostNode(et *ExecutingTask, n *pipeline.HTTPPostNode, l *log.Logger) (*HTTPPostNode, error) { +func newHTTPPostNode(et *ExecutingTask, n *pipeline.HTTPPostNode, d NodeDiagnostic) (*HTTPPostNode, error) { hn := &HTTPPostNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, c: n, bp: bufpool.New(), } @@ -117,14 +116,12 @@ func (n *HTTPPostNode) postRow(row *models.Row) { defer n.bp.Put(body) err := json.NewEncoder(body).Encode(result) if err != nil { - n.incrementErrorCount() - n.logger.Printf("E! failed to marshal row data json: %v", err) + n.diag.Error("failed to marshal row data json", err) return } req, err := n.endpoint.NewHTTPRequest(body) if err != nil { - n.incrementErrorCount() - n.logger.Printf("E! failed to marshal row data json: %v", err) + n.diag.Error("failed to marshal row data json", err) return } @@ -134,8 +131,7 @@ func (n *HTTPPostNode) postRow(row *models.Row) { } resp, err := http.DefaultClient.Do(req) if err != nil { - n.incrementErrorCount() - n.logger.Printf("E! failed to POST row data: %v", err) + n.diag.Error("failed to POST row data", err) return } resp.Body.Close() diff --git a/influxdb_out.go b/influxdb_out.go index 17a333039..7b284aa6b 100644 --- a/influxdb_out.go +++ b/influxdb_out.go @@ -2,7 +2,6 @@ package kapacitor import ( "bytes" - "log" "sync" "time" @@ -10,6 +9,7 @@ import ( "github.com/influxdata/kapacitor/edge" "github.com/influxdata/kapacitor/expvar" "github.com/influxdata/kapacitor/influxdb" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/pipeline" "github.com/pkg/errors" ) @@ -30,7 +30,7 @@ type InfluxDBOutNode struct { batchBuffer *edge.BatchBuffer } -func newInfluxDBOutNode(et *ExecutingTask, n *pipeline.InfluxDBOutNode, l *log.Logger) (*InfluxDBOutNode, error) { +func newInfluxDBOutNode(et *ExecutingTask, n *pipeline.InfluxDBOutNode, d NodeDiagnostic) (*InfluxDBOutNode, error) { if et.tm.InfluxDBService == nil { return nil, errors.New("no InfluxDB cluster configured cannot use the InfluxDBOutNode") } @@ -39,7 +39,7 @@ func newInfluxDBOutNode(et *ExecutingTask, n *pipeline.InfluxDBOutNode, l *log.L return nil, errors.Wrap(err, "failed to get InfluxDB client") } in := &InfluxDBOutNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, i: n, wb: newWriteBuffer(int(n.Buffer), n.FlushInterval, cli), batchBuffer: new(edge.BatchBuffer), @@ -81,8 +81,7 @@ func (n *InfluxDBOutNode) runOut([]byte) error { return nil }() if err != nil { - n.incrementErrorCount() - n.logger.Printf("E! failed to create database %q on cluster %q: %v", n.i.Database, n.i.Cluster, err) + n.diag.Error("failed to create database", err, keyvalue.KV("database", n.i.Database), keyvalue.KV("cluster", n.i.Cluster)) } } @@ -265,8 +264,7 @@ func (w *writeBuffer) run() { if !ok { bp, err = influxdb.NewBatchPoints(qe.bpc) if err != nil { - w.i.incrementErrorCount() - w.i.logger.Println("E! failed to write points to InfluxDB:", err) + w.i.diag.Error("failed to write points to InfluxDB", err) break } w.buffer[qe.bpc] = bp @@ -276,8 +274,7 @@ func (w *writeBuffer) run() { if len(bp.Points()) >= w.size { err = w.write(bp) if err != nil { - w.i.incrementErrorCount() - w.i.logger.Println("E! failed to write points to InfluxDB:", err) + w.i.diag.Error("failed to write points to InfluxDB", err) } delete(w.buffer, qe.bpc) } @@ -298,8 +295,7 @@ func (w *writeBuffer) writeAll() { for bpc, bp := range w.buffer { err := w.write(bp) if err != nil { - w.i.incrementErrorCount() - w.i.logger.Println("E! failed to write points to InfluxDB:", err) + w.i.diag.Error("failed to write points to InfluxDB", err) } delete(w.buffer, bpc) } diff --git a/influxql.go b/influxql.go index eb766bcdb..e04f31f3e 100644 --- a/influxql.go +++ b/influxql.go @@ -2,7 +2,6 @@ package kapacitor import ( "fmt" - "log" "reflect" "time" @@ -26,9 +25,9 @@ type InfluxQLNode struct { currentKind reflect.Kind } -func newInfluxQLNode(et *ExecutingTask, n *pipeline.InfluxQLNode, l *log.Logger) (*InfluxQLNode, error) { +func newInfluxQLNode(et *ExecutingTask, n *pipeline.InfluxQLNode, d NodeDiagnostic) (*InfluxQLNode, error) { m := &InfluxQLNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, n: n, isStreamTransformation: n.ReduceCreater.IsStreamTransformation, } @@ -111,15 +110,13 @@ func (g *influxqlGroup) BeginBatch(begin edge.BeginBatchMessage) (edge.Message, func (g *influxqlGroup) BatchPoint(bp edge.BatchPointMessage) (edge.Message, error) { if g.rc == nil { if err := g.realizeReduceContextFromFields(bp.Fields()); err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E!", err) + g.n.diag.Error("failed to realize reduce context from fields", err) return nil, nil } } g.batchSize++ if err := g.rc.AggregatePoint(g.begin.Name(), bp); err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E! failed to aggregate point in batch:", err) + g.n.diag.Error("failed to aggregate point in batch", err) } return nil, nil } @@ -137,8 +134,7 @@ func (g *influxqlGroup) EndBatch(end edge.EndBatchMessage) (edge.Message, error) } m, err := g.n.emit(g.rc) if err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E! failed to emit batch:", err) + g.n.diag.Error("failed to emit batch", err) return nil, nil } return m, nil @@ -153,8 +149,7 @@ func (g *influxqlGroup) Point(p edge.PointMessage) (edge.Message, error) { if g.rc != nil { m, err := g.n.emit(g.rc) if err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E! failed to emit stream:", err) + g.n.diag.Error("failed to emit stream", err) } msg = m } @@ -175,15 +170,13 @@ func (g *influxqlGroup) Point(p edge.PointMessage) (edge.Message, error) { func (g *influxqlGroup) aggregatePoint(p edge.PointMessage) { if g.rc == nil { if err := g.realizeReduceContextFromFields(p.Fields()); err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E!", err) + g.n.diag.Error("failed to realize reduce context from fields", err) return } } err := g.rc.AggregatePoint(p.Name(), p) if err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E! failed to aggregate point:", err) + g.n.diag.Error("failed to aggregate point", err) } } @@ -234,18 +227,15 @@ func (g *influxqlStreamingTransformGroup) BeginBatch(begin edge.BeginBatchMessag func (g *influxqlStreamingTransformGroup) BatchPoint(bp edge.BatchPointMessage) (edge.Message, error) { if g.rc == nil { if err := g.realizeReduceContextFromFields(bp.Fields()); err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E!", err) + g.n.diag.Error("failed to realize reduce context from fields", err) return nil, nil } } if err := g.rc.AggregatePoint(g.begin.Name(), bp); err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E! failed to aggregate batch point:", err) + g.n.diag.Error("failed to aggregate batch point", err) } if ep, err := g.rc.EmitPoint(); err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E! failed to emit batch point:", err) + g.n.diag.Error("failed to emit batch point", err) } else if ep != nil { return edge.NewBatchPointMessage( ep.Fields(), @@ -263,22 +253,19 @@ func (g *influxqlStreamingTransformGroup) EndBatch(end edge.EndBatchMessage) (ed func (g *influxqlStreamingTransformGroup) Point(p edge.PointMessage) (edge.Message, error) { if g.rc == nil { if err := g.realizeReduceContextFromFields(p.Fields()); err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E!", err) + g.n.diag.Error("failed to realize reduce context from fields", err) // Skip point return nil, nil } } err := g.rc.AggregatePoint(p.Name(), p) if err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E! failed to aggregate point:", err) + g.n.diag.Error("failed to aggregate point", err) } m, err := g.n.emit(g.rc) if err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E! failed to emit stream:", err) + g.n.diag.Error("failed to emit stream", err) return nil, nil } return m, nil diff --git a/integrations/batcher_test.go b/integrations/batcher_test.go index cd15d3e40..892f0d135 100644 --- a/integrations/batcher_test.go +++ b/integrations/batcher_test.go @@ -4,7 +4,6 @@ import ( "encoding/json" "fmt" "io" - "log" "net/http" "net/http/httptest" "os" @@ -26,7 +25,8 @@ import ( func TestBatch_InvalidQuery(t *testing.T) { // Create a new execution env - tm := kapacitor.NewTaskMaster("invalidQuery", newServerInfo(), logService) + d := diagService.NewKapacitorHandler() + tm := kapacitor.NewTaskMaster("invalidQuery", newServerInfo(), d) tm.HTTPDService = newHTTPDService() tm.TaskStore = taskStore{} tm.DeadmanService = deadman{} @@ -2975,13 +2975,14 @@ func testBatcher(t *testing.T, name, script string) (clock.Setter, *kapacitor.Ex } // Create a new execution env - tm := kapacitor.NewTaskMaster("testBatcher", newServerInfo(), logService) + d := diagService.NewKapacitorHandler() + tm := kapacitor.NewTaskMaster("testBatcher", newServerInfo(), d) httpdService := newHTTPDService() tm.HTTPDService = httpdService tm.TaskStore = taskStore{} tm.DeadmanService = deadman{} - tm.HTTPPostService = httppost.NewService(nil, logService.NewLogger("[httppost] ", log.LstdFlags)) - as := alertservice.NewService(logService.NewLogger("[alert] ", log.LstdFlags)) + tm.HTTPPostService = httppost.NewService(nil, diagService.NewHTTPPostHandler()) + as := alertservice.NewService(diagService.NewAlertServiceHandler()) as.StorageService = storagetest.New() as.HTTPDService = httpdService if err := as.Open(); err != nil { diff --git a/integrations/helpers_test.go b/integrations/helpers_test.go index 2a8f5128d..b7cb698e7 100644 --- a/integrations/helpers_test.go +++ b/integrations/helpers_test.go @@ -3,7 +3,6 @@ package integrations import ( "errors" "fmt" - "log" "net/http" "net/http/httptest" "reflect" @@ -24,7 +23,7 @@ func newHTTPDService() *httpd.Service { config := httpd.NewConfig() config.BindAddress = ":0" // Choose port dynamically config.LogEnabled = testing.Verbose() - httpService := httpd.NewService(config, "localhost", logService.NewLogger("[http] ", log.LstdFlags), logService) + httpService := httpd.NewService(config, "localhost", diagService.NewHTTPDHandler()) err := httpService.Open() if err != nil { panic(err) @@ -136,7 +135,7 @@ func compareAlertData(exp, got alert.Data) (bool, string) { type UDFService struct { ListFunc func() []string InfoFunc func(name string) (udf.Info, bool) - CreateFunc func(name, taskID, nodeID string, l *log.Logger, abortCallback func()) (udf.Interface, error) + CreateFunc func(name, taskID, nodeID string, d udf.Diagnostic, abortCallback func()) (udf.Interface, error) } func (u UDFService) List() []string { @@ -147,8 +146,8 @@ func (u UDFService) Info(name string) (udf.Info, bool) { return u.InfoFunc(name) } -func (u UDFService) Create(name, taskID, nodeID string, l *log.Logger, abortCallback func()) (udf.Interface, error) { - return u.CreateFunc(name, taskID, nodeID, l, abortCallback) +func (u UDFService) Create(name, taskID, nodeID string, d udf.Diagnostic, abortCallback func()) (udf.Interface, error) { + return u.CreateFunc(name, taskID, nodeID, d, abortCallback) } type taskStore struct{} diff --git a/integrations/streamer_test.go b/integrations/streamer_test.go index 3cd015645..2eec8642c 100644 --- a/integrations/streamer_test.go +++ b/integrations/streamer_test.go @@ -7,7 +7,6 @@ import ( "fmt" "html" "io/ioutil" - "log" "net/http" "net/http/httptest" "net/mail" @@ -29,18 +28,19 @@ import ( "github.com/influxdata/kapacitor/clock" "github.com/influxdata/kapacitor/command" "github.com/influxdata/kapacitor/command/commandtest" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/models" alertservice "github.com/influxdata/kapacitor/services/alert" "github.com/influxdata/kapacitor/services/alert/alerttest" "github.com/influxdata/kapacitor/services/alerta" "github.com/influxdata/kapacitor/services/alerta/alertatest" + "github.com/influxdata/kapacitor/services/diagnostic" "github.com/influxdata/kapacitor/services/hipchat" "github.com/influxdata/kapacitor/services/hipchat/hipchattest" "github.com/influxdata/kapacitor/services/httppost" "github.com/influxdata/kapacitor/services/httppost/httpposttest" k8s "github.com/influxdata/kapacitor/services/k8s/client" "github.com/influxdata/kapacitor/services/k8s/k8stest" - "github.com/influxdata/kapacitor/services/logging/loggingtest" "github.com/influxdata/kapacitor/services/opsgenie" "github.com/influxdata/kapacitor/services/opsgenie/opsgenietest" "github.com/influxdata/kapacitor/services/pagerduty" @@ -70,7 +70,12 @@ import ( "github.com/k-sone/snmpgo" ) -var logService = loggingtest.New() +var diagService *diagnostic.Service + +func init() { + diagService = diagnostic.NewService(diagnostic.NewConfig(), ioutil.Discard, ioutil.Discard) + diagService.Open() +} var dbrps = []kapacitor.DBRP{ { @@ -2557,7 +2562,7 @@ stream c := httppost.Config{} c.URL = ts.URL c.Endpoint = "test" - sl := httppost.NewService(httppost.Configs{c}, logService.NewLogger("[test_httppost_endpoint] ", log.LstdFlags)) + sl := httppost.NewService(httppost.Configs{c}, diagService.NewHTTPPostHandler()) tm.HTTPPostService = sl } @@ -5935,11 +5940,11 @@ stream return } uio := udf_test.NewIO() - udfService.CreateFunc = func(name, taskID, nodeID string, l *log.Logger, abortCallback func()) (udf.Interface, error) { + udfService.CreateFunc = func(name, taskID, nodeID string, d udf.Diagnostic, abortCallback func()) (udf.Interface, error) { if name != "customFunc" { return nil, fmt.Errorf("unknown function %s", name) } - return udf_test.New(taskID, nodeID, uio, l), nil + return udf_test.New(taskID, nodeID, uio, d), nil } tmInit := func(tm *kapacitor.TaskMaster) { @@ -7280,7 +7285,7 @@ stream c.Enabled = true c.Addr = ts.Addr c.Source = "Kapacitor" - sl := sensu.NewService(c, logService.NewLogger("[test_sensu] ", log.LstdFlags)) + sl := sensu.NewService(c, diagService.NewSensuHandler()) tm.SensuService = sl } testStreamerNoOutput(t, "TestStream_Alert", script, 13*time.Second, tmInit) @@ -7336,7 +7341,8 @@ stream c.Enabled = true c.URL = ts.URL + "/test/slack/url" c.Channel = "#channel" - sl, err := slack.NewService(c, logService.NewLogger("[test_slack] ", log.LstdFlags)) + d := diagService.NewSlackHandler().WithContext(keyvalue.KV("test", "slack")) + sl, err := slack.NewService(c, d) if err != nil { t.Error(err) } @@ -7424,7 +7430,7 @@ stream c.ChatId = "123456789" c.DisableWebPagePreview = true c.DisableNotification = false - tl := telegram.NewService(c, logService.NewLogger("[test_telegram] ", log.LstdFlags)) + tl := telegram.NewService(c, diagService.NewTelegramHandler()) tm.TelegramService = tl } testStreamerNoOutput(t, "TestStream_Alert", script, 13*time.Second, tmInit) @@ -7556,7 +7562,7 @@ stream c.URL = ts.URL c.Room = "1231234" c.Token = "testtoken1231234" - sl := hipchat.NewService(c, logService.NewLogger("[test_hipchat] ", log.LstdFlags)) + sl := hipchat.NewService(c, diagService.NewHipChatHandler()) tm.HipChatService = sl } testStreamerNoOutput(t, "TestStream_Alert", script, 13*time.Second, tmInit) @@ -7632,7 +7638,7 @@ stream c.Enabled = true c.URL = ts.URL c.Origin = "Kapacitor" - sl := alerta.NewService(c, logService.NewLogger("[test_alerta] ", log.LstdFlags)) + sl := alerta.NewService(c, diagService.NewAlertaHandler()) tm.AlertaService = sl } testStreamerNoOutput(t, "TestStream_Alert", script, 13*time.Second, tmInit) @@ -7716,7 +7722,7 @@ stream c.URL = ts.URL c.UserKey = "user" c.Token = "KzGDORePKggMaC0QOYAMyEEuzJnyUi" - sl := pushover.NewService(c, logService.NewLogger("[test_pushover] ", log.LstdFlags)) + sl := pushover.NewService(c, diagService.NewPushoverHandler()) tm.PushoverService = sl } testStreamerNoOutput(t, "TestStream_Alert", script, 13*time.Second, tmInit) @@ -7788,7 +7794,7 @@ stream c.Enabled = true c.URL = ts.URL c.APIKey = "api_key" - og := opsgenie.NewService(c, logService.NewLogger("[test_og] ", log.LstdFlags)) + og := opsgenie.NewService(c, diagService.NewOpsGenieHandler()) tm.OpsGenieService = og } testStreamerNoOutput(t, "TestStream_Alert", script, 13*time.Second, tmInit) @@ -7883,7 +7889,7 @@ stream c.Enabled = true c.URL = ts.URL c.ServiceKey = "service_key" - pd := pagerduty.NewService(c, logService.NewLogger("[test_pd] ", log.LstdFlags)) + pd := pagerduty.NewService(c, diagService.NewPagerDutyHandler()) pd.HTTPDService = tm.HTTPDService tm.PagerDutyService = pd @@ -8017,7 +8023,7 @@ stream c.URL = ts.URL c.Endpoint = "test" c.Headers = headers - sl := httppost.NewService(httppost.Configs{c}, logService.NewLogger("[test_pushover] ", log.LstdFlags)) + sl := httppost.NewService(httppost.Configs{c}, diagService.NewHTTPPostHandler()) tm.HTTPPostService = sl } testStreamerNoOutput(t, "TestStream_Alert", script, 13*time.Second, tmInit) @@ -8089,7 +8095,8 @@ stream c.URL = ts.URL c.APIKey = "api_key" c.RoutingKey = "routing_key" - vo := victorops.NewService(c, logService.NewLogger("[test_vo] ", log.LstdFlags)) + d := diagService.NewVictorOpsHandler().WithContext(keyvalue.KV("test", "vo")) + vo := victorops.NewService(c, d) tm.VictorOpsService = vo } testStreamerNoOutput(t, "TestStream_Alert", script, 13*time.Second, tmInit) @@ -8157,7 +8164,7 @@ stream c.Enabled = true c.URL = ts.URL c.AuthorName = "Kapacitor" - sl := talk.NewService(c, logService.NewLogger("[test_talk] ", log.LstdFlags)) + sl := talk.NewService(c, diagService.NewTalkHandler()) tm.TalkService = sl } testStreamerNoOutput(t, "TestStream_Alert", script, 13*time.Second, tmInit) @@ -8430,7 +8437,7 @@ Value: 10 Port: smtpServer.Port, From: "test@example.com", } - smtpService := smtp.NewService(sc, logService.NewLogger("[test-smtp] ", log.LstdFlags)) + smtpService := smtp.NewService(sc, diagService.NewSMTPHandler()) if err := smtpService.Open(); err != nil { t.Fatal(err) } @@ -8560,7 +8567,7 @@ stream c.Addr = snmpServer.Addr c.Community = snmpServer.Community c.Retries = 2 - st := snmptrap.NewService(c, logService.NewLogger("[test_snmptrap] ", log.LstdFlags)) + st := snmptrap.NewService(c, diagService.NewSNMPTrapHandler()) if err := st.Open(); err != nil { t.Fatal(err) } @@ -10880,13 +10887,14 @@ func compareListIgnoreOrder(got, exp []interface{}, cmpF func(got, exp interface } func createTaskMaster() (*kapacitor.TaskMaster, error) { - tm := kapacitor.NewTaskMaster("testStreamer", newServerInfo(), logService) + d := diagService.NewKapacitorHandler() + tm := kapacitor.NewTaskMaster("testStreamer", newServerInfo(), d) httpdService := newHTTPDService() tm.HTTPDService = httpdService tm.TaskStore = taskStore{} tm.DeadmanService = deadman{} - tm.HTTPPostService = httppost.NewService(nil, logService.NewLogger("[httppost] ", log.LstdFlags)) - as := alertservice.NewService(logService.NewLogger("[alert] ", log.LstdFlags)) + tm.HTTPPostService = httppost.NewService(nil, diagService.NewHTTPPostHandler()) + as := alertservice.NewService(diagService.NewAlertServiceHandler()) as.StorageService = storagetest.New() as.HTTPDService = httpdService if err := as.Open(); err != nil { diff --git a/join.go b/join.go index d18cdbaa2..6f41c13c8 100644 --- a/join.go +++ b/join.go @@ -2,7 +2,6 @@ package kapacitor import ( "fmt" - "log" "sync" "time" @@ -36,10 +35,10 @@ type JoinNode struct { } // Create a new JoinNode, which takes pairs from parent streams combines them into a single point. -func newJoinNode(et *ExecutingTask, n *pipeline.JoinNode, l *log.Logger) (*JoinNode, error) { +func newJoinNode(et *ExecutingTask, n *pipeline.JoinNode, d NodeDiagnostic) (*JoinNode, error) { jn := &JoinNode{ j: n, - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, groups: make(map[models.GroupID]*joinGroup), matchGroupsBuffer: make(map[models.GroupID][]srcPoint), specificGroupsBuffer: make(map[models.GroupID][]srcPoint), @@ -362,7 +361,7 @@ func (g *joinGroup) newJoinset(t time.Time) *joinset { g.n.j.Delimiter, g.n.j.Tolerance, t, - g.n.logger, + g.n.diag, ) } @@ -462,7 +461,7 @@ type joinset struct { first int - logger *log.Logger + diag NodeDiagnostic } func newJoinset( @@ -474,7 +473,7 @@ func newJoinset( delimiter string, tolerance time.Duration, time time.Time, - l *log.Logger, + d NodeDiagnostic, ) *joinset { expected := len(prefixes) return &joinset{ @@ -489,7 +488,7 @@ func newJoinset( first: expected, time: time, tolerance: tolerance, - logger: l, + diag: d, } } diff --git a/kapacitor_loopback.go b/kapacitor_loopback.go index 9569c6848..00d2456a1 100644 --- a/kapacitor_loopback.go +++ b/kapacitor_loopback.go @@ -2,7 +2,6 @@ package kapacitor import ( "fmt" - "log" "github.com/influxdata/kapacitor/edge" "github.com/influxdata/kapacitor/expvar" @@ -23,9 +22,9 @@ type KapacitorLoopbackNode struct { begin edge.BeginBatchMessage } -func newKapacitorLoopbackNode(et *ExecutingTask, n *pipeline.KapacitorLoopbackNode, l *log.Logger) (*KapacitorLoopbackNode, error) { +func newKapacitorLoopbackNode(et *ExecutingTask, n *pipeline.KapacitorLoopbackNode, d NodeDiagnostic) (*KapacitorLoopbackNode, error) { kn := &KapacitorLoopbackNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, k: n, } kn.node.runF = kn.runOut @@ -77,8 +76,8 @@ func (n *KapacitorLoopbackNode) Point(p edge.PointMessage) error { n.timer.Resume() if err != nil { - n.incrementErrorCount() - n.logger.Println("E! failed to write point over loopback") + n.diag.Error("failed to write point over loopback", err) + } else { n.pointsWritten.Add(1) } @@ -113,8 +112,7 @@ func (n *KapacitorLoopbackNode) BatchPoint(bp edge.BatchPointMessage) error { n.timer.Resume() if err != nil { - n.incrementErrorCount() - n.logger.Println("E! failed to write point over loopback") + n.diag.Error("failed to write point over loopback", err) } else { n.pointsWritten.Add(1) } diff --git a/keyvalue/type.go b/keyvalue/type.go new file mode 100644 index 000000000..951f7175b --- /dev/null +++ b/keyvalue/type.go @@ -0,0 +1,10 @@ +package keyvalue + +type T struct { + Key string + Value string +} + +func KV(k, v string) T { + return T{Key: k, Value: v} +} diff --git a/log.go b/log.go index b2423d2ea..b18aaed02 100644 --- a/log.go +++ b/log.go @@ -3,34 +3,30 @@ package kapacitor import ( "bytes" "encoding/json" - "fmt" - "log" "strings" "github.com/influxdata/kapacitor/edge" "github.com/influxdata/kapacitor/pipeline" - "github.com/influxdata/wlog" ) type LogNode struct { node - key string - buf bytes.Buffer - enc *json.Encoder + key string + level string + prefix string + buf bytes.Buffer + enc *json.Encoder batchBuffer *edge.BatchBuffer } // Create a new LogNode which logs all data it receives -func newLogNode(et *ExecutingTask, n *pipeline.LogNode, l *log.Logger) (*LogNode, error) { - level, ok := wlog.StringToLevel[strings.ToUpper(n.Level)] - if !ok { - return nil, fmt.Errorf("invalid log level %s", n.Level) - } +func newLogNode(et *ExecutingTask, n *pipeline.LogNode, d NodeDiagnostic) (*LogNode, error) { nn := &LogNode{ - node: node{Node: n, et: et, logger: l}, - key: fmt.Sprintf("%c! %s", wlog.ReverseLevels[level], n.Prefix), + node: node{Node: n, et: et, diag: d}, + level: strings.ToUpper(n.Level), + prefix: n.Prefix, batchBuffer: new(edge.BatchBuffer), } nn.enc = json.NewEncoder(&nn.buf) @@ -63,24 +59,12 @@ func (n *LogNode) EndBatch(end edge.EndBatchMessage) (edge.Message, error) { } func (n *LogNode) BufferedBatch(batch edge.BufferedBatchMessage) (edge.Message, error) { - n.buf.Reset() - if err := n.enc.Encode(batch); err != nil { - n.incrementErrorCount() - n.logger.Println("E!", err) - return batch, nil - } - n.logger.Println(n.key, n.buf.String()) + n.diag.LogBatchData(n.level, n.prefix, batch) return batch, nil } func (n *LogNode) Point(p edge.PointMessage) (edge.Message, error) { - n.buf.Reset() - if err := n.enc.Encode(p); err != nil { - n.incrementErrorCount() - n.logger.Println("E!", err) - return p, nil - } - n.logger.Println(n.key, n.buf.String()) + n.diag.LogPointData(n.level, n.prefix, p) return p, nil } diff --git a/node.go b/node.go index db09ab15a..05124a2d8 100644 --- a/node.go +++ b/node.go @@ -4,14 +4,15 @@ import ( "bytes" "expvar" "fmt" - "log" "runtime" "sync" "sync/atomic" "time" + "github.com/influxdata/kapacitor/alert" "github.com/influxdata/kapacitor/edge" kexpvar "github.com/influxdata/kapacitor/expvar" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/models" "github.com/influxdata/kapacitor/pipeline" "github.com/influxdata/kapacitor/server/vars" @@ -25,6 +26,43 @@ const ( statAverageExecTime = "avg_exec_time_ns" ) +type NodeDiagnostic interface { + Error(msg string, err error, ctx ...keyvalue.T) + + // AlertNode + AlertTriggered(level alert.Level, id string, message string, rows *models.Row) + + // AutoscaleNode + SettingReplicas(new int, old int, id string) + + // QueryNode + StartingBatchQuery(q string) + + // LogNode + LogPointData(key, prefix string, data edge.PointMessage) + LogBatchData(key, prefix string, data edge.BufferedBatchMessage) + + //UDF + UDFLog(s string) +} + +type nodeDiagnostic struct { + NodeDiagnostic + node *node +} + +func newNodeDiagnostic(n *node, diag NodeDiagnostic) *nodeDiagnostic { + return &nodeDiagnostic{ + NodeDiagnostic: diag, + node: n, + } +} + +func (n *nodeDiagnostic) Error(msg string, err error, ctx ...keyvalue.T) { + n.node.incrementErrorCount() + n.NodeDiagnostic.Error(msg, err, ctx...) +} + // A node that can be in an executor. type Node interface { pipeline.Node @@ -81,7 +119,7 @@ type node struct { finished bool ins []edge.StatsEdge outs []edge.StatsEdge - logger *log.Logger + diag NodeDiagnostic timer timer.Timer statsKey string statMap *kexpvar.Map @@ -111,6 +149,7 @@ func (n *node) init() { n.statMap.Set(statAverageExecTime, avgExecVar) n.nodeErrors = &kexpvar.Int{} n.statMap.Set(statErrorCount, n.nodeErrors) + n.diag = newNodeDiagnostic(n, n.diag) n.statMap.Set(statCardinalityGauge, kexpvar.NewIntFuncGauge(nil)) n.timer = n.et.tm.TimingService.NewTimer(avgExecVar) n.errCh = make(chan error, 1) @@ -132,7 +171,8 @@ func (n *node) start(snapshot []byte) { err = fmt.Errorf("%s: Trace:%s", r, string(trace[:n])) } n.abortParentEdges() - n.logger.Println("E!", err) + n.diag.Error("node failed", err) + err = errors.Wrap(err, n.Name()) } n.errCh <- err @@ -174,7 +214,8 @@ func (n *node) addChild(c Node) (edge.StatsEdge, error) { } n.children = append(n.children, c) - edge := newEdge(n.et.Task.ID, n.Name(), c.Name(), n.Provides(), defaultEdgeBufferSize, n.et.tm.LogService) + d := n.et.tm.diag.WithEdgeContext(n.et.Task.ID, n.Name(), c.Name()) + edge := newEdge(n.et.Task.ID, n.Name(), c.Name(), n.Provides(), defaultEdgeBufferSize, d) if edge == nil { return nil, fmt.Errorf("unknown edge type %s", n.Provides()) } diff --git a/noop.go b/noop.go index 53c4cb3ee..d693c79d2 100644 --- a/noop.go +++ b/noop.go @@ -1,8 +1,6 @@ package kapacitor import ( - "log" - "github.com/influxdata/kapacitor/edge" "github.com/influxdata/kapacitor/pipeline" ) @@ -12,9 +10,9 @@ type NoOpNode struct { } // Create a new NoOpNode which does nothing with the data and just passes it through. -func newNoOpNode(et *ExecutingTask, n *pipeline.NoOpNode, l *log.Logger) (*NoOpNode, error) { +func newNoOpNode(et *ExecutingTask, n *pipeline.NoOpNode, d NodeDiagnostic) (*NoOpNode, error) { nn := &NoOpNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, } nn.node.runF = nn.runNoOp return nn, nil diff --git a/sample.go b/sample.go index 254fe00ea..ca8087ddf 100644 --- a/sample.go +++ b/sample.go @@ -2,7 +2,6 @@ package kapacitor import ( "errors" - "log" "time" "github.com/influxdata/kapacitor/edge" @@ -19,9 +18,9 @@ type SampleNode struct { } // Create a new SampleNode which filters data from a source. -func newSampleNode(et *ExecutingTask, n *pipeline.SampleNode, l *log.Logger) (*SampleNode, error) { +func newSampleNode(et *ExecutingTask, n *pipeline.SampleNode, d NodeDiagnostic) (*SampleNode, error) { sn := &SampleNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, s: n, counts: make(map[models.GroupID]int64), duration: n.Duration, diff --git a/server/config.go b/server/config.go index 1973b733f..031fc3cc0 100644 --- a/server/config.go +++ b/server/config.go @@ -17,6 +17,7 @@ import ( "github.com/influxdata/kapacitor/services/config" "github.com/influxdata/kapacitor/services/consul" "github.com/influxdata/kapacitor/services/deadman" + "github.com/influxdata/kapacitor/services/diagnostic" "github.com/influxdata/kapacitor/services/dns" "github.com/influxdata/kapacitor/services/ec2" "github.com/influxdata/kapacitor/services/file_discovery" @@ -26,7 +27,6 @@ import ( "github.com/influxdata/kapacitor/services/httppost" "github.com/influxdata/kapacitor/services/influxdb" "github.com/influxdata/kapacitor/services/k8s" - "github.com/influxdata/kapacitor/services/logging" "github.com/influxdata/kapacitor/services/marathon" "github.com/influxdata/kapacitor/services/mqtt" "github.com/influxdata/kapacitor/services/nerve" @@ -66,7 +66,7 @@ type Config struct { Storage storage.Config `toml:"storage"` Task task_store.Config `toml:"task"` InfluxDB []influxdb.Config `toml:"influxdb" override:"influxdb,element-key=name"` - Logging logging.Config `toml:"logging"` + Logging diagnostic.Config `toml:"logging"` ConfigOverride config.Config `toml:"config-override"` // Input services @@ -134,7 +134,7 @@ func NewConfig() *Config { c.Replay = replay.NewConfig() c.Task = task_store.NewConfig() c.InfluxDB = []influxdb.Config{influxdb.NewConfig()} - c.Logging = logging.NewConfig() + c.Logging = diagnostic.NewConfig() c.ConfigOverride = config.NewConfig() c.Collectd = collectd.NewConfig() diff --git a/server/server.go b/server/server.go index c3cb0caf6..aeeae1195 100644 --- a/server/server.go +++ b/server/server.go @@ -4,7 +4,6 @@ package server import ( "fmt" "io/ioutil" - "log" "os" "path/filepath" "runtime" @@ -20,6 +19,7 @@ import ( "github.com/influxdata/kapacitor/auth" "github.com/influxdata/kapacitor/command" iclient "github.com/influxdata/kapacitor/influxdb" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/server/vars" "github.com/influxdata/kapacitor/services/alert" "github.com/influxdata/kapacitor/services/alerta" @@ -27,6 +27,7 @@ import ( "github.com/influxdata/kapacitor/services/config" "github.com/influxdata/kapacitor/services/consul" "github.com/influxdata/kapacitor/services/deadman" + "github.com/influxdata/kapacitor/services/diagnostic" "github.com/influxdata/kapacitor/services/dns" "github.com/influxdata/kapacitor/services/ec2" "github.com/influxdata/kapacitor/services/file_discovery" @@ -36,7 +37,6 @@ import ( "github.com/influxdata/kapacitor/services/httppost" "github.com/influxdata/kapacitor/services/influxdb" "github.com/influxdata/kapacitor/services/k8s" - "github.com/influxdata/kapacitor/services/logging" "github.com/influxdata/kapacitor/services/marathon" "github.com/influxdata/kapacitor/services/mqtt" "github.com/influxdata/kapacitor/services/nerve" @@ -79,6 +79,12 @@ type BuildInfo struct { Branch string } +type Diagnostic interface { + Debug(msg string, ctx ...keyvalue.T) + Info(msg string, ctx ...keyvalue.T) + Error(msg string, err error, ctx ...keyvalue.T) +} + // Server represents a container for the metadata and storage data and services. // It is built using a Config and it manages the startup and shutdown of all // services in the proper order. @@ -133,17 +139,17 @@ type Server struct { CPUProfile string MemProfile string - LogService logging.Interface - Logger *log.Logger + DiagService *diagnostic.Service + Diag Diagnostic } // New returns a new instance of Server built from a config. -func New(c *Config, buildInfo BuildInfo, logService logging.Interface) (*Server, error) { +func New(c *Config, buildInfo BuildInfo, diagService *diagnostic.Service) (*Server, error) { err := c.Validate() if err != nil { return nil, fmt.Errorf("%s. To generate a valid configuration file run `kapacitord config > kapacitor.generated.conf`.", err) } - l := logService.NewLogger("[srv] ", log.LstdFlags) + d := diagService.NewServerHandler() s := &Server{ config: c, BuildInfo: buildInfo, @@ -151,16 +157,16 @@ func New(c *Config, buildInfo BuildInfo, logService logging.Interface) (*Server, hostname: c.Hostname, err: make(chan error), configUpdates: make(chan config.ConfigUpdate, 100), - LogService: logService, + DiagService: diagService, MetaClient: &kapacitor.NoopMetaClient{}, QueryExecutor: &Queryexecutor{}, - Logger: l, + Diag: d, ServicesByName: make(map[string]int), DynamicServices: make(map[string]Updater), Commander: c.Commander, clusterIDChanged: waiter.NewGroup(), } - s.Logger.Println("I! Kapacitor hostname:", s.hostname) + s.Diag.Info("listing Kapacitor hostname", keyvalue.KV("hostname", s.hostname)) // Setup IDs err = s.setupIDs() @@ -174,11 +180,13 @@ func New(c *Config, buildInfo BuildInfo, logService logging.Interface) (*Server, vars.HostVar.Set(s.hostname) vars.ProductVar.Set(vars.Product) vars.VersionVar.Set(s.BuildInfo.Version) - s.Logger.Printf("I! ClusterID: %s ServerID: %s", s.ClusterID, s.ServerID) + s.Diag.Info("listing ClusterID and ServerID", + keyvalue.KV("cluster_id", s.ClusterID.String()), keyvalue.KV("server_id", s.ServerID.String())) // Start Task Master s.TaskMasterLookup = kapacitor.NewTaskMasterLookup() - s.TaskMaster = kapacitor.NewTaskMaster(kapacitor.MainTaskMaster, vars.Info, logService) + kd := diagService.NewKapacitorHandler() + s.TaskMaster = kapacitor.NewTaskMaster(kapacitor.MainTaskMaster, vars.Info, kd) s.TaskMaster.DefaultRetentionPolicy = c.DefaultRetentionPolicy s.TaskMaster.Commander = s.Commander s.TaskMasterLookup.Set(s.TaskMaster) @@ -233,7 +241,9 @@ func New(c *Config, buildInfo BuildInfo, logService logging.Interface) (*Server, // Append third-party integrations // Append extra input services - s.appendCollectdService() + if err := s.appendCollectdService(); err != nil { + return nil, errors.Wrap(err, "collectd service") + } s.appendUDPServices() if err := s.appendOpenTSDBService(); err != nil { return nil, errors.Wrap(err, "opentsdb service") @@ -297,8 +307,8 @@ func (s *Server) SetDynamicService(name string, srv dynamicService) { } func (s *Server) appendStorageService() { - l := s.LogService.NewLogger("[storage] ", log.LstdFlags) - srv := storage.NewService(s.config.Storage, l) + d := s.DiagService.NewStorageHandler() + srv := storage.NewService(s.config.Storage, d) srv.HTTPDService = s.HTTPDService @@ -307,8 +317,8 @@ func (s *Server) appendStorageService() { } func (s *Server) appendConfigOverrideService() { - l := s.LogService.NewLogger("[config-override] ", log.LstdFlags) - srv := config.NewService(s.config.ConfigOverride, s.config, l, s.configUpdates) + d := s.DiagService.NewConfigOverrideHandler() + srv := config.NewService(s.config.ConfigOverride, s.config, d, s.configUpdates) srv.HTTPDService = s.HTTPDService srv.StorageService = s.StorageService @@ -317,8 +327,8 @@ func (s *Server) appendConfigOverrideService() { } func (s *Server) initAlertService() { - l := s.LogService.NewLogger("[alert] ", log.LstdFlags) - srv := alert.NewService(l) + d := s.DiagService.NewAlertServiceHandler() + srv := alert.NewService(d) srv.Commander = s.Commander srv.HTTPDService = s.HTTPDService @@ -333,8 +343,7 @@ func (s *Server) appendAlertService() { } func (s *Server) appendTesterService() { - l := s.LogService.NewLogger("[service-tests] ", log.LstdFlags) - srv := servicetest.NewService(servicetest.NewConfig(), l) + srv := servicetest.NewService(servicetest.NewConfig()) srv.HTTPDService = s.HTTPDService s.TesterService = srv @@ -343,8 +352,8 @@ func (s *Server) appendTesterService() { func (s *Server) appendSMTPService() { c := s.config.SMTP - l := s.LogService.NewLogger("[smtp] ", log.LstdFlags) - srv := smtp.NewService(c, l) + d := s.DiagService.NewSMTPHandler() + srv := smtp.NewService(c, d) s.TaskMaster.SMTPService = srv s.AlertService.SMTPService = srv @@ -355,12 +364,12 @@ func (s *Server) appendSMTPService() { func (s *Server) appendInfluxDBService() error { c := s.config.InfluxDB - l := s.LogService.NewLogger("[influxdb] ", log.LstdFlags) + d := s.DiagService.NewInfluxDBHandler() httpPort, err := s.config.HTTP.Port() if err != nil { return errors.Wrap(err, "failed to get http port") } - srv, err := influxdb.NewService(c, httpPort, s.config.Hostname, vars.Info, s.config.HTTP.AuthEnabled, l) + srv, err := influxdb.NewService(c, httpPort, s.config.Hostname, vars.Info, s.config.HTTP.AuthEnabled, d) if err != nil { return err } @@ -372,7 +381,6 @@ func (s *Server) appendInfluxDBService() error { srv.HTTPDService = s.HTTPDService srv.PointsWriter = s.TaskMaster - srv.LogService = s.LogService srv.AuthService = s.AuthService srv.ClientCreator = iclient.ClientCreator{} @@ -384,10 +392,11 @@ func (s *Server) appendInfluxDBService() error { } func (s *Server) initHTTPDService() { - l := s.LogService.NewLogger("[httpd] ", log.LstdFlags) - srv := httpd.NewService(s.config.HTTP, s.hostname, l, s.LogService) + d := s.DiagService.NewHTTPDHandler() + srv := httpd.NewService(s.config.HTTP, s.hostname, d) srv.Handler.PointsWriter = s.TaskMaster + srv.Handler.DiagService = s.DiagService srv.Handler.Version = s.BuildInfo.Version s.HTTPDService = srv @@ -399,8 +408,8 @@ func (s *Server) appendHTTPDService() { } func (s *Server) appendTaskStoreService() { - l := s.LogService.NewLogger("[task_store] ", log.LstdFlags) - srv := task_store.NewService(s.config.Task, l) + d := s.DiagService.NewTaskStoreHandler() + srv := task_store.NewService(s.config.Task, d) srv.StorageService = s.StorageService srv.HTTPDService = s.HTTPDService srv.TaskMasterLookup = s.TaskMasterLookup @@ -411,8 +420,8 @@ func (s *Server) appendTaskStoreService() { } func (s *Server) appendReplayService() { - l := s.LogService.NewLogger("[replay] ", log.LstdFlags) - srv := replay.NewService(s.config.Replay, l) + d := s.DiagService.NewReplayHandler() + srv := replay.NewService(s.config.Replay, d) srv.StorageService = s.StorageService srv.TaskStore = s.TaskStore srv.HTTPDService = s.HTTPDService @@ -426,8 +435,8 @@ func (s *Server) appendReplayService() { func (s *Server) appendK8sService() error { c := s.config.Kubernetes - l := s.LogService.NewLogger("[kubernetes] ", log.LstdFlags) - srv, err := k8s.NewService(c, s.ScraperService, l) + d := s.DiagService.NewK8sHandler() + srv, err := k8s.NewService(c, s.ScraperService, d) if err != nil { return err } @@ -439,8 +448,8 @@ func (s *Server) appendK8sService() error { } func (s *Server) appendSwarmService() error { c := s.config.Swarm - l := s.LogService.NewLogger("[swarm] ", log.LstdFlags) - srv, err := swarm.NewService(c, l) + d := s.DiagService.NewSwarmHandler() + srv, err := swarm.NewService(c, d) if err != nil { return err } @@ -452,24 +461,24 @@ func (s *Server) appendSwarmService() error { } func (s *Server) appendDeadmanService() { - l := s.LogService.NewLogger("[deadman] ", log.LstdFlags) - srv := deadman.NewService(s.config.Deadman, l) + d := s.DiagService.NewDeadmanHandler() + srv := deadman.NewService(s.config.Deadman, d) s.TaskMaster.DeadmanService = srv s.AppendService("deadman", srv) } func (s *Server) appendUDFService() { - l := s.LogService.NewLogger("[udf] ", log.LstdFlags) - srv := udf.NewService(s.config.UDF, l) + d := s.DiagService.NewUDFServiceHandler() + srv := udf.NewService(s.config.UDF, d) s.TaskMaster.UDFService = srv s.AppendService("udf", srv) } func (s *Server) appendAuthService() { - l := s.LogService.NewLogger("[noauth] ", log.LstdFlags) - srv := noauth.NewService(l) + d := s.DiagService.NewNoAuthHandler() + srv := noauth.NewService(d) s.AuthService = srv s.HTTPDService.Handler.AuthService = srv @@ -478,8 +487,8 @@ func (s *Server) appendAuthService() { func (s *Server) appendMQTTService() error { cs := s.config.MQTT - l := s.LogService.NewLogger("[mqtt] ", log.LstdFlags) - srv, err := mqtt.NewService(cs, l) + d := s.DiagService.NewMQTTHandler() + srv, err := mqtt.NewService(cs, d) if err != nil { return err } @@ -494,8 +503,8 @@ func (s *Server) appendMQTTService() error { func (s *Server) appendOpsGenieService() { c := s.config.OpsGenie - l := s.LogService.NewLogger("[opsgenie] ", log.LstdFlags) - srv := opsgenie.NewService(c, l) + d := s.DiagService.NewOpsGenieHandler() + srv := opsgenie.NewService(c, d) s.TaskMaster.OpsGenieService = srv s.AlertService.OpsGenieService = srv @@ -506,8 +515,8 @@ func (s *Server) appendOpsGenieService() { func (s *Server) appendVictorOpsService() { c := s.config.VictorOps - l := s.LogService.NewLogger("[victorops] ", log.LstdFlags) - srv := victorops.NewService(c, l) + d := s.DiagService.NewVictorOpsHandler() + srv := victorops.NewService(c, d) s.TaskMaster.VictorOpsService = srv s.AlertService.VictorOpsService = srv @@ -518,8 +527,8 @@ func (s *Server) appendVictorOpsService() { func (s *Server) appendPagerDutyService() { c := s.config.PagerDuty - l := s.LogService.NewLogger("[pagerduty] ", log.LstdFlags) - srv := pagerduty.NewService(c, l) + d := s.DiagService.NewPagerDutyHandler() + srv := pagerduty.NewService(c, d) srv.HTTPDService = s.HTTPDService s.TaskMaster.PagerDutyService = srv @@ -531,8 +540,8 @@ func (s *Server) appendPagerDutyService() { func (s *Server) appendPushoverService() { c := s.config.Pushover - l := s.LogService.NewLogger("[pushover] ", log.LstdFlags) - srv := pushover.NewService(c, l) + d := s.DiagService.NewPushoverHandler() + srv := pushover.NewService(c, d) s.TaskMaster.PushoverService = srv s.AlertService.PushoverService = srv @@ -543,8 +552,8 @@ func (s *Server) appendPushoverService() { func (s *Server) appendHTTPPostService() { c := s.config.HTTPPost - l := s.LogService.NewLogger("[httppost] ", log.LstdFlags) - srv := httppost.NewService(c, l) + d := s.DiagService.NewHTTPPostHandler() + srv := httppost.NewService(c, d) s.TaskMaster.HTTPPostService = srv s.AlertService.HTTPPostService = srv @@ -555,8 +564,8 @@ func (s *Server) appendHTTPPostService() { func (s *Server) appendSensuService() { c := s.config.Sensu - l := s.LogService.NewLogger("[sensu] ", log.LstdFlags) - srv := sensu.NewService(c, l) + d := s.DiagService.NewSensuHandler() + srv := sensu.NewService(c, d) s.TaskMaster.SensuService = srv s.AlertService.SensuService = srv @@ -567,8 +576,8 @@ func (s *Server) appendSensuService() { func (s *Server) appendSlackService() error { c := s.config.Slack - l := s.LogService.NewLogger("[slack] ", log.LstdFlags) - srv, err := slack.NewService(c, l) + d := s.DiagService.NewSlackHandler() + srv, err := slack.NewService(c, d) if err != nil { return err } @@ -583,8 +592,8 @@ func (s *Server) appendSlackService() error { func (s *Server) appendSNMPTrapService() { c := s.config.SNMPTrap - l := s.LogService.NewLogger("[snmptrap] ", log.LstdFlags) - srv := snmptrap.NewService(c, l) + d := s.DiagService.NewSNMPTrapHandler() + srv := snmptrap.NewService(c, d) s.TaskMaster.SNMPTrapService = srv s.AlertService.SNMPTrapService = srv @@ -595,8 +604,8 @@ func (s *Server) appendSNMPTrapService() { func (s *Server) appendTelegramService() { c := s.config.Telegram - l := s.LogService.NewLogger("[telegram] ", log.LstdFlags) - srv := telegram.NewService(c, l) + d := s.DiagService.NewTelegramHandler() + srv := telegram.NewService(c, d) s.TaskMaster.TelegramService = srv s.AlertService.TelegramService = srv @@ -607,8 +616,8 @@ func (s *Server) appendTelegramService() { func (s *Server) appendHipChatService() { c := s.config.HipChat - l := s.LogService.NewLogger("[hipchat] ", log.LstdFlags) - srv := hipchat.NewService(c, l) + d := s.DiagService.NewHipChatHandler() + srv := hipchat.NewService(c, d) s.TaskMaster.HipChatService = srv s.AlertService.HipChatService = srv @@ -619,8 +628,8 @@ func (s *Server) appendHipChatService() { func (s *Server) appendAlertaService() { c := s.config.Alerta - l := s.LogService.NewLogger("[alerta] ", log.LstdFlags) - srv := alerta.NewService(c, l) + d := s.DiagService.NewAlertaHandler() + srv := alerta.NewService(c, d) s.TaskMaster.AlertaService = srv s.AlertService.AlertaService = srv @@ -631,8 +640,8 @@ func (s *Server) appendAlertaService() { func (s *Server) appendTalkService() { c := s.config.Talk - l := s.LogService.NewLogger("[talk] ", log.LstdFlags) - srv := talk.NewService(c, l) + d := s.DiagService.NewTalkHandler() + srv := talk.NewService(c, d) s.TaskMaster.TalkService = srv s.AlertService.TalkService = srv @@ -641,18 +650,23 @@ func (s *Server) appendTalkService() { s.AppendService("talk", srv) } -func (s *Server) appendCollectdService() { +func (s *Server) appendCollectdService() error { c := s.config.Collectd if !c.Enabled { - return + return nil } srv := collectd.NewService(c) - w := s.LogService.NewStaticLevelWriter(logging.INFO) + w, err := s.DiagService.NewStaticLevelHandler("info", "collectd") + if err != nil { + return fmt.Errorf("failed to create static level handler for collectd: %v", err) + } srv.SetLogOutput(w) srv.MetaClient = s.MetaClient srv.PointsWriter = s.TaskMaster s.AppendService("collectd", srv) + + return nil } func (s *Server) appendOpenTSDBService() error { @@ -664,7 +678,10 @@ func (s *Server) appendOpenTSDBService() error { if err != nil { return err } - w := s.LogService.NewStaticLevelWriter(logging.INFO) + w, err := s.DiagService.NewStaticLevelHandler("info", "opentsdb") + if err != nil { + return fmt.Errorf("failed to create static level handler for opentsdb: %v", err) + } srv.SetLogOutput(w) srv.PointsWriter = s.TaskMaster @@ -682,7 +699,10 @@ func (s *Server) appendGraphiteServices() error { if err != nil { return errors.Wrap(err, "creating new graphite service") } - w := s.LogService.NewStaticLevelWriter(logging.INFO) + w, err := s.DiagService.NewStaticLevelHandler("info", "graphite") + if err != nil { + return fmt.Errorf("failed to create static level handler for graphite: %v", err) + } srv.SetLogOutput(w) srv.PointsWriter = s.TaskMaster @@ -697,8 +717,8 @@ func (s *Server) appendUDPServices() { if !c.Enabled { continue } - l := s.LogService.NewLogger("[udp] ", log.LstdFlags) - srv := udp.NewService(c, l) + d := s.DiagService.NewUDPHandler() + srv := udp.NewService(c, d) srv.PointsWriter = s.TaskMaster s.AppendService(fmt.Sprintf("udp%d", i), srv) } @@ -707,8 +727,8 @@ func (s *Server) appendUDPServices() { func (s *Server) appendStatsService() { c := s.config.Stats if c.Enabled { - l := s.LogService.NewLogger("[stats] ", log.LstdFlags) - srv := stats.NewService(c, l) + d := s.DiagService.NewStatsHandler() + srv := stats.NewService(c, d) srv.TaskMaster = s.TaskMaster s.StatsService = srv @@ -720,8 +740,8 @@ func (s *Server) appendStatsService() { func (s *Server) appendReportingService() { c := s.config.Reporting if c.Enabled { - l := s.LogService.NewLogger("[reporting] ", log.LstdFlags) - srv := reporting.NewService(c, vars.Info, l) + d := s.DiagService.NewReportingHandler() + srv := reporting.NewService(c, vars.Info, d) s.AppendService("reporting", srv) } @@ -729,8 +749,8 @@ func (s *Server) appendReportingService() { func (s *Server) appendScraperService() { c := s.config.Scraper - l := s.LogService.NewLogger("[scrapers] ", log.LstdFlags) - srv := scraper.NewService(c, l) + d := s.DiagService.NewScraperHandler() + srv := scraper.NewService(c, d) srv.PointsWriter = s.TaskMaster s.ScraperService = srv s.SetDynamicService("scraper", srv) @@ -739,88 +759,88 @@ func (s *Server) appendScraperService() { func (s *Server) appendAzureService() { c := s.config.Azure - l := s.LogService.NewLogger("[azure] ", log.LstdFlags) - srv := azure.NewService(c, s.ScraperService, l) + d := s.DiagService.NewAzureHandler() + srv := azure.NewService(c, s.ScraperService, d) s.SetDynamicService("azure", srv) s.AppendService("azure", srv) } func (s *Server) appendConsulService() { c := s.config.Consul - l := s.LogService.NewLogger("[consul] ", log.LstdFlags) - srv := consul.NewService(c, s.ScraperService, l) + d := s.DiagService.NewConsulHandler() + srv := consul.NewService(c, s.ScraperService, d) s.SetDynamicService("consul", srv) s.AppendService("consul", srv) } func (s *Server) appendDNSService() { c := s.config.DNS - l := s.LogService.NewLogger("[dns] ", log.LstdFlags) - srv := dns.NewService(c, s.ScraperService, l) + d := s.DiagService.NewDNSHandler() + srv := dns.NewService(c, s.ScraperService, d) s.SetDynamicService("dns", srv) s.AppendService("dns", srv) } func (s *Server) appendEC2Service() { c := s.config.EC2 - l := s.LogService.NewLogger("[ec2] ", log.LstdFlags) - srv := ec2.NewService(c, s.ScraperService, l) + d := s.DiagService.NewEC2Handler() + srv := ec2.NewService(c, s.ScraperService, d) s.SetDynamicService("ec2", srv) s.AppendService("ec2", srv) } func (s *Server) appendFileService() { c := s.config.FileDiscovery - l := s.LogService.NewLogger("[file-discovery] ", log.LstdFlags) - srv := file_discovery.NewService(c, s.ScraperService, l) + d := s.DiagService.NewFileDiscoveryHandler() + srv := file_discovery.NewService(c, s.ScraperService, d) s.SetDynamicService("file-discovery", srv) s.AppendService("file-discovery", srv) } func (s *Server) appendGCEService() { c := s.config.GCE - l := s.LogService.NewLogger("[gce] ", log.LstdFlags) - srv := gce.NewService(c, s.ScraperService, l) + d := s.DiagService.NewGCEHandler() + srv := gce.NewService(c, s.ScraperService, d) s.SetDynamicService("gce", srv) s.AppendService("gce", srv) } func (s *Server) appendMarathonService() { c := s.config.Marathon - l := s.LogService.NewLogger("[marathon] ", log.LstdFlags) - srv := marathon.NewService(c, s.ScraperService, l) + d := s.DiagService.NewMarathonHandler() + srv := marathon.NewService(c, s.ScraperService, d) s.SetDynamicService("marathon", srv) s.AppendService("marathon", srv) } func (s *Server) appendNerveService() { c := s.config.Nerve - l := s.LogService.NewLogger("[nerve] ", log.LstdFlags) - srv := nerve.NewService(c, s.ScraperService, l) + d := s.DiagService.NewNerveHandler() + srv := nerve.NewService(c, s.ScraperService, d) s.SetDynamicService("nerve", srv) s.AppendService("nerve", srv) } func (s *Server) appendServersetService() { c := s.config.Serverset - l := s.LogService.NewLogger("[serverset] ", log.LstdFlags) - srv := serverset.NewService(c, s.ScraperService, l) + d := s.DiagService.NewServersetHandler() + srv := serverset.NewService(c, s.ScraperService, d) s.SetDynamicService("serverset", srv) s.AppendService("serverset", srv) } func (s *Server) appendStaticService() { c := s.config.StaticDiscovery - l := s.LogService.NewLogger("[static-discovery] ", log.LstdFlags) - srv := static_discovery.NewService(c, s.ScraperService, l) + d := s.DiagService.NewStaticDiscoveryHandler() + srv := static_discovery.NewService(c, s.ScraperService, d) s.SetDynamicService("static-discovery", srv) s.AppendService("static-discovery", srv) } func (s *Server) appendTritonService() { c := s.config.Triton - l := s.LogService.NewLogger("[triton] ", log.LstdFlags) - srv := triton.NewService(c, s.ScraperService, l) + d := s.DiagService.NewTritonHandler() + srv := triton.NewService(c, s.ScraperService, d) s.SetDynamicService("triton", srv) s.AppendService("triton", srv) } @@ -849,16 +869,16 @@ func (s *Server) Open() error { func (s *Server) startServices() error { for _, service := range s.Services { - s.Logger.Printf("D! opening service: %T", service) + s.Diag.Debug("opening service", keyvalue.KV("service", fmt.Sprintf("%T", service))) if err := service.Open(); err != nil { return fmt.Errorf("open service %T: %s", service, err) } - s.Logger.Printf("D! opened service: %T", service) + s.Diag.Debug("opened service", keyvalue.KV("service", fmt.Sprintf("%T", service))) // Apply config overrides after the config override service has been opened and before any dynamic services. if service == s.ConfigOverrideService && !s.config.SkipConfigOverrides && s.config.ConfigOverride.Enabled { // Apply initial config updates - s.Logger.Println("D! applying configuration overrides") + s.Diag.Debug("applying config overrides") configs, err := s.ConfigOverrideService.Config() if err != nil { return errors.Wrap(err, "failed to apply config overrides") @@ -867,7 +887,7 @@ func (s *Server) startServices() error { if srv, ok := s.DynamicServices[service]; !ok { return fmt.Errorf("found configuration override for unknown service %q", service) } else { - s.Logger.Println("D! applying configuration overrides for", service) + s.Diag.Debug("applying config overrides for service", keyvalue.KV("service", service)) if err := srv.Update(config); err != nil { return errors.Wrapf(err, "failed to update configuration for service %s", service) } @@ -904,11 +924,11 @@ func (s *Server) Close() error { // Close all services that write points first. if err := s.HTTPDService.Close(); err != nil { - s.Logger.Printf("E! error closing httpd service: %v", err) + s.Diag.Error("error closing httpd service", err) } if s.StatsService != nil { if err := s.StatsService.Close(); err != nil { - s.Logger.Printf("E! error closing stats service: %v", err) + s.Diag.Error("error closing stats service", err) } } @@ -919,12 +939,12 @@ func (s *Server) Close() error { // Close services now that all tasks are stopped. for i := len(s.Services) - 1; i >= 0; i-- { service := s.Services[i] - s.Logger.Printf("D! closing service: %T", service) + s.Diag.Debug("closing service", keyvalue.KV("service", fmt.Sprintf("%T", service))) err := service.Close() if err != nil { - s.Logger.Printf("E! error closing service %T: %v", service, err) + s.Diag.Error("error closing service", err, keyvalue.KV("service", fmt.Sprintf("%T", service))) } - s.Logger.Printf("D! closed service: %T", service) + s.Diag.Debug("closed service", keyvalue.KV("service", fmt.Sprintf("%T", service))) } // Finally close the task master @@ -1043,7 +1063,7 @@ func (s *Server) startProfile(cpuprofile, memprofile string) error { if err != nil { return fmt.Errorf("E! cpuprofile: %v", err) } - s.Logger.Printf("I! writing CPU profile to: %s\n", cpuprofile) + s.Diag.Info("writing CPU profile", keyvalue.KV("file", cpuprofile)) prof.cpu = f if err := pprof.StartCPUProfile(prof.cpu); err != nil { return fmt.Errorf("#! start cpu profile: %v", err) @@ -1055,7 +1075,7 @@ func (s *Server) startProfile(cpuprofile, memprofile string) error { if err != nil { return fmt.Errorf("E! memprofile: %v", err) } - s.Logger.Printf("I! writing mem profile to: %s\n", memprofile) + s.Diag.Info("writing mem profile", keyvalue.KV("file", memprofile)) prof.mem = f runtime.MemProfileRate = 4096 } @@ -1067,14 +1087,14 @@ func (s *Server) stopProfile() { if prof.cpu != nil { pprof.StopCPUProfile() prof.cpu.Close() - s.Logger.Println("I! CPU profile stopped") + s.Diag.Info("CPU profile stopped") } if prof.mem != nil { if err := pprof.Lookup("heap").WriteTo(prof.mem, 0); err != nil { - s.Logger.Printf("I! failed to write mem profile: %v\n", err) + s.Diag.Error("failed to write mem profile", err) } prof.mem.Close() - s.Logger.Println("I! mem profile stopped") + s.Diag.Info("mem profile stopped") } } diff --git a/server/server_helper_test.go b/server/server_helper_test.go index 276b68742..9cbb17113 100644 --- a/server/server_helper_test.go +++ b/server/server_helper_test.go @@ -18,8 +18,7 @@ import ( iclient "github.com/influxdata/influxdb/client/v2" "github.com/influxdata/kapacitor/client/v1" "github.com/influxdata/kapacitor/server" - "github.com/influxdata/kapacitor/services/logging" - "github.com/influxdata/kapacitor/services/logging/loggingtest" + "github.com/influxdata/kapacitor/services/diagnostic" "github.com/influxdata/wlog" ) @@ -28,7 +27,7 @@ type Server struct { *server.Server Config *server.Config buildInfo server.BuildInfo - ls logging.Interface + ds *diagnostic.Service } // NewServer returns a new instance of Server. @@ -40,8 +39,9 @@ func NewServer(c *server.Config) *Server { Branch: "testBranch", } c.HTTP.LogEnabled = testing.Verbose() - ls := loggingtest.New() - srv, err := server.New(c, buildInfo, ls) + ds := diagnostic.NewService(diagnostic.NewConfig(), ioutil.Discard, ioutil.Discard) + ds.Open() + srv, err := server.New(c, buildInfo, ds) if err != nil { panic(err) } @@ -49,7 +49,7 @@ func NewServer(c *server.Config) *Server { Server: srv, Config: c, buildInfo: buildInfo, - ls: ls, + ds: ds, } return &s } @@ -59,7 +59,7 @@ func (s *Server) Stop() { } func (s *Server) Start() { - srv, err := server.New(s.Config, s.buildInfo, s.ls) + srv, err := server.New(s.Config, s.buildInfo, s.ds) if err != nil { panic(err.Error()) } diff --git a/services/alert/api.go b/services/alert/api.go index 441819366..988fcd48e 100644 --- a/services/alert/api.go +++ b/services/alert/api.go @@ -5,7 +5,6 @@ import ( "fmt" "io" "io/ioutil" - "log" "net/http" "path" "sort" @@ -47,7 +46,7 @@ type apiServer struct { AddPreviewRoutes([]httpd.Route) error DelRoutes([]httpd.Route) } - logger *log.Logger + diag Diagnostic } func (s *apiServer) Open() error { diff --git a/services/alert/handlers.go b/services/alert/handlers.go index 01a1e7e85..dc5ed92a7 100644 --- a/services/alert/handlers.go +++ b/services/alert/handlers.go @@ -4,7 +4,6 @@ import ( "bytes" "encoding/json" "fmt" - "log" "net" "os" "path/filepath" @@ -16,11 +15,16 @@ import ( "github.com/influxdata/kapacitor/alert" "github.com/influxdata/kapacitor/bufpool" "github.com/influxdata/kapacitor/command" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/tick/ast" "github.com/influxdata/kapacitor/tick/stateful" "github.com/pkg/errors" ) +type HandlerDiagnostic interface { + Error(msg string, err error, ctx ...keyvalue.T) +} + // Default log mode for file const defaultLogFileMode = 0600 @@ -42,7 +46,7 @@ func (c LogHandlerConfig) Validate() error { type logHandler struct { logpath string mode os.FileMode - logger *log.Logger + diag HandlerDiagnostic } func DefaultLogHandlerConfig() LogHandlerConfig { @@ -51,14 +55,14 @@ func DefaultLogHandlerConfig() LogHandlerConfig { } } -func NewLogHandler(c LogHandlerConfig, l *log.Logger) (alert.Handler, error) { +func NewLogHandler(c LogHandlerConfig, d HandlerDiagnostic) (alert.Handler, error) { if err := c.Validate(); err != nil { return nil, err } return &logHandler{ logpath: c.Path, mode: c.Mode, - logger: l, + diag: d, }, nil } @@ -67,14 +71,14 @@ func (h *logHandler) Handle(event alert.Event) { f, err := os.OpenFile(h.logpath, os.O_WRONLY|os.O_APPEND|os.O_CREATE, h.mode) if err != nil { - h.logger.Printf("E! failed to open file %s for alert logging: %v", h.logpath, err) + h.diag.Error("failed to open file for alert logging", err, keyvalue.KV("file", h.logpath)) return } defer f.Close() err = json.NewEncoder(f).Encode(ad) if err != nil { - h.logger.Printf("E! failed to marshal alert data json: %v", err) + h.diag.Error("failed to marshal alert data json", err) } } @@ -88,10 +92,10 @@ type execHandler struct { bp *bufpool.Pool s command.Spec commander command.Commander - logger *log.Logger + diag HandlerDiagnostic } -func NewExecHandler(c ExecHandlerConfig, l *log.Logger) alert.Handler { +func NewExecHandler(c ExecHandlerConfig, d HandlerDiagnostic) alert.Handler { s := command.Spec{ Prog: c.Prog, Args: c.Args, @@ -100,7 +104,7 @@ func NewExecHandler(c ExecHandlerConfig, l *log.Logger) alert.Handler { bp: bufpool.New(), s: s, commander: c.Commander, - logger: l, + diag: d, } } @@ -111,7 +115,7 @@ func (h *execHandler) Handle(event alert.Event) { err := json.NewEncoder(buf).Encode(ad) if err != nil { - h.logger.Printf("E! failed to marshal alert data json: %v", err) + h.diag.Error("failed to marshal alert data json", err) return } @@ -122,12 +126,12 @@ func (h *execHandler) Handle(event alert.Event) { cmd.Stderr(&out) err = cmd.Start() if err != nil { - h.logger.Printf("E! exec command failed: Output: %s: %v", out.String(), err) + h.diag.Error("exec command failed", err, keyvalue.KV("output", out.String())) return } err = cmd.Wait() if err != nil { - h.logger.Printf("E! exec command failed: Output: %s: %v", out.String(), err) + h.diag.Error("exec command failed", err, keyvalue.KV("output", out.String())) return } } @@ -137,16 +141,16 @@ type TCPHandlerConfig struct { } type tcpHandler struct { - bp *bufpool.Pool - addr string - logger *log.Logger + bp *bufpool.Pool + addr string + diag HandlerDiagnostic } -func NewTCPHandler(c TCPHandlerConfig, l *log.Logger) alert.Handler { +func NewTCPHandler(c TCPHandlerConfig, d HandlerDiagnostic) alert.Handler { return &tcpHandler{ - bp: bufpool.New(), - addr: c.Address, - logger: l, + bp: bufpool.New(), + addr: c.Address, + diag: d, } } @@ -157,13 +161,13 @@ func (h *tcpHandler) Handle(event alert.Event) { err := json.NewEncoder(buf).Encode(ad) if err != nil { - h.logger.Printf("E! failed to marshal alert data json: %v", err) + h.diag.Error("failed to marshal alert data json", err) return } conn, err := net.Dial("tcp", h.addr) if err != nil { - h.logger.Printf("E! tcp handler: failed to connect to %s: %v", h.addr, err) + h.diag.Error("tcp handler failed to connect", err, keyvalue.KV("address", h.addr)) return } defer conn.Close() @@ -200,14 +204,14 @@ type aggregateHandler struct { messageTmpl *text.Template - logger *log.Logger + diag HandlerDiagnostic events chan alert.Event closing chan struct{} wg sync.WaitGroup } -func NewAggregateHandler(c AggregateHandlerConfig, l *log.Logger) (alert.Handler, error) { +func NewAggregateHandler(c AggregateHandlerConfig, d HandlerDiagnostic) (alert.Handler, error) { // Parse and validate message template tmpl, err := text.New("message").Parse(c.Message) if err != nil { @@ -226,7 +230,7 @@ func NewAggregateHandler(c AggregateHandlerConfig, l *log.Logger) (alert.Handler topic: c.Topic, ec: c.ec, messageTmpl: tmpl, - logger: l, + diag: d, events: make(chan alert.Event), closing: make(chan struct{}), } @@ -310,14 +314,14 @@ type PublishHandlerConfig struct { ec EventCollector } type publishHandler struct { - c PublishHandlerConfig - logger *log.Logger + c PublishHandlerConfig + diag HandlerDiagnostic } -func NewPublishHandler(c PublishHandlerConfig, l *log.Logger) alert.Handler { +func NewPublishHandler(c PublishHandlerConfig, d HandlerDiagnostic) alert.Handler { return &publishHandler{ - c: c, - logger: l, + c: c, + diag: d, } } @@ -361,7 +365,7 @@ type matchHandler struct { vars []string - logger *log.Logger + diag HandlerDiagnostic } const ( @@ -379,7 +383,7 @@ var matchIdentifiers = map[string]interface{}{ "CRITICAL": int64(alert.Critical), } -func newMatchHandler(match string, h alert.Handler, l *log.Logger) (*matchHandler, error) { +func newMatchHandler(match string, h alert.Handler, d HandlerDiagnostic) (*matchHandler, error) { lambda, err := ast.ParseLambda(match) if err != nil { return nil, errors.Wrap(err, "invalid match expression") @@ -406,11 +410,11 @@ func newMatchHandler(match string, h alert.Handler, l *log.Logger) (*matchHandle } mh := &matchHandler{ - h: h, - expr: expr, - scope: stateful.NewScope(), - vars: ast.FindReferenceVariables(lambda), - logger: l, + h: h, + expr: expr, + scope: stateful.NewScope(), + vars: ast.FindReferenceVariables(lambda), + diag: d, } // Determine which functions are called @@ -437,7 +441,7 @@ func newMatchHandler(match string, h alert.Handler, l *log.Logger) (*matchHandle func (h *matchHandler) Handle(event alert.Event) { if ok, err := h.match(event); err != nil { - h.logger.Println("E! failed to evaluate match expression:", err) + h.diag.Error("failed to evaluate match expression", err) } else if ok { h.h.Handle(event) } diff --git a/services/alert/service.go b/services/alert/service.go index e3ea941c2..18f921e8f 100644 --- a/services/alert/service.go +++ b/services/alert/service.go @@ -4,7 +4,6 @@ import ( "encoding" "encoding/json" "fmt" - "log" "path" "reflect" "regexp" @@ -12,6 +11,7 @@ import ( "github.com/influxdata/kapacitor/alert" "github.com/influxdata/kapacitor/command" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/services/alerta" "github.com/influxdata/kapacitor/services/hipchat" "github.com/influxdata/kapacitor/services/httpd" @@ -31,6 +31,18 @@ import ( "github.com/pkg/errors" ) +type Diagnostic interface { + WithHandlerContext(ctx ...keyvalue.T) HandlerDiagnostic + + MigratingHandlerSpecs() + FoundHandlerRows(length int) + FoundNewHandler(key string) + CreatingNewHandlers(length int) + MigratingOldHandlerSpec(id string) + + Error(msg string, err error, ctx ...keyvalue.T) +} + type Service struct { mu sync.RWMutex @@ -59,65 +71,65 @@ type Service struct { Commander command.Commander - logger *log.Logger + diag Diagnostic AlertaService interface { DefaultHandlerConfig() alerta.HandlerConfig - Handler(alerta.HandlerConfig, *log.Logger) (alert.Handler, error) + Handler(alerta.HandlerConfig, ...keyvalue.T) (alert.Handler, error) } HipChatService interface { - Handler(hipchat.HandlerConfig, *log.Logger) alert.Handler + Handler(hipchat.HandlerConfig, ...keyvalue.T) alert.Handler } MQTTService interface { - Handler(mqtt.HandlerConfig, *log.Logger) alert.Handler + Handler(mqtt.HandlerConfig, ...keyvalue.T) alert.Handler } OpsGenieService interface { - Handler(opsgenie.HandlerConfig, *log.Logger) alert.Handler + Handler(opsgenie.HandlerConfig, ...keyvalue.T) alert.Handler } PagerDutyService interface { - Handler(pagerduty.HandlerConfig, *log.Logger) alert.Handler + Handler(pagerduty.HandlerConfig, ...keyvalue.T) alert.Handler } PushoverService interface { - Handler(pushover.HandlerConfig, *log.Logger) alert.Handler + Handler(pushover.HandlerConfig, ...keyvalue.T) alert.Handler } HTTPPostService interface { - Handler(httppost.HandlerConfig, *log.Logger) alert.Handler + Handler(httppost.HandlerConfig, ...keyvalue.T) alert.Handler } SensuService interface { - Handler(sensu.HandlerConfig, *log.Logger) (alert.Handler, error) + Handler(sensu.HandlerConfig, ...keyvalue.T) (alert.Handler, error) } SlackService interface { - Handler(slack.HandlerConfig, *log.Logger) alert.Handler + Handler(slack.HandlerConfig, ...keyvalue.T) alert.Handler } SMTPService interface { - Handler(smtp.HandlerConfig, *log.Logger) alert.Handler + Handler(smtp.HandlerConfig, ...keyvalue.T) alert.Handler } SNMPTrapService interface { - Handler(snmptrap.HandlerConfig, *log.Logger) (alert.Handler, error) + Handler(snmptrap.HandlerConfig, ...keyvalue.T) (alert.Handler, error) } TalkService interface { - Handler(*log.Logger) alert.Handler + Handler(...keyvalue.T) alert.Handler } TelegramService interface { - Handler(telegram.HandlerConfig, *log.Logger) alert.Handler + Handler(telegram.HandlerConfig, ...keyvalue.T) alert.Handler } VictorOpsService interface { - Handler(victorops.HandlerConfig, *log.Logger) alert.Handler + Handler(victorops.HandlerConfig, ...keyvalue.T) alert.Handler } } -func NewService(l *log.Logger) *Service { +func NewService(d Diagnostic) *Service { s := &Service{ handlers: make(map[string]map[string]handler), closedTopics: make(map[string]bool), - topics: alert.NewTopics(l), - logger: l, + topics: alert.NewTopics(), + diag: d, } s.APIServer = &apiServer{ Registrar: s, Topics: s, Persister: s, - logger: l, + diag: d, } s.EventCollector = s return s @@ -195,7 +207,7 @@ func (s *Service) migrateHandlerSpecs(store storage.Interface) error { // Already migrated return nil } - s.logger.Println("D! migrating old v1.2 handler specs") + s.diag.MigratingHandlerSpecs() // v1.2 HandlerActionSpec type oldHandlerActionSpec struct { @@ -227,12 +239,12 @@ func (s *Service) migrateHandlerSpecs(store storage.Interface) error { if err != nil { return err } - s.logger.Printf("D! found %d handler rows", len(kvs)) + s.diag.FoundHandlerRows(len(kvs)) var oldKeys []string for _, kv := range kvs { if !oldKeyPattern.MatchString(kv.Key) { - s.logger.Println("D! found new handler skipping:", kv.Key) + s.diag.FoundNewHandler(kv.Key) continue } oldKeys = append(oldKeys, kv.Key) @@ -247,7 +259,7 @@ func (s *Service) migrateHandlerSpecs(store storage.Interface) error { return errors.Wrapf(err, "failed to read old handler spec data for %s", kv.Key) } - s.logger.Println("D! migrating old handler spec", old.ID) + s.diag.MigratingOldHandlerSpec(old.ID) // Create new handlers from the old hasStateChangesOnly := false @@ -289,7 +301,7 @@ func (s *Service) migrateHandlerSpecs(store storage.Interface) error { } } - s.logger.Printf("D! creating %d new handlers in place of old handlers", len(newHandlers)) + s.diag.CreatingNewHandlers(len(newHandlers)) // Create new handlers for _, handler := range newHandlers { @@ -317,7 +329,7 @@ func (s *Service) loadSavedHandlerSpecs() error { for _, spec := range specs { if err := s.loadHandlerSpec(spec); err != nil { - s.logger.Println("E! failed to load handler on startup", err) + s.diag.Error("failed to load handler on startup", err) } } @@ -717,6 +729,10 @@ func decodeStringToTextUnmarshaler(f, t reflect.Type, data interface{}) (interfa func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { var h alert.Handler var err error + ctx := []keyvalue.T{ + keyvalue.KV("handler", spec.ID), + keyvalue.KV("topic", spec.Topic), + } switch spec.Kind { case "aggregate": c := newDefaultAggregateHandlerConfig(s.EventCollector) @@ -724,7 +740,8 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h, err = NewAggregateHandler(c, s.logger) + handlerDiag := s.diag.WithHandlerContext(ctx...) + h, err = NewAggregateHandler(c, handlerDiag) if err != nil { return handler{}, err } @@ -734,7 +751,7 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h, err = s.AlertaService.Handler(c, s.logger) + h, err = s.AlertaService.Handler(c, ctx...) if err != nil { return handler{}, err } @@ -747,7 +764,8 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h = NewExecHandler(c, s.logger) + handlerDiag := s.diag.WithHandlerContext(ctx...) + h = NewExecHandler(c, handlerDiag) h = newExternalHandler(h) case "hipchat": c := hipchat.HandlerConfig{} @@ -755,7 +773,7 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h = s.HipChatService.Handler(c, s.logger) + h = s.HipChatService.Handler(c, ctx...) h = newExternalHandler(h) case "log": c := DefaultLogHandlerConfig() @@ -763,7 +781,8 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h, err = NewLogHandler(c, s.logger) + handlerDiag := s.diag.WithHandlerContext(ctx...) + h, err = NewLogHandler(c, handlerDiag) if err != nil { return handler{}, err } @@ -774,7 +793,7 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h = s.MQTTService.Handler(c, s.logger) + h = s.MQTTService.Handler(c, ctx...) h = newExternalHandler(h) case "opsgenie": c := opsgenie.HandlerConfig{} @@ -782,7 +801,7 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h = s.OpsGenieService.Handler(c, s.logger) + h = s.OpsGenieService.Handler(c, ctx...) h = newExternalHandler(h) case "pagerduty": c := pagerduty.HandlerConfig{} @@ -790,7 +809,7 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h = s.PagerDutyService.Handler(c, s.logger) + h = s.PagerDutyService.Handler(c, ctx...) h = newExternalHandler(h) case "pushover": c := pushover.HandlerConfig{} @@ -798,7 +817,7 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h = s.PushoverService.Handler(c, s.logger) + h = s.PushoverService.Handler(c, ctx...) h = newExternalHandler(h) case "post": c := httppost.HandlerConfig{} @@ -806,7 +825,7 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h = s.HTTPPostService.Handler(c, s.logger) + h = s.HTTPPostService.Handler(c, ctx...) h = newExternalHandler(h) case "publish": c := PublishHandlerConfig{ @@ -816,14 +835,15 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h = NewPublishHandler(c, s.logger) + handlerDiag := s.diag.WithHandlerContext(ctx...) + h = NewPublishHandler(c, handlerDiag) case "sensu": c := sensu.HandlerConfig{} err = decodeOptions(spec.Options, &c) if err != nil { return handler{}, err } - h, err = s.SensuService.Handler(c, s.logger) + h, err = s.SensuService.Handler(c, ctx...) if err != nil { return handler{}, err } @@ -834,7 +854,7 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h = s.SlackService.Handler(c, s.logger) + h = s.SlackService.Handler(c, ctx...) h = newExternalHandler(h) case "smtp": c := smtp.HandlerConfig{} @@ -842,7 +862,7 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h = s.SMTPService.Handler(c, s.logger) + h = s.SMTPService.Handler(c, ctx...) h = newExternalHandler(h) case "snmptrap": c := snmptrap.HandlerConfig{} @@ -850,13 +870,13 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h, err = s.SNMPTrapService.Handler(c, s.logger) + h, err = s.SNMPTrapService.Handler(c, ctx...) if err != nil { return handler{}, err } h = newExternalHandler(h) case "talk": - h = s.TalkService.Handler(s.logger) + h = s.TalkService.Handler(ctx...) h = newExternalHandler(h) case "tcp": c := TCPHandlerConfig{} @@ -864,7 +884,8 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h = NewTCPHandler(c, s.logger) + handlerDiag := s.diag.WithHandlerContext(ctx...) + h = NewTCPHandler(c, handlerDiag) h = newExternalHandler(h) case "telegram": c := telegram.HandlerConfig{} @@ -872,7 +893,7 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h = s.TelegramService.Handler(c, s.logger) + h = s.TelegramService.Handler(c, ctx...) h = newExternalHandler(h) case "victorops": c := victorops.HandlerConfig{} @@ -880,14 +901,15 @@ func (s *Service) createHandlerFromSpec(spec HandlerSpec) (handler, error) { if err != nil { return handler{}, err } - h = s.VictorOpsService.Handler(c, s.logger) + h = s.VictorOpsService.Handler(c, ctx...) h = newExternalHandler(h) default: err = fmt.Errorf("unsupported action kind %q", spec.Kind) } if spec.Match != "" { // Wrap handler in match handler - h, err = newMatchHandler(spec.Match, h, s.logger) + handlerDiag := s.diag.WithHandlerContext(ctx...) + h, err = newMatchHandler(spec.Match, h, handlerDiag) } return handler{Spec: spec, Handler: h}, err } diff --git a/services/alerta/service.go b/services/alerta/service.go index 887cd01f8..cea6d1c1c 100644 --- a/services/alerta/service.go +++ b/services/alerta/service.go @@ -6,7 +6,6 @@ import ( "encoding/json" "fmt" "io/ioutil" - "log" "net/http" "net/url" "path" @@ -15,6 +14,7 @@ import ( "time" "github.com/influxdata/kapacitor/alert" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/models" "github.com/pkg/errors" ) @@ -27,15 +27,21 @@ const ( defaultTokenPrefix = "Bearer" ) +type Diagnostic interface { + WithContext(ctx ...keyvalue.T) Diagnostic + TemplateError(err error, kv keyvalue.T) + Error(msg string, err error) +} + type Service struct { configValue atomic.Value clientValue atomic.Value - logger *log.Logger + diag Diagnostic } -func NewService(c Config, l *log.Logger) *Service { +func NewService(c Config, d Diagnostic) *Service { s := &Service{ - logger: l, + diag: d, } s.configValue.Store(c) s.clientValue.Store(&http.Client{ @@ -283,9 +289,9 @@ type HandlerConfig struct { } type handler struct { - s *Service - c HandlerConfig - logger *log.Logger + s *Service + c HandlerConfig + diag Diagnostic resourceTmpl *text.Template eventTmpl *text.Template @@ -304,7 +310,7 @@ func (s *Service) DefaultHandlerConfig() HandlerConfig { } } -func (s *Service) Handler(c HandlerConfig, l *log.Logger) (alert.Handler, error) { +func (s *Service) Handler(c HandlerConfig, ctx ...keyvalue.T) (alert.Handler, error) { // Parse and validate alerta templates rtmpl, err := text.New("resource").Parse(c.Resource) if err != nil { @@ -339,7 +345,7 @@ func (s *Service) Handler(c HandlerConfig, l *log.Logger) (alert.Handler, error) return &handler{ s: s, c: c, - logger: l, + diag: s.diag.WithContext(ctx...), resourceTmpl: rtmpl, eventTmpl: evtmpl, environmentTmpl: etmpl, @@ -370,7 +376,7 @@ func (h *handler) Handle(event alert.Event) { var buf bytes.Buffer err := h.resourceTmpl.Execute(&buf, td) if err != nil { - h.logger.Printf("E! failed to evaluate Alerta Resource template %s: %v", h.c.Resource, err) + h.diag.TemplateError(err, keyvalue.KV("resource", h.c.Resource)) return } resource := buf.String() @@ -385,7 +391,7 @@ func (h *handler) Handle(event alert.Event) { } err = h.eventTmpl.Execute(&buf, data) if err != nil { - h.logger.Printf("E! failed to evaluate Alerta Event template %s: %v", h.c.Event, err) + h.diag.TemplateError(err, keyvalue.KV("event", h.c.Event)) return } eventStr := buf.String() @@ -393,7 +399,7 @@ func (h *handler) Handle(event alert.Event) { err = h.environmentTmpl.Execute(&buf, td) if err != nil { - h.logger.Printf("E! failed to evaluate Alerta Environment template %s: %v", h.c.Environment, err) + h.diag.TemplateError(err, keyvalue.KV("environment", h.c.Environment)) return } environment := buf.String() @@ -401,7 +407,7 @@ func (h *handler) Handle(event alert.Event) { err = h.groupTmpl.Execute(&buf, td) if err != nil { - h.logger.Printf("E! failed to evaluate Alerta Group template %s: %v", h.c.Group, err) + h.diag.TemplateError(err, keyvalue.KV("group", h.c.Group)) return } group := buf.String() @@ -409,7 +415,7 @@ func (h *handler) Handle(event alert.Event) { err = h.valueTmpl.Execute(&buf, td) if err != nil { - h.logger.Printf("E! failed to evaluate Alerta Value template %s: %v", h.c.Value, err) + h.diag.TemplateError(err, keyvalue.KV("value", h.c.Value)) return } value := buf.String() @@ -422,7 +428,7 @@ func (h *handler) Handle(event alert.Event) { for _, tmpl := range h.serviceTmpl { err = tmpl.Execute(&buf, td) if err != nil { - h.logger.Printf("E! failed to evaluate Alerta Service template: %v", err) + h.diag.TemplateError(err, keyvalue.KV("service", tmpl.Name())) return } service = append(service, buf.String()) @@ -461,6 +467,6 @@ func (h *handler) Handle(event alert.Event) { event.Data.Tags, event.Data.Result, ); err != nil { - h.logger.Printf("E! failed to send event to Alerta: %v", err) + h.diag.Error("failed to send event to Alerta", err) } } diff --git a/services/azure/service.go b/services/azure/service.go index 4e5d7f29d..d913dd6c4 100644 --- a/services/azure/service.go +++ b/services/azure/service.go @@ -3,7 +3,6 @@ package azure import ( "context" "fmt" - "log" "sync" "time" @@ -12,6 +11,8 @@ import ( pazure "github.com/prometheus/prometheus/discovery/azure" ) +type Diagnostic scraper.Diagnostic + // Service is the azure discovery service type Service struct { Configs []Config @@ -19,16 +20,16 @@ type Service struct { registry scraper.Registry - logger *log.Logger - open bool + diag Diagnostic + open bool } // NewService creates a new unopened Azure service -func NewService(c []Config, r scraper.Registry, l *log.Logger) *Service { +func NewService(c []Config, r scraper.Registry, d Diagnostic) *Service { return &Service{ Configs: c, registry: r, - logger: l, + diag: d, } } @@ -126,7 +127,7 @@ func (s *Service) Test(options interface{}) error { } sd := s.Configs[found].PromConfig() - discoverer := pazure.NewDiscovery(sd, scraper.NewLogger(s.logger)) + discoverer := pazure.NewDiscovery(sd, s.diag) ctx, cancel := context.WithCancel(context.Background()) updates := make(chan []*config.TargetGroup) diff --git a/services/config/service.go b/services/config/service.go index 00b813463..9af9b2828 100644 --- a/services/config/service.go +++ b/services/config/service.go @@ -3,7 +3,6 @@ package config import ( "encoding/json" "fmt" - "log" "net/http" "path" "regexp" @@ -26,6 +25,10 @@ const ( updateTimeout = 5 * time.Second ) +type Diagnostic interface { + Error(msg string, err error) +} + type ConfigUpdate struct { Name string NewConfig []interface{} @@ -35,7 +38,7 @@ type ConfigUpdate struct { type Service struct { enabled bool config interface{} - logger *log.Logger + diag Diagnostic updates chan<- ConfigUpdate routes []httpd.Route @@ -54,11 +57,11 @@ type Service struct { } } -func NewService(c Config, config interface{}, l *log.Logger, updates chan<- ConfigUpdate) *Service { +func NewService(c Config, config interface{}, d Diagnostic, updates chan<- ConfigUpdate) *Service { return &Service{ enabled: c.Enabled, config: config, - logger: l, + diag: d, updates: updates, } } @@ -306,7 +309,7 @@ func (s *Service) handleGetConfig(w http.ResponseWriter, r *http.Request) { if !hasSection { w.WriteHeader(http.StatusOK) if err := json.NewEncoder(w).Encode(config); err != nil { - s.logger.Println("E! failed to JSON encode configuration", err) + s.diag.Error("failed to JSON encode configuration", err) } } else if section != "" { sec, ok := config.Sections[section] @@ -329,7 +332,7 @@ func (s *Service) handleGetConfig(w http.ResponseWriter, r *http.Request) { if found { w.WriteHeader(http.StatusOK) if err := json.NewEncoder(w).Encode(elementEntry); err != nil { - s.logger.Println("E! failed to JSON encode element", err) + s.diag.Error("failed to JSON encode element", err) } } else { httpd.HttpError(w, fmt.Sprintf("unknown section/element: %s/%s", section, element), true, http.StatusNotFound) @@ -338,7 +341,7 @@ func (s *Service) handleGetConfig(w http.ResponseWriter, r *http.Request) { } else { w.WriteHeader(http.StatusOK) if err := json.NewEncoder(w).Encode(sec); err != nil { - s.logger.Println("E! failed to JSON encode sec", err) + s.diag.Error("failed to JSON encode sec", err) } } } diff --git a/services/config/service_test.go b/services/config/service_test.go index 7e470e292..1e6034e0b 100644 --- a/services/config/service_test.go +++ b/services/config/service_test.go @@ -5,9 +5,7 @@ import ( "errors" "fmt" "io/ioutil" - "log" "net/http" - "os" "reflect" "strings" "testing" @@ -15,11 +13,19 @@ import ( client "github.com/influxdata/kapacitor/client/v1" "github.com/influxdata/kapacitor/services/config" + "github.com/influxdata/kapacitor/services/diagnostic" "github.com/influxdata/kapacitor/services/httpd" "github.com/influxdata/kapacitor/services/httpd/httpdtest" "github.com/influxdata/kapacitor/services/storage/storagetest" ) +var diagService *diagnostic.Service + +func init() { + diagService = diagnostic.NewService(diagnostic.NewConfig(), ioutil.Discard, ioutil.Discard) + diagService.Open() +} + type SectionA struct { Option1 string `override:"option-1"` } @@ -49,7 +55,7 @@ type TestConfig struct { func OpenNewSerivce(testConfig interface{}, updates chan<- config.ConfigUpdate) (*config.Service, *httpdtest.Server) { c := config.NewConfig() - service := config.NewService(c, testConfig, log.New(os.Stderr, "[config] ", log.LstdFlags), updates) + service := config.NewService(c, testConfig, diagService.NewConfigOverrideHandler(), updates) service.StorageService = storagetest.New() server := httpdtest.NewServer(testing.Verbose()) service.HTTPDService = server diff --git a/services/consul/service.go b/services/consul/service.go index b6276b039..497666961 100644 --- a/services/consul/service.go +++ b/services/consul/service.go @@ -3,7 +3,6 @@ package consul import ( "context" "fmt" - "log" "sync" "time" @@ -12,6 +11,8 @@ import ( pconsul "github.com/prometheus/prometheus/discovery/consul" ) +type Diagnostic scraper.Diagnostic + // Service is the consul discovery service type Service struct { Configs []Config @@ -19,16 +20,16 @@ type Service struct { registry scraper.Registry - logger *log.Logger - open bool + diag Diagnostic + open bool } // NewService creates a new unopened service -func NewService(c []Config, r scraper.Registry, l *log.Logger) *Service { +func NewService(c []Config, r scraper.Registry, d Diagnostic) *Service { return &Service{ Configs: c, registry: r, - logger: l, + diag: d, } } @@ -126,7 +127,7 @@ func (s *Service) Test(options interface{}) error { } sd := s.Configs[found].PromConfig() - discoverer, err := pconsul.NewDiscovery(sd, scraper.NewLogger(s.logger)) + discoverer, err := pconsul.NewDiscovery(sd, s.diag) if err != nil { return err } diff --git a/services/deadman/service.go b/services/deadman/service.go index dc10210e5..54a7bc1b9 100644 --- a/services/deadman/service.go +++ b/services/deadman/service.go @@ -1,19 +1,22 @@ package deadman import ( - "log" "time" ) +type Diagnostic interface { + ConfiguredGlobally() +} + type Service struct { - c Config - logger *log.Logger + c Config + diag Diagnostic } -func NewService(c Config, l *log.Logger) *Service { +func NewService(c Config, d Diagnostic) *Service { return &Service{ - c: c, - logger: l, + c: c, + diag: d, } } @@ -39,7 +42,7 @@ func (s *Service) Global() bool { func (s *Service) Open() error { if s.Global() { - s.logger.Println("I! Deadman's switch is configured globally") + s.diag.ConfiguredGlobally() } return nil } diff --git a/services/logging/config.go b/services/diagnostic/config.go similarity index 80% rename from services/logging/config.go rename to services/diagnostic/config.go index 9ef160cd9..e755abc6a 100644 --- a/services/logging/config.go +++ b/services/diagnostic/config.go @@ -1,4 +1,4 @@ -package logging +package diagnostic type Config struct { File string `toml:"file"` @@ -8,6 +8,6 @@ type Config struct { func NewConfig() Config { return Config{ File: "STDERR", - Level: "INFO", + Level: "DEBUG", } } diff --git a/services/diagnostic/handlers.go b/services/diagnostic/handlers.go new file mode 100644 index 000000000..c12275330 --- /dev/null +++ b/services/diagnostic/handlers.go @@ -0,0 +1,1203 @@ +package diagnostic + +import ( + "bytes" + "errors" + "fmt" + "log" + "runtime" + "strconv" + "sync" + "time" + + "github.com/influxdata/kapacitor" + "github.com/influxdata/kapacitor/alert" + "github.com/influxdata/kapacitor/edge" + "github.com/influxdata/kapacitor/keyvalue" + "github.com/influxdata/kapacitor/models" + alertservice "github.com/influxdata/kapacitor/services/alert" + "github.com/influxdata/kapacitor/services/alerta" + klog "github.com/influxdata/kapacitor/services/diagnostic/internal/log" + "github.com/influxdata/kapacitor/services/hipchat" + "github.com/influxdata/kapacitor/services/httppost" + "github.com/influxdata/kapacitor/services/influxdb" + "github.com/influxdata/kapacitor/services/k8s" + "github.com/influxdata/kapacitor/services/mqtt" + "github.com/influxdata/kapacitor/services/opsgenie" + "github.com/influxdata/kapacitor/services/pagerduty" + "github.com/influxdata/kapacitor/services/pushover" + "github.com/influxdata/kapacitor/services/sensu" + "github.com/influxdata/kapacitor/services/slack" + "github.com/influxdata/kapacitor/services/smtp" + "github.com/influxdata/kapacitor/services/snmptrap" + "github.com/influxdata/kapacitor/services/swarm" + "github.com/influxdata/kapacitor/services/talk" + "github.com/influxdata/kapacitor/services/telegram" + "github.com/influxdata/kapacitor/services/udp" + "github.com/influxdata/kapacitor/services/victorops" + "github.com/influxdata/kapacitor/udf" + plog "github.com/prometheus/common/log" +) + +func Error(l *klog.Logger, msg string, err error, ctx []keyvalue.T) { + if len(ctx) == 0 { + l.Error(msg, klog.Error(err)) + return + } + + if len(ctx) == 1 { + el := ctx[0] + l.Error(msg, klog.Error(err), klog.String(el.Key, el.Value)) + return + } + + if len(ctx) == 2 { + x := ctx[0] + y := ctx[1] + l.Error(msg, klog.Error(err), klog.String(x.Key, x.Value), klog.String(y.Key, y.Value)) + return + } + + // This isn't great wrt to allocation, but should be rare. Currently + // no calls to Error use more than 2 ctx values. If a new call to + // Error uses more than 2, update this function + fields := make([]klog.Field, len(ctx)+1) // +1 for error + fields[0] = klog.Error(err) + for i := 1; i < len(fields); i++ { + kv := ctx[i-1] + fields[i] = klog.String(kv.Key, kv.Value) + } + + l.Error(msg, fields...) +} + +// Alert Service Handler + +type AlertServiceHandler struct { + l *klog.Logger +} + +func logFieldsFromContext(ctx []keyvalue.T) []klog.Field { + fields := make([]klog.Field, len(ctx)) + for i, kv := range ctx { + fields[i] = klog.String(kv.Key, kv.Value) + } + + return fields +} + +func (h *AlertServiceHandler) WithHandlerContext(ctx ...keyvalue.T) alertservice.HandlerDiagnostic { + fields := logFieldsFromContext(ctx) + + return &AlertServiceHandler{ + l: h.l.With(fields...), + } +} + +func (h *AlertServiceHandler) MigratingHandlerSpecs() { + h.l.Debug("migrating old v1.2 handler specs") +} + +func (h *AlertServiceHandler) MigratingOldHandlerSpec(spec string) { + h.l.Debug("migrating old handler spec", klog.String("handler", spec)) +} + +func (h *AlertServiceHandler) FoundHandlerRows(length int) { + h.l.Debug("found handler rows", klog.Int("handler_row_count", length)) +} + +func (h *AlertServiceHandler) CreatingNewHandlers(length int) { + h.l.Debug("creating new handlers in place of old handlers", klog.Int("handler_row_count", length)) +} + +func (h *AlertServiceHandler) FoundNewHandler(key string) { + h.l.Debug("found new handler skipping", klog.String("handler", key)) +} + +func (h *AlertServiceHandler) Error(msg string, err error, ctx ...keyvalue.T) { + Error(h.l, msg, err, ctx) +} + +// Kapcitor Handler + +type KapacitorHandler struct { + l *klog.Logger +} + +func (h *KapacitorHandler) WithTaskContext(task string) kapacitor.TaskDiagnostic { + return &KapacitorHandler{ + l: h.l.With(klog.String("task", task)), + } +} + +func (h *KapacitorHandler) WithTaskMasterContext(tm string) kapacitor.Diagnostic { + return &KapacitorHandler{ + l: h.l.With(klog.String("task_master", tm)), + } +} + +func (h *KapacitorHandler) WithNodeContext(node string) kapacitor.NodeDiagnostic { + return &KapacitorHandler{ + l: h.l.With(klog.String("node", node)), + } +} + +func (h *KapacitorHandler) WithEdgeContext(task, parent, child string) kapacitor.EdgeDiagnostic { + return &KapacitorHandler{ + l: h.l.With(klog.String("task", task), klog.String("parent", parent), klog.String("child", child)), + } +} + +func (h *KapacitorHandler) TaskMasterOpened() { + h.l.Info("opened task master") +} + +func (h *KapacitorHandler) TaskMasterClosed() { + h.l.Info("closed task master") +} + +func (h *KapacitorHandler) StartingTask(task string) { + h.l.Debug("starting task", klog.String("task", task)) +} + +func (h *KapacitorHandler) StartedTask(task string) { + h.l.Info("started task", klog.String("task", task)) +} + +func (h *KapacitorHandler) StoppedTask(task string) { + h.l.Info("stopped task", klog.String("task", task)) +} + +func (h *KapacitorHandler) StoppedTaskWithError(task string, err error) { + h.l.Error("failed to stop task with out error", klog.String("task", task), klog.Error(err)) +} + +func (h *KapacitorHandler) TaskMasterDot(d string) { + h.l.Debug("listing dot", klog.String("dot", d)) +} + +func (h *KapacitorHandler) ClosingEdge(collected int64, emitted int64) { + h.l.Debug("closing edge", klog.Int64("collected", collected), klog.Int64("emitted", emitted)) +} + +func (h *KapacitorHandler) Error(msg string, err error, ctx ...keyvalue.T) { + Error(h.l, msg, err, ctx) +} + +func (h *KapacitorHandler) AlertTriggered(level alert.Level, id string, message string, rows *models.Row) { + h.l.Debug("alert triggered", + klog.Stringer("level", level), + klog.String("id", id), + klog.String("event_message", message), + klog.String("data", fmt.Sprintf("%v", rows)), + ) +} + +func (h *KapacitorHandler) SettingReplicas(new int, old int, id string) { + h.l.Debug("setting replicas", + klog.Int("new", new), + klog.Int("old", old), + klog.String("event_id", id), + ) +} + +func (h *KapacitorHandler) StartingBatchQuery(q string) { + h.l.Debug("starting next batch query", klog.String("query", q)) +} + +func TagPairs(tags models.Tags) []klog.Field { + ts := []klog.Field{} + for k, v := range tags { + ts = append(ts, klog.String(k, v)) + } + + return ts +} + +func FieldPairs(tags models.Fields) []klog.Field { + ts := []klog.Field{} + for k, v := range tags { + var el klog.Field + switch t := v.(type) { + case int64: + el = klog.Int64(k, t) + case string: + el = klog.String(k, t) + case float64: + el = klog.Float64(k, t) + case bool: + el = klog.Bool(k, t) + default: + el = klog.String(k, fmt.Sprintf("%v", t)) + } + ts = append(ts, el) + } + + return ts +} + +func (h *KapacitorHandler) LogPointData(level, prefix string, point edge.PointMessage) { + fields := []klog.Field{ + klog.String("prefix", prefix), + klog.String("name", point.Name()), + klog.String("db", point.Database()), + klog.String("rp", point.RetentionPolicy()), + klog.String("group", string(point.GroupID())), + klog.Strings("dimension", point.Dimensions().TagNames), + klog.GroupedFields("tag", TagPairs(point.Tags())), + klog.GroupedFields("field", FieldPairs(point.Fields())), + klog.Time("time", point.Time()), + } + + var log func(string, ...klog.Field) + + switch level { + case "INFO": + log = h.l.Info + case "ERROR": + log = h.l.Error + case "DEBUG": + log = h.l.Debug + case "WARN": + log = h.l.Warn + default: + log = h.l.Info + } + + log("point", fields...) +} + +func (h *KapacitorHandler) LogBatchData(level, prefix string, batch edge.BufferedBatchMessage) { + var log func(string, ...klog.Field) + + switch level { + case "INFO": + log = h.l.Info + case "ERROR": + log = h.l.Error + case "DEBUG": + log = h.l.Debug + case "WARN": + log = h.l.Warn + default: + log = h.l.Info + } + + begin := batch.Begin() + log("begin batch", + klog.String("prefix", prefix), + klog.String("name", begin.Name()), + klog.String("group", string(begin.GroupID())), + klog.GroupedFields("tag", TagPairs(begin.Tags())), + klog.Time("time", begin.Time()), + ) + + for _, p := range batch.Points() { + log("batch point", + klog.String("prefix", prefix), + klog.String("name", begin.Name()), + klog.String("group", string(begin.GroupID())), + klog.GroupedFields("tag", TagPairs(p.Tags())), + klog.GroupedFields("field", FieldPairs(p.Fields())), + klog.Time("time", p.Time()), + ) + } + + log("end batch", + klog.String("prefix", prefix), + klog.String("name", begin.Name()), + klog.String("group", string(begin.GroupID())), + klog.GroupedFields("tag", TagPairs(begin.Tags())), + klog.Time("time", begin.Time()), + ) +} + +func (h *KapacitorHandler) UDFLog(s string) { + h.l.Info("UDF log", klog.String("text", s)) +} + +// Alerta handler + +type AlertaHandler struct { + l *klog.Logger +} + +func (h *AlertaHandler) WithContext(ctx ...keyvalue.T) alerta.Diagnostic { + fields := logFieldsFromContext(ctx) + + return &AlertaHandler{ + l: h.l.With(fields...), + } +} + +func (h *AlertaHandler) TemplateError(err error, kv keyvalue.T) { + h.l.Error("failed to evaluate Alerta template", klog.Error(err), klog.String(kv.Key, kv.Value)) +} + +func (h *AlertaHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +// HipChat handler +type HipChatHandler struct { + l *klog.Logger +} + +func (h *HipChatHandler) WithContext(ctx ...keyvalue.T) hipchat.Diagnostic { + fields := logFieldsFromContext(ctx) + + return &HipChatHandler{ + l: h.l.With(fields...), + } +} + +func (h *HipChatHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +// HTTPD handler + +type HTTPDHandler struct { + l *klog.Logger +} + +func (h *HTTPDHandler) NewHTTPServerErrorLogger() *log.Logger { + s := &StaticLevelHandler{ + l: h.l.With(klog.String("service", "httpd_server_errors")), + level: llError, + } + + return log.New(s, "", log.LstdFlags) +} + +func (h *HTTPDHandler) StartingService() { + h.l.Info("starting HTTP service") +} + +func (h *HTTPDHandler) StoppedService() { + h.l.Info("closed HTTP service") +} + +func (h *HTTPDHandler) ShutdownTimeout() { + h.l.Error("shutdown timedout, forcefully closing all remaining connections") +} + +func (h *HTTPDHandler) AuthenticationEnabled(enabled bool) { + h.l.Info("authentication", klog.Bool("enabled", enabled)) +} + +func (h *HTTPDHandler) ListeningOn(addr string, proto string) { + h.l.Info("listening on", klog.String("addr", addr), klog.String("protocol", proto)) +} + +func (h *HTTPDHandler) WriteBodyReceived(body string) { + h.l.Debug("write body received by handler: %s", klog.String("body", body)) +} + +func (h *HTTPDHandler) HTTP( + host string, + username string, + start time.Time, + method string, + uri string, + proto string, + status int, + referer string, + userAgent string, + reqID string, + duration time.Duration, +) { + h.l.Info("http request", + klog.String("host", host), + klog.String("username", username), + klog.Time("start", start), + klog.String("method", method), + klog.String("uri", uri), + klog.String("protocol", proto), + klog.Int("status", status), + klog.String("referer", referer), + klog.String("user-agent", userAgent), + klog.String("request-id", reqID), + klog.Duration("duration", duration), + ) +} + +func (h *HTTPDHandler) RecoveryError( + msg string, + err string, + host string, + username string, + start time.Time, + method string, + uri string, + proto string, + status int, + referer string, + userAgent string, + reqID string, + duration time.Duration, +) { + h.l.Error( + msg, + klog.String("err", err), + klog.String("host", host), + klog.String("username", username), + klog.Time("start", start), + klog.String("method", method), + klog.String("uri", uri), + klog.String("protocol", proto), + klog.Int("status", status), + klog.String("referer", referer), + klog.String("user-agent", userAgent), + klog.String("request-id", reqID), + klog.Duration("duration", duration), + ) +} + +func (h *HTTPDHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +// Reporting handler +type ReportingHandler struct { + l *klog.Logger +} + +func (h *ReportingHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +// PagerDuty handler +type PagerDutyHandler struct { + l *klog.Logger +} + +func (h *PagerDutyHandler) WithContext(ctx ...keyvalue.T) pagerduty.Diagnostic { + fields := logFieldsFromContext(ctx) + + return &PagerDutyHandler{ + l: h.l.With(fields...), + } +} + +func (h *PagerDutyHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +// Slack Handler + +type SlackHandler struct { + l *klog.Logger +} + +func (h *SlackHandler) InsecureSkipVerify() { + h.l.Warn("service is configured to skip ssl verification") +} + +func (h *SlackHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +func (h *SlackHandler) WithContext(ctx ...keyvalue.T) slack.Diagnostic { + fields := logFieldsFromContext(ctx) + + return &SlackHandler{ + l: h.l.With(fields...), + } +} + +// Storage Handler + +type StorageHandler struct { + l *klog.Logger +} + +func (h *StorageHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +// TaskStore Handler + +type TaskStoreHandler struct { + l *klog.Logger +} + +func (h *TaskStoreHandler) StartingTask(taskID string) { + h.l.Debug("starting enabled task on startup", klog.String("task", taskID)) +} + +func (h *TaskStoreHandler) StartedTask(taskID string) { + h.l.Debug("started task during startup", klog.String("task", taskID)) +} + +func (h *TaskStoreHandler) FinishedTask(taskID string) { + h.l.Debug("task finished", klog.String("task", taskID)) +} + +func (h *TaskStoreHandler) Debug(msg string) { + h.l.Debug(msg) +} + +func (h *TaskStoreHandler) Error(msg string, err error, ctx ...keyvalue.T) { + Error(h.l, msg, err, ctx) +} + +func (h *TaskStoreHandler) AlreadyMigrated(entity, id string) { + h.l.Debug("entity has already been migrated skipping", klog.String(entity, id)) +} + +func (h *TaskStoreHandler) Migrated(entity, id string) { + h.l.Debug("entity was migrated to new storage service", klog.String(entity, id)) +} + +// VictorOps Handler + +type VictorOpsHandler struct { + l *klog.Logger +} + +func (h *VictorOpsHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +func (h *VictorOpsHandler) WithContext(ctx ...keyvalue.T) victorops.Diagnostic { + fields := logFieldsFromContext(ctx) + + return &VictorOpsHandler{ + l: h.l.With(fields...), + } +} + +type SMTPHandler struct { + l *klog.Logger +} + +func (h *SMTPHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +func (h *SMTPHandler) WithContext(ctx ...keyvalue.T) smtp.Diagnostic { + fields := logFieldsFromContext(ctx) + + return &SMTPHandler{ + l: h.l.With(fields...), + } +} + +type OpsGenieHandler struct { + l *klog.Logger +} + +func (h *OpsGenieHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +func (h *OpsGenieHandler) WithContext(ctx ...keyvalue.T) opsgenie.Diagnostic { + fields := logFieldsFromContext(ctx) + + return &OpsGenieHandler{ + l: h.l.With(fields...), + } +} + +// UDF service handler + +type UDFServiceHandler struct { + l *klog.Logger +} + +func (h *UDFServiceHandler) LoadedUDFInfo(udf string) { + h.l.Debug("loaded UDF info", klog.String("udf", udf)) +} + +func (h *UDFServiceHandler) WithUDFContext() udf.Diagnostic { + return &UDFServiceHandler{ + l: h.l, + } +} + +func (h *UDFServiceHandler) Error(msg string, err error, ctx ...keyvalue.T) { + Error(h.l, msg, err, ctx) +} + +func (h *UDFServiceHandler) UDFLog(msg string) { + h.l.Info("UDF log", klog.String("text", msg)) +} + +// Pushover handler + +type PushoverHandler struct { + l *klog.Logger +} + +func (h *PushoverHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +func (h *PushoverHandler) WithContext(ctx ...keyvalue.T) pushover.Diagnostic { + fields := logFieldsFromContext(ctx) + + return &PushoverHandler{ + l: h.l.With(fields...), + } +} + +// Template handler + +type HTTPPostHandler struct { + l *klog.Logger +} + +func (h *HTTPPostHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +func (h *HTTPPostHandler) WithContext(ctx ...keyvalue.T) httppost.Diagnostic { + fields := logFieldsFromContext(ctx) + + return &HTTPPostHandler{ + l: h.l.With(fields...), + } +} + +// Sensu handler + +type SensuHandler struct { + l *klog.Logger +} + +func (h *SensuHandler) Error(msg string, err error, ctx ...keyvalue.T) { + Error(h.l, msg, err, ctx) +} + +func (h *SensuHandler) WithContext(ctx ...keyvalue.T) sensu.Diagnostic { + fields := logFieldsFromContext(ctx) + + return &SensuHandler{ + l: h.l.With(fields...), + } +} + +// SNMPTrap handler + +type SNMPTrapHandler struct { + l *klog.Logger +} + +func (h *SNMPTrapHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +func (h *SNMPTrapHandler) WithContext(ctx ...keyvalue.T) snmptrap.Diagnostic { + fields := logFieldsFromContext(ctx) + + return &SNMPTrapHandler{ + l: h.l.With(fields...), + } +} + +// Telegram handler + +type TelegramHandler struct { + l *klog.Logger +} + +func (h *TelegramHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +func (h *TelegramHandler) WithContext(ctx ...keyvalue.T) telegram.Diagnostic { + fields := logFieldsFromContext(ctx) + + return &TelegramHandler{ + l: h.l.With(fields...), + } +} + +// MQTT handler + +type MQTTHandler struct { + l *klog.Logger +} + +func (h *MQTTHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +func (h *MQTTHandler) CreatingAlertHandler(c mqtt.HandlerConfig) { + qos, _ := c.QoS.MarshalText() + h.l.Debug("creating mqtt handler", + klog.String("broker_name", c.BrokerName), + klog.String("topic", c.Topic), + klog.Bool("retained", c.Retained), + klog.String("qos", string(qos)), + ) +} + +func (h *MQTTHandler) HandlingEvent() { + h.l.Debug("handling event") +} + +func (h *MQTTHandler) WithContext(ctx ...keyvalue.T) mqtt.Diagnostic { + fields := logFieldsFromContext(ctx) + + return &MQTTHandler{ + l: h.l.With(fields...), + } +} + +// Talk handler + +type TalkHandler struct { + l *klog.Logger +} + +func (h *TalkHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +func (h *TalkHandler) WithContext(ctx ...keyvalue.T) talk.Diagnostic { + fields := logFieldsFromContext(ctx) + + return &TalkHandler{ + l: h.l.With(fields...), + } +} + +// Config handler + +type ConfigOverrideHandler struct { + l *klog.Logger +} + +func (h *ConfigOverrideHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +type ServerHandler struct { + l *klog.Logger +} + +func (h *ServerHandler) Error(msg string, err error, ctx ...keyvalue.T) { + Error(h.l, msg, err, ctx) +} + +func (h *ServerHandler) Info(msg string, ctx ...keyvalue.T) { + if len(ctx) == 0 { + h.l.Info(msg) + return + } + + if len(ctx) == 1 { + el := ctx[0] + h.l.Info(msg, klog.String(el.Key, el.Value)) + return + } + + if len(ctx) == 2 { + x := ctx[0] + y := ctx[1] + h.l.Info(msg, klog.String(x.Key, x.Value), klog.String(y.Key, y.Value)) + return + } + + fields := make([]klog.Field, len(ctx)) + for i := 0; i < len(fields); i++ { + kv := ctx[i] + fields[i] = klog.String(kv.Key, kv.Value) + } + + h.l.Info(msg, fields...) +} + +func (h *ServerHandler) Debug(msg string, ctx ...keyvalue.T) { + if len(ctx) == 0 { + h.l.Debug(msg) + return + } + + if len(ctx) == 1 { + el := ctx[0] + h.l.Debug(msg, klog.String(el.Key, el.Value)) + return + } + + if len(ctx) == 2 { + x := ctx[0] + y := ctx[1] + h.l.Debug(msg, klog.String(x.Key, x.Value), klog.String(y.Key, y.Value)) + return + } + + fields := make([]klog.Field, len(ctx)) + for i := 0; i < len(fields); i++ { + kv := ctx[i] + fields[i] = klog.String(kv.Key, kv.Value) + } + + h.l.Debug(msg, fields...) +} + +type ReplayHandler struct { + l *klog.Logger +} + +func (h *ReplayHandler) Error(msg string, err error, ctx ...keyvalue.T) { + Error(h.l, msg, err, ctx) +} + +func (h *ReplayHandler) Debug(msg string, ctx ...keyvalue.T) { + if len(ctx) == 0 { + h.l.Debug(msg) + return + } + + if len(ctx) == 1 { + el := ctx[0] + h.l.Debug(msg, klog.String(el.Key, el.Value)) + return + } + + if len(ctx) == 2 { + x := ctx[0] + y := ctx[1] + h.l.Debug(msg, klog.String(x.Key, x.Value), klog.String(y.Key, y.Value)) + return + } + + fields := make([]klog.Field, len(ctx)) + for i := 0; i < len(fields); i++ { + kv := ctx[i] + fields[i] = klog.String(kv.Key, kv.Value) + } + + h.l.Debug(msg, fields...) +} + +// K8s handler + +type K8sHandler struct { + l *klog.Logger +} + +func (h *K8sHandler) WithClusterContext(cluster string) k8s.Diagnostic { + return &K8sHandler{ + l: h.l.With(klog.String("cluster_id", cluster)), + } +} + +// Swarm handler + +type SwarmHandler struct { + l *klog.Logger +} + +func (h *SwarmHandler) WithClusterContext(cluster string) swarm.Diagnostic { + return &SwarmHandler{ + l: h.l.With(klog.String("cluster_id", cluster)), + } +} + +// Deadman handler + +type DeadmanHandler struct { + l *klog.Logger +} + +func (h *DeadmanHandler) ConfiguredGlobally() { + h.l.Info("Deadman's switch is configured globally") +} + +// NoAuth handler + +type NoAuthHandler struct { + l *klog.Logger +} + +func (h *NoAuthHandler) FakedUserAuthentication(username string) { + h.l.Warn("using noauth auth backend. Faked Authentication for user", klog.String("user", username)) +} + +func (h *NoAuthHandler) FakedSubscriptionUserToken() { + h.l.Warn("using noauth auth backend. Faked authentication for subscription user token") +} + +// Stats handler + +type StatsHandler struct { + l *klog.Logger +} + +func (h *StatsHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +// UDP handler + +type UDPHandler struct { + l *klog.Logger +} + +func (h *UDPHandler) Error(msg string, err error, ctx ...keyvalue.T) { + Error(h.l, msg, err, ctx) +} + +func (h *UDPHandler) StartedListening(addr string) { + h.l.Info("started listening on UDP", klog.String("address", addr)) +} + +func (h *UDPHandler) ClosedService() { + h.l.Info("closed service") +} + +// InfluxDB handler + +type InfluxDBHandler struct { + l *klog.Logger +} + +func (h *InfluxDBHandler) Error(msg string, err error, ctx ...keyvalue.T) { + Error(h.l, msg, err, ctx) +} + +func (h *InfluxDBHandler) WithClusterContext(id string) influxdb.Diagnostic { + return &InfluxDBHandler{ + l: h.l.With(klog.String("cluster", id)), + } +} + +func (h *InfluxDBHandler) WithUDPContext(db string, rp string) udp.Diagnostic { + return &UDPHandler{ + l: h.l.With(klog.String("listener_id", fmt.Sprintf("udp:%s.%s", db, rp))), + } +} + +func (h *InfluxDBHandler) InsecureSkipVerify(urls []string) { + h.l.Warn("using InsecureSkipVerify when connecting to InfluxDB; this is insecure", klog.Strings("urls", urls)) +} + +func (h *InfluxDBHandler) UnlinkingSubscriptions(cluster string) { + h.l.Debug("unlinking subscription for cluster", klog.String("cluster", cluster)) +} + +func (h *InfluxDBHandler) LinkingSubscriptions(cluster string) { + h.l.Debug("linking subscription for cluster", klog.String("cluster", cluster)) +} + +func (h *InfluxDBHandler) StartedUDPListener(db string, rp string) { + h.l.Info("started UDP listener", klog.String("dbrp", fmt.Sprintf("%s.%s", db, rp))) +} + +// Scraper handler + +type ScraperHandler struct { + mu sync.Mutex + buf *bytes.Buffer + l *klog.Logger +} + +func (h *ScraperHandler) Debug(ctx ...interface{}) { + h.mu.Lock() + defer h.mu.Unlock() + defer h.buf.Reset() + fmt.Fprint(h.buf, ctx...) + + h.l.Debug(h.buf.String()) +} + +func (h *ScraperHandler) Debugln(ctx ...interface{}) { + h.mu.Lock() + defer h.mu.Unlock() + defer h.buf.Reset() + fmt.Fprintln(h.buf, ctx...) + + h.l.Debug(strconv.Quote(h.buf.String())) +} + +func (h *ScraperHandler) Debugf(s string, ctx ...interface{}) { + h.l.Debug(fmt.Sprintf(s, ctx...)) +} + +func (h *ScraperHandler) Info(ctx ...interface{}) { + h.mu.Lock() + defer h.mu.Unlock() + defer h.buf.Reset() + fmt.Fprint(h.buf, ctx...) + + h.l.Info(h.buf.String()) +} + +func (h *ScraperHandler) Infoln(ctx ...interface{}) { + h.mu.Lock() + defer h.mu.Unlock() + defer h.buf.Reset() + fmt.Fprintln(h.buf, ctx...) + + h.l.Info(strconv.Quote(h.buf.String())) +} + +func (h *ScraperHandler) Infof(s string, ctx ...interface{}) { + h.l.Debug(fmt.Sprintf(s, ctx...)) +} + +func (h *ScraperHandler) Warn(ctx ...interface{}) { + h.mu.Lock() + defer h.mu.Unlock() + defer h.buf.Reset() + fmt.Fprint(h.buf, ctx...) + + h.l.Warn(h.buf.String()) +} + +func (h *ScraperHandler) Warnln(ctx ...interface{}) { + h.mu.Lock() + defer h.mu.Unlock() + defer h.buf.Reset() + fmt.Fprintln(h.buf, ctx...) + + h.l.Warn(strconv.Quote(h.buf.String())) +} + +func (h *ScraperHandler) Warnf(s string, ctx ...interface{}) { + h.l.Warn(fmt.Sprintf(s, ctx...)) +} + +func (h *ScraperHandler) Error(ctx ...interface{}) { + h.mu.Lock() + defer h.mu.Unlock() + defer h.buf.Reset() + fmt.Fprint(h.buf, ctx...) + + h.l.Error(h.buf.String()) +} + +func (h *ScraperHandler) Errorln(ctx ...interface{}) { + h.mu.Lock() + defer h.mu.Unlock() + defer h.buf.Reset() + fmt.Fprintln(h.buf, ctx...) + + h.l.Error(strconv.Quote(h.buf.String())) +} + +func (h *ScraperHandler) Errorf(s string, ctx ...interface{}) { + h.l.Error(fmt.Sprintf(s, ctx...)) +} + +func (h *ScraperHandler) Fatal(ctx ...interface{}) { + h.mu.Lock() + defer h.mu.Unlock() + defer h.buf.Reset() + fmt.Fprint(h.buf, ctx...) + + h.l.Error(h.buf.String()) +} + +func (h *ScraperHandler) Fatalln(ctx ...interface{}) { + h.mu.Lock() + defer h.mu.Unlock() + defer h.buf.Reset() + fmt.Fprintln(h.buf, ctx...) + + h.l.Error(h.buf.String()) +} + +func (h *ScraperHandler) Fatalf(s string, ctx ...interface{}) { + h.l.Error(fmt.Sprintf(s, ctx...)) +} + +func (h *ScraperHandler) With(key string, value interface{}) plog.Logger { + var field klog.Field + + switch value.(type) { + case int: + field = klog.Int(key, value.(int)) + case float64: + field = klog.Float64(key, value.(float64)) + case string: + field = klog.String(key, value.(string)) + case time.Duration: + field = klog.Duration(key, value.(time.Duration)) + default: + field = klog.String(key, fmt.Sprintf("%v", value)) + } + + return &ScraperHandler{ + l: h.l.With(field), + } +} + +func (h *ScraperHandler) SetFormat(string) error { + return nil +} + +func (h *ScraperHandler) SetLevel(string) error { + return nil +} + +// Edge Handler + +type EdgeHandler struct { + l *klog.Logger +} + +func (h *EdgeHandler) Collect(mtype edge.MessageType) { + h.l.Debug("collected message", klog.Stringer("message_type", mtype)) +} +func (h *EdgeHandler) Emit(mtype edge.MessageType) { + h.l.Debug("emitted message", klog.Stringer("message_type", mtype)) +} + +type logLevel int + +const ( + llInvalid logLevel = iota + llDebug + llError + llInfo + llWarn +) + +type StaticLevelHandler struct { + l *klog.Logger + level logLevel +} + +func (h *StaticLevelHandler) Write(buf []byte) (int, error) { + switch h.level { + case llDebug: + h.l.Debug(string(buf)) + case llError: + h.l.Error(string(buf)) + case llInfo: + h.l.Info(string(buf)) + case llWarn: + h.l.Warn(string(buf)) + default: + return 0, errors.New("invalid log level") + } + + return len(buf), nil +} + +// Cmd handler + +type CmdHandler struct { + l *klog.Logger +} + +func (h *CmdHandler) Error(msg string, err error) { + h.l.Error(msg, klog.Error(err)) +} + +func (h *CmdHandler) KapacitorStarting(version, branch, commit string) { + h.l.Info("kapacitor starting", klog.String("version", version), klog.String("branch", branch), klog.String("commit", commit)) +} + +func (h *CmdHandler) GoVersion() { + h.l.Info("go version", klog.String("version", runtime.Version())) +} + +func (h *CmdHandler) Info(msg string) { + h.l.Info(msg) +} diff --git a/services/diagnostic/internal/log/field.go b/services/diagnostic/internal/log/field.go new file mode 100644 index 000000000..0b5381d5b --- /dev/null +++ b/services/diagnostic/internal/log/field.go @@ -0,0 +1,478 @@ +package log + +import ( + "bufio" + "fmt" + "strconv" + "strings" + "time" +) + +func writeString(w *bufio.Writer, s string) (n int, err error) { + var m int + // TODO: revisit + if strings.ContainsAny(s, " \"") { + m, err = w.WriteString(strconv.Quote(s)) + n += m + if err != nil { + return + } + } else { + m, err = w.WriteString(s) + n += m + if err != nil { + return + } + } + + return +} + +type Field interface { + WriteTo(w *bufio.Writer) (n int64, err error) +} + +// String +type StringField struct { + key []byte + value string +} + +func String(key string, value string) Field { + return StringField{ + key: []byte(key), + value: value, + } +} + +func (s StringField) WriteTo(w *bufio.Writer) (n int64, err error) { + var m int + + m, err = w.Write(s.key) + n += int64(m) + if err != nil { + return + } + + err = w.WriteByte('=') + n += 1 + if err != nil { + return + } + + m, err = writeString(w, s.value) + n += int64(m) + if err != nil { + return + } + + return +} + +// Stringer +type StringerField struct { + key []byte + value fmt.Stringer +} + +func Stringer(key string, value fmt.Stringer) Field { + return StringerField{ + key: []byte(key), + value: value, + } +} + +func (s StringerField) WriteTo(w *bufio.Writer) (n int64, err error) { + var m int + + m, err = w.Write(s.key) + n += int64(m) + if err != nil { + return + } + + err = w.WriteByte('=') + n += 1 + if err != nil { + return + } + + m, err = writeString(w, s.value.String()) + n += int64(m) + if err != nil { + return + } + + return +} + +type GroupedField struct { + key []byte + values []Field +} + +func GroupedFields(key string, fields []Field) Field { + return GroupedField{ + key: []byte(key), + values: fields, + } +} +func (s GroupedField) WriteTo(w *bufio.Writer) (n int64, err error) { + var m int + var k int64 + + for i, value := range s.values { + if i != 0 { + err = w.WriteByte(' ') + n += 1 + if err != nil { + return + } + } + + m, err = w.Write(s.key) + n += int64(m) + if err != nil { + return + } + + err = w.WriteByte('_') + n += 1 + if err != nil { + return + } + + k, err = value.WriteTo(w) + n += k + if err != nil { + return + } + + } + + return +} + +// Strings +type StringsField struct { + key []byte + values []string +} + +func Strings(key string, values []string) Field { + return StringsField{ + key: []byte(key), + values: values, + } +} + +func (s StringsField) WriteTo(w *bufio.Writer) (n int64, err error) { + var m int + + for i, value := range s.values { + if i != 0 { + err = w.WriteByte(' ') + n += 1 + if err != nil { + return + } + } + + m, err = w.Write(s.key) + n += int64(m) + if err != nil { + return + } + + err = w.WriteByte('_') + n += 1 + if err != nil { + return + } + + m, err = w.WriteString(strconv.Itoa(i)) + n += int64(m) + if err != nil { + return + } + + err = w.WriteByte('=') + n += 1 + if err != nil { + return + } + + m, err = writeString(w, value) + n += int64(m) + if err != nil { + return + } + } + + return +} + +// Int +type IntField struct { + key []byte + value int +} + +func Int(key string, value int) Field { + return IntField{ + key: []byte(key), + value: value, + } +} + +func (s IntField) WriteTo(w *bufio.Writer) (n int64, err error) { + var m int + + m, err = w.Write(s.key) + n += int64(m) + if err != nil { + return + } + + err = w.WriteByte('=') + n += 1 + if err != nil { + return + } + + m, err = writeString(w, strconv.Itoa(s.value)) + n += int64(m) + if err != nil { + return + } + + return +} + +// Int64 +type Int64Field struct { + key []byte + value int64 +} + +func Int64(key string, value int64) Field { + return Int64Field{ + key: []byte(key), + value: value, + } +} + +func (s Int64Field) WriteTo(w *bufio.Writer) (n int64, err error) { + var m int + + m, err = w.Write(s.key) + n += int64(m) + if err != nil { + return + } + + err = w.WriteByte('=') + n += 1 + if err != nil { + return + } + + m, err = writeString(w, strconv.FormatInt(s.value, 10)) + n += int64(m) + if err != nil { + return + } + + return +} + +// Float64 +type Float64Field struct { + key []byte + value float64 +} + +func Float64(key string, value float64) Field { + return Float64Field{ + key: []byte(key), + value: value, + } +} + +func (s Float64Field) WriteTo(w *bufio.Writer) (n int64, err error) { + var m int + + m, err = w.Write(s.key) + n += int64(m) + if err != nil { + return + } + + err = w.WriteByte('=') + n += 1 + if err != nil { + return + } + + m, err = writeString(w, strconv.FormatFloat(s.value, 'f', -1, 64)) + n += int64(m) + if err != nil { + return + } + + return +} + +// Bool +type BoolField struct { + key []byte + value bool +} + +func Bool(key string, value bool) Field { + return BoolField{ + key: []byte(key), + value: value, + } +} + +func (s BoolField) WriteTo(w *bufio.Writer) (n int64, err error) { + var m int + + m, err = w.Write(s.key) + n += int64(m) + if err != nil { + return + } + + err = w.WriteByte('=') + n += 1 + if err != nil { + return + } + + if s.value { + m, err = w.Write([]byte("true")) + n += int64(m) + if err != nil { + return + } + } else { + m, err = w.Write([]byte("false")) + n += int64(m) + if err != nil { + return + } + } + + return +} + +// Error +type ErrorField struct { + err error +} + +func Error(err error) Field { + return ErrorField{ + err: err, + } +} + +func (s ErrorField) WriteTo(w *bufio.Writer) (n int64, err error) { + var m int + + m, err = w.Write([]byte("err")) + n += int64(m) + if err != nil { + return + } + + err = w.WriteByte('=') + n += 1 + if err != nil { + return + } + + m, err = writeString(w, s.err.Error()) + n += int64(m) + if err != nil { + return + } + + return +} + +// Time +type TimeField struct { + key []byte + value time.Time +} + +func Time(key string, value time.Time) Field { + return TimeField{ + key: []byte(key), + value: value, + } +} + +func (s TimeField) WriteTo(w *bufio.Writer) (n int64, err error) { + var m int + + m, err = w.Write(s.key) + n += int64(m) + if err != nil { + return + } + + err = w.WriteByte('=') + n += 1 + if err != nil { + return + } + + m, err = writeString(w, s.value.Format(time.RFC3339Nano)) + n += int64(m) + if err != nil { + return + } + + return +} + +// Duration +type DurationField struct { + key []byte + value time.Duration +} + +func Duration(key string, value time.Duration) Field { + return DurationField{ + key: []byte(key), + value: value, + } +} + +func (s DurationField) WriteTo(w *bufio.Writer) (n int64, err error) { + var m int + + m, err = w.Write(s.key) + n += int64(m) + if err != nil { + return + } + + err = w.WriteByte('=') + n += 1 + if err != nil { + return + } + + m, err = writeString(w, s.value.String()) + n += int64(m) + if err != nil { + return + } + + return +} diff --git a/services/diagnostic/internal/log/log.go b/services/diagnostic/internal/log/log.go new file mode 100644 index 000000000..ab431ab5f --- /dev/null +++ b/services/diagnostic/internal/log/log.go @@ -0,0 +1,137 @@ +package log + +import ( + "bufio" + "io" + "sync" + "time" +) + +const RFC3339Milli = "2006-01-02T15:04:05.000Z07:00" + +type Level int + +const ( + DebugLevel Level = iota + InfoLevel + WarnLevel + ErrorLevel +) + +func defaultLevelF(lvl Level) bool { + return true +} + +type Logger struct { + mu *sync.Mutex + context []Field + w *bufio.Writer + + levelMu sync.RWMutex + levelF func(lvl Level) bool +} + +func NewLogger(w io.Writer) *Logger { + var mu sync.Mutex + return &Logger{ + mu: &mu, + w: bufio.NewWriter(w), + levelF: defaultLevelF, + } +} + +// LevelF set on parent applies to self and any future children +func (l *Logger) SetLevelF(f func(Level) bool) { + l.levelMu.Lock() + defer l.levelMu.Unlock() + l.levelF = f +} + +func (l *Logger) With(ctx ...Field) *Logger { + l.mu.Lock() + defer l.mu.Unlock() + return &Logger{ + mu: l.mu, + context: append(l.context, ctx...), + w: l.w, + levelF: l.levelF, + } +} + +func (l *Logger) Error(msg string, ctx ...Field) { + l.levelMu.RLock() + logLine := l.levelF(ErrorLevel) + l.levelMu.RUnlock() + if logLine { + l.Log(time.Now(), "error", msg, ctx) + } +} + +func (l *Logger) Debug(msg string, ctx ...Field) { + l.levelMu.RLock() + logLine := l.levelF(DebugLevel) + l.levelMu.RUnlock() + if logLine { + l.Log(time.Now(), "debug", msg, ctx) + } +} + +func (l *Logger) Warn(msg string, ctx ...Field) { + l.levelMu.RLock() + logLine := l.levelF(WarnLevel) + l.levelMu.RUnlock() + if logLine { + l.Log(time.Now(), "warn", msg, ctx) + } +} + +func (l *Logger) Info(msg string, ctx ...Field) { + l.levelMu.RLock() + logLine := l.levelF(InfoLevel) + l.levelMu.RUnlock() + if logLine { + l.Log(time.Now(), "info", msg, ctx) + } +} + +// TODO: actually care about errors? +func (l *Logger) Log(now time.Time, level string, msg string, ctx []Field) { + l.mu.Lock() + defer l.mu.Unlock() + + writeTimestamp(l.w, now) + l.w.WriteByte(' ') + writeLevel(l.w, level) + l.w.WriteByte(' ') + writeMessage(l.w, msg) + + for _, f := range l.context { + l.w.WriteByte(' ') + f.WriteTo(l.w) + } + + for _, f := range ctx { + l.w.WriteByte(' ') + f.WriteTo(l.w) + } + + l.w.WriteByte('\n') + + l.w.Flush() +} + +func writeTimestamp(w *bufio.Writer, now time.Time) { + w.Write([]byte("ts=")) + // TODO: UTC? + w.WriteString(now.Format(RFC3339Milli)) +} + +func writeLevel(w *bufio.Writer, lvl string) { + w.Write([]byte("lvl=")) + w.WriteString(lvl) +} + +func writeMessage(w *bufio.Writer, msg string) { + w.Write([]byte("msg=")) + writeString(w, msg) +} diff --git a/services/diagnostic/internal/log/log_test.go b/services/diagnostic/internal/log/log_test.go new file mode 100644 index 000000000..c97fa9b97 --- /dev/null +++ b/services/diagnostic/internal/log/log_test.go @@ -0,0 +1,422 @@ +package log_test + +import ( + "bytes" + "errors" + "fmt" + "strconv" + "testing" + "time" + + "github.com/influxdata/kapacitor/services/diagnostic/internal/log" +) + +var defaultTime = time.Date(2009, time.November, 10, 23, 0, 0, 0, time.UTC) + +type testStringer string + +func (t testStringer) String() string { + return string(t) +} + +func TestLoggerWithoutContext(t *testing.T) { + now := time.Now() + nowStr := now.Format(log.RFC3339Milli) + buf := bytes.NewBuffer(nil) + l := log.NewLogger(buf) + + tests := []struct { + name string + exp string + lvl string + msg string + fields []log.Field + }{ + { + name: "no fields simple message", + exp: fmt.Sprintf("ts=%s lvl=error msg=this\n", nowStr), + lvl: "error", + msg: "this", + }, + { + name: "no fields less simple message", + exp: fmt.Sprintf("ts=%s lvl=error msg=this/is/a/test\n", nowStr), + lvl: "error", + msg: "this/is/a/test", + }, + { + name: "no fields complex message", + exp: fmt.Sprintf("ts=%s lvl=error msg=\"this is \\\" a test/yeah\"\n", nowStr), + lvl: "error", + msg: "this is \" a test/yeah", + }, + { + name: "simple string field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test test=this\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.String("test", "this"), + }, + }, + { + name: "complex string field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test test=\"this is \\\" a test/yeah\"\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.String("test", "this is \" a test/yeah"), + }, + }, + { + name: "simple stringer field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test test=this\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.Stringer("test", testStringer("this")), + }, + }, + { + name: "simple single grouped field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test test_a=this\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.GroupedFields("test", []log.Field{ + log.String("a", "this"), + }), + }, + }, + { + name: "simple double grouped field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test test_a=this test_b=other\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.GroupedFields("test", []log.Field{ + log.String("a", "this"), + log.String("b", "other"), + }), + }, + }, + { + name: "simple single strings field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test test_0=this\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.Strings("test", []string{"this"}), + }, + }, + { + name: "simple double strings field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test test_0=this test_1=other\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.Strings("test", []string{"this", "other"}), + }, + }, + { + name: "int field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test test=10\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.Int("test", 10), + }, + }, + { + name: "int64 field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test test=10\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.Int64("test", 10), + }, + }, + { + name: "float64 field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test test=3.1415926535\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.Float64("test", 3.1415926535), + }, + }, + { + name: "bool true field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test test=true\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.Bool("test", true), + }, + }, + { + name: "bool false field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test test=false\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.Bool("test", false), + }, + }, + { + name: "simple error field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test err=this\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.Error(errors.New("this")), + }, + }, + { + name: "complex error field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test err=\"this is \\\" a test/yeah\"\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.Error(errors.New("this is \" a test/yeah")), + }, + }, + { + name: "time field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test time=%s\n", nowStr, defaultTime.Format(time.RFC3339Nano)), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.Time("time", defaultTime), + }, + }, + { + name: "duration field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test test=1s\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.Duration("test", time.Second), + }, + }, + { + name: "two fields", + exp: fmt.Sprintf("ts=%s lvl=error msg=test testing=\"that this\" works=1s\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.String("testing", "that this"), + log.Duration("works", time.Second), + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + defer buf.Reset() + l.Log(now, test.lvl, test.msg, test.fields) + if exp, got := test.exp, buf.String(); exp != got { + t.Fatalf("bad log line:\nexp: `%v`\ngot: `%v`", strconv.Quote(exp), strconv.Quote(got)) + } + }) + } +} + +func TestLoggerWithContext(t *testing.T) { + now := time.Now() + nowStr := now.Format(log.RFC3339Milli) + buf := bytes.NewBuffer(nil) + l := log.NewLogger(buf).With(log.String("a", "tag"), log.Int("id", 10)) + + tests := []struct { + name string + exp string + lvl string + msg string + fields []log.Field + }{ + { + name: "no fields simple message", + exp: fmt.Sprintf("ts=%s lvl=error msg=this a=tag id=10\n", nowStr), + lvl: "error", + msg: "this", + }, + { + name: "simple double grouped field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test a=tag id=10 test_a=this test_b=other\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.GroupedFields("test", []log.Field{ + log.String("a", "this"), + log.String("b", "other"), + }), + }, + }, + { + name: "simple double strings field", + exp: fmt.Sprintf("ts=%s lvl=error msg=test a=tag id=10 test_0=this test_1=other\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.Strings("test", []string{"this", "other"}), + }, + }, + { + name: "two fields", + exp: fmt.Sprintf("ts=%s lvl=error msg=test a=tag id=10 testing=\"that this\" works=1s\n", nowStr), + lvl: "error", + msg: "test", + fields: []log.Field{ + log.String("testing", "that this"), + log.Duration("works", time.Second), + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + defer buf.Reset() + l.Log(now, test.lvl, test.msg, test.fields) + if exp, got := test.exp, buf.String(); exp != got { + t.Fatalf("bad log line:\nexp: `%v`\ngot: `%v`", strconv.Quote(exp), strconv.Quote(got)) + } + }) + } +} + +// TODO: is there something better than this? +func TestLogger_SetLeveF(t *testing.T) { + var logLine string + buf := bytes.NewBuffer(nil) + l := log.NewLogger(buf) + msg := "the message" + + l.SetLevelF(func(lvl log.Level) bool { + return lvl >= log.DebugLevel + }) + l.Debug(msg) + logLine = buf.String() + buf.Reset() + if logLine == "" { + t.Fatal("expected debug log") + return + } + l.Info(msg) + logLine = buf.String() + buf.Reset() + if logLine == "" { + t.Fatal("expected info log") + return + } + buf.Reset() + l.Warn(msg) + logLine = buf.String() + if logLine == "" { + t.Fatal("expected warn log") + return + } + l.Error(msg) + logLine = buf.String() + buf.Reset() + if logLine == "" { + t.Fatal("expected error log") + return + } + + l.SetLevelF(func(lvl log.Level) bool { + return lvl >= log.InfoLevel + }) + l.Debug(msg) + logLine = buf.String() + buf.Reset() + if logLine != "" { + t.Fatal("expected no debug log") + return + } + l.Info(msg) + logLine = buf.String() + buf.Reset() + if logLine == "" { + t.Fatal("expected info log") + return + } + buf.Reset() + l.Warn(msg) + logLine = buf.String() + if logLine == "" { + t.Fatal("expected warn log") + return + } + l.Error(msg) + logLine = buf.String() + buf.Reset() + if logLine == "" { + t.Fatal("expected error log") + return + } + + l.SetLevelF(func(lvl log.Level) bool { + return lvl >= log.WarnLevel + }) + l.Debug(msg) + logLine = buf.String() + buf.Reset() + if logLine != "" { + t.Fatal("expected no debug log") + return + } + l.Info(msg) + logLine = buf.String() + buf.Reset() + if logLine != "" { + t.Fatal("expected no info log") + return + } + buf.Reset() + l.Warn(msg) + logLine = buf.String() + if logLine == "" { + t.Fatal("expected warn log") + return + } + l.Error(msg) + logLine = buf.String() + buf.Reset() + if logLine == "" { + t.Fatal("expected error log") + return + } + + l.SetLevelF(func(lvl log.Level) bool { + return lvl >= log.ErrorLevel + }) + l.Debug(msg) + logLine = buf.String() + buf.Reset() + if logLine != "" { + t.Fatal("expected no debug log") + return + } + l.Info(msg) + logLine = buf.String() + buf.Reset() + if logLine != "" { + t.Fatal("expected no info log") + return + } + buf.Reset() + l.Warn(msg) + logLine = buf.String() + if logLine != "" { + t.Fatal("expected no warn log") + return + } + l.Error(msg) + logLine = buf.String() + buf.Reset() + if logLine == "" { + t.Fatal("expected error log") + return + } +} diff --git a/services/diagnostic/service.go b/services/diagnostic/service.go new file mode 100644 index 000000000..3e4d9a23c --- /dev/null +++ b/services/diagnostic/service.go @@ -0,0 +1,417 @@ +package diagnostic + +import ( + "bytes" + "errors" + "io" + "os" + "path" + "strings" + "sync" + + klog "github.com/influxdata/kapacitor/services/diagnostic/internal/log" +) + +type nopCloser struct { + f io.Writer +} + +func (c *nopCloser) Write(b []byte) (int, error) { return c.f.Write(b) } +func (c *nopCloser) Close() error { return nil } + +type Service struct { + c Config + + logger *klog.Logger + + f io.WriteCloser + stdout io.Writer + stderr io.Writer + + mu sync.RWMutex + level string +} + +func NewService(c Config, stdout, stderr io.Writer) *Service { + return &Service{ + c: c, + stdout: stdout, + stderr: stderr, + } +} + +func (s *Service) SetLogLevelFromName(lvl string) error { + s.mu.Lock() + defer s.mu.Unlock() + level := strings.ToUpper(lvl) + switch level { + case "INFO", "ERROR", "WARN", "DEBUG": + s.level = level + default: + return errors.New("invalid log level") + } + + return nil +} + +func logLevelFromName(lvl string) klog.Level { + var level klog.Level + switch lvl { + case "INFO": + level = klog.InfoLevel + case "ERROR": + level = klog.ErrorLevel + case "WARN": + level = klog.WarnLevel + case "DEBUG": + level = klog.DebugLevel + } + + return level +} + +func (s *Service) Open() error { + s.mu.Lock() + s.level = s.c.Level + s.mu.Unlock() + + levelF := func(lvl klog.Level) bool { + s.mu.RLock() + defer s.mu.RUnlock() + return lvl >= logLevelFromName(s.level) + } + + switch s.c.File { + case "STDERR": + s.f = &nopCloser{f: s.stderr} + case "STDOUT": + s.f = &nopCloser{f: s.stdout} + default: + dir := path.Dir(s.c.File) + if _, err := os.Stat(dir); os.IsNotExist(err) { + err := os.MkdirAll(dir, 0755) + if err != nil { + return err + } + } + + f, err := os.OpenFile(s.c.File, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0640) + if err != nil { + return err + } + s.f = f + } + + s.logger = klog.NewLogger(s.f) + s.logger.SetLevelF(levelF) + return nil +} + +func (s *Service) Close() error { + if s.f != nil { + return s.f.Close() + } + return nil +} + +func (s *Service) NewVictorOpsHandler() *VictorOpsHandler { + return &VictorOpsHandler{ + l: s.logger.With(klog.String("service", "victorops")), + } +} + +func (s *Service) NewSlackHandler() *SlackHandler { + return &SlackHandler{ + l: s.logger.With(klog.String("service", "slack")), + } +} + +func (s *Service) NewTaskStoreHandler() *TaskStoreHandler { + return &TaskStoreHandler{ + l: s.logger.With(klog.String("service", "task_store")), + } +} + +func (s *Service) NewReportingHandler() *ReportingHandler { + return &ReportingHandler{ + l: s.logger.With(klog.String("service", "reporting")), + } +} + +func (s *Service) NewStorageHandler() *StorageHandler { + return &StorageHandler{ + l: s.logger.With(klog.String("service", "storage")), + } +} + +func (s *Service) NewHTTPDHandler() *HTTPDHandler { + return &HTTPDHandler{ + l: s.logger.With(klog.String("service", "http")), + } +} + +func (s *Service) NewAlertaHandler() *AlertaHandler { + return &AlertaHandler{ + l: s.logger.With(klog.String("service", "alerta")), + } +} + +func (s *Service) NewKapacitorHandler() *KapacitorHandler { + return &KapacitorHandler{ + l: s.logger.With(klog.String("service", "kapacitor")), + } +} + +func (s *Service) NewAlertServiceHandler() *AlertServiceHandler { + return &AlertServiceHandler{ + l: s.logger.With(klog.String("service", "alert")), + } +} + +func (s *Service) NewHipChatHandler() *HipChatHandler { + return &HipChatHandler{ + l: s.logger.With(klog.String("service", "hipchat")), + } +} + +func (s *Service) NewPagerDutyHandler() *PagerDutyHandler { + return &PagerDutyHandler{ + l: s.logger.With(klog.String("service", "pagerduty")), + } +} + +func (s *Service) NewSMTPHandler() *SMTPHandler { + return &SMTPHandler{ + l: s.logger.With(klog.String("service", "smtp")), + } +} + +func (s *Service) NewUDFServiceHandler() *UDFServiceHandler { + return &UDFServiceHandler{ + l: s.logger.With(klog.String("service", "udf")), + } +} + +func (s *Service) NewOpsGenieHandler() *OpsGenieHandler { + return &OpsGenieHandler{ + l: s.logger.With(klog.String("service", "opsgenie")), + } +} + +func (s *Service) NewPushoverHandler() *PushoverHandler { + return &PushoverHandler{ + l: s.logger.With(klog.String("service", "pushover")), + } +} + +func (s *Service) NewHTTPPostHandler() *HTTPPostHandler { + return &HTTPPostHandler{ + l: s.logger.With(klog.String("service", "httppost")), + } +} + +func (s *Service) NewSensuHandler() *SensuHandler { + return &SensuHandler{ + l: s.logger.With(klog.String("service", "sensu")), + } +} + +func (s *Service) NewSNMPTrapHandler() *SNMPTrapHandler { + return &SNMPTrapHandler{ + l: s.logger.With(klog.String("service", "snmp")), + } +} + +func (s *Service) NewTelegramHandler() *TelegramHandler { + return &TelegramHandler{ + l: s.logger.With(klog.String("service", "telegram")), + } +} + +func (s *Service) NewMQTTHandler() *MQTTHandler { + return &MQTTHandler{ + l: s.logger.With(klog.String("service", "mqtt")), + } +} + +func (s *Service) NewTalkHandler() *TalkHandler { + return &TalkHandler{ + l: s.logger.With(klog.String("service", "talk")), + } +} + +func (s *Service) NewConfigOverrideHandler() *ConfigOverrideHandler { + return &ConfigOverrideHandler{ + l: s.logger.With(klog.String("service", "config-override")), + } +} + +func (s *Service) NewServerHandler() *ServerHandler { + return &ServerHandler{ + l: s.logger.With(klog.String("source", "srv")), + } +} + +func (s *Service) NewReplayHandler() *ReplayHandler { + return &ReplayHandler{ + l: s.logger.With(klog.String("service", "replay")), + } +} + +func (s *Service) NewK8sHandler() *K8sHandler { + return &K8sHandler{ + l: s.logger.With(klog.String("service", "kubernetes")), + } +} + +func (s *Service) NewSwarmHandler() *SwarmHandler { + return &SwarmHandler{ + l: s.logger.With(klog.String("service", "swarm")), + } +} + +func (s *Service) NewDeadmanHandler() *DeadmanHandler { + return &DeadmanHandler{ + l: s.logger.With(klog.String("service", "deadman")), + } +} + +func (s *Service) NewNoAuthHandler() *NoAuthHandler { + return &NoAuthHandler{ + l: s.logger.With(klog.String("service", "noauth")), + } +} + +func (s *Service) NewStatsHandler() *StatsHandler { + return &StatsHandler{ + l: s.logger.With(klog.String("service", "stats")), + } +} + +func (s *Service) NewUDPHandler() *UDPHandler { + return &UDPHandler{ + l: s.logger.With(klog.String("service", "udp")), + } +} + +func (s *Service) NewInfluxDBHandler() *InfluxDBHandler { + return &InfluxDBHandler{ + l: s.logger.With(klog.String("service", "influxdb")), + } +} + +func (s *Service) NewScraperHandler() *ScraperHandler { + return &ScraperHandler{ + l: s.logger.With(klog.String("service", "scraper")), + buf: bytes.NewBuffer(nil), + } +} + +func (s *Service) NewAzureHandler() *ScraperHandler { + return &ScraperHandler{ + l: s.logger.With(klog.String("service", "azure")), + buf: bytes.NewBuffer(nil), + } +} + +func (s *Service) NewConsulHandler() *ScraperHandler { + return &ScraperHandler{ + l: s.logger.With(klog.String("service", "consul")), + buf: bytes.NewBuffer(nil), + } +} + +func (s *Service) NewDNSHandler() *ScraperHandler { + return &ScraperHandler{ + l: s.logger.With(klog.String("service", "dns")), + buf: bytes.NewBuffer(nil), + } +} + +func (s *Service) NewEC2Handler() *ScraperHandler { + return &ScraperHandler{ + l: s.logger.With(klog.String("service", "ec2")), + buf: bytes.NewBuffer(nil), + } +} + +func (s *Service) NewFileDiscoveryHandler() *ScraperHandler { + return &ScraperHandler{ + l: s.logger.With(klog.String("service", "file-discovery")), + buf: bytes.NewBuffer(nil), + } +} + +func (s *Service) NewGCEHandler() *ScraperHandler { + return &ScraperHandler{ + l: s.logger.With(klog.String("service", "gce")), + buf: bytes.NewBuffer(nil), + } +} + +func (s *Service) NewMarathonHandler() *ScraperHandler { + return &ScraperHandler{ + l: s.logger.With(klog.String("service", "marathon")), + buf: bytes.NewBuffer(nil), + } +} + +func (s *Service) NewNerveHandler() *ScraperHandler { + return &ScraperHandler{ + l: s.logger.With(klog.String("service", "nerve")), + buf: bytes.NewBuffer(nil), + } +} + +func (s *Service) NewServersetHandler() *ScraperHandler { + return &ScraperHandler{ + l: s.logger.With(klog.String("service", "serverset")), + buf: bytes.NewBuffer(nil), + } +} + +func (s *Service) NewStaticDiscoveryHandler() *ScraperHandler { + return &ScraperHandler{ + l: s.logger.With(klog.String("service", "static-discovery")), + buf: bytes.NewBuffer(nil), + } +} + +func (s *Service) NewTritonHandler() *ScraperHandler { + return &ScraperHandler{ + l: s.logger.With(klog.String("service", "triton")), + buf: bytes.NewBuffer(nil), + } +} + +func (s *Service) NewStaticLevelHandler(level string, service string) (*StaticLevelHandler, error) { + var ll logLevel + + switch level { + case "debug": + ll = llDebug + case "error": + ll = llError + case "info": + ll = llInfo + case "warn": + ll = llWarn + default: + ll = llInvalid + } + + if ll == llInvalid { + return nil, errors.New("invalid log level") + } + + return &StaticLevelHandler{ + l: s.logger.With(klog.String("service", service)), + level: ll, + }, nil +} + +func (s *Service) NewCmdHandler() *CmdHandler { + return &CmdHandler{ + l: s.logger.With(klog.String("service", "run")), + } +} diff --git a/services/dns/service.go b/services/dns/service.go index a5deac9b3..c57314368 100644 --- a/services/dns/service.go +++ b/services/dns/service.go @@ -3,7 +3,6 @@ package dns import ( "context" "fmt" - "log" "sync" "time" @@ -12,6 +11,8 @@ import ( pdns "github.com/prometheus/prometheus/discovery/dns" ) +type Diagnostic scraper.Diagnostic + // Service is the dns discovery service type Service struct { Configs []Config @@ -19,16 +20,17 @@ type Service struct { registry scraper.Registry - logger *log.Logger - open bool + diag Diagnostic + + open bool } // NewService creates a new unopened service -func NewService(c []Config, r scraper.Registry, l *log.Logger) *Service { +func NewService(c []Config, r scraper.Registry, d Diagnostic) *Service { return &Service{ Configs: c, registry: r, - logger: l, + diag: d, } } @@ -126,7 +128,7 @@ func (s *Service) Test(options interface{}) error { } sd := s.Configs[found].PromConfig() - discoverer := pdns.NewDiscovery(sd, scraper.NewLogger(s.logger)) + discoverer := pdns.NewDiscovery(sd, s.diag) ctx, cancel := context.WithCancel(context.Background()) updates := make(chan []*config.TargetGroup) diff --git a/services/ec2/service.go b/services/ec2/service.go index 71edc8df1..238cc75b8 100644 --- a/services/ec2/service.go +++ b/services/ec2/service.go @@ -3,7 +3,6 @@ package ec2 import ( "context" "fmt" - "log" "sync" "time" @@ -12,6 +11,8 @@ import ( pec2 "github.com/prometheus/prometheus/discovery/ec2" ) +type Diagnostic scraper.Diagnostic + // Service is the ec2 discovery service type Service struct { Configs []Config @@ -19,16 +20,16 @@ type Service struct { registry scraper.Registry - logger *log.Logger - open bool + diag Diagnostic + open bool } // NewService creates a new unopened service -func NewService(c []Config, r scraper.Registry, l *log.Logger) *Service { +func NewService(c []Config, r scraper.Registry, d Diagnostic) *Service { return &Service{ Configs: c, registry: r, - logger: l, + diag: d, } } @@ -126,7 +127,7 @@ func (s *Service) Test(options interface{}) error { } sd := s.Configs[found].PromConfig() - discoverer := pec2.NewDiscovery(sd, scraper.NewLogger(s.logger)) + discoverer := pec2.NewDiscovery(sd, s.diag) ctx, cancel := context.WithCancel(context.Background()) updates := make(chan []*config.TargetGroup) diff --git a/services/file_discovery/service.go b/services/file_discovery/service.go index d3089840f..9b65a2e28 100644 --- a/services/file_discovery/service.go +++ b/services/file_discovery/service.go @@ -3,7 +3,6 @@ package file_discovery import ( "context" "fmt" - "log" "sync" "time" @@ -12,6 +11,8 @@ import ( pfile "github.com/prometheus/prometheus/discovery/file" ) +type Diagnostic scraper.Diagnostic + // Service is the file discovery service type Service struct { Configs []Config @@ -19,16 +20,16 @@ type Service struct { registry scraper.Registry - logger *log.Logger - open bool + diag Diagnostic + open bool } // NewService creates a new unopened service -func NewService(c []Config, r scraper.Registry, l *log.Logger) *Service { +func NewService(c []Config, r scraper.Registry, d Diagnostic) *Service { return &Service{ Configs: c, registry: r, - logger: l, + diag: d, } } @@ -126,7 +127,7 @@ func (s *Service) Test(options interface{}) error { } sd := s.Configs[found].PromConfig() - discoverer := pfile.NewDiscovery(sd, scraper.NewLogger(s.logger)) + discoverer := pfile.NewDiscovery(sd, s.diag) ctx, cancel := context.WithCancel(context.Background()) updates := make(chan []*config.TargetGroup) diff --git a/services/gce/service.go b/services/gce/service.go index 8ee73ac8f..b9399f634 100644 --- a/services/gce/service.go +++ b/services/gce/service.go @@ -3,7 +3,6 @@ package gce import ( "context" "fmt" - "log" "sync" "time" @@ -12,6 +11,8 @@ import ( pgce "github.com/prometheus/prometheus/discovery/gce" ) +type Diagnostic scraper.Diagnostic + // Service is the gce discovery service type Service struct { Configs []Config @@ -19,16 +20,16 @@ type Service struct { registry scraper.Registry - logger *log.Logger - open bool + diag Diagnostic + open bool } // NewService creates a new unopened service -func NewService(c []Config, r scraper.Registry, l *log.Logger) *Service { +func NewService(c []Config, r scraper.Registry, d Diagnostic) *Service { return &Service{ Configs: c, registry: r, - logger: l, + diag: d, } } @@ -126,7 +127,7 @@ func (s *Service) Test(options interface{}) error { } sd := s.Configs[found].PromConfig() - discoverer, err := pgce.NewDiscovery(sd, scraper.NewLogger(s.logger)) + discoverer, err := pgce.NewDiscovery(sd, s.diag) if err != nil { return err } diff --git a/services/hipchat/service.go b/services/hipchat/service.go index 4ffb925ea..aa7d925a2 100644 --- a/services/hipchat/service.go +++ b/services/hipchat/service.go @@ -7,23 +7,28 @@ import ( "fmt" "io" "io/ioutil" - "log" "net/http" "net/url" "path" "sync/atomic" "github.com/influxdata/kapacitor/alert" + "github.com/influxdata/kapacitor/keyvalue" ) +type Diagnostic interface { + WithContext(ctx ...keyvalue.T) Diagnostic + Error(msg string, err error) +} + type Service struct { configValue atomic.Value - logger *log.Logger + diag Diagnostic } -func NewService(c Config, l *log.Logger) *Service { +func NewService(c Config, d Diagnostic) *Service { s := &Service{ - logger: l, + diag: d, } s.configValue.Store(c) return s @@ -174,16 +179,16 @@ type HandlerConfig struct { } type handler struct { - s *Service - c HandlerConfig - logger *log.Logger + s *Service + c HandlerConfig + diag Diagnostic } -func (s *Service) Handler(c HandlerConfig, l *log.Logger) alert.Handler { +func (s *Service) Handler(c HandlerConfig, ctx ...keyvalue.T) alert.Handler { return &handler{ - s: s, - c: c, - logger: l, + s: s, + c: c, + diag: s.diag.WithContext(ctx...), } } @@ -194,6 +199,6 @@ func (h *handler) Handle(event alert.Event) { event.State.Message, event.State.Level, ); err != nil { - h.logger.Println("E! failed to send event to HipChat", err) + h.diag.Error("failed to send event to Alerta", err) } } diff --git a/services/httpd/handler.go b/services/httpd/handler.go index 020745a9c..994dbc024 100644 --- a/services/httpd/handler.go +++ b/services/httpd/handler.go @@ -8,7 +8,6 @@ import ( "fmt" "io" "io/ioutil" - "log" "net/http" "net/http/pprof" "strings" @@ -21,8 +20,6 @@ import ( "github.com/influxdata/influxdb/uuid" "github.com/influxdata/kapacitor/auth" "github.com/influxdata/kapacitor/client/v1" - "github.com/influxdata/kapacitor/services/logging" - "github.com/influxdata/wlog" ) // statistics gathered by the httpd package. @@ -84,16 +81,15 @@ type Handler struct { WritePoints(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error } - // Normal wlog logger - logger *log.Logger + DiagService interface { + SetLogLevelFromName(lvl string) error + } + + diag Diagnostic // Detailed logging of write path // Uses normal logger writeTrace bool - // Common log format logger. - // This logger does not use log levels with wlog. - // Its simply a binary on off from the config. - clfLogger *log.Logger // Log every HTTP access. loggingEnabled bool @@ -108,8 +104,7 @@ func NewHandler( writeTrace, allowGzip bool, statMap *expvar.Map, - l *log.Logger, - li logging.Interface, + d Diagnostic, sharedSecret string, ) *Handler { h := &Handler{ @@ -118,9 +113,8 @@ func NewHandler( exposePprof: pprofEnabled, sharedSecret: sharedSecret, allowGzip: allowGzip, - logger: l, + diag: d, writeTrace: writeTrace, - clfLogger: li.NewRawLogger("[httpd] ", 0), loggingEnabled: loggingEnabled, statMap: statMap, } @@ -326,9 +320,9 @@ func (h *Handler) addRawRoute(r Route) error { handler = requestID(handler) if h.loggingEnabled { - handler = logHandler(handler, h.clfLogger) + handler = logHandler(handler, h.diag) } - handler = recovery(handler, h.logger) // make sure recovery is always last + handler = recovery(handler, h.diag) // make sure recovery is always last mux, ok := h.methodMux[r.Method] if !ok { @@ -391,7 +385,7 @@ func (h *Handler) serveLogLevel(w http.ResponseWriter, r *http.Request) { HttpError(w, "invalid json: "+err.Error(), true, http.StatusBadRequest) return } - err = wlog.SetLevelFromName(opt.Level) + err = h.DiagService.SetLogLevelFromName(opt.Level) if err != nil { HttpError(w, err.Error(), true, http.StatusBadRequest) return @@ -453,14 +447,14 @@ func (h *Handler) serveWrite(w http.ResponseWriter, r *http.Request, user auth.U b, err := ioutil.ReadAll(body) if err != nil { if h.writeTrace { - h.logger.Print("E! write handler unable to read bytes from request body") + h.diag.Error("write handler unabled to read bytes from request body", err) } h.writeError(w, influxql.Result{Err: err}, http.StatusBadRequest) return } h.statMap.Add(statWriteRequestBytesReceived, int64(len(b))) if h.writeTrace { - h.logger.Printf("D! write body received by handler: %s", string(b)) + h.diag.WriteBodyReceived(string(b)) } h.serveWriteLine(w, r, b, user) @@ -885,24 +879,22 @@ func requestID(inner http.Handler) http.Handler { }) } -func logHandler(inner http.Handler, weblog *log.Logger) http.Handler { +func logHandler(inner http.Handler, d Diagnostic) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { start := time.Now() l := &responseLogger{w: w} inner.ServeHTTP(l, r) - weblog.Println(buildLogLine(l, r, start)) + buildLogLine(d, l, r, start) }) } -func recovery(inner http.Handler, weblog *log.Logger) http.Handler { +func recovery(inner http.Handler, d Diagnostic) http.Handler { return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { start := time.Now() l := &responseLogger{w: w} inner.ServeHTTP(l, r) if err := recover(); err != nil { - logLine := buildLogLine(l, r, start) - logLine = fmt.Sprintf("E! %s [err:%s]", logLine, err) - weblog.Println(logLine) + buildLogLineError(d, l, r, start, fmt.Sprintf("%v", err)) } }) } diff --git a/services/httpd/httpdtest/server.go b/services/httpd/httpdtest/server.go index fa5373a19..37077a609 100644 --- a/services/httpd/httpdtest/server.go +++ b/services/httpd/httpdtest/server.go @@ -2,11 +2,11 @@ package httpdtest import ( "expvar" - "log" + "io/ioutil" "net/http/httptest" + "github.com/influxdata/kapacitor/services/diagnostic" "github.com/influxdata/kapacitor/services/httpd" - "github.com/influxdata/kapacitor/services/logging/loggingtest" ) type Server struct { @@ -17,7 +17,8 @@ type Server struct { func NewServer(verbose bool) *Server { statMap := &expvar.Map{} statMap.Init() - ls := loggingtest.New() + ds := diagnostic.NewService(diagnostic.NewConfig(), ioutil.Discard, ioutil.Discard) + ds.Open() s := &Server{ Handler: httpd.NewHandler( false, @@ -26,8 +27,7 @@ func NewServer(verbose bool) *Server { verbose, false, statMap, - ls.NewLogger("[httpdtest] ", log.LstdFlags), - ls, + ds.NewHTTPDHandler(), "", ), } diff --git a/services/httpd/logger.go b/services/httpd/logger.go index 266462904..49a4f7126 100644 --- a/services/httpd/logger.go +++ b/services/httpd/logger.go @@ -1,10 +1,8 @@ package httpd import ( - "fmt" "net" "net/http" - "strconv" "strings" "time" ) @@ -74,7 +72,7 @@ func redactPassword(r *http.Request) { // %h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" %L %D // // Common Log Format: http://en.wikipedia.org/wiki/Common_Log_Format -func buildLogLine(l *responseLogger, r *http.Request, start time.Time) string { +func buildLogLine(d Diagnostic, l *responseLogger, r *http.Request, start time.Time) { redactPassword(r) @@ -92,25 +90,55 @@ func buildLogLine(l *responseLogger, r *http.Request, start time.Time) string { userAgent := r.UserAgent() - fields := []string{ + d.HTTP( host, - "-", detect(username, "-"), - fmt.Sprintf("[%s]", start.Format("02/Jan/2006:15:04:05 -0700")), - `"` + r.Method, + start, + r.Method, uri, - r.Proto + `"`, - detect(strconv.Itoa(l.Status()), "-"), - strconv.Itoa(l.Size()), - `"` + detect(referer, "-") + `"`, - `"` + detect(userAgent, "-") + `"`, + r.Proto, + l.Status(), + detect(referer, "-"), + detect(userAgent, "-"), r.Header.Get("Request-Id"), - // response time, report in microseconds because this is consistent - // with apache's %D parameter in mod_log_config - strconv.FormatInt(time.Since(start).Nanoseconds()/1000, 10), + time.Since(start), + ) + +} + +func buildLogLineError(d Diagnostic, l *responseLogger, r *http.Request, start time.Time, e string) { + + redactPassword(r) + + username := parseUsername(r) + + host, _, err := net.SplitHostPort(r.RemoteAddr) + + if err != nil { + host = r.RemoteAddr } - return strings.Join(fields, " ") + uri := r.URL.RequestURI() + + referer := r.Referer() + + userAgent := r.UserAgent() + + d.RecoveryError( + "encountered error", + e, + host, + detect(username, "-"), + start, + r.Method, + uri, + r.Proto, + l.Status(), + detect(referer, "-"), + detect(userAgent, "-"), + r.Header.Get("Request-Id"), + time.Since(start), + ) } // detect detects the first presence of a non blank string and returns it diff --git a/services/httpd/service.go b/services/httpd/service.go index 1eb957e58..d9deb54a1 100644 --- a/services/httpd/service.go +++ b/services/httpd/service.go @@ -11,10 +11,52 @@ import ( "strings" "sync" "time" - - "github.com/influxdata/kapacitor/services/logging" ) +type Diagnostic interface { + NewHTTPServerErrorLogger() *log.Logger + + StartingService() + StoppedService() + ShutdownTimeout() + AuthenticationEnabled(enabled bool) + + ListeningOn(addr string, proto string) + + WriteBodyReceived(body string) + + HTTP( + host string, + username string, + start time.Time, + method string, + uri string, + proto string, + status int, + referer string, + userAgent string, + reqID string, + duration time.Duration, + ) + + Error(msg string, err error) + RecoveryError( + msg string, + err string, + host string, + username string, + start time.Time, + method string, + uri string, + proto string, + status int, + referer string, + userAgent string, + reqID string, + duration time.Duration, + ) +} + type Service struct { ln net.Listener addr string @@ -37,11 +79,11 @@ type Service struct { Handler *Handler - logger *log.Logger - httpServerLogger *log.Logger + diag Diagnostic + httpServerErrorLogger *log.Logger } -func NewService(c Config, hostname string, l *log.Logger, li logging.Interface) *Service { +func NewService(c Config, hostname string, d Diagnostic) *Service { statMap := &expvar.Map{} statMap.Init() port, _ := c.Port() @@ -66,12 +108,11 @@ func NewService(c Config, hostname string, l *log.Logger, li logging.Interface) c.WriteTracing, c.GZIP, statMap, - l, - li, + d, c.SharedSecret, ), - logger: l, - httpServerLogger: li.NewStaticLevelLogger("[httpd]", log.LstdFlags, logging.ERROR), + diag: d, + httpServerErrorLogger: d.NewHTTPServerErrorLogger(), } return s } @@ -80,8 +121,8 @@ func NewService(c Config, hostname string, l *log.Logger, li logging.Interface) func (s *Service) Open() error { s.mu.Lock() defer s.mu.Unlock() - s.logger.Println("I! Starting HTTP service") - s.logger.Println("I! Authentication enabled:", s.Handler.requireAuthentication) + s.diag.StartingService() + s.diag.AuthenticationEnabled(s.Handler.requireAuthentication) // Open listener. if s.https { @@ -97,7 +138,7 @@ func (s *Service) Open() error { return err } - s.logger.Println("I! Listening on HTTPS:", listener.Addr().String()) + s.diag.ListeningOn(listener.Addr().String(), "https") s.ln = listener } else { listener, err := net.Listen("tcp", s.addr) @@ -105,7 +146,7 @@ func (s *Service) Open() error { return err } - s.logger.Println("I! Listening on HTTP:", listener.Addr().String()) + s.diag.ListeningOn(listener.Addr().String(), "http") s.ln = listener } @@ -113,7 +154,7 @@ func (s *Service) Open() error { s.server = &http.Server{ Handler: s.Handler, ConnState: s.connStateHandler, - ErrorLog: s.httpServerLogger, + ErrorLog: s.httpServerErrorLogger, } s.new = make(chan net.Conn) @@ -132,7 +173,7 @@ func (s *Service) Open() error { // Close closes the underlying listener. func (s *Service) Close() error { - defer s.logger.Println("I! Closed HTTP service") + defer s.diag.StoppedService() s.mu.Lock() defer s.mu.Unlock() // If server is not set we were never started @@ -228,7 +269,7 @@ func (s *Service) manage() { // continue the loop and wait for all the ConnState updates which will // eventually close(stopDone) and return from this goroutine. case <-timeout: - s.logger.Println("E! shutdown timedout, forcefully closing all remaining connections") + s.diag.ShutdownTimeout() // Connections didn't close in time. // Forcefully close all connections. for c := range conns { diff --git a/services/httppost/service.go b/services/httppost/service.go index 8a14088f0..2b6f89705 100644 --- a/services/httppost/service.go +++ b/services/httppost/service.go @@ -6,14 +6,19 @@ import ( "errors" "fmt" "io" - "log" "net/http" "sync" "github.com/influxdata/kapacitor/alert" "github.com/influxdata/kapacitor/bufpool" + "github.com/influxdata/kapacitor/keyvalue" ) +type Diagnostic interface { + WithContext(ctx ...keyvalue.T) Diagnostic + Error(msg string, err error) +} + // Only one of name and url should be non-empty type Endpoint struct { mu sync.RWMutex @@ -71,12 +76,12 @@ func (e *Endpoint) NewHTTPRequest(body io.Reader) (req *http.Request, err error) type Service struct { mu sync.RWMutex endpoints map[string]*Endpoint - logger *log.Logger + diag Diagnostic } -func NewService(c Configs, l *log.Logger) *Service { +func NewService(c Configs, d Diagnostic) *Service { s := &Service{ - logger: l, + diag: d, endpoints: c.index(), } return s @@ -191,11 +196,11 @@ type handler struct { s *Service bp *bufpool.Pool endpoint *Endpoint - logger *log.Logger + diag Diagnostic headers map[string]string } -func (s *Service) Handler(c HandlerConfig, l *log.Logger) alert.Handler { +func (s *Service) Handler(c HandlerConfig, ctx ...keyvalue.T) alert.Handler { e, ok := s.Endpoint(c.Endpoint) if !ok { e = NewEndpoint(c.URL, nil, BasicAuth{}) @@ -205,7 +210,7 @@ func (s *Service) Handler(c HandlerConfig, l *log.Logger) alert.Handler { s: s, bp: bufpool.New(), endpoint: e, - logger: l, + diag: s.diag.WithContext(ctx...), headers: c.Headers, } } @@ -233,13 +238,13 @@ func (h *handler) Handle(event alert.Event) { err = json.NewEncoder(body).Encode(ad) if err != nil { - h.logger.Printf("E! failed to marshal alert data json: %v", err) + h.diag.Error("failed to marshal alert data json", err) return } req, err := h.NewHTTPRequest(body) if err != nil { - h.logger.Printf("E! fail to create HTTP request: %v", err) + h.diag.Error("failed to create HTTP request", err) return } @@ -247,7 +252,7 @@ func (h *handler) Handle(event alert.Event) { req.Header.Set("Content-Type", "application/json") resp, err := http.DefaultClient.Do(req) if err != nil { - h.logger.Printf("E! failed to POST alert data: %v", err) + h.diag.Error("failed to POST alert data", err) return } resp.Body.Close() diff --git a/services/influxdb/service.go b/services/influxdb/service.go index 9a01422fc..7388c9ef4 100644 --- a/services/influxdb/service.go +++ b/services/influxdb/service.go @@ -10,7 +10,6 @@ import ( "fmt" "io" "io/ioutil" - "log" "net" "net/http" "net/url" @@ -23,6 +22,7 @@ import ( "github.com/influxdata/influxdb/influxql" "github.com/influxdata/influxdb/models" "github.com/influxdata/kapacitor/influxdb" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/server/vars" "github.com/influxdata/kapacitor/services/httpd" "github.com/influxdata/kapacitor/services/udp" @@ -52,6 +52,17 @@ type IDer interface { ServerID() uuid.UUID } +type Diagnostic interface { + WithClusterContext(cluster string) Diagnostic + WithUDPContext(db string, rp string) udp.Diagnostic + Error(msg string, err error, ctx ...keyvalue.T) + + InsecureSkipVerify(urls []string) + UnlinkingSubscriptions(cluster string) + LinkingSubscriptions(cluster string) + StartedUDPListener(db string, rp string) +} + // Handles requests to write or read from an InfluxDB cluster type Service struct { wg sync.WaitGroup @@ -76,9 +87,6 @@ type Service struct { PointsWriter interface { WritePoints(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error } - LogService interface { - NewLogger(string, int) *log.Logger - } HTTPDService interface { AddRoutes([]httpd.Route) error DelRoutes([]httpd.Route) @@ -92,17 +100,17 @@ type Service struct { RevokeSubscriptionAccess(token string) error } RandReader io.Reader - logger *log.Logger + diag Diagnostic } -func NewService(configs []Config, httpPort int, hostname string, ider IDer, useTokens bool, l *log.Logger) (*Service, error) { +func NewService(configs []Config, httpPort int, hostname string, ider IDer, useTokens bool, d Diagnostic) (*Service, error) { s := &Service{ clusters: make(map[string]*influxdbCluster), hostname: hostname, ider: ider, httpPort: httpPort, useTokens: useTokens, - logger: l, + diag: d, RandReader: rand.Reader, } if err := s.updateConfigs(configs); err != nil { @@ -225,7 +233,7 @@ func (s *Service) updateConfigs(configs []Config) error { } } else { var err error - cluster, err = newInfluxDBCluster(c, s.hostname, s.ider, s.httpPort, s.useTokens, s.logger) + cluster, err = newInfluxDBCluster(c, s.hostname, s.ider, s.httpPort, s.useTokens, s.diag.WithClusterContext(c.Name)) if err != nil { return err } @@ -270,10 +278,10 @@ func (s *Service) updateConfigs(configs []Config) error { // Unlink/Close/Delete all removed clusters for name, cluster := range removedClusters { if err := cluster.UnlinkSubscriptions(); err != nil { - s.logger.Printf("E! failed to unlink subscriptions for cluster %s: %s", name, err) + s.diag.Error("failed to unlink subscriptions for cluster", err, keyvalue.KV("cluster", name)) } if err := cluster.Close(); err != nil { - s.logger.Printf("E! failed to close cluster %s: %s", name, err) + s.diag.Error("failed to close cluster", err, keyvalue.KV("cluster", name)) } delete(s.clusters, name) } @@ -282,7 +290,6 @@ func (s *Service) updateConfigs(configs []Config) error { func (s *Service) assignServiceToCluster(cluster *influxdbCluster) { cluster.PointsWriter = s.PointsWriter - cluster.LogService = s.LogService cluster.AuthService = s.AuthService cluster.ClientCreator = s.ClientCreator cluster.randReader = s.RandReader @@ -385,7 +392,7 @@ type influxdbCluster struct { exConfigSubs map[subEntry]bool hostname string httpPort int - logger *log.Logger + diag Diagnostic protocol string udpBind string udpBuffer int @@ -410,9 +417,6 @@ type influxdbCluster struct { PointsWriter interface { WritePoints(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error } - LogService interface { - NewLogger(string, int) *log.Logger - } ClientCreator interface { Create(influxdb.Config) (influxdb.ClientUpdater, error) } @@ -445,9 +449,9 @@ type subInfo struct { Destinations []string } -func newInfluxDBCluster(c Config, hostname string, ider IDer, httpPort int, useTokens bool, l *log.Logger) (*influxdbCluster, error) { +func newInfluxDBCluster(c Config, hostname string, ider IDer, httpPort int, useTokens bool, d Diagnostic) (*influxdbCluster, error) { if c.InsecureSkipVerify { - l.Printf("W! Using InsecureSkipVerify when connecting to InfluxDB @ %v this is insecure!", c.URLs) + d.InsecureSkipVerify(c.URLs) } config, err := httpConfig(c) if err != nil { @@ -474,7 +478,6 @@ func newInfluxDBCluster(c Config, hostname string, ider IDer, httpPort int, useT exConfigSubs: exSubs, hostname: host, httpPort: port, - logger: l, udpBind: c.UDPBind, udpBuffer: c.UDPBuffer, udpReadBuffer: c.UDPReadBuffer, @@ -489,6 +492,7 @@ func newInfluxDBCluster(c Config, hostname string, ider IDer, httpPort int, useT services: make(map[subEntry]openCloser, len(c.Subscriptions)), // Do not use tokens for non http protocols useTokens: useTokens && (c.SubscriptionProtocol == "http" || c.SubscriptionProtocol == "https"), + diag: d, }, nil } @@ -612,7 +616,7 @@ func (c *influxdbCluster) Update(conf Config) error { c.subscriptionMode = conf.SubscriptionMode if conf.InsecureSkipVerify { - c.logger.Printf("W! Using InsecureSkipVerify when connecting to InfluxDB @ %v this is insecure!", conf.URLs) + c.diag.InsecureSkipVerify(conf.URLs) } if conf.HTTPPort != 0 { c.httpPort = conf.HTTPPort @@ -700,7 +704,7 @@ func (c *influxdbCluster) Update(conf Config) error { err := c.linkSubscriptions(ctx, newSubName) if err != nil { - c.logger.Printf("E! failed to link subscriptions for cluster %s: %v", c.clusterName, err) + c.diag.Error("failed to link subscription for cluster", err, keyvalue.KV("cluser", c.clusterName)) } }() } @@ -747,7 +751,7 @@ func (c *influxdbCluster) validateClientWithBackoff(ctx context.Context) error { } _, _, err := c.client.Ping(ctx) if err != nil { - c.logger.Println("D! failed to connect to InfluxDB, retrying... ", err) + c.diag.Error("failed to connect to InfluxDB, retrying...", err) continue } return nil @@ -764,7 +768,7 @@ func (c *influxdbCluster) UnlinkSubscriptions() error { // unlinkSubscriptions, you must have the lock to call this function. func (c *influxdbCluster) unlinkSubscriptions(subName string) error { - c.logger.Println("D! unlinking subscriptions for cluster", c.clusterName) + c.diag.UnlinkingSubscriptions(c.clusterName) // Get all existing subscriptions resp, err := c.execQuery(&influxql.ShowSubscriptionsStatement{}) if err != nil { @@ -835,7 +839,7 @@ func (c *influxdbCluster) linkSubscriptions(ctx context.Context, subName string) return nil } - c.logger.Println("D! linking subscriptions for cluster", c.clusterName) + c.diag.LinkingSubscriptions(c.clusterName) err := c.validateClientWithBackoff(ctx) if err != nil { return err @@ -954,19 +958,19 @@ func (c *influxdbCluster) linkSubscriptions(ctx context.Context, subName string) for _, dest := range si.Destinations { u, err := url.Parse(dest) if err != nil { - c.logger.Println("E! invalid URL in subscription destinations:", err) + c.diag.Error("invalid URL in subscription destinations", err) continue } host, port, err := net.SplitHostPort(u.Host) if err != nil { - c.logger.Println("E! invalid host in subscription:", err) + c.diag.Error("invalid host in subscription", err) continue } if host == c.hostname { if u.Scheme == "udp" { _, err := c.startUDPListener(se, port) if err != nil { - c.logger.Println("E! failed to start UDP listener:", err) + c.diag.Error("failed to start UDP listener", err) } } c.runningSubs[se] = true @@ -1015,7 +1019,7 @@ func (c *influxdbCluster) linkSubscriptions(ctx context.Context, subName string) case "udp": addr, err := c.startUDPListener(se, "0") if err != nil { - c.logger.Println("E! failed to start UDP listener:", err) + c.diag.Error("failed to start UDP listener", err) } destination = fmt.Sprintf("udp://%s:%d", c.hostname, addr.Port) } @@ -1075,7 +1079,10 @@ func (c *influxdbCluster) linkSubscriptions(ctx context.Context, subName string) if _, exists := existingSubs[se]; !exists { err := c.closeSub(se) if err != nil { - c.logger.Printf("E! failed to close service for %v: %s", se, err) + c.diag.Error("failed to close service", err, + keyvalue.KV("dbrp", fmt.Sprintf("%s.%s", se.db, se.rp)), + keyvalue.KV("name", se.name), + ) } } } @@ -1225,15 +1232,15 @@ func (c *influxdbCluster) startUDPListener(se subEntry, port string) (*net.UDPAd conf.Buffer = c.udpBuffer conf.ReadBuffer = c.udpReadBuffer - l := c.LogService.NewLogger(fmt.Sprintf("[udp:%s.%s] ", se.db, se.rp), log.LstdFlags) - service := udp.NewService(conf, l) + d := c.diag.WithUDPContext(se.db, se.rp) + service := udp.NewService(conf, d) service.PointsWriter = c.PointsWriter err := service.Open() if err != nil { return nil, err } c.services[se] = service - c.logger.Println("I! started UDP listener for", se.db, se.rp) + c.diag.StartedUDPListener(se.db, se.rp) return service.Addr(), nil } diff --git a/services/influxdb/service_test.go b/services/influxdb/service_test.go index c881a2647..4b9d78de9 100644 --- a/services/influxdb/service_test.go +++ b/services/influxdb/service_test.go @@ -5,6 +5,7 @@ import ( "encoding/base64" "errors" "fmt" + "io/ioutil" "log" "net/url" "os" @@ -15,13 +16,12 @@ import ( "github.com/influxdata/influxdb/influxql" "github.com/influxdata/influxdb/models" influxcli "github.com/influxdata/kapacitor/influxdb" + "github.com/influxdata/kapacitor/services/diagnostic" "github.com/influxdata/kapacitor/services/httpd" "github.com/influxdata/kapacitor/services/influxdb" "github.com/influxdata/kapacitor/uuid" ) -var ls = logSerivce{} - const ( randomTokenData = "test random data that is 64 bytes long xxxxxxxxxxxxxxxxxxxxxxxxx" testClusterName = "testcluster0" @@ -38,6 +38,13 @@ var ( testSubName = "kapacitor-" + testKapacitorClusterID.String() ) +var diagService *diagnostic.Service + +func init() { + diagService = diagnostic.NewService(diagnostic.NewConfig(), ioutil.Discard, ioutil.Discard) + diagService.Open() +} + func init() { if len(randomTokenData) != tokenSize { panic(fmt.Sprintf("invalid randomTokenData: got %d exp %d", len(randomTokenData), tokenSize)) @@ -1157,17 +1164,16 @@ func NewDefaultTestConfigs(clusters []string) []influxdb.Config { func NewTestService(configs []influxdb.Config, hostname string, useTokens bool) (*influxdb.Service, *authService, *clientCreator) { httpPort := 9092 - l := ls.NewLogger("[test-influxdb] ", log.LstdFlags) + d := diagService.NewInfluxDBHandler() s, err := influxdb.NewService( configs, httpPort, hostname, ider{clusterID: testKapacitorClusterID, serverID: uuid.New()}, - useTokens, l) + useTokens, d) if err != nil { panic(err) } - s.LogService = ls s.HTTPDService = httpdService{} as := &authService{} s.AuthService = as diff --git a/services/k8s/k8s.go b/services/k8s/k8s.go index 01ef73132..9c6a808ae 100644 --- a/services/k8s/k8s.go +++ b/services/k8s/k8s.go @@ -1,7 +1,6 @@ package k8s import ( - "log" "sync/atomic" "github.com/influxdata/kapacitor/services/k8s/client" @@ -11,10 +10,10 @@ import ( type Cluster struct { configValue atomic.Value // Config client client.Client - logger *log.Logger + diag Diagnostic } -func NewCluster(c Config, l *log.Logger) (*Cluster, error) { +func NewCluster(c Config, d Diagnostic) (*Cluster, error) { clientConfig, err := c.ClientConfig() if err != nil { return nil, errors.Wrap(err, "failed to create k8s client config") @@ -26,7 +25,7 @@ func NewCluster(c Config, l *log.Logger) (*Cluster, error) { s := &Cluster{ client: cli, - logger: l, + diag: d, } s.configValue.Store(c) return s, nil diff --git a/services/k8s/service.go b/services/k8s/service.go index 805ac6b34..b050c1523 100644 --- a/services/k8s/service.go +++ b/services/k8s/service.go @@ -2,27 +2,31 @@ package k8s import ( "fmt" - "log" "sync" "github.com/influxdata/kapacitor/services/k8s/client" "github.com/influxdata/kapacitor/services/scraper" ) +// Doesn't actually get used, but its good to have a template here already +type Diagnostic interface { + WithClusterContext(cluster string) Diagnostic +} + // Service is the kubernetes discovery and autoscale service type Service struct { mu sync.Mutex configs []Config clusters map[string]*Cluster registry scraper.Registry - logger *log.Logger + diag Diagnostic } // NewService creates a new unopened k8s service -func NewService(c []Config, r scraper.Registry, l *log.Logger) (*Service, error) { +func NewService(c []Config, r scraper.Registry, d Diagnostic) (*Service, error) { clusters := make(map[string]*Cluster, len(c)) for i := range c { - cluster, err := NewCluster(c[i], l) + cluster, err := NewCluster(c[i], d.WithClusterContext(c[i].ID)) if err != nil { return nil, err } @@ -32,7 +36,7 @@ func NewService(c []Config, r scraper.Registry, l *log.Logger) (*Service, error) return &Service{ clusters: clusters, configs: c, - logger: l, + diag: d, registry: r, }, nil } @@ -90,7 +94,7 @@ func (s *Service) Update(newConfigs []interface{}) error { cluster, ok := s.clusters[c.ID] if !ok { var err error - cluster, err = NewCluster(c, s.logger) + cluster, err = NewCluster(c, s.diag.WithClusterContext(c.ID)) if err != nil { return err } diff --git a/services/logging/loggingtest/logging.go b/services/logging/loggingtest/logging.go deleted file mode 100644 index 9ba6f568b..000000000 --- a/services/logging/loggingtest/logging.go +++ /dev/null @@ -1,42 +0,0 @@ -package loggingtest - -import ( - "io" - "log" - "os" - - "github.com/influxdata/kapacitor/services/logging" - "github.com/influxdata/wlog" -) - -func init() { - wlog.SetLevel(wlog.DEBUG) -} - -type TestLogService struct { - prefix string -} - -func New() TestLogService { - return NewWithPrefix("") -} -func NewWithPrefix(prefix string) TestLogService { - return TestLogService{ - prefix: prefix, - } -} - -func (l TestLogService) NewLogger(prefix string, flag int) *log.Logger { - return wlog.New(os.Stderr, l.prefix+prefix, flag) -} -func (l TestLogService) NewRawLogger(prefix string, flag int) *log.Logger { - return log.New(os.Stderr, l.prefix+prefix, flag) -} - -func (l TestLogService) NewStaticLevelLogger(prefix string, flag int, level logging.Level) *log.Logger { - return log.New(wlog.NewStaticLevelWriter(os.Stderr, wlog.Level(level)), l.prefix+prefix, flag) -} - -func (l TestLogService) NewStaticLevelWriter(level logging.Level) io.Writer { - return wlog.NewStaticLevelWriter(os.Stderr, wlog.Level(level)) -} diff --git a/services/logging/service.go b/services/logging/service.go deleted file mode 100644 index 2850593cf..000000000 --- a/services/logging/service.go +++ /dev/null @@ -1,105 +0,0 @@ -package logging - -import ( - "io" - "log" - "os" - "path" - - "github.com/influxdata/wlog" -) - -type Level wlog.Level - -const ( - _ Level = iota - DEBUG - INFO - WARN - ERROR - OFF -) - -// Interface for creating new loggers -type Interface interface { - NewLogger(prefix string, flag int) *log.Logger - NewRawLogger(prefix string, flag int) *log.Logger - NewStaticLevelLogger(prefix string, flag int, l Level) *log.Logger - NewStaticLevelWriter(l Level) io.Writer -} - -type Service struct { - f io.WriteCloser - c Config - stdout io.Writer - stderr io.Writer -} - -func NewService(c Config, stdout, stderr io.Writer) *Service { - return &Service{ - c: c, - stdout: stdout, - stderr: stderr, - } -} - -func (s *Service) Open() error { - switch s.c.File { - case "STDERR": - s.f = &nopCloser{f: s.stderr} - case "STDOUT": - s.f = &nopCloser{f: s.stdout} - default: - dir := path.Dir(s.c.File) - if _, err := os.Stat(dir); os.IsNotExist(err) { - err := os.MkdirAll(dir, 0755) - if err != nil { - return err - } - } - - f, err := os.OpenFile(s.c.File, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0640) - if err != nil { - return err - } - s.f = f - } - - // Configure default logger - log.SetPrefix("[log] ") - log.SetFlags(log.LstdFlags) - log.SetOutput(wlog.NewWriter(s.f)) - - wlog.SetLevelFromName(s.c.Level) - return nil -} - -func (s *Service) Close() error { - if s.f != nil { - return s.f.Close() - } - return nil -} - -func (s *Service) NewLogger(prefix string, flag int) *log.Logger { - return wlog.New(s.f, prefix, flag) -} - -func (s *Service) NewRawLogger(prefix string, flag int) *log.Logger { - return log.New(s.f, prefix, flag) -} - -func (s *Service) NewStaticLevelLogger(prefix string, flag int, l Level) *log.Logger { - return log.New(wlog.NewStaticLevelWriter(s.f, wlog.Level(l)), prefix, flag) -} - -func (s *Service) NewStaticLevelWriter(l Level) io.Writer { - return wlog.NewStaticLevelWriter(s.f, wlog.Level(l)) -} - -type nopCloser struct { - f io.Writer -} - -func (c *nopCloser) Write(b []byte) (int, error) { return c.f.Write(b) } -func (c *nopCloser) Close() error { return nil } diff --git a/services/marathon/service.go b/services/marathon/service.go index fa048385c..a5f1e88f0 100644 --- a/services/marathon/service.go +++ b/services/marathon/service.go @@ -3,7 +3,6 @@ package marathon import ( "context" "fmt" - "log" "sync" "time" @@ -12,6 +11,8 @@ import ( pmarathon "github.com/prometheus/prometheus/discovery/marathon" ) +type Diagnostic scraper.Diagnostic + // Service is the marathon discovery service type Service struct { Configs []Config @@ -19,16 +20,16 @@ type Service struct { registry scraper.Registry - logger *log.Logger - open bool + diag Diagnostic + open bool } // NewService creates a new unopened service -func NewService(c []Config, r scraper.Registry, l *log.Logger) *Service { +func NewService(c []Config, r scraper.Registry, d Diagnostic) *Service { return &Service{ Configs: c, registry: r, - logger: l, + diag: d, } } @@ -126,7 +127,7 @@ func (s *Service) Test(options interface{}) error { } sd := s.Configs[found].PromConfig() - discoverer, err := pmarathon.NewDiscovery(sd, scraper.NewLogger(s.logger)) + discoverer, err := pmarathon.NewDiscovery(sd, s.diag) if err != nil { return err } diff --git a/services/mqtt/service.go b/services/mqtt/service.go index fec01cf6e..c14eb7225 100644 --- a/services/mqtt/service.go +++ b/services/mqtt/service.go @@ -6,9 +6,17 @@ import ( "sync" "github.com/influxdata/kapacitor/alert" + "github.com/influxdata/kapacitor/keyvalue" "github.com/pkg/errors" ) +type Diagnostic interface { + WithContext(ctx ...keyvalue.T) Diagnostic + Error(msg string, err error) + CreatingAlertHandler(c HandlerConfig) + HandlingEvent() +} + // QoSLevel indicates the quality of service for messages delivered to a // broker. type QoSLevel byte @@ -54,7 +62,7 @@ const ( ) type Service struct { - logger *log.Logger + diag Diagnostic mu sync.RWMutex clients map[string]Client @@ -63,7 +71,7 @@ type Service struct { defaultBrokerName string } -func NewService(cs Configs, l *log.Logger) (*Service, error) { +func NewService(cs Configs, d Diagnostic) (*Service, error) { configs := cs.index() clients := make(map[string]Client, len(cs)) @@ -85,7 +93,7 @@ func NewService(cs Configs, l *log.Logger) (*Service, error) { } return &Service{ - logger: l, + diag: d, configs: configs, clients: clients, defaultBrokerName: defaultBrokerName, @@ -200,12 +208,13 @@ func (s *Service) update(cs Configs) error { return nil } -func (s *Service) Handler(c HandlerConfig, l *log.Logger) alert.Handler { - s.logger.Println("D! create Handler", c) +func (s *Service) Handler(c HandlerConfig, ctx ...keyvalue.T) alert.Handler { + d := s.diag.WithContext(ctx...) + d.CreatingAlertHandler(c) return &handler{ - s: s, - c: c, - logger: l, + s: s, + c: c, + diag: d, } } @@ -217,15 +226,15 @@ type HandlerConfig struct { } type handler struct { - s *Service - c HandlerConfig - logger *log.Logger + s *Service + c HandlerConfig + diag Diagnostic } func (h *handler) Handle(event alert.Event) { - h.logger.Println("D! HANDLE") + h.diag.HandlingEvent() if err := h.s.Alert(h.c.BrokerName, h.c.Topic, h.c.QoS, h.c.Retained, event.State.Message); err != nil { - h.logger.Println("E! failed to post message to MQTT broker", err) + h.diag.Error("failed to post message to MQTT broker", err) } } diff --git a/services/nerve/service.go b/services/nerve/service.go index 1cd29fdb6..9932e6d5d 100644 --- a/services/nerve/service.go +++ b/services/nerve/service.go @@ -3,7 +3,6 @@ package nerve import ( "context" "fmt" - "log" "sync" "time" @@ -12,6 +11,8 @@ import ( pnerve "github.com/prometheus/prometheus/discovery/zookeeper" ) +type Diagnostic scraper.Diagnostic + // Service is the nerve discovery service type Service struct { Configs []Config @@ -19,16 +20,16 @@ type Service struct { registry scraper.Registry - logger *log.Logger - open bool + diag Diagnostic + open bool } // NewService creates a new unopened service -func NewService(c []Config, r scraper.Registry, l *log.Logger) *Service { +func NewService(c []Config, r scraper.Registry, d Diagnostic) *Service { return &Service{ Configs: c, registry: r, - logger: l, + diag: d, } } @@ -126,7 +127,7 @@ func (s *Service) Test(options interface{}) error { } sd := s.Configs[found].PromConfig() - discoverer := pnerve.NewNerveDiscovery(sd, scraper.NewLogger(s.logger)) + discoverer := pnerve.NewNerveDiscovery(sd, s.diag) ctx, cancel := context.WithCancel(context.Background()) updates := make(chan []*config.TargetGroup) diff --git a/services/noauth/service.go b/services/noauth/service.go index 89c7075c9..8a9e77d32 100644 --- a/services/noauth/service.go +++ b/services/noauth/service.go @@ -1,21 +1,24 @@ package noauth import ( - "log" - "github.com/influxdata/kapacitor/auth" ) +type Diagnostic interface { + FakedUserAuthentication(username string) + FakedSubscriptionUserToken() +} + // Provide an implentation of an Authentication service. // NOTE: This service provides no real authentication but rather // returns admin users for all requests. type Service struct { - logger *log.Logger + diag Diagnostic } -func NewService(l *log.Logger) *Service { +func NewService(d Diagnostic) *Service { return &Service{ - logger: l, + diag: d, } } @@ -34,13 +37,13 @@ func (s *Service) Authenticate(username, password string) (auth.User, error) { // Return a user will all privileges and given username. func (s *Service) User(username string) (auth.User, error) { - s.logger.Println("W! using noauth auth backend. Faked authentication for user", username) + s.diag.FakedUserAuthentication(username) return auth.NewUser(username, nil, true, nil), nil } // Return a user will all privileges. func (s *Service) SubscriptionUser(token string) (auth.User, error) { - s.logger.Println("W! using noauth auth backend. Faked authentication for subscription user token") + s.diag.FakedSubscriptionUserToken() return auth.NewUser("subscription-user", nil, true, nil), nil } diff --git a/services/opsgenie/service.go b/services/opsgenie/service.go index f6e11982a..c209a0cb8 100644 --- a/services/opsgenie/service.go +++ b/services/opsgenie/service.go @@ -7,23 +7,29 @@ import ( "fmt" "io" "io/ioutil" - "log" "net/http" "sync/atomic" "time" "github.com/influxdata/kapacitor/alert" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/models" ) +type Diagnostic interface { + WithContext(ctx ...keyvalue.T) Diagnostic + + Error(msg string, err error) +} + type Service struct { configValue atomic.Value - logger *log.Logger + diag Diagnostic } -func NewService(c Config, l *log.Logger) *Service { +func NewService(c Config, d Diagnostic) *Service { s := &Service{ - logger: l, + diag: d, } s.configValue.Store(c) return s @@ -192,16 +198,16 @@ type HandlerConfig struct { } type handler struct { - s *Service - c HandlerConfig - logger *log.Logger + s *Service + c HandlerConfig + diag Diagnostic } -func (s *Service) Handler(c HandlerConfig, l *log.Logger) alert.Handler { +func (s *Service) Handler(c HandlerConfig, ctx ...keyvalue.T) alert.Handler { return &handler{ - s: s, - c: c, - logger: l, + s: s, + c: c, + diag: s.diag.WithContext(ctx...), } } @@ -222,6 +228,6 @@ func (h *handler) Handle(event alert.Event) { event.State.Time, event.Data.Result, ); err != nil { - h.logger.Println("E! failed to send event to OpsGenie", err) + h.diag.Error("failed to send event to OpsGenie", err) } } diff --git a/services/pagerduty/service.go b/services/pagerduty/service.go index 10428cc42..228dac3c9 100644 --- a/services/pagerduty/service.go +++ b/services/pagerduty/service.go @@ -7,25 +7,30 @@ import ( "fmt" "io" "io/ioutil" - "log" "net/http" "sync/atomic" "github.com/influxdata/kapacitor/alert" + "github.com/influxdata/kapacitor/keyvalue" ) +type Diagnostic interface { + WithContext(ctx ...keyvalue.T) Diagnostic + Error(msg string, err error) +} + type Service struct { configValue atomic.Value HTTPDService interface { URL() string } - logger *log.Logger + diag Diagnostic } -func NewService(c Config, l *log.Logger) *Service { +func NewService(c Config, d Diagnostic) *Service { s := &Service{ - logger: l, + diag: d, } s.configValue.Store(c) return s @@ -167,16 +172,16 @@ type HandlerConfig struct { } type handler struct { - s *Service - c HandlerConfig - logger *log.Logger + s *Service + c HandlerConfig + diag Diagnostic } -func (s *Service) Handler(c HandlerConfig, l *log.Logger) alert.Handler { +func (s *Service) Handler(c HandlerConfig, ctx ...keyvalue.T) alert.Handler { return &handler{ - s: s, - c: c, - logger: l, + s: s, + c: c, + diag: s.diag.WithContext(ctx...), } } @@ -188,6 +193,6 @@ func (h *handler) Handle(event alert.Event) { event.State.Level, event.State.Details, ); err != nil { - h.logger.Println("E! failed to send event to PagerDuty", err) + h.diag.Error("failed to send event to PagerDuty", err) } } diff --git a/services/pushover/service.go b/services/pushover/service.go index b0e576972..8fe9c1329 100644 --- a/services/pushover/service.go +++ b/services/pushover/service.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "io/ioutil" - "log" "net/http" "net/url" "strconv" @@ -14,16 +13,22 @@ import ( "sync/atomic" "github.com/influxdata/kapacitor/alert" + "github.com/influxdata/kapacitor/keyvalue" ) +type Diagnostic interface { + WithContext(ctx ...keyvalue.T) Diagnostic + Error(msg string, err error) +} + type Service struct { configValue atomic.Value - logger *log.Logger + diag Diagnostic } -func NewService(c Config, l *log.Logger) *Service { +func NewService(c Config, d Diagnostic) *Service { s := &Service{ - logger: l, + diag: d, } s.configValue.Store(c) return s @@ -235,16 +240,16 @@ type HandlerConfig struct { } type handler struct { - s *Service - c HandlerConfig - logger *log.Logger + s *Service + c HandlerConfig + diag Diagnostic } -func (s *Service) Handler(c HandlerConfig, l *log.Logger) alert.Handler { +func (s *Service) Handler(c HandlerConfig, ctx ...keyvalue.T) alert.Handler { return &handler{ - s: s, - c: c, - logger: l, + s: s, + c: c, + diag: s.diag.WithContext(ctx...), } } @@ -258,6 +263,6 @@ func (h *handler) Handle(event alert.Event) { h.c.Sound, event.State.Level, ); err != nil { - h.logger.Println("E! failed to send event to Pushover", err) + h.diag.Error("failed to send event to Pushover", err) } } diff --git a/services/replay/service.go b/services/replay/service.go index 18fec6137..a01533bdf 100644 --- a/services/replay/service.go +++ b/services/replay/service.go @@ -7,7 +7,6 @@ import ( "fmt" "io" "io/ioutil" - "log" "net/http" "net/url" "os" @@ -24,6 +23,7 @@ import ( "github.com/influxdata/kapacitor/clock" "github.com/influxdata/kapacitor/edge" "github.com/influxdata/kapacitor/influxdb" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/models" "github.com/influxdata/kapacitor/services/httpd" "github.com/influxdata/kapacitor/services/storage" @@ -51,6 +51,11 @@ const ( var validID = regexp.MustCompile(`^[-\._\p{L}0-9]+$`) +type Diagnostic interface { + Error(msg string, err error, ctx ...keyvalue.T) + Debug(msg string, ctx ...keyvalue.T) +} + // Handles recording, starting, and waiting on replays type Service struct { saveDir string @@ -86,14 +91,14 @@ type Service struct { Stream(name string) (kapacitor.StreamCollector, error) } - logger *log.Logger + diag Diagnostic } // Create a new replay master. -func NewService(conf Config, l *log.Logger) *Service { +func NewService(conf Config, d Diagnostic) *Service { return &Service{ saveDir: conf.Dir, - logger: l, + diag: d, } } @@ -229,7 +234,7 @@ func (s *Service) syncRecordingMetadata() error { name := info.Name() i := strings.LastIndex(name, ".") if i == -1 { - s.logger.Println("E! file without extension in replay dir", name) + s.diag.Error("file without extension in replay dir", fmt.Errorf("file %s is missing file extension", name)) continue } ext := name[i:] @@ -242,7 +247,7 @@ func (s *Service) syncRecordingMetadata() error { case batchEXT: typ = BatchRecording default: - s.logger.Println("E! unknown file in replay dir", name) + s.diag.Error("unknown file type in replay dir", fmt.Errorf("%s has unknown file type", name)) continue } dataUrl := url.URL{ @@ -264,7 +269,7 @@ func (s *Service) syncRecordingMetadata() error { if err != nil { return errors.Wrap(err, "creating recording metadata") } - s.logger.Printf("D! recording %s metadata synced", id) + s.diag.Debug("recording metadata synced", keyvalue.KV("recording_id", id)) } else if err != nil { return errors.Wrap(err, "checking for existing recording metadata") } else if err == nil { @@ -274,9 +279,9 @@ func (s *Service) syncRecordingMetadata() error { if err != nil { return errors.Wrap(err, "updating recording metadata") } - s.logger.Printf("D! recording %s data url fixed", id) + s.diag.Debug("recording data url fixed", keyvalue.KV("recording_id", id)) } else { - s.logger.Printf("D! skipping recording %s, metadata already correct", id) + s.diag.Debug("skipping recording, metadata is already correct", keyvalue.KV("recording_id", id)) } } } @@ -289,7 +294,7 @@ func (s *Service) markFailedRecordings() { for { recordings, err := s.recordings.List("", offset, limit) if err != nil { - s.logger.Println("E! failed to retrieve recordings:", err) + s.diag.Error("failed to retriece recordings", err) } for _, recording := range recordings { if recording.Status == Running { @@ -297,7 +302,7 @@ func (s *Service) markFailedRecordings() { recording.Error = "unexpected Kapacitor shutdown" err := s.recordings.Replace(recording) if err != nil { - s.logger.Println("E! failed to set recording status to failed:", err) + s.diag.Error("failed to set recording status to failed", err) } } } @@ -314,7 +319,7 @@ func (s *Service) markFailedReplays() { for { replays, err := s.replays.List("", offset, limit) if err != nil { - s.logger.Println("E! failed to retrieve replays:", err) + s.diag.Error("failed to retrieve replays", err) } for _, replay := range replays { if replay.Status == Running { @@ -322,7 +327,7 @@ func (s *Service) markFailedReplays() { replay.Error = "unexpected Kapacitor shutdown" err := s.replays.Replace(replay) if err != nil { - s.logger.Println("E! failed to set replay status to failed:", err) + s.diag.Error("failed to set replay status to failed", err) } } } @@ -745,15 +750,15 @@ func (s *Service) updateRecordingResult(recording Recording, ds DataSource, err recording.Progress = 1.0 recording.Size, err = ds.Size() if err != nil { - s.logger.Println("E! failed to determine size of recording", recording.ID, err) + s.diag.Error("failed to determine size of recording", err, keyvalue.KV("recording_id", recording.ID)) } err = s.recordings.Replace(recording) if err != nil { - s.logger.Println("E! failed to save recording info", recording.ID, err) + s.diag.Error("failed to save recording info", err, keyvalue.KV("recording_id", recording.ID)) } } -func (r *Service) updateReplayResult(replay Replay, err error) { +func (s *Service) updateReplayResult(replay Replay, err error) { replay.Status = Finished if err != nil { replay.Status = Failed @@ -761,9 +766,9 @@ func (r *Service) updateReplayResult(replay Replay, err error) { } replay.Progress = 1.0 replay.Date = time.Now() - err = r.replays.Replace(replay) + err = s.replays.Replace(replay) if err != nil { - r.logger.Println("E! failed to save replay results:", err) + s.diag.Error("failed to save replay results", err) } } @@ -1365,7 +1370,7 @@ func (s *Service) startRecordBatch(t *kapacitor.Task, start, stop time.Time) ([] } // Run queries for _, q := range queries { - s.logger.Println("D! Runing batch query for replay", q) + s.diag.Debug("running batch query for replay", keyvalue.KV("query", q.String())) query := influxdb.Query{ Command: q.String(), diff --git a/services/reporting/service.go b/services/reporting/service.go index f5d2e6f4f..fa19c194e 100644 --- a/services/reporting/service.go +++ b/services/reporting/service.go @@ -2,7 +2,6 @@ package reporting import ( - "log" "runtime" "sync" "time" @@ -13,6 +12,10 @@ import ( const reportingInterval = time.Hour * 12 +type Diagnostic interface { + Error(msg string, err error) +} + // Sends anonymous usage information every 12 hours. type Service struct { tags client.Tags @@ -24,17 +27,17 @@ type Service struct { statsTicker *time.Ticker usageTicker *time.Ticker closing chan struct{} - logger *log.Logger + diag Diagnostic wg sync.WaitGroup } -func NewService(c Config, info vars.Infoer, l *log.Logger) *Service { +func NewService(c Config, info vars.Infoer, d Diagnostic) *Service { client := client.New("") client.URL = c.URL return &Service{ client: client, info: info, - logger: l, + diag: d, } } @@ -55,7 +58,7 @@ func (s *Service) Open() error { defer s.wg.Done() err := s.sendUsageReport() if err != nil { - s.logger.Println("E! error while sending usage report on startup:", err) + s.diag.Error("error while sending usage report on startup", err) } }() @@ -91,7 +94,7 @@ func (s *Service) usage() { case <-s.usageTicker.C: err := s.sendUsageReport() if err != nil { - s.logger.Println("E! error while sending usage report:", err) + s.diag.Error("error while sending usage report", err) } } } diff --git a/services/scraper/log.go b/services/scraper/log.go deleted file mode 100644 index 21c218b85..000000000 --- a/services/scraper/log.go +++ /dev/null @@ -1,99 +0,0 @@ -package scraper - -import ( - "log" - - plog "github.com/prometheus/common/log" -) - -// Logger wraps kapacitor logging for prometheus -type Logger struct { - *log.Logger -} - -// NewLogger wraps a logger to be used for prometheus -func NewLogger(l *log.Logger) *Logger { - return &Logger{ - Logger: l, - } -} - -// Debug logs a message at level Debug on the standard logger. -func (l *Logger) Debug(v ...interface{}) { - l.Logger.Print("D! ", v) -} - -// Debugln logs a message at level Debug on the standard logger. -func (l *Logger) Debugln(v ...interface{}) { - l.Logger.Println("D! ", v) -} - -// Debugf logs a message at level Debug on the standard logger. -func (l *Logger) Debugf(s string, v ...interface{}) { - l.Logger.Printf("D! "+s, v) -} - -// Info logs a message at level Info on the standard logger. -func (l *Logger) Info(v ...interface{}) { - l.Logger.Print("I! ", v) -} - -// Infoln logs a message at level Info on the standard logger. -func (l *Logger) Infoln(v ...interface{}) { - l.Logger.Println("I! ", v) -} - -// Infof logs a message at level Info on the standard logger. -func (l *Logger) Infof(s string, v ...interface{}) { - l.Logger.Printf("I! "+s, v) -} - -// Warn logs a message at level Warn on the standard logger. -func (l *Logger) Warn(v ...interface{}) { - l.Logger.Print("W! ", v) -} - -// Warnln logs a message at level Warn on the standard logger. -func (l *Logger) Warnln(v ...interface{}) { - l.Logger.Println("W! ", v) -} - -// Warnf logs a message at level Warn on the standard logger. -func (l *Logger) Warnf(s string, v ...interface{}) { - l.Logger.Printf("W! "+s, v) -} - -// Error logs a message at level Error on the standard logger. -func (l *Logger) Error(v ...interface{}) { - l.Logger.Print("E! ", v) -} - -// Errorln logs a message at level Error on the standard logger. -func (l *Logger) Errorln(v ...interface{}) { - l.Logger.Println("E! ", v) -} - -// Errorf logs a message at level Error on the standard logger. -func (l *Logger) Errorf(s string, v ...interface{}) { - l.Logger.Printf("E! "+s, v) -} - -// Fatal logs a message at level Fatal on the standard logger. -func (l *Logger) Fatal(v ...interface{}) { - l.Logger.Fatal(v) -} - -// Fatalln logs a message at level Fatal on the standard logger. -func (l *Logger) Fatalln(v ...interface{}) { - l.Logger.Fatalln(v) -} - -// Fatalf logs a message at level Fatal on the standard logger. -func (l *Logger) Fatalf(s string, v ...interface{}) { - l.Logger.Fatalf(s, v) -} - -// With adds a field to the logger. -func (l *Logger) With(key string, value interface{}) plog.Logger { - return l -} diff --git a/services/scraper/service.go b/services/scraper/service.go index 4f18d4c1d..437b06ec9 100644 --- a/services/scraper/service.go +++ b/services/scraper/service.go @@ -2,13 +2,13 @@ package scraper import ( "fmt" - "log" "math" "sync" "sync/atomic" "github.com/influxdata/kapacitor/edge" "github.com/influxdata/kapacitor/models" + plog "github.com/prometheus/common/log" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/config" "github.com/prometheus/prometheus/retrieval" @@ -20,6 +20,9 @@ var ( _ storage.SampleAppender = &Service{} ) +// Prometheus logger +type Diagnostic plog.Logger + // Service represents the scraper manager type Service struct { PointsWriter interface { @@ -35,7 +38,7 @@ type Service struct { configs atomic.Value // []Config - logger *log.Logger + diag Diagnostic discoverers []Discoverer @@ -49,12 +52,12 @@ type Service struct { } // NewService creates a new scraper service -func NewService(c []Config, l *log.Logger) *Service { +func NewService(c []Config, d Diagnostic) *Service { s := &Service{ - logger: l, + diag: d, } s.storeConfigs(c) - s.mgr = retrieval.NewTargetManager(s, NewLogger(l)) + s.mgr = retrieval.NewTargetManager(s, d) return s } diff --git a/services/sensu/service.go b/services/sensu/service.go index cae17a8fa..db4451338 100644 --- a/services/sensu/service.go +++ b/services/sensu/service.go @@ -6,25 +6,30 @@ import ( "errors" "fmt" "io/ioutil" - "log" "net" "regexp" "sync/atomic" text "text/template" "github.com/influxdata/kapacitor/alert" + "github.com/influxdata/kapacitor/keyvalue" ) +type Diagnostic interface { + WithContext(ctx ...keyvalue.T) Diagnostic + Error(msg string, err error, kvs ...keyvalue.T) +} + type Service struct { configValue atomic.Value - logger *log.Logger + diag Diagnostic } var validNamePattern = regexp.MustCompile(`^[\w\.-]+$`) -func NewService(c Config, l *log.Logger) *Service { +func NewService(c Config, d Diagnostic) *Service { s := &Service{ - logger: l, + diag: d, } s.configValue.Store(c) return s @@ -171,14 +176,14 @@ type HandlerConfig struct { } type handler struct { - s *Service - c HandlerConfig - logger *log.Logger + s *Service + c HandlerConfig + diag Diagnostic sourceTmpl *text.Template } -func (s *Service) Handler(c HandlerConfig, l *log.Logger) (alert.Handler, error) { +func (s *Service) Handler(c HandlerConfig, ctx ...keyvalue.T) (alert.Handler, error) { srcTmpl, err := text.New("source").Parse(c.Source) if err != nil { return nil, err @@ -186,7 +191,7 @@ func (s *Service) Handler(c HandlerConfig, l *log.Logger) (alert.Handler, error) return &handler{ s: s, c: c, - logger: l, + diag: s.diag.WithContext(ctx...), sourceTmpl: srcTmpl, }, nil } @@ -196,7 +201,7 @@ func (h *handler) Handle(event alert.Event) { var buf bytes.Buffer err := h.sourceTmpl.Execute(&buf, td) if err != nil { - h.logger.Printf("E! failed to evaluate Sensu source template %s: %v", h.c.Source, err) + h.diag.Error("failed to evaluate Sensu source template", err, keyvalue.KV("source", h.c.Source)) return } sourceStr := buf.String() @@ -208,6 +213,6 @@ func (h *handler) Handle(event alert.Event) { h.c.Handlers, event.State.Level, ); err != nil { - h.logger.Println("E! failed to send event to Sensu", err) + h.diag.Error("failed to send event to Sensu", err) } } diff --git a/services/serverset/service.go b/services/serverset/service.go index ad12ea475..5cfa7d06d 100644 --- a/services/serverset/service.go +++ b/services/serverset/service.go @@ -3,7 +3,6 @@ package serverset import ( "context" "fmt" - "log" "sync" "time" @@ -12,6 +11,8 @@ import ( pzookeeper "github.com/prometheus/prometheus/discovery/zookeeper" ) +type Diagnostic scraper.Diagnostic + // Service is the serverset discovery service type Service struct { Configs []Config @@ -19,16 +20,16 @@ type Service struct { registry scraper.Registry - logger *log.Logger - open bool + diag Diagnostic + open bool } // NewService creates a new unopened service -func NewService(c []Config, r scraper.Registry, l *log.Logger) *Service { +func NewService(c []Config, r scraper.Registry, d Diagnostic) *Service { return &Service{ Configs: c, registry: r, - logger: l, + diag: d, } } @@ -126,7 +127,7 @@ func (s *Service) Test(options interface{}) error { } sd := s.Configs[found].PromConfig() - discoverer := pzookeeper.NewServersetDiscovery(sd, scraper.NewLogger(s.logger)) + discoverer := pzookeeper.NewServersetDiscovery(sd, s.diag) ctx, cancel := context.WithCancel(context.Background()) updates := make(chan []*config.TargetGroup) diff --git a/services/servicetest/service.go b/services/servicetest/service.go index 6faa95596..385a6c3aa 100644 --- a/services/servicetest/service.go +++ b/services/servicetest/service.go @@ -3,7 +3,6 @@ package servicetest import ( "encoding/json" "fmt" - "log" "net/http" "path" "path/filepath" @@ -41,7 +40,7 @@ type Service struct { } } -func NewService(c Config, l *log.Logger) *Service { +func NewService(c Config) *Service { return &Service{ testers: make(map[string]Tester), } diff --git a/services/slack/service.go b/services/slack/service.go index fee66e714..ee546dc80 100644 --- a/services/slack/service.go +++ b/services/slack/service.go @@ -6,32 +6,40 @@ import ( "fmt" "io" "io/ioutil" - "log" "net/http" "sync/atomic" "github.com/influxdata/kapacitor/alert" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/tlsconfig" "github.com/pkg/errors" ) +type Diagnostic interface { + WithContext(ctx ...keyvalue.T) Diagnostic + + InsecureSkipVerify() + + Error(msg string, err error) +} + type Service struct { configValue atomic.Value clientValue atomic.Value - logger *log.Logger + diag Diagnostic client *http.Client } -func NewService(c Config, l *log.Logger) (*Service, error) { +func NewService(c Config, d Diagnostic) (*Service, error) { tlsConfig, err := tlsconfig.Create(c.SSLCA, c.SSLCert, c.SSLKey, c.InsecureSkipVerify) if err != nil { return nil, err } if tlsConfig.InsecureSkipVerify { - l.Println("W! Slack service is configured to skip ssl verification") + d.InsecureSkipVerify() } s := &Service{ - logger: l, + diag: d, } s.configValue.Store(c) s.clientValue.Store(&http.Client{ @@ -67,7 +75,7 @@ func (s *Service) Update(newConfig []interface{}) error { return err } if tlsConfig.InsecureSkipVerify { - s.logger.Println("W! Slack service is configured to skip ssl verification") + s.diag.InsecureSkipVerify() } s.configValue.Store(c) s.clientValue.Store(&http.Client{ @@ -216,16 +224,16 @@ type HandlerConfig struct { } type handler struct { - s *Service - c HandlerConfig - logger *log.Logger + s *Service + c HandlerConfig + diag Diagnostic } -func (s *Service) Handler(c HandlerConfig, l *log.Logger) alert.Handler { +func (s *Service) Handler(c HandlerConfig, ctx ...keyvalue.T) alert.Handler { return &handler{ - s: s, - c: c, - logger: l, + s: s, + c: c, + diag: s.diag.WithContext(ctx...), } } @@ -237,6 +245,6 @@ func (h *handler) Handle(event alert.Event) { h.c.IconEmoji, event.State.Level, ); err != nil { - h.logger.Println("E! failed to send event to Slack", err) + h.diag.Error("failed to send event", err) } } diff --git a/services/smtp/service.go b/services/smtp/service.go index 20974b55b..c3bc7fa92 100644 --- a/services/smtp/service.go +++ b/services/smtp/service.go @@ -4,32 +4,37 @@ import ( "crypto/tls" "errors" "fmt" - "log" "sync" "sync/atomic" "time" "github.com/influxdata/kapacitor/alert" + "github.com/influxdata/kapacitor/keyvalue" "gopkg.in/gomail.v2" ) var ErrNoRecipients = errors.New("not sending email, no recipients defined") +type Diagnostic interface { + WithContext(ctx ...keyvalue.T) Diagnostic + Error(msg string, err error) +} + type Service struct { mu sync.Mutex configValue atomic.Value mail chan *gomail.Message updates chan bool - logger *log.Logger + diag Diagnostic wg sync.WaitGroup opened bool } -func NewService(c Config, l *log.Logger) *Service { +func NewService(c Config, d Diagnostic) *Service { s := &Service{ updates: make(chan bool), - logger: l, + diag: d, } s.configValue.Store(c) return s @@ -136,7 +141,7 @@ func (s *Service) runMailer() { // Close old connection if conn != nil { if err := conn.Close(); err != nil { - s.logger.Println("E! error closing connection to old SMTP server:", err) + s.diag.Error("error closing connection to old SMTP server", err) } conn = nil } @@ -149,20 +154,20 @@ func (s *Service) runMailer() { } if !open { if conn, err = d.Dial(); err != nil { - s.logger.Println("E! error connecting to SMTP server", err) + s.diag.Error("error closing connection to SMTP server", err) break } open = true } if err := gomail.Send(conn, m); err != nil { - s.logger.Println("E!", err) + s.diag.Error("error sending", err) } // Close the connection to the SMTP server if no email was sent in // the last IdleTimeout duration. case <-timer.C: if open { if err := conn.Close(); err != nil { - s.logger.Println("E! error closing connection to SMTP server:", err) + s.diag.Error("error closing connection to SMTP server", err) } open = false } @@ -232,16 +237,16 @@ type HandlerConfig struct { } type handler struct { - s *Service - c HandlerConfig - logger *log.Logger + s *Service + c HandlerConfig + diag Diagnostic } -func (s *Service) Handler(c HandlerConfig, l *log.Logger) alert.Handler { +func (s *Service) Handler(c HandlerConfig, ctx ...keyvalue.T) alert.Handler { return &handler{ - s: s, - c: c, - logger: l, + s: s, + c: c, + diag: s.diag.WithContext(ctx...), } } @@ -251,6 +256,6 @@ func (h *handler) Handle(event alert.Event) { event.State.Message, event.State.Details, ); err != nil { - h.logger.Println("E! failed to send email", err) + h.diag.Error("failed to send email", err) } } diff --git a/services/snmptrap/service.go b/services/snmptrap/service.go index 3ee4f1ab9..29a507e50 100644 --- a/services/snmptrap/service.go +++ b/services/snmptrap/service.go @@ -3,27 +3,32 @@ package snmptrap import ( "bytes" "fmt" - "log" "strconv" "sync" "sync/atomic" text "text/template" "github.com/influxdata/kapacitor/alert" + "github.com/influxdata/kapacitor/keyvalue" "github.com/k-sone/snmpgo" "github.com/pkg/errors" ) +type Diagnostic interface { + WithContext(ctx ...keyvalue.T) Diagnostic + Error(msg string, err error) +} + type Service struct { configValue atomic.Value clientMu sync.Mutex client *snmpgo.SNMP - logger *log.Logger + diag Diagnostic } -func NewService(c Config, l *log.Logger) *Service { +func NewService(c Config, d Diagnostic) *Service { s := &Service{ - logger: l, + diag: d, } s.configValue.Store(c) return s @@ -204,13 +209,13 @@ type Data struct { // handler provides the implementation of the alert.Handler interface for the Foo service. type handler struct { - s *Service - c HandlerConfig - logger *log.Logger + s *Service + c HandlerConfig + diag Diagnostic } // Handler creates a handler from the config. -func (s *Service) Handler(c HandlerConfig, l *log.Logger) (alert.Handler, error) { +func (s *Service) Handler(c HandlerConfig, ctx ...keyvalue.T) (alert.Handler, error) { // Compile data value templates for i, d := range c.DataList { tmpl, err := text.New("data").Parse(d.Value) @@ -220,9 +225,9 @@ func (s *Service) Handler(c HandlerConfig, l *log.Logger) (alert.Handler, error) c.DataList[i].tmpl = tmpl } return &handler{ - s: s, - c: c, - logger: l, + s: s, + c: c, + diag: s.diag.WithContext(ctx...), }, nil } @@ -234,13 +239,13 @@ func (h *handler) Handle(event alert.Event) { for i, d := range h.c.DataList { err := d.tmpl.Execute(&buf, td) if err != nil { - h.logger.Println("E! failed to handle event", err) + h.diag.Error("failed to handle event", err) return } h.c.DataList[i].Value = buf.String() buf.Reset() } if err := h.s.Trap(h.c.TrapOid, h.c.DataList); err != nil { - h.logger.Println("E! failed to handle event", err) + h.diag.Error("failed to handle event", err) } } diff --git a/services/static_discovery/service.go b/services/static_discovery/service.go index 87f4a12ab..8b67487cf 100644 --- a/services/static_discovery/service.go +++ b/services/static_discovery/service.go @@ -3,7 +3,6 @@ package static_discovery import ( "context" "fmt" - "log" "sync" "time" @@ -12,6 +11,8 @@ import ( "github.com/prometheus/prometheus/discovery" ) +type Diagnostic scraper.Diagnostic + // Service is the static discovery service type Service struct { Configs []Config @@ -19,16 +20,16 @@ type Service struct { registry scraper.Registry - logger *log.Logger - open bool + diag Diagnostic + open bool } // NewService creates a new unopened service -func NewService(c []Config, r scraper.Registry, l *log.Logger) *Service { +func NewService(c []Config, r scraper.Registry, d Diagnostic) *Service { return &Service{ Configs: c, registry: r, - logger: l, + diag: d, } } diff --git a/services/stats/service.go b/services/stats/service.go index 7e20634a0..26c9e5b7d 100644 --- a/services/stats/service.go +++ b/services/stats/service.go @@ -23,7 +23,6 @@ package stats import ( - "log" "sync" "time" @@ -34,6 +33,10 @@ import ( "github.com/influxdata/kapacitor/timer" ) +type Diagnostic interface { + Error(msg string, err error) +} + // Sends internal stats back into the Kapacitor stream. // Internal stats come from running tasks and other // services running within Kapacitor. @@ -56,17 +59,17 @@ type Service struct { mu sync.Mutex wg sync.WaitGroup - logger *log.Logger + diag Diagnostic } -func NewService(c Config, l *log.Logger) *Service { +func NewService(c Config, d Diagnostic) *Service { return &Service{ interval: time.Duration(c.StatsInterval), db: c.Database, rp: c.RetentionPolicy, timingSampleRate: c.TimingSampleRate, timingMovingAvgSize: c.TimingMovingAverageSize, - logger: l, + diag: d, } } @@ -115,7 +118,7 @@ func (s *Service) reportStats() { now := time.Now().UTC() data, err := vars.GetStatsData() if err != nil { - s.logger.Println("E! error getting stats data:", err) + s.diag.Error("error getting stats data", err) return } for _, stat := range data { diff --git a/services/storage/api.go b/services/storage/api.go index aaa6dc593..27e991fd4 100644 --- a/services/storage/api.go +++ b/services/storage/api.go @@ -3,7 +3,6 @@ package storage import ( "encoding/json" "fmt" - "log" "net/http" "path" "strconv" @@ -28,7 +27,7 @@ type APIServer struct { Registrar StoreActionerRegistrar DB *bolt.DB routes []httpd.Route - logger *log.Logger + diag Diagnostic HTTPDService interface { AddRoutes([]httpd.Route) error @@ -83,7 +82,7 @@ func (s *APIServer) handleBackup(w http.ResponseWriter, r *http.Request) { // since the headers have already been sent. // The client can simply check that the Content-Length matches // the amount of data to ensure a successful backup was performed. - s.logger.Println("E! failed to send backup data:", err) + s.diag.Error("failed to send backup data", err) } } diff --git a/services/storage/service.go b/services/storage/service.go index c7e2446b1..377ec8020 100644 --- a/services/storage/service.go +++ b/services/storage/service.go @@ -1,7 +1,6 @@ package storage import ( - "log" "os" "path" "sync" @@ -11,6 +10,10 @@ import ( "github.com/pkg/errors" ) +type Diagnostic interface { + Error(msg string, err error) +} + type Service struct { dbpath string @@ -28,13 +31,13 @@ type Service struct { DelRoutes([]httpd.Route) } - logger *log.Logger + diag Diagnostic } -func NewService(conf Config, l *log.Logger) *Service { +func NewService(conf Config, d Diagnostic) *Service { return &Service{ dbpath: conf.BoltDBPath, - logger: l, + diag: d, stores: make(map[string]Interface), } } @@ -61,7 +64,7 @@ func (s *Service) Open() error { DB: s.boltdb, Registrar: s.registrar, HTTPDService: s.HTTPDService, - logger: s.logger, + diag: s.diag, } if err := s.apiServer.Open(); err != nil { diff --git a/services/swarm/cluster.go b/services/swarm/cluster.go index c878fb468..224dce199 100644 --- a/services/swarm/cluster.go +++ b/services/swarm/cluster.go @@ -1,7 +1,6 @@ package swarm import ( - "log" "sync/atomic" "github.com/influxdata/kapacitor/services/swarm/client" @@ -11,10 +10,10 @@ import ( type Cluster struct { configValue atomic.Value // Config client client.Client - logger *log.Logger + diag Diagnostic } -func NewCluster(c Config, l *log.Logger) (*Cluster, error) { +func NewCluster(c Config, d Diagnostic) (*Cluster, error) { clientConfig, err := c.ClientConfig() if err != nil { return nil, errors.Wrap(err, "failed to create swarm client config") @@ -26,7 +25,7 @@ func NewCluster(c Config, l *log.Logger) (*Cluster, error) { s := &Cluster{ client: cli, - logger: l, + diag: d, } s.configValue.Store(c) return s, nil diff --git a/services/swarm/service.go b/services/swarm/service.go index 47d51d03e..7e79072ef 100644 --- a/services/swarm/service.go +++ b/services/swarm/service.go @@ -2,23 +2,26 @@ package swarm import ( "fmt" - "log" "sync" "github.com/influxdata/kapacitor/services/swarm/client" "github.com/pkg/errors" ) +type Diagnostic interface { + WithClusterContext(cluster string) Diagnostic +} + type Service struct { mu sync.Mutex clusters map[string]*Cluster - logger *log.Logger + diag Diagnostic } -func NewService(cs Configs, l *log.Logger) (*Service, error) { +func NewService(cs Configs, d Diagnostic) (*Service, error) { clusters := make(map[string]*Cluster, len(cs)) for _, c := range cs { - cluster, err := NewCluster(c, l) + cluster, err := NewCluster(c, d.WithClusterContext(c.ID)) if err != nil { return nil, errors.Wrapf(err, "failed to create cluster for %q", c.ID) } @@ -26,7 +29,7 @@ func NewService(cs Configs, l *log.Logger) (*Service, error) { } return &Service{ clusters: clusters, - logger: l, + diag: d, }, nil } @@ -50,7 +53,7 @@ func (s *Service) Update(newConfigs []interface{}) error { cluster, ok := s.clusters[c.ID] if !ok { var err error - cluster, err = NewCluster(c, s.logger) + cluster, err = NewCluster(c, s.diag.WithClusterContext(c.ID)) if err != nil { return err } diff --git a/services/talk/service.go b/services/talk/service.go index 4461d656e..10b5217c5 100644 --- a/services/talk/service.go +++ b/services/talk/service.go @@ -7,21 +7,26 @@ import ( "fmt" "io" "io/ioutil" - "log" "net/http" "sync/atomic" "github.com/influxdata/kapacitor/alert" + "github.com/influxdata/kapacitor/keyvalue" ) +type Diagnostic interface { + WithContext(ctx ...keyvalue.T) Diagnostic + Error(msg string, err error) +} + type Service struct { configValue atomic.Value - logger *log.Logger + diag Diagnostic } -func NewService(c Config, l *log.Logger) *Service { +func NewService(c Config, d Diagnostic) *Service { s := &Service{ - logger: l, + diag: d, } s.configValue.Store(c) return s @@ -120,14 +125,14 @@ func (s *Service) preparePost(title, text string) (string, io.Reader, error) { } type handler struct { - s *Service - logger *log.Logger + s *Service + diag Diagnostic } -func (s *Service) Handler(l *log.Logger) alert.Handler { +func (s *Service) Handler(ctx ...keyvalue.T) alert.Handler { return &handler{ - s: s, - logger: l, + s: s, + diag: s.diag.WithContext(ctx...), } } @@ -136,6 +141,6 @@ func (h *handler) Handle(event alert.Event) { event.State.ID, event.State.Message, ); err != nil { - h.logger.Println("E! failed to send event to Talk", err) + h.diag.Error("failed to send event to Talk", err) } } diff --git a/services/task_store/service.go b/services/task_store/service.go index 1c137531b..8e4dce967 100644 --- a/services/task_store/service.go +++ b/services/task_store/service.go @@ -5,7 +5,6 @@ import ( "encoding/gob" "encoding/json" "fmt" - "log" "net/http" "path" "path/filepath" @@ -16,6 +15,7 @@ import ( "github.com/boltdb/bolt" "github.com/influxdata/kapacitor" "github.com/influxdata/kapacitor/client/v1" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/server/vars" "github.com/influxdata/kapacitor/services/httpd" "github.com/influxdata/kapacitor/services/storage" @@ -33,6 +33,20 @@ const ( templatesPathAnchored = "/templates/" ) +type Diagnostic interface { + StartingTask(taskID string) + StartedTask(taskID string) + + FinishedTask(taskID string) + + Error(msg string, err error, ctx ...keyvalue.T) + + Debug(msg string) + + AlreadyMigrated(entity, id string) + Migrated(entity, id string) +} + type Service struct { oldDBDir string tasks TaskDAO @@ -55,7 +69,7 @@ type Service struct { Delete(*kapacitor.TaskMaster) } - logger *log.Logger + diag Diagnostic } type taskStore struct { @@ -64,10 +78,10 @@ type taskStore struct { TICKScript string } -func NewService(conf Config, l *log.Logger) *Service { +func NewService(conf Config, d Diagnostic) *Service { return &Service{ snapshotInterval: time.Duration(conf.SnapshotInterval), - logger: l, + diag: d, oldDBDir: conf.Dir, } } @@ -183,12 +197,12 @@ func (ts *Service) Open() error { numTasks++ if task.Status == Enabled { numEnabledTasks++ - ts.logger.Println("D! starting enabled task on startup", task.ID) + ts.diag.StartingTask(task.ID) err = ts.startTask(task) if err != nil { - ts.logger.Printf("E! error starting enabled task %s, err: %s\n", task.ID, err) + ts.diag.Error("failed to start enabled task", err, keyvalue.KV("task", task.ID)) } else { - ts.logger.Println("D! started task during startup", task.ID) + ts.diag.StartedTask(task.ID) } } } @@ -224,7 +238,7 @@ func (ts *Service) migrate() error { // Connect to old boltdb db, err := bolt.Open(filepath.Join(ts.oldDBDir, "task.db"), 0600, &bolt.Options{ReadOnly: true}) if err != nil { - ts.logger.Println("D! could not open old boltd for task_store. Not performing migration. Remove the `task_store.dir` configuration to disable migration.") + ts.diag.Debug("could not open old boltd for task_store. Not performing migration. Remove the `task_store.dir` configuration to disable migration.") return nil } @@ -256,7 +270,7 @@ func (ts *Service) migrate() error { task := &rawTask{} err = dec.Decode(task) if err != nil { - ts.logger.Println("E! corrupt data in old task_store boltdb tasks:", err) + ts.diag.Error("corrupt data in old task_store boltdb tasks", err) return nil } @@ -305,10 +319,10 @@ func (ts *Service) migrate() error { // Failed to migrate task stop process return err } else { - ts.logger.Printf("D! task %s has already been migrated skipping", task.Name) + ts.diag.AlreadyMigrated("task", task.Name) } } else { - ts.logger.Printf("D! task %s was migrated to new storage service", task.Name) + ts.diag.Migrated("task", task.Name) } return nil }) @@ -329,7 +343,7 @@ func (ts *Service) migrate() error { snapshot := &kapacitor.TaskSnapshot{} err = dec.Decode(snapshot) if err != nil { - ts.logger.Println("E! corrupt data in old task_store boltdb snapshots:", err) + ts.diag.Error("corrupt data in old task_store boltdb snapshots", err) return nil } @@ -344,9 +358,9 @@ func (ts *Service) migrate() error { // Failed to migrate snapshot stop process. return err } - ts.logger.Printf("D! snapshot %s was migrated to new storage service", id) + ts.diag.Migrated("snapshot", id) } else { - ts.logger.Printf("D! snapshot %s skipped, already migrated to new storage service", id) + ts.diag.AlreadyMigrated("snapshot", id) } } else if err != nil { return err @@ -383,7 +397,7 @@ func (ts *Service) SaveSnapshot(id string, snapshot *kapacitor.TaskSnapshot) err func (ts *Service) HasSnapshot(id string) bool { exists, err := ts.snapshots.Exists(id) if err != nil { - ts.logger.Println("E! error checking for snapshot", err) + ts.diag.Error("error checking for snapshot", err) return false } return exists @@ -614,7 +628,7 @@ func (ts *Service) handleListTasks(w http.ResponseWriter, r *http.Request) { if executing { s, err := tm.ExecutionStats(task.ID) if err != nil { - ts.logger.Printf("E! failed to retrieve stats for task %s: %v", task.ID, err) + ts.diag.Error("failed to retriete stats for task", err, keyvalue.KV("task", task.ID)) } else { value = client.ExecutionStats{ TaskStats: s.TaskStats, @@ -640,7 +654,7 @@ func (ts *Service) handleListTasks(w http.ResponseWriter, r *http.Request) { case "vars": vars, err := ts.convertToClientVars(task.Vars) if err != nil { - ts.logger.Printf("E! failed to get vars for task %s: %s", task.ID, err) + ts.diag.Error("failed to get vars for task", err, keyvalue.KV("task", task.ID)) break } value = vars @@ -920,7 +934,12 @@ func (ts *Service) handleUpdateTask(w http.ResponseWriter, r *http.Request) { return } if err := ts.tasks.Delete(original.ID); err != nil { - ts.logger.Printf("E! failed to delete old task definition during ID change: old ID: %s new ID: %s, %s", original.ID, updated.ID, err.Error()) + ts.diag.Error( + "failed to delete old task definition during ID change", + err, + keyvalue.KV("oldID", original.ID), + keyvalue.KV("newID", updated.ID), + ) } if original.Status == Enabled && updated.Status == Enabled { // Stop task and start it under new name @@ -984,7 +1003,7 @@ func (ts *Service) convertTask(t Task, scriptFormat, dotView string, tm *kapacit dot = tm.ExecutingDot(t.ID, dotView == "labels") s, err := tm.ExecutionStats(t.ID) if err != nil { - ts.logger.Printf("E! failed to retrieve stats for task %s: %v", t.ID, err) + ts.diag.Error("failed to retrieve stats for task", err, keyvalue.KV("task", t.ID)) } else { stats.TaskStats = s.TaskStats stats.NodeStats = s.NodeStats @@ -1326,7 +1345,8 @@ func (ts *Service) deleteTask(id string) error { } if task.TemplateID != "" { if err := ts.templates.DisassociateTask(task.TemplateID, task.ID); err != nil { - ts.logger.Printf("E! failed to disassociate task %s from template %s", task.TemplateID, task.ID) + ts.diag.Error("failed to disassociate task from template", err, + keyvalue.KV("template", task.TemplateID), keyvalue.KV("task", task.ID)) } } vars.NumTasksVar.Add(-1) @@ -1525,7 +1545,7 @@ func (ts *Service) handleListTemplates(w http.ResponseWriter, r *http.Request) { case "vars": vars, err := ts.convertToClientVarsFromTick(task.Vars()) if err != nil { - ts.logger.Printf("E! failed to get vars for template %s: %s", template.ID, err) + ts.diag.Error("failed to get vars for template", err, keyvalue.KV("template", template.ID)) break } value = vars @@ -1689,7 +1709,8 @@ func (ts *Service) handleUpdateTemplate(w http.ResponseWriter, r *http.Request) return } if err := ts.templates.Delete(original.ID); err != nil { - ts.logger.Printf("E! failed to delete old template during ID change, old ID: %s new ID: %s, %s", original.ID, updated.ID, err.Error()) + ts.diag.Error("failed to delete old template during ID change", err, + keyvalue.KV("oldID", original.ID), keyvalue.KV("newID", updated.ID)) } } else { if err := ts.templates.Replace(updated); err != nil { @@ -1732,7 +1753,7 @@ func (ts *Service) updateAllAssociatedTasks(old, new Template, taskIds []string) task, err := ts.tasks.Get(taskId) if err != nil { if err != ErrNoTaskExists { - ts.logger.Printf("E! error rolling back associated task %s: %s", taskId, err) + ts.diag.Error("error rolling back associated task", err, keyvalue.KV("task", taskId)) } continue } @@ -1740,13 +1761,13 @@ func (ts *Service) updateAllAssociatedTasks(old, new Template, taskIds []string) task.TICKscript = old.TICKscript task.Type = old.Type if err := ts.tasks.Replace(task); err != nil { - ts.logger.Printf("E! error rolling back associated task %s: %s", taskId, err) + ts.diag.Error("error rolling back associated task", err, keyvalue.KV("task", taskId)) } if task.Status == Enabled { ts.stopTask(taskId) err := ts.startTask(task) if err != nil { - ts.logger.Printf("E! error rolling back associated task %s: %s", taskId, err) + ts.diag.Error("error rolling back associated task", err, keyvalue.KV("task", taskId)) } } } @@ -1873,17 +1894,17 @@ func (ts *Service) startTask(task Task) error { go func() { // Wait for task to finish err := et.Wait() - ts.logger.Printf("D! task %s finished", et.Task.ID) + ts.diag.FinishedTask(et.Task.ID) if err != nil { // Stop task tm.StopTask(t.ID) - ts.logger.Printf("E! task %s finished with error: %s", et.Task.ID, err) + ts.diag.Error("task finished with error", err, keyvalue.KV("task", et.Task.ID)) // Save last error from task. err = ts.saveLastError(t.ID, err.Error()) if err != nil { - ts.logger.Println("E! failed to save last error for task", et.Task.ID) + ts.diag.Error("failed to save last error for task", err, keyvalue.KV("task", et.Task.ID)) } } }() diff --git a/services/telegram/service.go b/services/telegram/service.go index 177399366..d0ef7cbda 100644 --- a/services/telegram/service.go +++ b/services/telegram/service.go @@ -6,25 +6,31 @@ import ( "fmt" "io" "io/ioutil" - "log" "net/http" "net/url" "path" "sync/atomic" + "strings" + "github.com/influxdata/kapacitor/alert" + "github.com/influxdata/kapacitor/keyvalue" "github.com/pkg/errors" - "strings" ) +type Diagnostic interface { + WithContext(ctx ...keyvalue.T) Diagnostic + Error(msg string, err error) +} + type Service struct { configValue atomic.Value - logger *log.Logger + diag Diagnostic } -func NewService(c Config, l *log.Logger) *Service { +func NewService(c Config, d Diagnostic) *Service { s := &Service{ - logger: l, + diag: d, } s.configValue.Store(c) return s @@ -198,16 +204,16 @@ type HandlerConfig struct { } type handler struct { - s *Service - c HandlerConfig - logger *log.Logger + s *Service + c HandlerConfig + diag Diagnostic } -func (s *Service) Handler(c HandlerConfig, l *log.Logger) alert.Handler { +func (s *Service) Handler(c HandlerConfig, ctx ...keyvalue.T) alert.Handler { return &handler{ - s: s, - c: c, - logger: l, + s: s, + c: c, + diag: s.diag.WithContext(ctx...), } } @@ -219,6 +225,6 @@ func (h *handler) Handle(event alert.Event) { h.c.DisableWebPagePreview, h.c.DisableNotification, ); err != nil { - h.logger.Println("E! failed to send event to Telegram", err) + h.diag.Error("failed to send event to Telegram", err) } } diff --git a/services/triton/service.go b/services/triton/service.go index df0ce8cb1..48ad3efdb 100644 --- a/services/triton/service.go +++ b/services/triton/service.go @@ -3,7 +3,6 @@ package triton import ( "context" "fmt" - "log" "sync" "time" @@ -12,6 +11,8 @@ import ( ptriton "github.com/prometheus/prometheus/discovery/triton" ) +type Diagnostic scraper.Diagnostic + // Service is the triton discovery service type Service struct { Configs []Config @@ -19,16 +20,16 @@ type Service struct { registry scraper.Registry - logger *log.Logger - open bool + diag Diagnostic + open bool } // NewService creates a new unopened service -func NewService(c []Config, r scraper.Registry, l *log.Logger) *Service { +func NewService(c []Config, r scraper.Registry, d Diagnostic) *Service { return &Service{ Configs: c, registry: r, - logger: l, + diag: d, } } @@ -126,7 +127,7 @@ func (s *Service) Test(options interface{}) error { } sd := s.Configs[found].PromConfig() - discoverer, err := ptriton.New(scraper.NewLogger(s.logger), sd) + discoverer, err := ptriton.New(s.diag, sd) if err != nil { return err } diff --git a/services/udf/service.go b/services/udf/service.go index 777e5f2fb..62cf83fe5 100644 --- a/services/udf/service.go +++ b/services/udf/service.go @@ -2,7 +2,6 @@ package udf import ( "fmt" - "log" "os" "sync" "time" @@ -12,18 +11,24 @@ import ( "github.com/influxdata/kapacitor/udf" ) +type Diagnostic interface { + LoadedUDFInfo(udf string) + + WithUDFContext() udf.Diagnostic +} + type Service struct { configs map[string]FunctionConfig infos map[string]udf.Info - logger *log.Logger + diag Diagnostic mu sync.RWMutex } -func NewService(c Config, l *log.Logger) *Service { +func NewService(c Config, d Diagnostic) *Service { return &Service{ configs: c.Functions, infos: make(map[string]udf.Info), - logger: l, + diag: d, } } @@ -60,7 +65,7 @@ func (s *Service) Info(name string) (udf.Info, bool) { func (s *Service) Create( name, taskID, nodeID string, - l *log.Logger, + d udf.Diagnostic, abortCallback func(), ) (udf.Interface, error) { conf, ok := s.configs[name] @@ -72,7 +77,7 @@ func (s *Service) Create( return kapacitor.NewUDFSocket( taskID, nodeID, kapacitor.NewSocketConn(conf.Socket), - l, + d, time.Duration(conf.Timeout), abortCallback, ), nil @@ -91,7 +96,7 @@ func (s *Service) Create( taskID, nodeID, command.ExecCommander, cmdSpec, - l, + d, time.Duration(conf.Timeout), abortCallback, ), nil @@ -106,7 +111,7 @@ func (s *Service) Refresh(name string) error { return fmt.Errorf("failed to load process info for %q: %v", name, err) } s.infos[name] = info - s.logger.Printf("D! loaded UDF info %q", name) + s.diag.LoadedUDFInfo(name) return nil } @@ -114,7 +119,7 @@ func (s *Service) loadUDFInfo(name string) (udf.Info, error) { // loadUDFInfo creates a UDF connection outside the context of a task or node // because it only makes the Info request and never makes an Init request. // As such it does not need to provide actual task and node IDs. - u, err := s.Create(name, "", "", s.logger, nil) + u, err := s.Create(name, "", "", s.diag.WithUDFContext(), nil) if err != nil { return udf.Info{}, err } diff --git a/services/udp/service.go b/services/udp/service.go index d9edfaa4d..d4cebe839 100644 --- a/services/udp/service.go +++ b/services/udp/service.go @@ -2,13 +2,14 @@ package udp import ( "errors" - "log" + "fmt" "net" "strings" "sync" "github.com/influxdata/influxdb/models" "github.com/influxdata/kapacitor/expvar" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/server/vars" ) @@ -26,6 +27,12 @@ const ( statTransmitFail = "tx_fail" ) +type Diagnostic interface { + Error(msg string, err error, ctx ...keyvalue.T) + StartedListening(addr string) + ClosedService() +} + // // Service represents here an UDP service // that will listen for incoming packets @@ -44,17 +51,17 @@ type Service struct { WritePoints(database, retentionPolicy string, consistencyLevel models.ConsistencyLevel, points []models.Point) error } - Logger *log.Logger + Diag Diagnostic statMap *expvar.Map statKey string } -func NewService(c Config, l *log.Logger) *Service { +func NewService(c Config, diag Diagnostic) *Service { d := *c.WithDefaults() return &Service{ config: d, done: make(chan struct{}), - Logger: l, + Diag: diag, } } @@ -69,13 +76,13 @@ func (s *Service) Open() (err error) { s.addr, err = net.ResolveUDPAddr("udp", s.config.BindAddress) if err != nil { - s.Logger.Printf("E! Failed to resolve UDP address %s: %s", s.config.BindAddress, err) + s.Diag.Error("failed to resolve UDP address", err, keyvalue.KV("bind_address", s.config.BindAddress)) return err } s.conn, err = net.ListenUDP("udp", s.addr) if err != nil { - s.Logger.Printf("E! Failed to set up UDP listener at address %s: %s", s.addr, err) + s.Diag.Error("failed to set up UDP listener at address", err, keyvalue.KV("address", s.addr.String())) return err } @@ -90,12 +97,12 @@ func (s *Service) Open() (err error) { if s.config.ReadBuffer != 0 { err = s.conn.SetReadBuffer(s.config.ReadBuffer) if err != nil { - s.Logger.Printf("E! Failed to set UDP read buffer to %d: %s", s.config.ReadBuffer, err) + s.Diag.Error("failed to set UDP read buffer", err, keyvalue.KV("read_buffer", fmt.Sprintf("%v", s.config.ReadBuffer))) return err } } - s.Logger.Printf("I! Started listening on UDP: %s", s.addr.String()) + s.Diag.StartedListening(s.addr.String()) // Start reading and processing packets s.packets = make(chan []byte, s.config.Buffer) @@ -126,7 +133,7 @@ func (s *Service) serve() { if err != nil { if !strings.Contains(err.Error(), "use of closed network connection") { s.statMap.Add(statReadFail, 1) - s.Logger.Printf("E! Failed to read UDP message: %s", err) + s.Diag.Error("failed to read UDP message", err) } continue } @@ -144,7 +151,7 @@ func (s *Service) processPackets() { points, err := models.ParsePoints(p) if err != nil { s.statMap.Add(statPointsParseFail, 1) - s.Logger.Printf("E! Failed to parse points: %s", err) + s.Diag.Error("failed to parse points", err) continue } @@ -156,7 +163,7 @@ func (s *Service) processPackets() { ); err == nil { s.statMap.Add(statPointsTransmitted, int64(len(points))) } else { - s.Logger.Printf("E! failed to write points to database %q: %s", s.config.Database, err) + s.Diag.Error("failed to write points to database", err, keyvalue.KV("database", s.config.Database)) s.statMap.Add(statTransmitFail, 1) } @@ -179,7 +186,7 @@ func (s *Service) Close() error { s.conn = nil s.packets = nil - s.Logger.Print("I! Service closed") + s.Diag.ClosedService() return nil } diff --git a/services/victorops/service.go b/services/victorops/service.go index 5583fad30..a9cc8da8c 100644 --- a/services/victorops/service.go +++ b/services/victorops/service.go @@ -6,7 +6,6 @@ import ( "fmt" "io" "io/ioutil" - "log" "net/http" "net/url" "path" @@ -14,18 +13,25 @@ import ( "time" "github.com/influxdata/kapacitor/alert" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/models" "github.com/pkg/errors" ) +type Diagnostic interface { + WithContext(ctx ...keyvalue.T) Diagnostic + + Error(msg string, err error) +} + type Service struct { configValue atomic.Value - logger *log.Logger + diag Diagnostic } -func NewService(c Config, l *log.Logger) *Service { +func NewService(c Config, d Diagnostic) *Service { s := &Service{ - logger: l, + diag: d, } s.configValue.Store(c) return s @@ -168,16 +174,16 @@ type HandlerConfig struct { } type handler struct { - s *Service - c HandlerConfig - logger *log.Logger + s *Service + c HandlerConfig + diag Diagnostic } -func (s *Service) Handler(c HandlerConfig, l *log.Logger) alert.Handler { +func (s *Service) Handler(c HandlerConfig, ctx ...keyvalue.T) alert.Handler { return &handler{ - s: s, - c: c, - logger: l, + s: s, + c: c, + diag: s.diag.WithContext(ctx...), } } @@ -197,6 +203,6 @@ func (h *handler) Handle(event alert.Event) { event.State.Time, event.Data.Result, ); err != nil { - h.logger.Println("E! failed to send event to VictorOps", err) + h.diag.Error("failed to send event", err) } } diff --git a/shift.go b/shift.go index b003c08c6..6902df715 100644 --- a/shift.go +++ b/shift.go @@ -2,7 +2,6 @@ package kapacitor import ( "errors" - "log" "time" "github.com/influxdata/kapacitor/edge" @@ -17,9 +16,9 @@ type ShiftNode struct { } // Create a new ShiftNode which shifts points and batches in time. -func newShiftNode(et *ExecutingTask, n *pipeline.ShiftNode, l *log.Logger) (*ShiftNode, error) { +func newShiftNode(et *ExecutingTask, n *pipeline.ShiftNode, d NodeDiagnostic) (*ShiftNode, error) { sn := &ShiftNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, s: n, shift: n.Shift, } diff --git a/state_tracking.go b/state_tracking.go index cff6bb228..5f0cacfa6 100644 --- a/state_tracking.go +++ b/state_tracking.go @@ -2,7 +2,6 @@ package kapacitor import ( "fmt" - "log" "time" "github.com/influxdata/kapacitor/edge" @@ -69,8 +68,7 @@ func (g *stateTrackingGroup) BatchPoint(bp edge.BatchPointMessage) (edge.Message bp = bp.ShallowCopy() err := g.track(bp) if err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E! error while evaluating expression:", err) + g.n.diag.Error("error while evaluating expression", err) return nil, nil } return bp, nil @@ -84,8 +82,7 @@ func (g *stateTrackingGroup) Point(p edge.PointMessage) (edge.Message, error) { p = p.ShallowCopy() err := g.track(p) if err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E! error while evaluating expression:", err) + g.n.diag.Error("error while evaluating expression", err) return nil, nil } return p, nil @@ -132,7 +129,7 @@ func (sdt *stateDurationTracker) track(t time.Time, inState bool) interface{} { return float64(t.Sub(sdt.startTime)) / float64(sdt.sd.Unit) } -func newStateDurationNode(et *ExecutingTask, sd *pipeline.StateDurationNode, l *log.Logger) (*StateTrackingNode, error) { +func newStateDurationNode(et *ExecutingTask, sd *pipeline.StateDurationNode, d NodeDiagnostic) (*StateTrackingNode, error) { if sd.Lambda == nil { return nil, fmt.Errorf("nil expression passed to StateDurationNode") } @@ -142,7 +139,7 @@ func newStateDurationNode(et *ExecutingTask, sd *pipeline.StateDurationNode, l * return nil, err } n := &StateTrackingNode{ - node: node{Node: sd, et: et, logger: l}, + node: node{Node: sd, et: et, diag: d}, as: sd.As, newTracker: func() stateTracker { return &stateDurationTracker{sd: sd} }, expr: expr, @@ -170,7 +167,7 @@ func (sct *stateCountTracker) track(t time.Time, inState bool) interface{} { return sct.count } -func newStateCountNode(et *ExecutingTask, sc *pipeline.StateCountNode, l *log.Logger) (*StateTrackingNode, error) { +func newStateCountNode(et *ExecutingTask, sc *pipeline.StateCountNode, d NodeDiagnostic) (*StateTrackingNode, error) { if sc.Lambda == nil { return nil, fmt.Errorf("nil expression passed to StateCountNode") } @@ -180,7 +177,7 @@ func newStateCountNode(et *ExecutingTask, sc *pipeline.StateCountNode, l *log.Lo return nil, err } n := &StateTrackingNode{ - node: node{Node: sc, et: et, logger: l}, + node: node{Node: sc, et: et, diag: d}, as: sc.As, newTracker: func() stateTracker { return &stateCountTracker{} }, expr: expr, diff --git a/stats.go b/stats.go index ade34e2d0..87056a4b0 100644 --- a/stats.go +++ b/stats.go @@ -2,7 +2,6 @@ package kapacitor import ( "fmt" - "log" "sync" "time" @@ -20,14 +19,14 @@ type StatsNode struct { } // Create a new FromNode which filters data from a source. -func newStatsNode(et *ExecutingTask, n *pipeline.StatsNode, l *log.Logger) (*StatsNode, error) { +func newStatsNode(et *ExecutingTask, n *pipeline.StatsNode, d NodeDiagnostic) (*StatsNode, error) { // Lookup the executing node for stats. en := et.lookup[n.SourceNode.ID()] if en == nil { return nil, fmt.Errorf("no node found for %s", n.SourceNode.Name()) } sn := &StatsNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, s: n, en: en, closing: make(chan struct{}), diff --git a/stream.go b/stream.go index 58196a46b..9dbdeb318 100644 --- a/stream.go +++ b/stream.go @@ -3,7 +3,6 @@ package kapacitor import ( "errors" "fmt" - "log" "github.com/influxdata/kapacitor/edge" "github.com/influxdata/kapacitor/models" @@ -18,9 +17,9 @@ type StreamNode struct { } // Create a new StreamNode which copies all data to children -func newStreamNode(et *ExecutingTask, n *pipeline.StreamNode, l *log.Logger) (*StreamNode, error) { +func newStreamNode(et *ExecutingTask, n *pipeline.StreamNode, d NodeDiagnostic) (*StreamNode, error) { sn := &StreamNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, s: n, } sn.node.runF = sn.runSourceStream @@ -52,9 +51,9 @@ type FromNode struct { } // Create a new FromNode which filters data from a source. -func newFromNode(et *ExecutingTask, n *pipeline.FromNode, l *log.Logger) (*FromNode, error) { +func newFromNode(et *ExecutingTask, n *pipeline.FromNode, d NodeDiagnostic) (*FromNode, error) { sn := &FromNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, s: n, db: n.Database, rp: n.RetentionPolicy, @@ -133,8 +132,7 @@ func (n *FromNode) matches(p edge.PointMessage) bool { } if n.expression != nil { if pass, err := EvalPredicate(n.expression, n.scopePool, p); err != nil { - n.incrementErrorCount() - n.logger.Println("E! error while evaluating WHERE expression:", err) + n.diag.Error("failed to evaluate WHERE expression", err) return false } else { return pass diff --git a/task.go b/task.go index aeb1ed045..426c9909b 100644 --- a/task.go +++ b/task.go @@ -4,15 +4,21 @@ import ( "bytes" "errors" "fmt" - "log" "math/rand" "sync" "time" "github.com/influxdata/kapacitor/edge" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/pipeline" ) +type TaskDiagnostic interface { + WithNodeContext(node string) NodeDiagnostic + + Error(msg string, err error, ctx ...keyvalue.T) +} + // The type of a task type TaskType int @@ -107,7 +113,7 @@ type ExecutingTask struct { nodes []Node stopping chan struct{} wg sync.WaitGroup - logger *log.Logger + diag TaskDiagnostic // Mutex for throughput var tmu sync.RWMutex @@ -116,13 +122,13 @@ type ExecutingTask struct { // Create a new task from a defined kapacitor. func NewExecutingTask(tm *TaskMaster, t *Task) (*ExecutingTask, error) { - l := tm.LogService.NewLogger(fmt.Sprintf("[task:%s] ", t.ID), log.LstdFlags) + d := tm.diag.WithTaskContext(t.ID) et := &ExecutingTask{ tm: tm, Task: t, outputs: make(map[string]Output), lookup: make(map[pipeline.ID]Node), - logger: l, + diag: d, } err := et.link() if err != nil { @@ -158,11 +164,8 @@ func (et *ExecutingTask) link() error { // Walk Pipeline and create equivalent executing nodes err := et.Task.Pipeline.Walk(func(n pipeline.Node) error { - l := et.tm.LogService.NewLogger( - fmt.Sprintf("[%s:%s] ", et.Task.ID, n.Name()), - log.LstdFlags, - ) - en, err := et.createNode(n, l) + d := et.diag.WithNodeContext(n.Name()) + en, err := et.createNode(n, d) if err != nil { return err } @@ -441,70 +444,70 @@ func (et *ExecutingTask) calcThroughput() { } // Create a node from a given pipeline node. -func (et *ExecutingTask) createNode(p pipeline.Node, l *log.Logger) (n Node, err error) { +func (et *ExecutingTask) createNode(p pipeline.Node, d NodeDiagnostic) (n Node, err error) { switch t := p.(type) { case *pipeline.FromNode: - n, err = newFromNode(et, t, l) + n, err = newFromNode(et, t, d) case *pipeline.StreamNode: - n, err = newStreamNode(et, t, l) + n, err = newStreamNode(et, t, d) case *pipeline.BatchNode: - n, err = newBatchNode(et, t, l) + n, err = newBatchNode(et, t, d) case *pipeline.QueryNode: - n, err = newQueryNode(et, t, l) + n, err = newQueryNode(et, t, d) case *pipeline.WindowNode: - n, err = newWindowNode(et, t, l) + n, err = newWindowNode(et, t, d) case *pipeline.HTTPOutNode: - n, err = newHTTPOutNode(et, t, l) + n, err = newHTTPOutNode(et, t, d) case *pipeline.HTTPPostNode: - n, err = newHTTPPostNode(et, t, l) + n, err = newHTTPPostNode(et, t, d) case *pipeline.InfluxDBOutNode: - n, err = newInfluxDBOutNode(et, t, l) + n, err = newInfluxDBOutNode(et, t, d) case *pipeline.KapacitorLoopbackNode: - n, err = newKapacitorLoopbackNode(et, t, l) + n, err = newKapacitorLoopbackNode(et, t, d) case *pipeline.AlertNode: - n, err = newAlertNode(et, t, l) + n, err = newAlertNode(et, t, d) case *pipeline.GroupByNode: - n, err = newGroupByNode(et, t, l) + n, err = newGroupByNode(et, t, d) case *pipeline.UnionNode: - n, err = newUnionNode(et, t, l) + n, err = newUnionNode(et, t, d) case *pipeline.JoinNode: - n, err = newJoinNode(et, t, l) + n, err = newJoinNode(et, t, d) case *pipeline.FlattenNode: - n, err = newFlattenNode(et, t, l) + n, err = newFlattenNode(et, t, d) case *pipeline.EvalNode: - n, err = newEvalNode(et, t, l) + n, err = newEvalNode(et, t, d) case *pipeline.WhereNode: - n, err = newWhereNode(et, t, l) + n, err = newWhereNode(et, t, d) case *pipeline.SampleNode: - n, err = newSampleNode(et, t, l) + n, err = newSampleNode(et, t, d) case *pipeline.DerivativeNode: - n, err = newDerivativeNode(et, t, l) + n, err = newDerivativeNode(et, t, d) case *pipeline.UDFNode: - n, err = newUDFNode(et, t, l) + n, err = newUDFNode(et, t, d) case *pipeline.StatsNode: - n, err = newStatsNode(et, t, l) + n, err = newStatsNode(et, t, d) case *pipeline.ShiftNode: - n, err = newShiftNode(et, t, l) + n, err = newShiftNode(et, t, d) case *pipeline.NoOpNode: - n, err = newNoOpNode(et, t, l) + n, err = newNoOpNode(et, t, d) case *pipeline.InfluxQLNode: - n, err = newInfluxQLNode(et, t, l) + n, err = newInfluxQLNode(et, t, d) case *pipeline.LogNode: - n, err = newLogNode(et, t, l) + n, err = newLogNode(et, t, d) case *pipeline.DefaultNode: - n, err = newDefaultNode(et, t, l) + n, err = newDefaultNode(et, t, d) case *pipeline.DeleteNode: - n, err = newDeleteNode(et, t, l) + n, err = newDeleteNode(et, t, d) case *pipeline.CombineNode: - n, err = newCombineNode(et, t, l) + n, err = newCombineNode(et, t, d) case *pipeline.K8sAutoscaleNode: - n, err = newK8sAutoscaleNode(et, t, l) + n, err = newK8sAutoscaleNode(et, t, d) case *pipeline.SwarmAutoscaleNode: - n, err = newSwarmAutoscaleNode(et, t, l) + n, err = newSwarmAutoscaleNode(et, t, d) case *pipeline.StateDurationNode: - n, err = newStateDurationNode(et, t, l) + n, err = newStateDurationNode(et, t, d) case *pipeline.StateCountNode: - n, err = newStateCountNode(et, t, l) + n, err = newStateCountNode(et, t, d) default: return nil, fmt.Errorf("unknown pipeline node type %T", p) } @@ -551,7 +554,7 @@ func (et *ExecutingTask) runSnapshotter() { case <-ticker.C: snapshot, err := et.Snapshot() if err != nil { - et.logger.Println("E! failed to snapshot task", et.Task.ID, err) + et.diag.Error("failed to snapshot task", err) break } size := 0 @@ -562,7 +565,7 @@ func (et *ExecutingTask) runSnapshotter() { if size > 0 { err = et.tm.TaskStore.SaveSnapshot(et.Task.ID, snapshot) if err != nil { - et.logger.Println("E! failed to save task snapshot", et.Task.ID, err) + et.diag.Error("failed to save task snapshot", err) } } case <-et.stopping: diff --git a/task_master.go b/task_master.go index d1962568f..eff1b668f 100644 --- a/task_master.go +++ b/task_master.go @@ -13,6 +13,7 @@ import ( "github.com/influxdata/kapacitor/edge" "github.com/influxdata/kapacitor/expvar" "github.com/influxdata/kapacitor/influxdb" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/models" "github.com/influxdata/kapacitor/pipeline" "github.com/influxdata/kapacitor/server/vars" @@ -48,10 +49,28 @@ type LogService interface { NewLogger(prefix string, flag int) *log.Logger } +type Diagnostic interface { + WithTaskContext(task string) TaskDiagnostic + WithTaskMasterContext(tm string) Diagnostic + WithNodeContext(node string) NodeDiagnostic + WithEdgeContext(task, parent, child string) EdgeDiagnostic + + TaskMasterOpened() + TaskMasterClosed() + + StartingTask(id string) + StartedTask(id string) + + StoppedTask(id string) + StoppedTaskWithError(id string, err error) + + TaskMasterDot(d string) +} + type UDFService interface { List() []string Info(name string) (udf.Info, bool) - Create(name, taskID, nodeID string, l *log.Logger, abortCallback func()) (udf.Interface, error) + Create(name, taskID, nodeID string, d udf.Diagnostic, abortCallback func()) (udf.Interface, error) } var ErrTaskMasterClosed = errors.New("TaskMaster is closed") @@ -91,58 +110,58 @@ type TaskMaster struct { SMTPService interface { Global() bool StateChangesOnly() bool - Handler(smtp.HandlerConfig, *log.Logger) alert.Handler + Handler(smtp.HandlerConfig, ...keyvalue.T) alert.Handler } MQTTService interface { - Handler(mqtt.HandlerConfig, *log.Logger) alert.Handler + Handler(mqtt.HandlerConfig, ...keyvalue.T) alert.Handler } OpsGenieService interface { Global() bool - Handler(opsgenie.HandlerConfig, *log.Logger) alert.Handler + Handler(opsgenie.HandlerConfig, ...keyvalue.T) alert.Handler } VictorOpsService interface { Global() bool - Handler(victorops.HandlerConfig, *log.Logger) alert.Handler + Handler(victorops.HandlerConfig, ...keyvalue.T) alert.Handler } PagerDutyService interface { Global() bool - Handler(pagerduty.HandlerConfig, *log.Logger) alert.Handler + Handler(pagerduty.HandlerConfig, ...keyvalue.T) alert.Handler } PushoverService interface { - Handler(pushover.HandlerConfig, *log.Logger) alert.Handler + Handler(pushover.HandlerConfig, ...keyvalue.T) alert.Handler } HTTPPostService interface { - Handler(httppost.HandlerConfig, *log.Logger) alert.Handler + Handler(httppost.HandlerConfig, ...keyvalue.T) alert.Handler Endpoint(string) (*httppost.Endpoint, bool) } SlackService interface { Global() bool StateChangesOnly() bool - Handler(slack.HandlerConfig, *log.Logger) alert.Handler + Handler(slack.HandlerConfig, ...keyvalue.T) alert.Handler } SNMPTrapService interface { - Handler(snmptrap.HandlerConfig, *log.Logger) (alert.Handler, error) + Handler(snmptrap.HandlerConfig, ...keyvalue.T) (alert.Handler, error) } TelegramService interface { Global() bool StateChangesOnly() bool - Handler(telegram.HandlerConfig, *log.Logger) alert.Handler + Handler(telegram.HandlerConfig, ...keyvalue.T) alert.Handler } HipChatService interface { Global() bool StateChangesOnly() bool - Handler(hipchat.HandlerConfig, *log.Logger) alert.Handler + Handler(hipchat.HandlerConfig, ...keyvalue.T) alert.Handler } AlertaService interface { DefaultHandlerConfig() alerta.HandlerConfig - Handler(alerta.HandlerConfig, *log.Logger) (alert.Handler, error) + Handler(alerta.HandlerConfig, ...keyvalue.T) (alert.Handler, error) } SensuService interface { - Handler(sensu.HandlerConfig, *log.Logger) (alert.Handler, error) + Handler(sensu.HandlerConfig, ...keyvalue.T) (alert.Handler, error) } TalkService interface { - Handler(*log.Logger) alert.Handler + Handler(...keyvalue.T) alert.Handler } TimingService interface { NewTimer(timer.Setter) timer.Timer @@ -153,7 +172,6 @@ type TaskMaster struct { SwarmService interface { Client(string) (swarm.Client, error) } - LogService LogService Commander command.Commander @@ -186,7 +204,7 @@ type TaskMaster struct { // DeleteHooks for tasks deleteHooks map[string][]deleteHook - logger *log.Logger + diag Diagnostic closed bool drained bool @@ -201,7 +219,7 @@ type forkKey struct { } // Create a new Executor with a given clock. -func NewTaskMaster(id string, info vars.Infoer, l LogService) *TaskMaster { +func NewTaskMaster(id string, info vars.Infoer, d Diagnostic) *TaskMaster { return &TaskMaster{ id: id, forks: make(map[forkKey]map[string]edge.Edge), @@ -210,17 +228,17 @@ func NewTaskMaster(id string, info vars.Infoer, l LogService) *TaskMaster { batches: make(map[string][]BatchCollector), tasks: make(map[string]*ExecutingTask), deleteHooks: make(map[string][]deleteHook), - LogService: l, ServerInfo: info, - logger: l.NewLogger(fmt.Sprintf("[task_master:%s] ", id), log.LstdFlags), - closed: true, - TimingService: noOpTimingService{}, + diag: d.WithTaskMasterContext(id), + + closed: true, + TimingService: noOpTimingService{}, } } // Returns a new TaskMaster instance with the same services as the current one. func (tm *TaskMaster) New(id string) *TaskMaster { - n := NewTaskMaster(id, tm.ServerInfo, tm.LogService) + n := NewTaskMaster(id, tm.ServerInfo, tm.diag) n.DefaultRetentionPolicy = tm.DefaultRetentionPolicy n.HTTPDService = tm.HTTPDService n.TaskStore = tm.TaskStore @@ -264,7 +282,7 @@ func (tm *TaskMaster) Open() (err error) { tm.closed = true return } - tm.logger.Println("I! opened") + tm.diag.TaskMasterOpened() return } @@ -293,7 +311,7 @@ func (tm *TaskMaster) Close() error { for _, et := range tm.tasks { _ = tm.stopTask(et.Task.ID) } - tm.logger.Println("I! closed") + tm.diag.TaskMasterClosed() return nil } @@ -432,7 +450,7 @@ func (tm *TaskMaster) StartTask(t *Task) (*ExecutingTask, error) { if tm.closed { return nil, errors.New("task master is closed cannot start a task") } - tm.logger.Println("D! Starting task:", t.ID) + tm.diag.StartingTask(t.ID) et, err := NewExecutingTask(tm, t) if err != nil { return nil, err @@ -453,7 +471,8 @@ func (tm *TaskMaster) StartTask(t *Task) (*ExecutingTask, error) { } ins = make([]edge.StatsEdge, count) for i := 0; i < count; i++ { - in := newEdge(t.ID, "batch", fmt.Sprintf("batch%d", i), pipeline.BatchEdge, defaultEdgeBufferSize, tm.LogService) + d := tm.diag.WithEdgeContext(t.ID, "batch", fmt.Sprintf("batch%d", i)) + in := newEdge(t.ID, "batch", fmt.Sprintf("batch%d", i), pipeline.BatchEdge, defaultEdgeBufferSize, d) ins[i] = in tm.batches[t.ID] = append(tm.batches[t.ID], &batchCollector{edge: in}) } @@ -473,8 +492,8 @@ func (tm *TaskMaster) StartTask(t *Task) (*ExecutingTask, error) { } tm.tasks[et.Task.ID] = et - tm.logger.Println("I! Started task:", t.ID) - tm.logger.Println("D!", string(t.Dot())) + tm.diag.StartedTask(t.ID) + tm.diag.TaskMasterDot(string(t.Dot())) return et, nil } @@ -515,9 +534,9 @@ func (tm *TaskMaster) stopTask(id string) (err error) { err = et.stop() if err != nil { - tm.logger.Println("E! Stopped task:", id, err) + tm.diag.StoppedTaskWithError(id, err) } else { - tm.logger.Println("I! Stopped task:", id) + tm.diag.StoppedTask(id) } } return @@ -576,7 +595,8 @@ func (tm *TaskMaster) stream(name string) (StreamCollector, error) { if tm.closed { return nil, ErrTaskMasterClosed } - in := newEdge(fmt.Sprintf("task_master:%s", tm.id), name, "stream", pipeline.StreamEdge, defaultEdgeBufferSize, tm.LogService) + d := tm.diag.WithEdgeContext(fmt.Sprintf("task_master:%s", tm.id), name, "stream") + in := newEdge(fmt.Sprintf("task_master:%s", tm.id), name, "stream", pipeline.StreamEdge, defaultEdgeBufferSize, d) se := &streamEdge{edge: in} tm.wg.Add(1) go func() { @@ -755,7 +775,8 @@ func (tm *TaskMaster) newFork(taskName string, dbrps []DBRP, measurements []stri return nil, ErrTaskMasterClosed } - e := newEdge(taskName, "stream", "stream0", pipeline.StreamEdge, defaultEdgeBufferSize, tm.LogService) + d := tm.diag.WithEdgeContext(taskName, "stream", "stream0") + e := newEdge(taskName, "stream", "stream0", pipeline.StreamEdge, defaultEdgeBufferSize, d) for _, key := range forkKeys(dbrps, measurements) { tm.taskToForkKeys[taskName] = append(tm.taskToForkKeys[taskName], key) diff --git a/tick/eval.go b/tick/eval.go index b2f94b56c..d56cd25f7 100644 --- a/tick/eval.go +++ b/tick/eval.go @@ -4,12 +4,9 @@ import ( "errors" "fmt" goast "go/ast" - "log" - "os" "reflect" "runtime" "strings" - "sync" "time" "unicode" "unicode/utf8" @@ -18,20 +15,6 @@ import ( "github.com/influxdata/kapacitor/tick/stateful" ) -var mu sync.Mutex -var logger = log.New(os.Stderr, "[tick] ", log.LstdFlags) - -func getLogger() *log.Logger { - mu.Lock() - defer mu.Unlock() - return logger -} -func SetLogger(l *log.Logger) { - mu.Lock() - defer mu.Unlock() - logger = l -} - type unboundFunc func(obj interface{}) (interface{}, error) type Var struct { diff --git a/udf.go b/udf.go index 1cf1063e7..be51784f6 100644 --- a/udf.go +++ b/udf.go @@ -4,7 +4,6 @@ import ( "bufio" "fmt" "io" - "log" "net" "sync" "time" @@ -31,9 +30,9 @@ type UDFNode struct { } // Create a new UDFNode that sends incoming data to child udf -func newUDFNode(et *ExecutingTask, n *pipeline.UDFNode, l *log.Logger) (*UDFNode, error) { +func newUDFNode(et *ExecutingTask, n *pipeline.UDFNode, d NodeDiagnostic) (*UDFNode, error) { un := &UDFNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, u: n, aborted: make(chan struct{}), } @@ -42,7 +41,7 @@ func newUDFNode(et *ExecutingTask, n *pipeline.UDFNode, l *log.Logger) (*UDFNode n.UDFName, et.Task.ID, n.Name(), - l, + d, un.abortedCallback, ) if err != nil { @@ -161,7 +160,7 @@ type UDFProcess struct { mu sync.Mutex - logger *log.Logger + diag udf.Diagnostic timeout time.Duration abortCallback func() } @@ -170,7 +169,7 @@ func NewUDFProcess( taskName, nodeName string, commander command.Commander, cmdSpec command.Spec, - l *log.Logger, + d udf.Diagnostic, timeout time.Duration, abortCallback func(), ) *UDFProcess { @@ -178,8 +177,8 @@ func NewUDFProcess( taskName: taskName, nodeName: nodeName, commander: commander, + diag: d, cmdSpec: cmdSpec, - logger: l, timeout: timeout, abortCallback: abortCallback, } @@ -220,7 +219,7 @@ func (p *UDFProcess) Open() error { p.nodeName, outBuf, stdin, - p.logger, + p.diag, p.timeout, p.abortCallback, cmd.Kill, @@ -266,7 +265,7 @@ func (p *UDFProcess) logStdErr() { defer p.logStdErrGroup.Done() scanner := bufio.NewScanner(p.stderr) for scanner.Scan() { - p.logger.Println("I!P", scanner.Text()) + p.diag.UDFLog(scanner.Text()) } } @@ -285,7 +284,7 @@ type UDFSocket struct { server *udf.Server socket Socket - logger *log.Logger + diag udf.Diagnostic timeout time.Duration abortCallback func() } @@ -300,7 +299,7 @@ type Socket interface { func NewUDFSocket( taskName, nodeName string, socket Socket, - l *log.Logger, + d udf.Diagnostic, timeout time.Duration, abortCallback func(), ) *UDFSocket { @@ -308,7 +307,7 @@ func NewUDFSocket( taskName: taskName, nodeName: nodeName, socket: socket, - logger: l, + diag: d, timeout: timeout, abortCallback: abortCallback, } @@ -328,7 +327,7 @@ func (s *UDFSocket) Open() error { s.nodeName, outBuf, in, - s.logger, + s.diag, s.timeout, s.abortCallback, func() { s.socket.Close() }, diff --git a/udf/server.go b/udf/server.go index 27a3a2123..d6da85ae9 100644 --- a/udf/server.go +++ b/udf/server.go @@ -4,17 +4,23 @@ import ( "errors" "fmt" "io" - "log" "sync" "time" "github.com/influxdata/kapacitor/edge" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/models" "github.com/influxdata/kapacitor/udf/agent" ) var ErrServerStopped = errors.New("server already stopped") +type Diagnostic interface { + Error(msg string, err error, ctx ...keyvalue.T) + + UDFLog(msg string) +} + // Server provides an implementation for the core communication with UDFs. // The Server provides only a partial implementation of udf.Interface as // it is expected that setup and teardown will be necessary to create a Server. @@ -75,8 +81,8 @@ type Server struct { // Group for waiting on read/write goroutines ioGroup sync.WaitGroup - mu sync.Mutex - logger *log.Logger + mu sync.Mutex + diag Diagnostic responseBuf []byte @@ -94,7 +100,7 @@ func NewServer( taskID, nodeID string, in agent.ByteReadReader, out io.WriteCloser, - l *log.Logger, + d Diagnostic, timeout time.Duration, abortCallback func(), killCallback func(), @@ -104,7 +110,7 @@ func NewServer( nodeID: nodeID, in: in, out: out, - logger: l, + diag: d, requests: make(chan *agent.Request), keepalive: make(chan int64, 1), keepaliveTimeout: timeout, @@ -347,7 +353,7 @@ func (s *Server) doResponse(response *agent.Response, respC chan *agent.Response select { case respC <- response: default: - s.logger.Printf("E! received %T without requesting it", response.Message) + s.diag.Error("received message without requesting it", fmt.Errorf("did not expect %T message", response.Message)) } } @@ -398,7 +404,7 @@ func (s *Server) watchKeepalive() { default: // We failed to abort just kill it. if s.killCallback != nil { - s.logger.Println("E! process not responding! killing") + s.diag.Error("killing process", errors.New("process not responding")) s.killCallback() } } @@ -425,7 +431,7 @@ func (s *Server) watchKeepalive() { break } err = fmt.Errorf("keepalive timedout, last keepalive received was: %s", time.Unix(0, last)) - s.logger.Println("E!", err) + s.diag.Error("encountered error", err) return case <-s.stopping: return @@ -688,7 +694,7 @@ func (s *Server) handleResponse(response *agent.Response) error { case *agent.Response_Restore: s.doResponse(response, s.restoreResponse) case *agent.Response_Error: - s.logger.Println("E!", msg.Error.Error) + s.diag.Error("received error message", errors.New(msg.Error.Error)) return errors.New(msg.Error.Error) case *agent.Response_Begin: s.begin = msg.Begin diff --git a/udf/server_test.go b/udf/server_test.go index 38ddaa5b3..b88b8bfc6 100644 --- a/udf/server_test.go +++ b/udf/server_test.go @@ -2,23 +2,34 @@ package udf_test import ( "errors" - "log" - "os" + "io/ioutil" "reflect" "testing" "time" + "github.com/influxdata/kapacitor" "github.com/influxdata/kapacitor/edge" "github.com/influxdata/kapacitor/models" + "github.com/influxdata/kapacitor/services/diagnostic" "github.com/influxdata/kapacitor/udf" "github.com/influxdata/kapacitor/udf/agent" udf_test "github.com/influxdata/kapacitor/udf/test" ) +var diagService *diagnostic.Service + +var kapacitorDiag kapacitor.Diagnostic + +func init() { + diagService = diagnostic.NewService(diagnostic.NewConfig(), ioutil.Discard, ioutil.Discard) + diagService.Open() + kapacitorDiag = diagService.NewKapacitorHandler() +} + func TestUDF_StartStop(t *testing.T) { u := udf_test.NewIO() - l := log.New(os.Stderr, "[TestUDF_StartStop] ", log.LstdFlags) - s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), l, 0, nil, nil) + d := kapacitorDiag.WithNodeContext("TestUDF_StartStop") + s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), d, 0, nil, nil) s.Start() @@ -34,8 +45,8 @@ func TestUDF_StartStop(t *testing.T) { func TestUDF_StartInitStop(t *testing.T) { u := udf_test.NewIO() - l := log.New(os.Stderr, "[TestUDF_StartStop] ", log.LstdFlags) - s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), l, 0, nil, nil) + d := kapacitorDiag.WithNodeContext("TestUDF_StartStop") + s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), d, 0, nil, nil) go func() { req := <-u.Requests _, ok := req.Message.(*agent.Request_Init) @@ -70,8 +81,8 @@ func TestUDF_StartInitStop(t *testing.T) { func TestUDF_StartInitAbort(t *testing.T) { u := udf_test.NewIO() - l := log.New(os.Stderr, "[TestUDF_StartInfoAbort] ", log.LstdFlags) - s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), l, 0, nil, nil) + d := kapacitorDiag.WithNodeContext("TestUDF_StartInfoAbort") + s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), d, 0, nil, nil) s.Start() expErr := errors.New("explicit abort") go func() { @@ -91,8 +102,8 @@ func TestUDF_StartInitAbort(t *testing.T) { func TestUDF_StartInfoStop(t *testing.T) { u := udf_test.NewIO() - l := log.New(os.Stderr, "[TestUDF_StartInfoStop] ", log.LstdFlags) - s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), l, 0, nil, nil) + d := kapacitorDiag.WithNodeContext("TestUDF_StartInfoStop") + s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), d, 0, nil, nil) go func() { req := <-u.Requests _, ok := req.Message.(*agent.Request_Info) @@ -133,8 +144,8 @@ func TestUDF_StartInfoStop(t *testing.T) { func TestUDF_StartInfoAbort(t *testing.T) { u := udf_test.NewIO() - l := log.New(os.Stderr, "[TestUDF_StartInfoAbort] ", log.LstdFlags) - s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), l, 0, nil, nil) + d := kapacitorDiag.WithNodeContext("TestUDF_StartInfoAbort") + s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), d, 0, nil, nil) s.Start() expErr := errors.New("explicit abort") go func() { @@ -155,8 +166,8 @@ func TestUDF_StartInfoAbort(t *testing.T) { func TestUDF_Keepalive(t *testing.T) { t.Parallel() u := udf_test.NewIO() - l := log.New(os.Stderr, "[TestUDF_Keepalive] ", log.LstdFlags) - s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), l, time.Millisecond*100, nil, nil) + d := kapacitorDiag.WithNodeContext("TestUDF_Keepalive") + s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), d, time.Millisecond*100, nil, nil) s.Start() s.Init(nil) req := <-u.Requests @@ -195,8 +206,8 @@ func TestUDF_MissedKeepalive(t *testing.T) { } u := udf_test.NewIO() - l := log.New(os.Stderr, "[TestUDF_MissedKeepalive] ", log.LstdFlags) - s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), l, time.Millisecond*100, aborted, nil) + d := kapacitorDiag.WithNodeContext("TestUDF_MissedKeepalive") + s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), d, time.Millisecond*100, aborted, nil) s.Start() // Since the keepalive is missed, the process should abort on its own. @@ -229,8 +240,8 @@ func TestUDF_KillCallBack(t *testing.T) { } u := udf_test.NewIO() - l := log.New(os.Stderr, "[TestUDF_MissedKeepalive] ", log.LstdFlags) - s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), l, timeout, aborted, kill) + d := kapacitorDiag.WithNodeContext("TestUDF_MissedKeepalive") + s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), d, timeout, aborted, kill) s.Start() // Since the keepalive is missed, the process should abort on its own. @@ -258,8 +269,8 @@ func TestUDF_MissedKeepaliveInit(t *testing.T) { } u := udf_test.NewIO() - l := log.New(os.Stderr, "[TestUDF_MissedKeepaliveInit] ", log.LstdFlags) - s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), l, time.Millisecond*100, aborted, nil) + d := kapacitorDiag.WithNodeContext("TestUDF_MissedKeepaliveInit") + s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), d, time.Millisecond*100, aborted, nil) s.Start() s.Init(nil) @@ -286,8 +297,8 @@ func TestUDF_MissedKeepaliveInfo(t *testing.T) { } u := udf_test.NewIO() - l := log.New(os.Stderr, "[TestUDF_MissedKeepaliveInfo] ", log.LstdFlags) - s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), l, time.Millisecond*100, aborted, nil) + d := kapacitorDiag.WithNodeContext("TestUDF_MissedKeepaliveInfo") + s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), d, time.Millisecond*100, aborted, nil) s.Start() s.Info() @@ -308,8 +319,8 @@ func TestUDF_MissedKeepaliveInfo(t *testing.T) { func TestUDF_SnapshotRestore(t *testing.T) { u := udf_test.NewIO() - l := log.New(os.Stderr, "[TestUDF_SnapshotRestore] ", log.LstdFlags) - s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), l, 0, nil, nil) + d := kapacitorDiag.WithNodeContext("TestUDF_SnapshotRestore") + s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), d, 0, nil, nil) go func() { // Init req := <-u.Requests @@ -377,8 +388,8 @@ func TestUDF_SnapshotRestore(t *testing.T) { } func TestUDF_StartInitPointStop(t *testing.T) { u := udf_test.NewIO() - l := log.New(os.Stderr, "[TestUDF_StartPointStop] ", log.LstdFlags) - s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), l, 0, nil, nil) + d := kapacitorDiag.WithNodeContext("TestUDF_StartPointStop") + s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), d, 0, nil, nil) go func() { req := <-u.Requests _, ok := req.Message.(*agent.Request_Init) @@ -439,8 +450,8 @@ func TestUDF_StartInitPointStop(t *testing.T) { } func TestUDF_StartInitBatchStop(t *testing.T) { u := udf_test.NewIO() - l := log.New(os.Stderr, "[TestUDF_StartPointStop] ", log.LstdFlags) - s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), l, 0, nil, nil) + d := kapacitorDiag.WithNodeContext("TestUDF_StartPointStop") + s := udf.NewServer("testTask", "testNode", u.Out(), u.In(), d, 0, nil, nil) go func() { req := <-u.Requests _, ok := req.Message.(*agent.Request_Init) diff --git a/udf/test/test_udf.go b/udf/test/test_udf.go index f59460f53..8e6e65700 100644 --- a/udf/test/test_udf.go +++ b/udf/test/test_udf.go @@ -3,12 +3,24 @@ package udf_test import ( "bufio" "io" - "log" + "io/ioutil" + "github.com/influxdata/kapacitor" + "github.com/influxdata/kapacitor/services/diagnostic" "github.com/influxdata/kapacitor/udf" "github.com/influxdata/kapacitor/udf/agent" ) +var diagService *diagnostic.Service + +var kapacitorDiag kapacitor.Diagnostic + +func init() { + diagService = diagnostic.NewService(diagnostic.NewConfig(), ioutil.Discard, ioutil.Discard) + diagService.Open() + kapacitorDiag = diagService.NewKapacitorHandler() +} + // IO implements a UDF process communication. // Connect up to UDF server via In/Out pipes. // Use Requests/Responses channels for reading @@ -124,21 +136,21 @@ type UDF struct { nodeID string *udf.Server - uio *IO - logger *log.Logger + uio *IO + diag udf.Diagnostic } -func New(taskID, nodeID string, uio *IO, l *log.Logger) *UDF { +func New(taskID, nodeID string, uio *IO, d udf.Diagnostic) *UDF { return &UDF{ taskID: taskID, nodeID: nodeID, uio: uio, - logger: l, + diag: d, } } func (u *UDF) Open() error { - u.Server = udf.NewServer(u.taskID, u.nodeID, u.uio.Out(), u.uio.In(), u.logger, 0, nil, nil) + u.Server = udf.NewServer(u.taskID, u.nodeID, u.uio.Out(), u.uio.In(), u.diag, 0, nil, nil) return u.Server.Start() } diff --git a/udf_test.go b/udf_test.go index 4bb8c5f41..dac7d8be9 100644 --- a/udf_test.go +++ b/udf_test.go @@ -2,10 +2,8 @@ package kapacitor_test import ( "bytes" - "fmt" "io" - "log" - "os" + "io/ioutil" "reflect" "testing" "time" @@ -14,23 +12,34 @@ import ( "github.com/influxdata/kapacitor/command" "github.com/influxdata/kapacitor/edge" "github.com/influxdata/kapacitor/models" + "github.com/influxdata/kapacitor/services/diagnostic" "github.com/influxdata/kapacitor/udf" "github.com/influxdata/kapacitor/udf/agent" udf_test "github.com/influxdata/kapacitor/udf/test" ) +var diagService *diagnostic.Service + +var kapacitorDiag kapacitor.Diagnostic + +func init() { + diagService = diagnostic.NewService(diagnostic.NewConfig(), ioutil.Discard, ioutil.Discard) + diagService.Open() + kapacitorDiag = diagService.NewKapacitorHandler() +} + func newUDFSocket(name string) (*kapacitor.UDFSocket, *udf_test.IO) { uio := udf_test.NewIO() - l := log.New(os.Stderr, fmt.Sprintf("[%s] ", name), log.LstdFlags) - u := kapacitor.NewUDFSocket(name, "testNode", newTestSocket(uio), l, 0, nil) + d := kapacitorDiag.WithNodeContext(name) + u := kapacitor.NewUDFSocket(name, "testNode", newTestSocket(uio), d, 0, nil) return u, uio } func newUDFProcess(name string) (*kapacitor.UDFProcess, *udf_test.IO) { uio := udf_test.NewIO() cmd := newTestCommander(uio) - l := log.New(os.Stderr, fmt.Sprintf("[%s] ", name), log.LstdFlags) - u := kapacitor.NewUDFProcess(name, "testNode", cmd, command.Spec{}, l, 0, nil) + d := kapacitorDiag.WithNodeContext(name) + u := kapacitor.NewUDFProcess(name, "testNode", cmd, command.Spec{}, d, 0, nil) return u, uio } diff --git a/union.go b/union.go index 23650c75d..7ca5e75d9 100644 --- a/union.go +++ b/union.go @@ -1,7 +1,6 @@ package kapacitor import ( - "log" "time" "github.com/influxdata/kapacitor/edge" @@ -27,10 +26,10 @@ type timeMessage interface { // Create a new UnionNode which combines all parent data streams into a single stream. // No transformation of any kind is performed. -func newUnionNode(et *ExecutingTask, n *pipeline.UnionNode, l *log.Logger) (*UnionNode, error) { +func newUnionNode(et *ExecutingTask, n *pipeline.UnionNode, d NodeDiagnostic) (*UnionNode, error) { un := &UnionNode{ u: n, - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, rename: n.Rename, } un.node.runF = un.runUnion diff --git a/where.go b/where.go index 634d0684d..ef3d87bdf 100644 --- a/where.go +++ b/where.go @@ -3,7 +3,6 @@ package kapacitor import ( "errors" "fmt" - "log" "github.com/influxdata/kapacitor/edge" "github.com/influxdata/kapacitor/pipeline" @@ -21,9 +20,9 @@ type WhereNode struct { } // Create a new WhereNode which filters down the batch or stream by a condition -func newWhereNode(et *ExecutingTask, n *pipeline.WhereNode, l *log.Logger) (wn *WhereNode, err error) { +func newWhereNode(et *ExecutingTask, n *pipeline.WhereNode, d NodeDiagnostic) (wn *WhereNode, err error) { wn = &WhereNode{ - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, w: n, } @@ -91,8 +90,7 @@ func (g *whereGroup) Point(p edge.PointMessage) (edge.Message, error) { func (g *whereGroup) doWhere(p edge.FieldsTagsTimeGetterMessage) (edge.Message, error) { pass, err := EvalPredicate(g.expr, g.n.scopePool, p) if err != nil { - g.n.incrementErrorCount() - g.n.logger.Println("E! error while evaluating expression:", err) + g.n.diag.Error("error while evaluating expression", err) return nil, nil } if pass { diff --git a/window.go b/window.go index 3031eaab2..3eca87bc7 100644 --- a/window.go +++ b/window.go @@ -3,7 +3,6 @@ package kapacitor import ( "errors" "fmt" - "log" "time" "github.com/influxdata/kapacitor/edge" @@ -17,13 +16,13 @@ type WindowNode struct { } // Create a new WindowNode, which windows data for a period of time and emits the window. -func newWindowNode(et *ExecutingTask, n *pipeline.WindowNode, l *log.Logger) (*WindowNode, error) { +func newWindowNode(et *ExecutingTask, n *pipeline.WindowNode, d NodeDiagnostic) (*WindowNode, error) { if n.Period == 0 && n.PeriodCount == 0 { return nil, errors.New("window node must have either a non zero period or non zero period count") } wn := &WindowNode{ w: n, - node: node{Node: n, et: et, logger: l}, + node: node{Node: n, et: et, diag: d}, } wn.node.runF = wn.runWindow return wn, nil @@ -61,7 +60,7 @@ func (n *WindowNode) newWindow(group edge.GroupInfo, first edge.PointMeta) (edge n.w.Every, n.w.AlignFlag, n.w.FillPeriodFlag, - n.logger, + n.diag, ), nil case n.w.PeriodCount != 0: return newWindowByCount( @@ -70,7 +69,7 @@ func (n *WindowNode) newWindow(group edge.GroupInfo, first edge.PointMeta) (edge int(n.w.PeriodCount), int(n.w.EveryCount), n.w.FillPeriodFlag, - n.logger, + n.diag, ), nil default: return nil, errors.New("unreachable code, window node should have a non-zero period or period count") @@ -91,7 +90,7 @@ type windowByTime struct { period time.Duration every time.Duration - logger *log.Logger + diag NodeDiagnostic } func newWindowByTime( @@ -102,7 +101,7 @@ func newWindowByTime( every time.Duration, align, fillPeriod bool, - logger *log.Logger, + d NodeDiagnostic, ) *windowByTime { // Determine nextEmit time. @@ -128,12 +127,12 @@ func newWindowByTime( name: name, group: group, nextEmit: nextEmit, - buf: &windowTimeBuffer{logger: logger}, + buf: &windowTimeBuffer{diag: d}, align: align, fillPeriod: fillPeriod, period: period, every: every, - logger: logger, + diag: d, } } @@ -216,7 +215,7 @@ type windowTimeBuffer struct { start int stop int size int - logger *log.Logger + diag NodeDiagnostic } // Insert a single point into the buffer. @@ -339,7 +338,7 @@ type windowByCount struct { size int count int - logger *log.Logger + diag NodeDiagnostic } func newWindowByCount( @@ -348,7 +347,8 @@ func newWindowByCount( period, every int, fillPeriod bool, - logger *log.Logger) *windowByCount { + d NodeDiagnostic, +) *windowByCount { // Determine the first nextEmit index nextEmit := every if fillPeriod { @@ -361,7 +361,7 @@ func newWindowByCount( period: period, every: every, nextEmit: nextEmit, - logger: logger, + diag: d, } } func (w *windowByCount) BeginBatch(edge.BeginBatchMessage) (edge.Message, error) { diff --git a/window_test.go b/window_test.go index 2f5caea89..6da1423a7 100644 --- a/window_test.go +++ b/window_test.go @@ -1,22 +1,34 @@ package kapacitor import ( - "log" - "os" "testing" "time" + "github.com/influxdata/kapacitor/alert" "github.com/influxdata/kapacitor/edge" + "github.com/influxdata/kapacitor/keyvalue" "github.com/influxdata/kapacitor/models" "github.com/stretchr/testify/assert" ) -var logger = log.New(os.Stderr, "[window] ", log.LstdFlags|log.Lshortfile) +// Mock node diagnostic +type windowNodeDiagnostic struct{} + +func newWindowNodeDiagnostic() *nodeDiagnostic { return &nodeDiagnostic{} } + +func (d *windowNodeDiagnostic) Error(msg string, err error, ctx ...keyvalue.T) {} +func (d *windowNodeDiagnostic) AlertTriggered(level alert.Level, id string, message string, rows *models.Row) { +} +func (d *windowNodeDiagnostic) SettingReplicas(new int, old int, id string) {} +func (d *windowNodeDiagnostic) StartingBatchQuery(q string) {} +func (d *windowNodeDiagnostic) LogBatchData(level, prefix string, batch edge.BufferedBatchMessage) {} +func (d *windowNodeDiagnostic) LogPointData(level, prefix string, point edge.PointMessage) {} +func (d *windowNodeDiagnostic) UDFLog(s string) {} func TestWindowBufferByTime(t *testing.T) { assert := assert.New(t) - buf := &windowTimeBuffer{logger: logger} + buf := &windowTimeBuffer{} size := 100 @@ -131,7 +143,7 @@ func TestWindowBufferByCount(t *testing.T) { tc.period, tc.every, tc.fillPeriod, - logger, + newWindowNodeDiagnostic(), ) // fill buffer