Skip to content

Commit

Permalink
Log domain/workflow/run ids for non-deterministic error from RespondD… (
Browse files Browse the repository at this point in the history
cadence-workflow#3012)

* Log domain/workflow/run ids for non-deterministic error from RespondDecisionTaskFail (cadence-workflow#2929)

* Add fixing based on Yichao's comments and make code more clean

* Some code refactoring.

Co-authored-by: Bowei Xu <[email protected]>
  • Loading branch information
luowenking1 and vancexu authored Jan 30, 2020
1 parent 64afbc6 commit e97f3e9
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 0 deletions.
2 changes: 2 additions & 0 deletions common/metrics/defs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1442,6 +1442,7 @@ const (
CadenceErrBadBinaryCounter
CadenceErrClientVersionNotSupportedCounter
CadenceErrIncompleteHistoryCounter
CadenceErrNonDeterministicCounter
PersistenceRequests
PersistenceFailures
PersistenceLatency
Expand Down Expand Up @@ -1797,6 +1798,7 @@ var MetricDefs = map[ServiceIdx]map[int]metricDefinition{
CadenceErrBadBinaryCounter: {metricName: "cadence_errors_bad_binary", metricType: Counter},
CadenceErrClientVersionNotSupportedCounter: {metricName: "cadence_errors_client_version_not_supported", metricType: Counter},
CadenceErrIncompleteHistoryCounter: {metricName: "cadence_errors_incomplete_history", metricType: Counter},
CadenceErrNonDeterministicCounter: {metricName: "cadence_errors_nondeterministic", metricType: Counter},
PersistenceRequests: {metricName: "persistence_requests", metricType: Counter},
PersistenceFailures: {metricName: "persistence_errors", metricType: Counter},
PersistenceLatency: {metricName: "persistence_latency", metricType: Timer},
Expand Down
13 changes: 13 additions & 0 deletions service/history/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,19 @@ func (h *Handler) RespondDecisionTaskFailed(
token.RunID,
token.ScheduleID))

if failedRequest != nil && failedRequest.GetCause() == gen.DecisionTaskFailedCauseUnhandledDecision {
h.GetLogger().Info("Non-Deterministic Error", tag.WorkflowDomainID(token.DomainID), tag.WorkflowID(token.WorkflowID), tag.WorkflowRunID(token.RunID))
domainName, err := h.GetDomainCache().GetDomainName(token.DomainID)
var domainTag metrics.Tag

if err == nil {
domainTag = metrics.DomainTag(domainName)
} else {
domainTag = metrics.DomainUnknownTag()
}

h.GetMetricsClient().Scope(scope, domainTag).IncCounter(metrics.CadenceErrNonDeterministicCounter)
}
err0 = validateTaskToken(token)
if err0 != nil {
return h.error(err0, scope, domainID, "")
Expand Down

0 comments on commit e97f3e9

Please sign in to comment.