Skip to content

Commit

Permalink
[Wf-Diagnostics] remove heartbeat rootcause for other irrelevant time…
Browse files Browse the repository at this point in the history
  • Loading branch information
sankari165 authored Aug 26, 2024
1 parent 630013c commit fbae51a
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 31 deletions.
34 changes: 20 additions & 14 deletions service/worker/diagnostics/invariants/timeout.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ func (t *timeout) RootCause(ctx context.Context, issues []InvariantCheckResult)
if err != nil {
return nil, err
}
result = append(result, heartbeatStatus)
result = append(result, heartbeatStatus...)

}
}
return result, nil
Expand Down Expand Up @@ -173,29 +174,34 @@ func (t *timeout) checkTasklist(ctx context.Context, issue InvariantCheckResult)

}

func checkHeartbeatStatus(issue InvariantCheckResult) (InvariantRootCauseResult, error) {
func checkHeartbeatStatus(issue InvariantCheckResult) ([]InvariantRootCauseResult, error) {
var metadata ActivityTimeoutMetadata
err := json.Unmarshal(issue.Metadata, &metadata)
if err != nil {
return InvariantRootCauseResult{}, err
return nil, err
}

if metadata.HeartBeatTimeout == 0 {
return InvariantRootCauseResult{
RootCause: RootCauseTypeHeartBeatingNotEnabled,
Metadata: []byte(metadata.TimeElapsed.String()),
if metadata.HeartBeatTimeout == 0 && activityStarted(metadata) {
return []InvariantRootCauseResult{
{
RootCause: RootCauseTypeHeartBeatingNotEnabled,
Metadata: []byte(metadata.TimeElapsed.String()),
},
}, nil
}

if metadata.HeartBeatTimeout > 0 && metadata.TimeoutType.String() == types.TimeoutTypeHeartbeat.String() {
return InvariantRootCauseResult{
RootCause: RootCauseTypeHeartBeatingEnabledMissingHeartbeat,
Metadata: []byte(metadata.TimeElapsed.String()),
return []InvariantRootCauseResult{
{
RootCause: RootCauseTypeHeartBeatingEnabledMissingHeartbeat,
Metadata: []byte(metadata.TimeElapsed.String()),
},
}, nil
}

return InvariantRootCauseResult{
RootCause: RootCauseTypeHeartBeatingEnabledActivityTimedOut,
Metadata: []byte(metadata.TimeElapsed.String()),
}, nil
return nil, nil
}

func activityStarted(metadata ActivityTimeoutMetadata) bool {
return metadata.TimeoutType.String() != types.TimeoutTypeScheduleToStart.String()
}
50 changes: 34 additions & 16 deletions service/worker/diagnostics/invariants/timeout_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func Test__Check(t *testing.T) {
{
InvariantType: TimeoutTypeActivity.String(),
Reason: "SCHEDULE_TO_START",
Metadata: activityTimeoutDataInBytes(t),
Metadata: activityScheduleToStartTimeoutDataInBytes(t),
},
{
InvariantType: TimeoutTypeActivity.String(),
Expand Down Expand Up @@ -302,7 +302,7 @@ func wfTimeoutDataInBytes(t *testing.T) []byte {
return dataInBytes
}

func activityTimeoutData() ActivityTimeoutMetadata {
func activityScheduleToStartTimeoutData() ActivityTimeoutMetadata {
return ActivityTimeoutMetadata{
TimeoutType: types.TimeoutTypeScheduleToStart.Ptr(),
ConfiguredTimeout: 50 * time.Second,
Expand All @@ -315,15 +315,37 @@ func activityTimeoutData() ActivityTimeoutMetadata {
},
}
}
func activityTimeoutDataInBytes(t *testing.T) []byte {
data := activityTimeoutData()

func activityStartToCloseTimeoutData() ActivityTimeoutMetadata {
return ActivityTimeoutMetadata{
TimeoutType: types.TimeoutTypeStartToClose.Ptr(),
ConfiguredTimeout: 50 * time.Second,
TimeElapsed: 50 * time.Second,
RetryPolicy: nil,
HeartBeatTimeout: 0,
Tasklist: &types.TaskList{
Name: testTasklist,
Kind: nil,
},
}
}

func activityScheduleToStartTimeoutDataInBytes(t *testing.T) []byte {
data := activityScheduleToStartTimeoutData()
dataInBytes, err := json.Marshal(data)
require.NoError(t, err)
return dataInBytes
}

func activityStartToCloseTimeoutDataInBytes(t *testing.T) []byte {
data := activityStartToCloseTimeoutData()
dataInBytes, err := json.Marshal(data)
require.NoError(t, err)
return dataInBytes
}

func activityHeartBeatTimeoutDataInBytes(t *testing.T) []byte {
actTimeoutData := activityTimeoutData()
actTimeoutData := activityStartToCloseTimeoutData()
actTimeoutData.TimeoutType = types.TimeoutTypeHeartbeat.Ptr()
actTimeoutData.HeartBeatTimeout = 50 * time.Second
actHeartBeatTimeoutDataInBytes, err := json.Marshal(actTimeoutData)
Expand All @@ -346,7 +368,7 @@ func childWfTimeoutDataInBytes(t *testing.T) []byte {
}

func Test__RootCause(t *testing.T) {
actTimeoutData := activityTimeoutData()
actStartToCloseTimeoutData := activityStartToCloseTimeoutData()
testCases := []struct {
name string
input []InvariantCheckResult
Expand Down Expand Up @@ -414,7 +436,7 @@ func Test__RootCause(t *testing.T) {
{
InvariantType: TimeoutTypeActivity.String(),
Reason: "START_TO_CLOSE",
Metadata: activityTimeoutDataInBytes(t),
Metadata: activityStartToCloseTimeoutDataInBytes(t),
},
},
clientExpects: func(client *frontend.MockClient) {
Expand All @@ -436,18 +458,18 @@ func Test__RootCause(t *testing.T) {
},
{
RootCause: RootCauseTypeHeartBeatingNotEnabled,
Metadata: []byte(actTimeoutData.TimeElapsed.String()),
Metadata: []byte(actStartToCloseTimeoutData.TimeElapsed.String()),
},
},
err: nil,
},
{
name: "activity timeout and heart beating not enabled",
name: "activity schedule to start timeout",
input: []InvariantCheckResult{
{
InvariantType: TimeoutTypeActivity.String(),
Reason: "START_TO_CLOSE",
Metadata: activityTimeoutDataInBytes(t),
Reason: "SCHEDULE_TO_START",
Metadata: activityScheduleToStartTimeoutDataInBytes(t),
},
},
clientExpects: func(client *frontend.MockClient) {
Expand All @@ -467,10 +489,6 @@ func Test__RootCause(t *testing.T) {
RootCause: RootCauseTypePollersStatus,
Metadata: taskListBacklogInBytes(testTaskListBacklog),
},
{
RootCause: RootCauseTypeHeartBeatingNotEnabled,
Metadata: []byte(actTimeoutData.TimeElapsed.String()),
},
},
err: nil,
},
Expand Down Expand Up @@ -502,7 +520,7 @@ func Test__RootCause(t *testing.T) {
},
{
RootCause: RootCauseTypeHeartBeatingEnabledMissingHeartbeat,
Metadata: []byte(actTimeoutData.TimeElapsed.String()),
Metadata: []byte(actStartToCloseTimeoutData.TimeElapsed.String()),
},
},
err: nil,
Expand Down
1 change: 0 additions & 1 deletion service/worker/diagnostics/invariants/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ const (
RootCauseTypePollersStatus RootCause = "There are pollers for the tasklist. Check backlog status"
RootCauseTypeHeartBeatingNotEnabled RootCause = "HeartBeating not enabled for activity"
RootCauseTypeHeartBeatingEnabledMissingHeartbeat RootCause = "HeartBeating enabled for activity but timed out due to missing heartbeat"
RootCauseTypeHeartBeatingEnabledActivityTimedOut RootCause = "HeartBeating enabled for activity but activity timed out due to other configured timeouts"
)

func (tt TimeoutType) String() string {
Expand Down

0 comments on commit fbae51a

Please sign in to comment.