Skip to content

Commit

Permalink
[Wf-Diagnostics] update diagnostics workflow to get rootcause for ide…
Browse files Browse the repository at this point in the history
…ntified issues (cadence-workflow#6244)
  • Loading branch information
sankari165 authored Aug 22, 2024
1 parent 07a62a7 commit 14ae23e
Show file tree
Hide file tree
Showing 5 changed files with 79 additions and 0 deletions.
15 changes: 15 additions & 0 deletions service/worker/diagnostics/activities.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,18 @@ func (w *dw) identifyTimeouts(ctx context.Context, info identifyTimeoutsInputPar
})
return timeoutInvariant.Check(ctx)
}

type rootCauseTimeoutsParams struct {
history *types.GetWorkflowExecutionHistoryResponse
domain string
issues []invariants.InvariantCheckResult
}

func (w *dw) rootCauseTimeouts(ctx context.Context, info rootCauseTimeoutsParams) ([]invariants.InvariantRootCauseResult, error) {
timeoutInvariant := invariants.NewTimeout(invariants.NewTimeoutParams{
WorkflowExecutionHistory: info.history,
ClientBean: w.clientBean,
Domain: info.domain,
})
return timeoutInvariant.RootCause(ctx, info.issues)
}
50 changes: 50 additions & 0 deletions service/worker/diagnostics/activities_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,12 +84,62 @@ func Test__identifyTimeouts(t *testing.T) {
require.Equal(t, expectedResult, result)
}

func Test__rootCauseTimeouts(t *testing.T) {
dwtest := testDiagnosticWorkflow(t)
workflowTimeoutData := invariants.ExecutionTimeoutMetadata{
ExecutionTime: 110 * time.Second,
ConfiguredTimeout: 110 * time.Second,
LastOngoingEvent: &types.HistoryEvent{
ID: 1,
Timestamp: common.Int64Ptr(testTimeStamp),
WorkflowExecutionStartedEventAttributes: &types.WorkflowExecutionStartedEventAttributes{
ExecutionStartToCloseTimeoutSeconds: common.Int32Ptr(workflowTimeoutSecond),
},
},
Tasklist: &types.TaskList{
Name: "testasklist",
Kind: nil,
},
}
workflowTimeoutDataInBytes, err := json.Marshal(workflowTimeoutData)
require.NoError(t, err)
issues := []invariants.InvariantCheckResult{
{
InvariantType: invariants.TimeoutTypeExecution.String(),
Reason: "START_TO_CLOSE",
Metadata: workflowTimeoutDataInBytes,
},
}
taskListBacklog := int64(10)
taskListBacklogInBytes, err := json.Marshal(taskListBacklog)
require.NoError(t, err)
expectedRootCause := []invariants.InvariantRootCauseResult{
{
RootCause: invariants.RootCauseTypePollersStatus,
Metadata: taskListBacklogInBytes,
},
}
result, err := dwtest.rootCauseTimeouts(context.Background(), rootCauseTimeoutsParams{history: testWorkflowExecutionHistoryResponse(), domain: "test-domain", issues: issues})
require.NoError(t, err)
require.Equal(t, expectedRootCause, result)
}

func testDiagnosticWorkflow(t *testing.T) *dw {
ctrl := gomock.NewController(t)
mockClientBean := client.NewMockBean(ctrl)
mockFrontendClient := frontend.NewMockClient(ctrl)
mockClientBean.EXPECT().GetFrontendClient().Return(mockFrontendClient).AnyTimes()
mockFrontendClient.EXPECT().GetWorkflowExecutionHistory(gomock.Any(), gomock.Any()).Return(testWorkflowExecutionHistoryResponse(), nil).AnyTimes()
mockFrontendClient.EXPECT().DescribeTaskList(gomock.Any(), gomock.Any()).Return(&types.DescribeTaskListResponse{
Pollers: []*types.PollerInfo{
{
Identity: "dca24-xy",
},
},
TaskListStatus: &types.TaskListStatus{
BacklogCountHint: int64(10),
},
}, nil).AnyTimes()
return &dw{
clientBean: mockClientBean,
}
Expand Down
1 change: 1 addition & 0 deletions service/worker/diagnostics/module.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ func (w *dw) Start() error {
newWorker.RegisterWorkflowWithOptions(w.DiagnosticsWorkflow, workflow.RegisterOptions{Name: diagnosticsWorkflow})
newWorker.RegisterActivityWithOptions(w.retrieveExecutionHistory, activity.RegisterOptions{Name: retrieveWfExecutionHistoryActivity})
newWorker.RegisterActivityWithOptions(w.identifyTimeouts, activity.RegisterOptions{Name: identifyTimeoutsActivity})
newWorker.RegisterActivityWithOptions(w.rootCauseTimeouts, activity.RegisterOptions{Name: rootCauseTimeoutsActivity})
w.worker = newWorker
return newWorker.Start()
}
Expand Down
11 changes: 11 additions & 0 deletions service/worker/diagnostics/workflow.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const (

retrieveWfExecutionHistoryActivity = "retrieveWfExecutionHistory"
identifyTimeoutsActivity = "identifyTimeouts"
rootCauseTimeoutsActivity = "rootCauseTimeouts"
)

type DiagnosticsWorkflowInput struct {
Expand Down Expand Up @@ -74,5 +75,15 @@ func (w *dw) DiagnosticsWorkflow(ctx workflow.Context, params DiagnosticsWorkflo
return fmt.Errorf("IdentifyTimeouts: %w", err)
}

var rootCauseResult []invariants.InvariantRootCauseResult
err = workflow.ExecuteActivity(activityCtx, w.rootCauseTimeouts, rootCauseTimeoutsParams{
history: wfExecutionHistory,
domain: params.Domain,
issues: checkResult,
}).Get(ctx, &rootCauseResult)
if err != nil {
return fmt.Errorf("RootCauseTimeouts: %w", err)
}

return nil
}
2 changes: 2 additions & 0 deletions service/worker/diagnostics/workflow_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ func (s *diagnosticsWorkflowTestSuite) SetupTest() {
s.workflowEnv.RegisterWorkflowWithOptions(s.dw.DiagnosticsWorkflow, workflow.RegisterOptions{Name: diagnosticsWorkflow})
s.workflowEnv.RegisterActivityWithOptions(s.dw.retrieveExecutionHistory, activity.RegisterOptions{Name: retrieveWfExecutionHistoryActivity})
s.workflowEnv.RegisterActivityWithOptions(s.dw.identifyTimeouts, activity.RegisterOptions{Name: identifyTimeoutsActivity})
s.workflowEnv.RegisterActivityWithOptions(s.dw.rootCauseTimeouts, activity.RegisterOptions{Name: rootCauseTimeoutsActivity})
}

func (s *diagnosticsWorkflowTestSuite) TearDownTest() {
Expand All @@ -80,6 +81,7 @@ func (s *diagnosticsWorkflowTestSuite) TestWorkflow() {
}
s.workflowEnv.OnActivity(retrieveWfExecutionHistoryActivity, mock.Anything, mock.Anything).Return(nil, nil)
s.workflowEnv.OnActivity(identifyTimeoutsActivity, mock.Anything, mock.Anything).Return(nil, nil)
s.workflowEnv.OnActivity(rootCauseTimeoutsActivity, mock.Anything, mock.Anything).Return(nil, nil)
s.workflowEnv.ExecuteWorkflow(diagnosticsWorkflow, params)
s.True(s.workflowEnv.IsWorkflowCompleted())
}
Expand Down

0 comments on commit 14ae23e

Please sign in to comment.