Skip to content

Commit

Permalink
Fix race condition issue for history scavenger against archiver (cade…
Browse files Browse the repository at this point in the history
  • Loading branch information
longquanzheng authored Nov 7, 2019
1 parent e0ef19e commit 6bd2bf9
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 1 deletion.
4 changes: 4 additions & 0 deletions common/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@ const (
// CriticalLongPollTimeout is a threshold for the context timeout passed into long poll API,
// below which a warning will be logged
CriticalLongPollTimeout = time.Second * 20
// MaxWorkflowRetentionPeriondInDays is the maximum of workflow retention when registering domain.
// !!! Do NOT simply decrease this number, because it is being used by history scavenger to avoid race condition against history archival.
// Check more details in history scanner(scavenger)
MaxWorkflowRetentionPeriodInDays = 30
)

const (
Expand Down
5 changes: 5 additions & 0 deletions service/frontend/workflowHandler.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ var (
errNoPermission = &gen.BadRequestError{Message: "No permission to do this operation."}
errRequestIDNotSet = &gen.BadRequestError{Message: "RequestId is not set on request."}
errWorkflowTypeNotSet = &gen.BadRequestError{Message: "WorkflowType is not set on request."}
errInvalidRetention = &gen.BadRequestError{Message: "RetentionDays is invalid."}
errInvalidExecutionStartToCloseTimeoutSeconds = &gen.BadRequestError{Message: "A valid ExecutionStartToCloseTimeoutSeconds is not set on request."}
errInvalidTaskStartToCloseTimeoutSeconds = &gen.BadRequestError{Message: "A valid TaskStartToCloseTimeoutSeconds is not set on request."}
errClientVersionNotSet = &gen.BadRequestError{Message: "Client version is not set on request."}
Expand Down Expand Up @@ -255,6 +256,10 @@ func (wh *WorkflowHandler) RegisterDomain(ctx context.Context, registerRequest *
return errRequestNotSet
}

if registerRequest.GetWorkflowExecutionRetentionPeriodInDays() > common.MaxWorkflowRetentionPeriodInDays {
return errInvalidRetention
}

if err := checkPermission(wh.config, registerRequest.SecurityToken); err != nil {
return err
}
Expand Down
6 changes: 5 additions & 1 deletion service/worker/scanner/history/scavenger.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,11 @@ const (
rpsPerConcurrency = 50
pageSize = 1000
// only clean up history branches that older than this threshold
cleanUpThreshold = time.Hour * 24
// we double the MaxWorkflowRetentionPeriodInDays to avoid racing condition with history archival.
// Our history archiver delete mutable state, and then upload history to blob store and then delete history.
// This scanner will face racing condition with archiver because it relys on describe mutable state returning entityNotExist error.
// That's why we need to keep MaxWorkflowRetentionPeriodInDays stable and not decreasing all the time.
cleanUpThreshold = time.Hour * 24 * common.MaxWorkflowRetentionPeriodInDays * 2
)

// NewScavenger returns an instance of history scavenger daemon
Expand Down

0 comments on commit 6bd2bf9

Please sign in to comment.