Skip to content

Commit

Permalink
Refactor replication resync to be an active process (minio#14266)
Browse files Browse the repository at this point in the history
When resync is triggered, walk the bucket namespace and
resync objects that are unreplicated. This PR also adds
an API to report resync progress.
  • Loading branch information
poornas authored Feb 10, 2022
1 parent a223086 commit ed3418c
Show file tree
Hide file tree
Showing 12 changed files with 1,731 additions and 153 deletions.
8 changes: 6 additions & 2 deletions cmd/api-router.go
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,9 @@ func registerAPIRouter(router *mux.Router) {
// ListenNotification
router.Methods(http.MethodGet).HandlerFunc(
collectAPIStats("listennotification", maxClients(gz(httpTraceAll(api.ListenNotificationHandler))))).Queries("events", "{events:.*}")
// ResetBucketReplicationStatus - MinIO extension API
router.Methods(http.MethodGet).HandlerFunc(
collectAPIStats("resetbucketreplicationstatus", maxClients(gz(httpTraceAll(api.ResetBucketReplicationStatusHandler))))).Queries("replication-reset-status", "")

// Dummy Bucket Calls
// GetBucketACL -- this is a dummy call.
Expand Down Expand Up @@ -417,9 +420,10 @@ func registerAPIRouter(router *mux.Router) {
// PutBucketNotification
router.Methods(http.MethodPut).HandlerFunc(
collectAPIStats("putbucketnotification", maxClients(gz(httpTraceAll(api.PutBucketNotificationHandler))))).Queries("notification", "")
// ResetBucketReplicationState - MinIO extension API
// ResetBucketReplicationStart - MinIO extension API
router.Methods(http.MethodPut).HandlerFunc(
collectAPIStats("resetbucketreplicationstate", maxClients(gz(httpTraceAll(api.ResetBucketReplicationStateHandler))))).Queries("replication-reset", "")
collectAPIStats("resetbucketreplicationstart", maxClients(gz(httpTraceAll(api.ResetBucketReplicationStartHandler))))).Queries("replication-reset", "")

// PutBucket
router.Methods(http.MethodPut).HandlerFunc(
collectAPIStats("putbucket", maxClients(gz(httpTraceAll(api.PutBucketHandler)))))
Expand Down
94 changes: 82 additions & 12 deletions cmd/bucket-handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -1301,7 +1301,7 @@ func (api objectAPIHandlers) DeleteBucketHandler(w http.ResponseWriter, r *http.
}

globalNotificationSys.DeleteBucketMetadata(ctx, bucket)

globalReplicationPool.deleteResyncMetadata(ctx, bucket)
// Call site replication hook.
if err := globalSiteReplicationSys.DeleteBucketHook(ctx, bucket, forceDelete); err != nil {
writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
Expand Down Expand Up @@ -1760,12 +1760,13 @@ func (api objectAPIHandlers) GetBucketReplicationMetricsHandler(w http.ResponseW
}
}

// ResetBucketReplicationStateHandler - starts a replication reset for all objects in a bucket which
// ResetBucketReplicationStartHandler - starts a replication reset for all objects in a bucket which
// qualify for replication and re-sync the object(s) to target, provided ExistingObjectReplication is
// enabled for the qualifying rule. This API is a MinIO only extension provided for situations where
// remote target is entirely lost,and previously replicated objects need to be re-synced.
func (api objectAPIHandlers) ResetBucketReplicationStateHandler(w http.ResponseWriter, r *http.Request) {
ctx := newContext(r, w, "ResetBucketReplicationState")
// remote target is entirely lost,and previously replicated objects need to be re-synced. If resync is
// already in progress it returns an error
func (api objectAPIHandlers) ResetBucketReplicationStartHandler(w http.ResponseWriter, r *http.Request) {
ctx := newContext(r, w, "ResetBucketReplicationStart")
defer logger.AuditLog(ctx, w, r, mustGetClaimsFromToken(r))

vars := mux.Vars(r)
Expand All @@ -1789,6 +1790,7 @@ func (api objectAPIHandlers) ResetBucketReplicationStateHandler(w http.ResponseW
}), r.URL)
}
}
resetBeforeDate := UTCNow().AddDate(0, 0, -1*int(days/24))

objectAPI := api.ObjectAPI()
if objectAPI == nil {
Expand Down Expand Up @@ -1825,7 +1827,7 @@ func (api objectAPIHandlers) ResetBucketReplicationStateHandler(w http.ResponseW
if len(tgtArns) == 0 {
writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErrWithErr(ErrBadRequest, InvalidArgument{
Bucket: bucket,
Err: fmt.Errorf("Remote target ARN %s missing/not eligible for replication resync", arn),
Err: fmt.Errorf("Remote target ARN %s missing or ineligible for replication resync", arn),
}), r.URL)
return
}
Expand All @@ -1849,22 +1851,90 @@ func (api objectAPIHandlers) ResetBucketReplicationStateHandler(w http.ResponseW
default:
writeErrorResponseJSON(ctx, w, toAPIError(ctx, err), r.URL)
}
}
if err := startReplicationResync(ctx, bucket, arn, resetID, resetBeforeDate, objectAPI); err != nil {
writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErrWithErr(ErrBadRequest, InvalidArgument{
Bucket: bucket,
Err: err,
}), r.URL)
return
}
targets, err := globalBucketTargetSys.ListBucketTargets(ctx, bucket)

data, err := json.Marshal(rinfo)
if err != nil {
writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
writeErrorResponseJSON(ctx, w, toAdminAPIErr(ctx, err), r.URL)
return
}
tgtBytes, err := json.Marshal(&targets)
if err != nil {
writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErrWithErr(ErrAdminConfigBadJSON, err), r.URL)
// Write success response.
writeSuccessResponseJSON(w, data)
}

// ResetBucketReplicationStatusHandler - returns the status of replication reset.
// This API is a MinIO only extension
func (api objectAPIHandlers) ResetBucketReplicationStatusHandler(w http.ResponseWriter, r *http.Request) {
ctx := newContext(r, w, "ResetBucketReplicationStatus")
defer logger.AuditLog(ctx, w, r, mustGetClaimsFromToken(r))

vars := mux.Vars(r)
bucket := vars["bucket"]
arn := r.URL.Query().Get("arn")
var err error

objectAPI := api.ObjectAPI()
if objectAPI == nil {
writeErrorResponse(ctx, w, errorCodes.ToAPIErr(ErrServerNotInitialized), r.URL)
return
}

if s3Error := checkRequestAuthType(ctx, r, policy.ResetBucketReplicationStateAction, bucket, ""); s3Error != ErrNone {
writeErrorResponse(ctx, w, errorCodes.ToAPIErr(s3Error), r.URL)
return
}

// Check if bucket exists.
if _, err := objectAPI.GetBucketInfo(ctx, bucket); err != nil {
writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
return
}
if err = globalBucketMetadataSys.Update(ctx, bucket, bucketTargetsFile, tgtBytes); err != nil {

if _, err := globalBucketMetadataSys.GetReplicationConfig(ctx, bucket); err != nil {
writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
return
}

globalReplicationPool.resyncState.RLock()
brs, ok := globalReplicationPool.resyncState.statusMap[bucket]
if !ok {
brs, err = loadBucketResyncMetadata(ctx, bucket, objectAPI)
if err != nil {
writeErrorResponse(ctx, w, errorCodes.ToAPIErrWithErr(ErrBadRequest, InvalidArgument{
Bucket: bucket,
Err: fmt.Errorf("No replication resync status available for %s", arn),
}), r.URL)
}
return
}

var rinfo ResyncTargetsInfo
for tarn, st := range brs.TargetsMap {
if arn != "" && tarn != arn {
continue
}
rinfo.Targets = append(rinfo.Targets, ResyncTarget{
Arn: tarn,
ResetID: st.ResyncID,
StartTime: st.StartTime,
EndTime: st.EndTime,
ResyncStatus: st.ResyncStatus.String(),
ReplicatedSize: st.ReplicatedSize,
ReplicatedCount: st.ReplicatedCount,
FailedSize: st.FailedSize,
FailedCount: st.FailedCount,
Bucket: st.Bucket,
Object: st.Object,
})
}
globalReplicationPool.resyncState.RUnlock()
data, err := json.Marshal(rinfo)
if err != nil {
writeErrorResponseJSON(ctx, w, toAdminAPIErr(ctx, err), r.URL)
Expand Down
1 change: 1 addition & 0 deletions cmd/bucket-metadata.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,7 @@ func deleteBucketMetadata(ctx context.Context, obj objectDeleter, bucket string)
metadataFiles := []string{
dataUsageCacheName,
bucketMetadataFile,
path.Join(replicationDir, resyncFileName),
}
for _, metaFile := range metadataFiles {
configFile := path.Join(bucketMetaPrefix, bucket, metaFile)
Expand Down
100 changes: 98 additions & 2 deletions cmd/bucket-replication-utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"regexp"
"strconv"
"strings"
"sync"
"time"

"github.com/minio/minio/internal/bucket/replication"
Expand Down Expand Up @@ -576,8 +577,23 @@ type ResyncTargetsInfo struct {

// ResyncTarget is a struct representing the Target reset ID where target is identified by its Arn
type ResyncTarget struct {
Arn string `json:"arn"`
ResetID string `json:"resetid"`
Arn string `json:"arn"`
ResetID string `json:"resetid"`
StartTime time.Time `json:"startTime"`
EndTime time.Time `json:"endTime"`
// Status of resync operation
ResyncStatus string `json:"resyncStatus,omitempty"`
// Completed size in bytes
ReplicatedSize int64 `json:"completedReplicationSize"`
// Failed size in bytes
FailedSize int64 `json:"failedReplicationSize"`
// Total number of failed operations
FailedCount int64 `json:"failedReplicationCount"`
// Total number of failed operations
ReplicatedCount int64 `json:"replicationCount"`
// Last bucket/object replicated.
Bucket string `json:"bucket,omitempty"`
Object string `json:"object,omitempty"`
}

// VersionPurgeStatusType represents status of a versioned delete or permanent delete w.r.t bucket replication
Expand All @@ -603,3 +619,83 @@ func (v VersionPurgeStatusType) Empty() bool {
func (v VersionPurgeStatusType) Pending() bool {
return v == Pending || v == Failed
}

type replicationResyncState struct {
// map of bucket to their resync status
statusMap map[string]BucketReplicationResyncStatus
sync.RWMutex
}

const (
replicationDir = "replication"
resyncFileName = "resync.bin"
resyncMetaFormat = 1
resyncMetaVersionV1 = 1
resyncMetaVersion = resyncMetaVersionV1
)

// ResyncStatusType status of resync operation
type ResyncStatusType int

const (
// NoResync - no resync in progress
NoResync ResyncStatusType = iota
// ResyncStarted - resync in progress
ResyncStarted
// ResyncCompleted - resync finished
ResyncCompleted
// ResyncFailed - resync failed
ResyncFailed
)

func (rt ResyncStatusType) String() string {
switch rt {
case ResyncStarted:
return "Ongoing"
case ResyncCompleted:
return "Completed"
case ResyncFailed:
return "Failed"
default:
return ""
}
}

// TargetReplicationResyncStatus status of resync of bucket for a specific target
type TargetReplicationResyncStatus struct {
StartTime time.Time `json:"startTime" msg:"st"`
EndTime time.Time `json:"endTime" msg:"et"`
// Resync ID assigned to this reset
ResyncID string `json:"resyncID" msg:"id"`
// ResyncBeforeDate - resync all objects created prior to this date
ResyncBeforeDate time.Time `json:"resyncBeforeDate" msg:"rdt"`
// Status of resync operation
ResyncStatus ResyncStatusType `json:"resyncStatus" msg:"rst"`
// Failed size in bytes
FailedSize int64 `json:"failedReplicationSize" msg:"fs"`
// Total number of failed operations
FailedCount int64 `json:"failedReplicationCount" msg:"frc"`
// Completed size in bytes
ReplicatedSize int64 `json:"completedReplicationSize" msg:"rs"`
// Total number of failed operations
ReplicatedCount int64 `json:"replicationCount" msg:"rrc"`
// Last bucket/object replicated.
Bucket string `json:"-" msg:"bkt"`
Object string `json:"-" msg:"obj"`
}

// BucketReplicationResyncStatus captures current replication resync status
type BucketReplicationResyncStatus struct {
Version int `json:"version" msg:"v"`
// map of remote arn to their resync status for a bucket
TargetsMap map[string]TargetReplicationResyncStatus `json:"resyncMap,omitempty" msg:"brs"`
ID int `json:"id" msg:"id"`
LastUpdate time.Time `json:"lastUpdate" msg:"lu"`
}

func newBucketResyncStatus(bucket string) BucketReplicationResyncStatus {
return BucketReplicationResyncStatus{
TargetsMap: make(map[string]TargetReplicationResyncStatus),
Version: resyncMetaVersion,
}
}
Loading

0 comments on commit ed3418c

Please sign in to comment.