Skip to content

Commit

Permalink
Merge pull request kubernetes#126107 from enj/enj/i/svm_not_found_err
Browse files Browse the repository at this point in the history
svm: set UID and RV on SSA patch to cause conflict on logical create
  • Loading branch information
k8s-ci-robot authored Jul 20, 2024
2 parents b293ca9 + 6a6771b commit 892acaa
Show file tree
Hide file tree
Showing 11 changed files with 411 additions and 187 deletions.
4 changes: 4 additions & 0 deletions cmd/kube-controller-manager/app/storageversionmigrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,11 @@ func startSVMController(
return nil, true, fmt.Errorf("storage version migrator requires garbage collector")
}

// svm controller can make a lot of requests during migration, keep it fast
config := controllerContext.ClientBuilder.ConfigOrDie(controllerName)
config.QPS *= 20
config.Burst *= 100

client := controllerContext.ClientBuilder.ClientOrDie(controllerName)
informer := controllerContext.InformerFactory.Storagemigration().V1alpha1().StorageVersionMigrations()

Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/storageversionmigrator/resourceversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ func (rv *ResourceVersionController) sync(ctx context.Context, key string) error
StorageVersionMigrations().
UpdateStatus(
ctx,
setStatusConditions(toBeProcessedSVM, svmv1alpha1.MigrationFailed, migrationFailedStatusReason),
setStatusConditions(toBeProcessedSVM, svmv1alpha1.MigrationFailed, migrationFailedStatusReason, "resource does not exist in discovery"),
metav1.UpdateOptions{},
)
if err != nil {
Expand Down
102 changes: 66 additions & 36 deletions pkg/controller/storageversionmigrator/storageversionmigrator.go
Original file line number Diff line number Diff line change
Expand Up @@ -204,27 +204,35 @@ func (svmc *SVMController) sync(ctx context.Context, key string) error {
}
gvr := getGVRFromResource(toBeProcessedSVM)

resourceMonitor, err := svmc.dependencyGraphBuilder.GetMonitor(ctx, gvr)
// prevent unsynced monitor from blocking forever
// use a short timeout so that we can fail quickly and possibly handle other migrations while this monitor gets ready.
monCtx, monCtxCancel := context.WithTimeout(ctx, 10*time.Second)
defer monCtxCancel()
resourceMonitor, errMonitor := svmc.dependencyGraphBuilder.GetMonitor(monCtx, gvr)
if resourceMonitor != nil {
if err != nil {
if errMonitor != nil {
// non nil monitor indicates that error is due to resource not being synced
return fmt.Errorf("dependency graph is not synced, requeuing to attempt again")
return fmt.Errorf("dependency graph is not synced, requeuing to attempt again: %w", errMonitor)
}
} else {
logger.V(4).Error(errMonitor, "resource does not exist in GC", "gvr", gvr.String())

// our GC cache could be missing a recently created custom resource, so give it some time to catch up
// we resync discovery every 30 seconds so twice that should be sufficient
if toBeProcessedSVM.CreationTimestamp.Add(time.Minute).After(time.Now()) {
return fmt.Errorf("resource does not exist in GC, requeuing to attempt again: %w", errMonitor)
}

// we can't migrate a resource that doesn't exist in the GC
_, err = svmc.kubeClient.StoragemigrationV1alpha1().
_, errStatus := svmc.kubeClient.StoragemigrationV1alpha1().
StorageVersionMigrations().
UpdateStatus(
ctx,
setStatusConditions(toBeProcessedSVM, svmv1alpha1.MigrationFailed, migrationFailedStatusReason),
setStatusConditions(toBeProcessedSVM, svmv1alpha1.MigrationFailed, migrationFailedStatusReason, "resource not found"),
metav1.UpdateOptions{},
)
if err != nil {
return err
}
logger.V(4).Error(fmt.Errorf("error migrating the resource"), "resource does not exist in GC", "gvr", gvr.String())

return nil
return errStatus
}

gcListResourceVersion, err := convertResourceVersionToInt(resourceMonitor.Controller.LastSyncResourceVersion())
Expand All @@ -244,7 +252,7 @@ func (svmc *SVMController) sync(ctx context.Context, key string) error {
StorageVersionMigrations().
UpdateStatus(
ctx,
setStatusConditions(toBeProcessedSVM, svmv1alpha1.MigrationRunning, migrationRunningStatusReason),
setStatusConditions(toBeProcessedSVM, svmv1alpha1.MigrationRunning, migrationRunningStatusReason, ""),
metav1.UpdateOptions{},
)
if err != nil {
Expand All @@ -255,60 +263,72 @@ func (svmc *SVMController) sync(ctx context.Context, key string) error {
if err != nil {
return err
}
typeMeta := metav1.TypeMeta{}
typeMeta.APIVersion, typeMeta.Kind = gvk.ToAPIVersionAndKind()
data, err := json.Marshal(typeMeta)
if err != nil {
return err
}

// ToDo: implement a mechanism to resume migration from the last migrated resource in case of a failure
// process storage migration
for _, gvrKey := range resourceMonitor.Store.ListKeys() {
namespace, name, err := cache.SplitMetaNamespaceKey(gvrKey)
for _, obj := range resourceMonitor.Store.List() {
accessor, err := meta.Accessor(obj)
if err != nil {
return err
}

_, err = svmc.dynamicClient.Resource(gvr).
Namespace(namespace).
typeMeta := typeMetaUIDRV{}
typeMeta.APIVersion, typeMeta.Kind = gvk.ToAPIVersionAndKind()
// set UID so that when a resource gets deleted, we get an "uid mismatch"
// conflict error instead of trying to create it.
typeMeta.UID = accessor.GetUID()
// set RV so that when a resources gets updated or deleted+recreated, we get an "object has been modified"
// conflict error. we do not actually need to do anything special for the updated case because if RV
// was not set, it would just result in no-op request. but for the deleted+recreated case, if RV is
// not set but UID is set, we would get an immutable field validation error. hence we must set both.
typeMeta.ResourceVersion = accessor.GetResourceVersion()
data, err := json.Marshal(typeMeta)
if err != nil {
return err
}

_, errPatch := svmc.dynamicClient.Resource(gvr).
Namespace(accessor.GetNamespace()).
Patch(ctx,
name,
accessor.GetName(),
types.ApplyPatchType,
data,
metav1.PatchOptions{
FieldManager: svmc.controllerName,
},
)
if err != nil {
// in case of NotFound or Conflict, we can stop processing migration for that resource
if apierrors.IsNotFound(err) || apierrors.IsConflict(err) {
continue
}

_, err = svmc.kubeClient.StoragemigrationV1alpha1().
// in case of conflict, we can stop processing migration for that resource because it has either been
// - updated, meaning that migration has already been performed
// - deleted, meaning that migration is not needed
// - deleted and recreated, meaning that migration has already been performed
if apierrors.IsConflict(errPatch) {
logger.V(6).Info("Resource ignored due to conflict", "namespace", accessor.GetNamespace(), "name", accessor.GetName(), "gvr", gvr.String(), "err", errPatch)
continue
}

if errPatch != nil {
logger.V(4).Error(errPatch, "Failed to migrate the resource", "namespace", accessor.GetNamespace(), "name", accessor.GetName(), "gvr", gvr.String(), "reason", apierrors.ReasonForError(errPatch))

_, errStatus := svmc.kubeClient.StoragemigrationV1alpha1().
StorageVersionMigrations().
UpdateStatus(
ctx,
setStatusConditions(toBeProcessedSVM, svmv1alpha1.MigrationFailed, migrationFailedStatusReason),
setStatusConditions(toBeProcessedSVM, svmv1alpha1.MigrationFailed, migrationFailedStatusReason, "migration encountered unhandled error"),
metav1.UpdateOptions{},
)
if err != nil {
return err
}
logger.V(4).Error(err, "Failed to migrate the resource", "name", gvrKey, "gvr", gvr.String(), "reason", apierrors.ReasonForError(err))

return nil
return errStatus
// Todo: add retry for scenarios where API server returns rate limiting error
}
logger.V(4).Info("Successfully migrated the resource", "name", gvrKey, "gvr", gvr.String())
logger.V(4).Info("Successfully migrated the resource", "namespace", accessor.GetNamespace(), "name", accessor.GetName(), "gvr", gvr.String())
}

_, err = svmc.kubeClient.StoragemigrationV1alpha1().
StorageVersionMigrations().
UpdateStatus(
ctx,
setStatusConditions(toBeProcessedSVM, svmv1alpha1.MigrationSucceeded, migrationSuccessStatusReason),
setStatusConditions(toBeProcessedSVM, svmv1alpha1.MigrationSucceeded, migrationSuccessStatusReason, ""),
metav1.UpdateOptions{},
)
if err != nil {
Expand All @@ -318,3 +338,13 @@ func (svmc *SVMController) sync(ctx context.Context, key string) error {
logger.V(4).Info("Finished syncing svm resource", "key", key, "gvr", gvr.String(), "elapsed", time.Since(startTime))
return nil
}

type typeMetaUIDRV struct {
metav1.TypeMeta `json:",inline"`
objectMetaUIDandRV `json:"metadata,omitempty"`
}

type objectMetaUIDandRV struct {
UID types.UID `json:"uid,omitempty"`
ResourceVersion string `json:"resourceVersion,omitempty"`
}
3 changes: 2 additions & 1 deletion pkg/controller/storageversionmigrator/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ func indexOfCondition(svm *svmv1alpha1.StorageVersionMigration, conditionType sv
func setStatusConditions(
toBeUpdatedSVM *svmv1alpha1.StorageVersionMigration,
conditionType svmv1alpha1.MigrationConditionType,
reason string,
reason, message string,
) *svmv1alpha1.StorageVersionMigration {
if !IsConditionTrue(toBeUpdatedSVM, conditionType) {
if conditionType == svmv1alpha1.MigrationSucceeded || conditionType == svmv1alpha1.MigrationFailed {
Expand All @@ -77,6 +77,7 @@ func setStatusConditions(
Status: corev1.ConditionTrue,
LastUpdateTime: metav1.Now(),
Reason: reason,
Message: message,
})
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,10 @@ func buildControllerRoles() ([]rbacv1.ClusterRole, []rbacv1.ClusterRoleBinding)
Name: saRolePrefix + "storage-version-migrator-controller",
},
Rules: []rbacv1.PolicyRule{
rbacv1helpers.NewRule("list", "patch").Groups("*").Resources("*").RuleOrDie(),
// need list to get current RV for any resource
// need patch for SSA of any resource
// need create because SSA of a deleted resource will be interpreted as a create request, these always fail with a conflict error because UID is set
rbacv1helpers.NewRule("list", "create", "patch").Groups("*").Resources("*").RuleOrDie(),
rbacv1helpers.NewRule("update").Groups(storageVersionMigrationGroup).Resources("storageversionmigrations/status").RuleOrDie(),
},
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package converter

import (
"bytes"
"fmt"
"io"
"net/http"
Expand Down Expand Up @@ -131,7 +132,7 @@ func serve(w http.ResponseWriter, r *http.Request, convert convertFunc) {
return
}

klog.V(2).Infof("handling request: %v", body)
klog.V(2).Infof("handling request: %s", string(body))
obj, gvk, err := serializer.Decode(body, nil, nil)
if err != nil {
msg := fmt.Sprintf("failed to deserialize body (%v) with error %v", string(body), err)
Expand All @@ -152,7 +153,6 @@ func serve(w http.ResponseWriter, r *http.Request, convert convertFunc) {
}
convertReview.Response = doConversionV1beta1(convertReview.Request, convert)
convertReview.Response.UID = convertReview.Request.UID
klog.V(2).Info(fmt.Sprintf("sending response: %v", convertReview.Response))

// reset the request, it is not needed in a response.
convertReview.Request = &v1beta1.ConversionRequest{}
Expand All @@ -167,7 +167,6 @@ func serve(w http.ResponseWriter, r *http.Request, convert convertFunc) {
}
convertReview.Response = doConversionV1(convertReview.Request, convert)
convertReview.Response.UID = convertReview.Request.UID
klog.V(2).Info(fmt.Sprintf("sending response: %v", convertReview.Response))

// reset the request, it is not needed in a response.
convertReview.Request = &v1.ConversionRequest{}
Expand All @@ -187,12 +186,14 @@ func serve(w http.ResponseWriter, r *http.Request, convert convertFunc) {
http.Error(w, msg, http.StatusBadRequest)
return
}
err = outSerializer.Encode(responseObj, w)
var buf bytes.Buffer
err = outSerializer.Encode(responseObj, io.MultiWriter(w, &buf))
if err != nil {
klog.Error(err)
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
klog.V(2).Infof("sending response: %s", buf.String())
}

// ServeExampleConvert servers endpoint for the example converter defined as convertExampleCRD function.
Expand Down
Loading

0 comments on commit 892acaa

Please sign in to comment.