Skip to content

Commit

Permalink
Fixing yet another race condition during cluster/kernel upgrade
Browse files Browse the repository at this point in the history
The following scenario causes worker pods to be stuck during cluster
upgrade:
1) kernel module is loaded into the node. NMC contains both spec and status using the current kernel version
2) cluster upgrade starts. As part of the upgrade node becomes
   Unschedulable
3) Module-NMC removes Spec from NMC, since the node is Uschedulable
4) NMC controller creates an unload worker pod, since the timestamp of
   the ready condition of the node has not been updated yet to a new
   value (the node is still not ready).
5) once the node becomes ready, the unload worker pod is scheduled with
   the old kernel version and therefore constantly fails.

Solution:
We will skip handling spec, statuses , garbage collection and labelling
for the node that are not ready (unschedulable). We will only collect
the completed pod and will update NMC statuses accordingly. Once the
node becomes ready, the new reconciliation loop will kick in
  • Loading branch information
yevgeny-shnaidman authored and k8s-ci-robot committed Oct 6, 2024
1 parent 29ada57 commit 6d66a6c
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 6 deletions.
15 changes: 11 additions & 4 deletions internal/controllers/nmc_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,9 @@ const (
//+kubebuilder:rbac:groups="core",resources=serviceaccounts,verbs=get;list;watch

type NMCReconciler struct {
client client.Client
helper nmcReconcilerHelper
client client.Client
helper nmcReconcilerHelper
nodeAPI node.Node
}

func NewNMCReconciler(
Expand All @@ -82,8 +83,9 @@ func NewNMCReconciler(
pm := newPodManager(client, workerImage, scheme, workerCfg)
helper := newNMCReconcilerHelper(client, pm, recorder, nodeAPI)
return &NMCReconciler{
client: client,
helper: helper,
client: client,
helper: helper,
nodeAPI: nodeAPI,
}
}

Expand Down Expand Up @@ -126,6 +128,11 @@ func (r *NMCReconciler) Reconcile(ctx context.Context, req reconcile.Request) (r
return ctrl.Result{}, fmt.Errorf("could not get node %s: %v", nmcObj.Name, err)
}

// skipping handling NMC spec, labelling, events until node becomes ready
if !r.nodeAPI.IsNodeSchedulable(&node) {
return ctrl.Result{}, nil
}

errs := make([]error, 0, len(nmcObj.Spec.Modules)+len(nmcObj.Status.Modules))

for _, mod := range nmcObj.Spec.Modules {
Expand Down
30 changes: 28 additions & 2 deletions internal/controllers/nmc_reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ var _ = Describe("NodeModulesConfigReconciler_Reconcile", func() {
var (
kubeClient *testclient.MockClient
wh *MocknmcReconcilerHelper
nm *node.MockNode

r *NMCReconciler

Expand All @@ -64,9 +65,11 @@ var _ = Describe("NodeModulesConfigReconciler_Reconcile", func() {
ctrl := gomock.NewController(GinkgoT())
kubeClient = testclient.NewMockClient(ctrl)
wh = NewMocknmcReconcilerHelper(ctrl)
nm = node.NewMockNode(ctrl)
r = &NMCReconciler{
client: kubeClient,
helper: wh,
client: kubeClient,
helper: wh,
nodeAPI: nm,
}
})

Expand Down Expand Up @@ -125,6 +128,27 @@ var _ = Describe("NodeModulesConfigReconciler_Reconcile", func() {
Expect(err).To(HaveOccurred())
})

It("should not continue if node is not schedulable", func() {
nmc := &kmmv1beta1.NodeModulesConfig{
ObjectMeta: metav1.ObjectMeta{Name: nmcName},
}
node := v1.Node{}
gomock.InOrder(
kubeClient.
EXPECT().
Get(ctx, nmcNsn, &kmmv1beta1.NodeModulesConfig{}).
Do(func(_ context.Context, _ types.NamespacedName, kubeNmc ctrlclient.Object, _ ...ctrlclient.Options) {
*kubeNmc.(*kmmv1beta1.NodeModulesConfig) = *nmc
}),
wh.EXPECT().SyncStatus(ctx, nmc),
kubeClient.EXPECT().Get(ctx, types.NamespacedName{Name: nmc.Name}, &node).Return(nil),
nm.EXPECT().IsNodeSchedulable(&node).Return(false),
)

_, err := r.Reconcile(ctx, req)
Expect(err).To(BeNil())
})

It("should process spec entries and orphan statuses", func() {
const (
mod0Name = "mod0"
Expand Down Expand Up @@ -188,6 +212,7 @@ var _ = Describe("NodeModulesConfigReconciler_Reconcile", func() {
}),
wh.EXPECT().SyncStatus(ctx, nmc),
kubeClient.EXPECT().Get(ctx, types.NamespacedName{Name: nmc.Name}, &node).Return(nil),
nm.EXPECT().IsNodeSchedulable(&node).Return(true),
wh.EXPECT().ProcessModuleSpec(contextWithValueMatch, nmc, &spec0, &status0, &node),
wh.EXPECT().ProcessModuleSpec(contextWithValueMatch, nmc, &spec1, nil, &node),
wh.EXPECT().ProcessUnconfiguredModuleStatus(contextWithValueMatch, nmc, &status2, &node),
Expand Down Expand Up @@ -266,6 +291,7 @@ var _ = Describe("NodeModulesConfigReconciler_Reconcile", func() {
}),
wh.EXPECT().SyncStatus(ctx, nmc).Return(nil),
kubeClient.EXPECT().Get(ctx, types.NamespacedName{Name: nmc.Name}, &node).Return(nil),
nm.EXPECT().IsNodeSchedulable(&node).Return(true),
wh.EXPECT().ProcessModuleSpec(contextWithValueMatch, nmc, &spec0, &status0, &node).Return(fmt.Errorf(errorMeassge)),
wh.EXPECT().ProcessUnconfiguredModuleStatus(contextWithValueMatch, nmc, &status2, &node).Return(fmt.Errorf(errorMeassge)),
wh.EXPECT().GarbageCollectInUseLabels(ctx, nmc).Return(fmt.Errorf(errorMeassge)),
Expand Down

0 comments on commit 6d66a6c

Please sign in to comment.