Skip to content

Commit

Permalink
Merge pull request kubevirt#6918 from iholder-redhat/bug/AlertHostMod…
Browse files Browse the repository at this point in the history
…elUnscheduable

[Bug fix]: Unscheduable host-model VMI alert is not triggered
  • Loading branch information
kubevirt-bot authored Dec 20, 2021
2 parents 13d494f + b457f74 commit 2c9bcc7
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 24 deletions.
64 changes: 42 additions & 22 deletions pkg/virt-controller/watch/migration.go
Original file line number Diff line number Diff line change
Expand Up @@ -467,14 +467,6 @@ func (c *MigrationController) updateStatus(migration *virtv1.VirtualMachineInsta
} else {
migrationCopy.Status.Phase = virtv1.MigrationScheduled
}
} else if cpu := vmi.Spec.Domain.CPU; cpu != nil && cpu.Model == virtv1.CPUModeHostModel && isPodPending(pod) {
nodes := c.nodeInformer.GetStore().List()
for _, node := range nodes {
if !isNodeSuitableForHostModelMigration(node.(*k8sv1.Node), pod) {
c.recorder.Eventf(migration, k8sv1.EventTypeWarning, NoSuitableNodesForHostModelMigration,
"Migration cannot proceed since no node is suitable to run the required CPU model / required features.")
}
}
}
case virtv1.MigrationScheduled:
if vmi.Status.MigrationState != nil && vmi.Status.MigrationState.TargetNode != "" {
Expand Down Expand Up @@ -940,6 +932,7 @@ func (c *MigrationController) handlePendingPodTimeout(migration *virtv1.VirtualM
secondsSpentPending := timeSinceCreationSeconds(&pod.ObjectMeta)

if isPodPendingUnschedulable(pod) {
c.alertIfHostModelIsUnschedulable(vmi, pod)
if secondsSpentPending >= unschedulableTimeout {
return c.deleteTimedOutTargetPod(migration, vmi, pod, "unschedulable pod timeout period exceeded")
} else {
Expand Down Expand Up @@ -1409,6 +1402,42 @@ func (c *MigrationController) getNodeForVMI(vmi *virtv1.VirtualMachineInstance)
return node, nil
}

func (c *MigrationController) alertIfHostModelIsUnschedulable(vmi *virtv1.VirtualMachineInstance, targetPod *k8sv1.Pod) {
fittingNodeFound := false

if cpu := vmi.Spec.Domain.CPU; cpu == nil || cpu.Model != virtv1.CPUModeHostModel {
return
}

requiredNodeLabels := map[string]string{}
for key, value := range targetPod.Spec.NodeSelector {
if strings.HasPrefix(key, virtv1.HostModelCPULabel) || strings.HasPrefix(key, virtv1.CPUFeatureLabel) {
requiredNodeLabels[key] = value
}
}

nodes := c.nodeInformer.GetStore().List()
for _, nodeInterface := range nodes {
node := nodeInterface.(*k8sv1.Node)

if node.Name == vmi.Status.NodeName {
continue // avoid checking the VMI's source node
}

if isNodeSuitableForHostModelMigration(node, requiredNodeLabels) {
log.Log.Object(vmi).Infof("Node %s is suitable to run vmi %s host model cpu mode (more nodes may fit as well)", node.Name, vmi.Name)
fittingNodeFound = true
break
}
}

if !fittingNodeFound {
warningMsg := fmt.Sprintf("Migration cannot proceed since no node is suitable to run the required CPU model / required features: %v", requiredNodeLabels)
c.recorder.Eventf(vmi, k8sv1.EventTypeWarning, NoSuitableNodesForHostModelMigration, warningMsg)
log.Log.Object(vmi).Warning(warningMsg)
}
}

func prepareNodeSelectorForHostCpuModel(node *k8sv1.Node, pod *k8sv1.Pod) error {
var hostCpuModel, hostCpuModelLabelKey, hostModelLabelValue string

Expand Down Expand Up @@ -1436,21 +1465,12 @@ func prepareNodeSelectorForHostCpuModel(node *k8sv1.Node, pod *k8sv1.Pod) error
return nil
}

func isNodeSuitableForHostModelMigration(node *k8sv1.Node, pod *k8sv1.Pod) bool {
nodeHasLabel := func(key, value string) bool {
if nodeValue, ok := node.Labels[key]; ok {
if value == nodeValue {
return true
}
}
return false
}
func isNodeSuitableForHostModelMigration(node *k8sv1.Node, requiredNodeLabels map[string]string) bool {
for key, value := range requiredNodeLabels {
nodeValue, ok := node.Labels[key]

for key, value := range pod.Labels {
if strings.HasPrefix(key, virtv1.HostModelCPULabel) || strings.HasPrefix(key, virtv1.CPUFeatureLabel) {
if !nodeHasLabel(key, value) {
return false
}
if !ok || nodeValue != value {
return false
}
}

Expand Down
48 changes: 48 additions & 0 deletions pkg/virt-controller/watch/migration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1393,6 +1393,54 @@ var _ = Describe("Migration watcher", func() {

})

Context("Migration of host-model VMI", func() {

It("should trigger alert when no node supports host-model", func() {
const nodeName = "testNode"

By("Defining node (that does not support host model)")
node := newNode(nodeName)

By("Defining VMI")
vmi := newVirtualMachine("testvmi", virtv1.Running)
vmi.Status.NodeName = nodeName
vmi.Spec.Domain.CPU = &virtv1.CPU{Model: virtv1.CPUModeHostModel}

By("Defining migration")
migration := newMigration("testmigration", vmi.Name, virtv1.MigrationScheduling)
migration.Annotations[virtv1.MigrationUnschedulablePodTimeoutSecondsAnnotation] = "1"

By("Defining target pod")
targetPod := newTargetPodForVirtualMachine(vmi, migration, k8sv1.PodPending)
if targetPod.Spec.NodeSelector == nil {
targetPod.Spec.NodeSelector = make(map[string]string)
}
targetPod.Spec.NodeSelector[virtv1.HostModelCPULabel+"fake-model"] = "true"
if node.Labels == nil {
node.Labels = make(map[string]string)
}
node.Labels[virtv1.HostModelCPULabel+"other-fake-model"] = "true"
targetPod.CreationTimestamp = metav1.NewTime(now().Time.Add(time.Duration(-defaultUnschedulablePendingTimeoutSeconds) * time.Second))
targetPod.Status.Conditions = append(targetPod.Status.Conditions, k8sv1.PodCondition{
Type: k8sv1.PodScheduled,
Status: k8sv1.ConditionFalse,
Reason: k8sv1.PodReasonUnschedulable,
})

By("Adding objects to mocked cluster")
addNode(node)
addMigration(migration)
addVirtualMachineInstance(vmi)
podFeeder.Add(targetPod)

By("Running controller and setting expectations")
shouldExpectPodDeletion()
controller.Execute()
testutils.ExpectEvent(recorder, NoSuitableNodesForHostModelMigration)
testutils.ExpectEvent(recorder, SuccessfulDeletePodReason)
})

})
})

func newPDB(name string, vmi *virtv1.VirtualMachineInstance, pods int) *v1beta1.PodDisruptionBudget {
Expand Down
10 changes: 8 additions & 2 deletions tests/migration.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,22 @@ func expectMigrationSuccessWithOffset(offset int, virtClient kubecli.KubevirtCli
}

func RunMigrationAndExpectCompletion(virtClient kubecli.KubevirtClient, migration *v1.VirtualMachineInstanceMigration, timeout int) string {
By("Starting a Migration")
migration = RunMigration(virtClient, migration, timeout)

return ExpectMigrationSuccess(virtClient, migration, timeout)
}

func RunMigration(virtClient kubecli.KubevirtClient, migration *v1.VirtualMachineInstanceMigration, timeout int) *v1.VirtualMachineInstanceMigration {
By("Starting a Migration")
var err error
var migrationCreated *v1.VirtualMachineInstanceMigration
Eventually(func() error {
migrationCreated, err = virtClient.VirtualMachineInstanceMigration(migration.Namespace).Create(migration, &metav1.CreateOptions{})
return err
}, timeout, 1*time.Second).Should(Succeed(), "migration creation should succeed")
migration = migrationCreated

return ExpectMigrationSuccess(virtClient, migration, timeout)
return migrationCreated
}

func ConfirmVMIPostMigration(virtClient kubecli.KubevirtClient, vmi *v1.VirtualMachineInstance, migrationUID string) *v1.VirtualMachineInstance {
Expand Down
112 changes: 112 additions & 0 deletions tests/migration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ import (
"strings"
"sync"

"kubevirt.io/kubevirt/pkg/virt-controller/watch"

virtconfig "kubevirt.io/kubevirt/pkg/virt-config"
virthandler "kubevirt.io/kubevirt/pkg/virt-handler"
"kubevirt.io/kubevirt/tests/framework/checks"
Expand Down Expand Up @@ -2300,6 +2302,99 @@ var _ = Describe("[Serial][rfe_id:393][crit:high][vendor:[email protected]][leve
Expect(isModelSupportedOnNode(newNode, hostModel)).To(BeTrue(), "original host model should be supported on new node")
expectFeatureToBeSupportedOnNode(newNode, requiredFeatures)
})

Context("[Serial]Should trigger event", func() {

var originalNodeLabels map[string]string
var originalNodeAnnotations map[string]string
var vmi *v1.VirtualMachineInstance
var node *k8sv1.Node

copyMap := func(originalMap map[string]string) map[string]string {
newMap := make(map[string]string, len(originalMap))

for key, value := range originalMap {
newMap[key] = value
}

return newMap
}

BeforeEach(func() {
By("Creating a VMI with default CPU mode")
vmi = cirrosVMIWithEvictionStrategy()
vmi.Spec.Domain.CPU = &v1.CPU{Model: v1.CPUModeHostModel}

By("Starting the VirtualMachineInstance")
vmi = runVMIAndExpectLaunch(vmi, 240)

By("Saving the original node's state")
node, err = virtClient.CoreV1().Nodes().Get(context.Background(), vmi.Status.NodeName, metav1.GetOptions{})
Expect(err).ToNot(HaveOccurred())

originalNodeLabels = copyMap(node.Labels)
originalNodeAnnotations = copyMap(node.Annotations)

node = disableNodeLabeller(node, virtClient)
})

AfterEach(func() {
By("Restore node to its original state")
node.Labels = originalNodeLabels
node.Annotations = originalNodeAnnotations
node, err = virtClient.CoreV1().Nodes().Update(context.Background(), node, metav1.UpdateOptions{})
Expect(err).ShouldNot(HaveOccurred())

Eventually(func() map[string]string {
node, err = virtClient.CoreV1().Nodes().Get(context.Background(), node.Name, metav1.GetOptions{})
Expect(err).ShouldNot(HaveOccurred())

return node.Labels
}, 10*time.Second, 1*time.Second).Should(Equal(originalNodeLabels), "Node should have fake host model")
Expect(node.Annotations).To(Equal(originalNodeAnnotations))
})

It("[test_id:7505]when no node is suited for host model", func() {
By("Changing node labels to support fake host model")
// Remove all supported host models
for key, _ := range node.Labels {
if strings.HasPrefix(key, v1.HostModelCPULabel) {
delete(node.Labels, key)
}
}
node.Labels[v1.HostModelCPULabel+"fake-model"] = "true"

node, err = virtClient.CoreV1().Nodes().Update(context.Background(), node, metav1.UpdateOptions{})
Expect(err).ShouldNot(HaveOccurred())

Eventually(func() bool {
node, err = virtClient.CoreV1().Nodes().Get(context.Background(), node.Name, metav1.GetOptions{})
Expect(err).ShouldNot(HaveOccurred())

labelValue, ok := node.Labels[v1.HostModelCPULabel+"fake-model"]
return ok && labelValue == "true"
}, 10*time.Second, 1*time.Second).Should(BeTrue(), "Node should have fake host model")

By("Starting the migration")
migration := tests.NewRandomMigration(vmi.Name, vmi.Namespace)
_ = tests.RunMigration(virtClient, migration, tests.MigrationWaitTime)

By("Expecting for an alert to be triggered")
Eventually(func() []k8sv1.Event {
events, err := virtClient.CoreV1().Events(vmi.Namespace).List(
context.Background(),
metav1.ListOptions{
FieldSelector: fmt.Sprintf("type=%s,reason=%s", k8sv1.EventTypeWarning, watch.NoSuitableNodesForHostModelMigration),
},
)
Expect(err).ToNot(HaveOccurred())

return events.Items
}, 30*time.Second, 1*time.Second).Should(HaveLen(1), "Exactly one alert is expected")
})

})

})

Context("with migration policies", func() {
Expand Down Expand Up @@ -3140,3 +3235,20 @@ func temporaryTLSConfig() *tls.Config {
},
}
}

func disableNodeLabeller(node *k8sv1.Node, virtClient kubecli.KubevirtClient) *k8sv1.Node {
var err error

node.Annotations[v1.LabellerSkipNodeAnnotation] = "true"

node, err = virtClient.CoreV1().Nodes().Update(context.Background(), node, metav1.UpdateOptions{})
Expect(err).ShouldNot(HaveOccurred())

Eventually(func() bool {
node, err = virtClient.CoreV1().Nodes().Get(context.Background(), node.Name, metav1.GetOptions{})
value, ok := node.Annotations[v1.LabellerSkipNodeAnnotation]
return ok && value == "true"
}, 30*time.Second, time.Second).Should(BeTrue())

return node
}

0 comments on commit 2c9bcc7

Please sign in to comment.