Skip to content

Commit

Permalink
virt-handler should move vmi to failed phase in case of phase1 critic…
Browse files Browse the repository at this point in the history
…al err

If phase1 reported an unrecoverable error (error of type
network.CriticalNetworkError), the vmi should be moved to failed status.

Signed-off-by: Alona Kaplan <[email protected]>
  • Loading branch information
AlonaKaplan committed Jun 23, 2020
1 parent b03f848 commit 0fb022f
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 9 deletions.
44 changes: 35 additions & 9 deletions pkg/virt-handler/vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,12 @@ type VirtualMachineController struct {
domainNotifyPipes map[string]string
}

type virtLauncherCriticalNetworkError struct {
msg string
}

func (e *virtLauncherCriticalNetworkError) Error() string { return e.msg }

func (d *VirtualMachineController) startDomainNotifyPipe(vmi *v1.VirtualMachineInstance) (net.Listener, error) {

res, err := d.podIsolationDetector.Detect(vmi)
Expand Down Expand Up @@ -337,12 +343,12 @@ func (d *VirtualMachineController) clearPodNetworkPhase1(vmi *v1.VirtualMachineI
// it results in killing/spawning a posix thread. Only do this if it
// is absolutely neccessary. The cache informs us if this action has
// already taken place or not for a VMI
func (d *VirtualMachineController) setPodNetworkPhase1(vmi *v1.VirtualMachineInstance) error {
func (d *VirtualMachineController) setPodNetworkPhase1(vmi *v1.VirtualMachineInstance) (bool, error) {

// configure network
res, err := d.podIsolationDetector.Detect(vmi)
if err != nil {
return fmt.Errorf("failed to detect isolation for launcher pod: %v", err)
return false, fmt.Errorf("failed to detect isolation for launcher pod: %v", err)
}

pid := res.Pid()
Expand All @@ -354,20 +360,26 @@ func (d *VirtualMachineController) setPodNetworkPhase1(vmi *v1.VirtualMachineIns

if ok && cachedPid == pid {
// already completed phase1
return nil
return false, nil
}

err = res.DoNetNS(func() error { return network.SetupPodNetworkPhase1(vmi, pid) })
if err != nil {
return fmt.Errorf("failed to configure vmi network for migration target: %v", err)
_, critical := err.(*network.CriticalNetworkError)
if critical {
return true, err
} else {
return false, err
}

}

// cache that phase 1 has completed for this vmi.
d.phase1NetworkSetupCacheLock.Lock()
d.phase1NetworkSetupCache[vmi.UID] = pid
d.phase1NetworkSetupCacheLock.Unlock()

return nil
return false, nil
}

func domainMigrated(domain *api.Domain) bool {
Expand Down Expand Up @@ -697,6 +709,10 @@ func (d *VirtualMachineController) updateVMIStatus(vmi *v1.VirtualMachineInstanc
condManager.RemoveCondition(vmi, v1.VirtualMachineInstancePaused)
}

if _, ok := syncError.(*virtLauncherCriticalNetworkError); ok {
log.Log.Errorf("virt-launcher crashed. Updating VMI %s status to Failed", vmi.Name)
vmi.Status.Phase = v1.Failed
}
condManager.CheckFailure(vmi, syncError, "Synchronizing with the Domain failed.")

if !reflect.DeepEqual(oldStatus, vmi.Status) {
Expand Down Expand Up @@ -1763,9 +1779,14 @@ func (d *VirtualMachineController) processVmUpdate(origVMI *v1.VirtualMachineIns
}

// configure network inside virt-launcher compute container
err = d.setPodNetworkPhase1(vmi)
criticalNetworkError, err := d.setPodNetworkPhase1(vmi)
if err != nil {
return fmt.Errorf("failed to configure vmi network for migration target: %v", err)
if criticalNetworkError {
return &virtLauncherCriticalNetworkError{fmt.Sprintf("failed to configure vmi network for migration target: %v", err)}
} else {
return fmt.Errorf("failed to configure vmi network for migration target: %v", err)
}

}

if err := client.SyncMigrationTarget(vmi); err != nil {
Expand Down Expand Up @@ -1809,9 +1830,14 @@ func (d *VirtualMachineController) processVmUpdate(origVMI *v1.VirtualMachineIns
return err
}

err = d.setPodNetworkPhase1(vmi)
criticalNetworkError, err := d.setPodNetworkPhase1(vmi)
if err != nil {
return fmt.Errorf("failed to configure vmi network: %v", err)
if criticalNetworkError {
return &virtLauncherCriticalNetworkError{fmt.Sprintf("failed to configure vmi network: %v", err)}
} else {
return fmt.Errorf("failed to configure vmi network: %v", err)
}

}

// set runtime limits as needed
Expand Down
16 changes: 16 additions & 0 deletions pkg/virt-handler/vm_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -804,6 +804,22 @@ var _ = Describe("VirtualMachineInstance", func() {
controller.Execute()
})

It("should move VirtualMachineInstance to Failed if configuring the networks on the virt-launcher fails", func() {
vmi := v1.NewMinimalVMI("testvmi")
vmi.ObjectMeta.ResourceVersion = "1"
vmi.Status.Phase = v1.Scheduled

mockWatchdog.CreateFile(vmi)
vmiFeeder.Add(vmi)

network.SetupPodNetworkPhase1 = func(vm *v1.VirtualMachineInstance, pid int) error { return fmt.Errorf("SetupPodNetworkPhase1 error") }

vmiInterface.EXPECT().Update(gomock.Any()).Do(func(vmi *v1.VirtualMachineInstance) {
Expect(vmi.Status.Phase).To(Equal(v1.Failed))
})
controller.Execute()
})

It("should remove an error condition if a synchronization run succeeds", func() {
vmi := v1.NewMinimalVMI("testvmi")
vmi.UID = vmiTestUUID
Expand Down

0 comments on commit 0fb022f

Please sign in to comment.