Skip to content

Commit

Permalink
Fix issue with domain informer not always receiving final domain DELE…
Browse files Browse the repository at this point in the history
…TE from libvirt

Signed-off-by: David Vossel <[email protected]>
  • Loading branch information
davidvossel committed Apr 13, 2020
1 parent 8d8baaf commit 4f82b81
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 20 deletions.
9 changes: 5 additions & 4 deletions pkg/virt-handler/cache/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ var _ = Describe("Domain informer", func() {

list = append(list, api.NewMinimalDomain("testvmi1"))

socketPath := filepath.Join(socketsDir, "1234", "default_testvmi1_sock")
socketPath := filepath.Join(socketsDir, "1234", cmdclient.StandardLauncherSocketFileName)
domainManager.EXPECT().ListAllDomains().Return(list, nil)

cmdserver.RunServer(socketPath, domainManager, stopChan, nil)
Expand All @@ -123,7 +123,7 @@ var _ = Describe("Domain informer", func() {
domain := api.NewMinimalDomain("test")
list = append(list, domain)

socketPath := filepath.Join(socketsDir, "1234", "default_test_sock")
socketPath := filepath.Join(socketsDir, "1234", cmdclient.StandardLauncherSocketFileName)
domainManager.EXPECT().ListAllDomains().Return(list, nil)

cmdserver.RunServer(socketPath, domainManager, stopChan, nil)
Expand All @@ -140,7 +140,7 @@ var _ = Describe("Domain informer", func() {
})

It("should detect expired watchdog file.", func() {
socketPath := filepath.Join(socketsDir, "1234", "default_test_sock")
socketPath := filepath.Join(socketsDir, "default_test_sock")
f, err := os.Create(socketPath)
Expect(err).ToNot(HaveOccurred())
f.Close()
Expand All @@ -149,6 +149,7 @@ var _ = Describe("Domain informer", func() {
backgroundWatcherStarted: false,
virtShareDir: shareDir,
watchdogTimeout: 1,
unresponsiveSockets: make(map[string]int64),
}

watchdogFile := watchdog.WatchdogFileFromNamespaceName(shareDir, "default", "test")
Expand Down Expand Up @@ -253,7 +254,7 @@ var _ = Describe("Domain informer", func() {
domain := api.NewMinimalDomain("test")
list = append(list, domain)

socketPath := filepath.Join(socketsDir, "1234", "default_test_sock")
socketPath := filepath.Join(socketsDir, "1234", cmdclient.StandardLauncherSocketFileName)
domainManager.EXPECT().ListAllDomains().Return(list, nil)

// This file doesn't have a unix sock server behind it
Expand Down
15 changes: 6 additions & 9 deletions pkg/virt-handler/cmd-client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (
"net/rpc"
"os"
"path/filepath"
"strings"
"syscall"
"time"

Expand Down Expand Up @@ -165,7 +166,11 @@ func ListAllSockets(baseDir string) ([]string, error) {
return nil, err
}
for _, dir := range directories {
if !dir.IsDir() {
if dir.IsDir() {
// append the socket file for each VMI directory even if
// that socket file has technically already been deleted.
socketFiles = append(socketFiles, filepath.Join(socketsDir, dir.Name(), StandardLauncherSocketFileName))
} else if !dir.IsDir() && strings.Contains(dir.Name(), "_sock") {
// legacy support.
// The old way of handling launcher sockets was to
// dump them all in the same directory. So if we encounter
Expand All @@ -174,14 +179,6 @@ func ListAllSockets(baseDir string) ([]string, error) {
socketFiles = append(socketFiles, filepath.Join(socketsDir, dir.Name()))
continue
}
files, err := ioutil.ReadDir(filepath.Join(socketsDir, dir.Name()))
if err != nil {
return nil, err
}

for _, file := range files {
socketFiles = append(socketFiles, filepath.Join(socketsDir, dir.Name(), file.Name()))
}
}
return socketFiles, nil
}
Expand Down
53 changes: 46 additions & 7 deletions pkg/virt-handler/vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -1090,9 +1090,17 @@ func (d *VirtualMachineController) execute(key string) error {
}
// If we found an outdated domain which is also not alive anymore, clean up
if expired {
return d.processVmCleanup(oldVMI)
log.Log.Object(oldVMI).Infof("Detected stale vmi %s that still needs cleanup before new vmi %s with identical name/namespace can be processed", oldVMI.UID, vmi.UID)
err = d.processVmCleanup(oldVMI)
if err != nil {
return err
}
}
// if the command server is still responsive, we are not allowed to clean up

// Make sure we re-enqueue the key to ensure this new VMI is processed
// after the stale domain is removed
d.Queue.AddAfter(controller.VirtualMachineKey(vmi), time.Second*5)

return nil
}

Expand Down Expand Up @@ -1135,6 +1143,7 @@ func (d *VirtualMachineController) processVmCleanup(vmi *v1.VirtualMachineInstan

vmiId := string(vmi.UID)

log.Log.Object(vmi).Infof("Performing final local cleanup for vmi with uid %s", vmiId)
// If the VMI is using the old graceful shutdown trigger on
// a hostmount, make sure to clear that file still.
err := vmGracefulShutdownTriggerClear(d.virtShareDir, vmi)
Expand All @@ -1154,7 +1163,31 @@ func (d *VirtualMachineController) processVmCleanup(vmi *v1.VirtualMachineInstan
d.clearPodNetworkPhase1(vmi)

// Watch dog file and command client must be the last things removed here
return d.closeLauncherClient(vmi)
err = d.closeLauncherClient(vmi)
if err != nil {
return err
}

key, err := controller.KeyFunc(vmi)
if err != nil {
return err
}

domain, domainExists, err := d.getDomainFromCache(key)
if err != nil {
return err
}

// Remove the domain from cache in the event that we're performing
// a final cleanup and never received the "DELETE" event. This is
// possible if the VMI pod goes away before we receive the final domain
// "DELETE"
if domainExists {
log.Log.Object(domain).Infof("Removing domain from cache during final cleanup")
return d.domainInformer.GetStore().Delete(domain)
}

return nil
}

func (d *VirtualMachineController) closeLauncherClient(vmi *v1.VirtualMachineInstance) error {
Expand All @@ -1173,11 +1206,8 @@ func (d *VirtualMachineController) closeLauncherClient(vmi *v1.VirtualMachineIns
client, ok := d.launcherClients[sockFile]
if ok {
client.Close()
delete(d.launcherClients, sockFile)
}

os.RemoveAll(sockFile)

// With the new socket format, we need to clean up
// the entire directory the launcher socket is placed in
// this is because we now use a single directory per launcher
Expand All @@ -1187,7 +1217,15 @@ func (d *VirtualMachineController) closeLauncherClient(vmi *v1.VirtualMachineIns
// The new socket format can be detected with the StandardLauncherSocketFileName const
// The old format was dynamic, and looks like <uid>_sock
if cmdclient.SocketMonitoringEnabled(sockFile) {
os.RemoveAll(filepath.Dir(sockFile))
err := os.RemoveAll(filepath.Dir(sockFile))
if err != nil {
return err
}
} else {
err := os.RemoveAll(sockFile)
if err != nil {
return err
}
}

// for legacy support, ensure watchdog is removed when client is removed
Expand All @@ -1197,6 +1235,7 @@ func (d *VirtualMachineController) closeLauncherClient(vmi *v1.VirtualMachineIns
return err
}

delete(d.launcherClients, sockFile)
return nil
}

Expand Down

0 comments on commit 4f82b81

Please sign in to comment.