Skip to content

Commit

Permalink
keep original pod ip on a dummy interface
Browse files Browse the repository at this point in the history
When kubelet is restarted, it goes over all pods, peeks in their
respective network namespaces and tries to detect their IP in order
to update their Pod objects.

However, with the bridge binding mode, we move this IP address to VM
and the pod interface is left IP-less. That causes kubelet's IP check
to fail and results in pod removal.

With this patch, we use a dummy interface named after the pod interface
to keep the pod IP address. When kubelet performs the check, it sees
interface with expectected name (eth0) and reports its IP address on
Pod object.

Signed-off-by: Ryan Hallisey <[email protected]>

Fix dummy unit test

Add func test for vmi restart w/ bridge network

Signed-off-by: Ryan Hallisey <[email protected]>

Restart kubelet

Signed-off-by: Ryan Hallisey <[email protected]>

Check for the eth0 and eth0-nic interfaces
  • Loading branch information
phoracek authored and rthallisey committed Feb 4, 2021
1 parent b013947 commit 859e99c
Show file tree
Hide file tree
Showing 10 changed files with 210 additions and 13 deletions.
8 changes: 8 additions & 0 deletions pkg/util/sysctl/sysctl.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Originally copied from https://github.com/kubernetes/kubernetes/blob/d8695d06b71
package sysctl

import (
"fmt"
"io/ioutil"
"path"
"strconv"
Expand All @@ -34,6 +35,8 @@ type Interface interface {
GetSysctl(sysctl string) (int, error)
// SetSysctl modifies the specified sysctl flag to the new value
SetSysctl(sysctl string, newVal int) error
// GetIpv4ArpIgnoreAll returns the interface's path to set arp_ignore
GetIpv4ArpIgnoreAll() string
}

// New returns a new Interface for accessing sysctl
Expand All @@ -45,6 +48,11 @@ func New() Interface {
type procSysctl struct {
}

// GetIpv4ArpIgnoreAll returns a path to set arp_ignore on all interfaces
func (*procSysctl) GetIpv4ArpIgnoreAll() string {
return fmt.Sprintf("net/ipv4/conf/all/arp_ignore")
}

// GetSysctl returns the value for the specified sysctl setting
func (*procSysctl) GetSysctl(sysctl string) (int, error) {
data, err := ioutil.ReadFile(path.Join(sysctlBase, sysctl))
Expand Down
14 changes: 14 additions & 0 deletions pkg/virt-launcher/virtwrap/network/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,10 @@ type NetworkHandler interface {
RouteList(link netlink.Link, family int) ([]netlink.Route, error)
AddrDel(link netlink.Link, addr *netlink.Addr) error
AddrAdd(link netlink.Link, addr *netlink.Addr) error
AddrReplace(link netlink.Link, addr *netlink.Addr) error
LinkSetDown(link netlink.Link) error
LinkSetUp(link netlink.Link) error
LinkSetName(link netlink.Link, name string) error
LinkAdd(link netlink.Link) error
LinkSetLearningOff(link netlink.Link) error
ParseAddr(s string) (*netlink.Addr, error)
Expand All @@ -109,6 +111,7 @@ type NetworkHandler interface {
IsIpv6Enabled(interfaceName string) (bool, error)
IsIpv4Primary() (bool, error)
ConfigureIpv6Forwarding() error
ConfigureIpv4ArpIgnore() error
IptablesNewChain(proto iptables.Protocol, table, chain string) error
IptablesAppendRule(proto iptables.Protocol, table, chain string, rulespec ...string) error
NftablesNewChain(proto iptables.Protocol, table, chain string) error
Expand Down Expand Up @@ -140,6 +143,9 @@ func (h *NetworkUtilsHandler) AddrList(link netlink.Link, family int) ([]netlink
func (h *NetworkUtilsHandler) RouteList(link netlink.Link, family int) ([]netlink.Route, error) {
return netlink.RouteList(link, family)
}
func (h *NetworkUtilsHandler) AddrReplace(link netlink.Link, addr *netlink.Addr) error {
return netlink.AddrReplace(link, addr)
}
func (h *NetworkUtilsHandler) AddrDel(link netlink.Link, addr *netlink.Addr) error {
return netlink.AddrDel(link, addr)
}
Expand All @@ -149,6 +155,9 @@ func (h *NetworkUtilsHandler) LinkSetDown(link netlink.Link) error {
func (h *NetworkUtilsHandler) LinkSetUp(link netlink.Link) error {
return netlink.LinkSetUp(link)
}
func (h *NetworkUtilsHandler) LinkSetName(link netlink.Link, name string) error {
return netlink.LinkSetName(link, name)
}
func (h *NetworkUtilsHandler) LinkAdd(link netlink.Link) error {
return netlink.LinkAdd(link)
}
Expand Down Expand Up @@ -180,6 +189,11 @@ func (h *NetworkUtilsHandler) HasNatIptables(proto iptables.Protocol) bool {
return true
}

func (h *NetworkUtilsHandler) ConfigureIpv4ArpIgnore() error {
err := sysctl.New().SetSysctl(sysctl.New().GetIpv4ArpIgnoreAll(), 1)
return err
}

func (h *NetworkUtilsHandler) ConfigureIpv6Forwarding() error {
err := sysctl.New().SetSysctl(sysctl.NetIPv6Forwarding, 1)
return err
Expand Down
30 changes: 30 additions & 0 deletions pkg/virt-launcher/virtwrap/network/generated_mock_common.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,16 @@ func (_mr *_MockNetworkHandlerRecorder) AddrAdd(arg0, arg1 interface{}) *gomock.
return _mr.mock.ctrl.RecordCall(_mr.mock, "AddrAdd", arg0, arg1)
}

func (_m *MockNetworkHandler) AddrReplace(link netlink.Link, addr *netlink.Addr) error {
ret := _m.ctrl.Call(_m, "AddrReplace", link, addr)
ret0, _ := ret[0].(error)
return ret0
}

func (_mr *_MockNetworkHandlerRecorder) AddrReplace(arg0, arg1 interface{}) *gomock.Call {
return _mr.mock.ctrl.RecordCall(_mr.mock, "AddrReplace", arg0, arg1)
}

func (_m *MockNetworkHandler) LinkSetDown(link netlink.Link) error {
ret := _m.ctrl.Call(_m, "LinkSetDown", link)
ret0, _ := ret[0].(error)
Expand All @@ -107,6 +117,16 @@ func (_mr *_MockNetworkHandlerRecorder) LinkSetUp(arg0 interface{}) *gomock.Call
return _mr.mock.ctrl.RecordCall(_mr.mock, "LinkSetUp", arg0)
}

func (_m *MockNetworkHandler) LinkSetName(link netlink.Link, name string) error {
ret := _m.ctrl.Call(_m, "LinkSetName", link, name)
ret0, _ := ret[0].(error)
return ret0
}

func (_mr *_MockNetworkHandlerRecorder) LinkSetName(arg0, arg1 interface{}) *gomock.Call {
return _mr.mock.ctrl.RecordCall(_mr.mock, "LinkSetName", arg0, arg1)
}

func (_m *MockNetworkHandler) LinkAdd(link netlink.Link) error {
ret := _m.ctrl.Call(_m, "LinkAdd", link)
ret0, _ := ret[0].(error)
Expand Down Expand Up @@ -245,6 +265,16 @@ func (_mr *_MockNetworkHandlerRecorder) ConfigureIpv6Forwarding() *gomock.Call {
return _mr.mock.ctrl.RecordCall(_mr.mock, "ConfigureIpv6Forwarding")
}

func (_m *MockNetworkHandler) ConfigureIpv4ArpIgnore() error {
ret := _m.ctrl.Call(_m, "ConfigureIpv4ArpIgnore")
ret0, _ := ret[0].(error)
return ret0
}

func (_mr *_MockNetworkHandlerRecorder) ConfigureIpv4ArpIgnore() *gomock.Call {
return _mr.mock.ctrl.RecordCall(_mr.mock, "ConfigureIpv4ArpIgnore")
}

func (_m *MockNetworkHandler) IptablesNewChain(proto iptables.Protocol, table string, chain string) error {
ret := _m.ctrl.Call(_m, "IptablesNewChain", proto, table, chain)
ret0, _ := ret[0].(error)
Expand Down
79 changes: 69 additions & 10 deletions pkg/virt-launcher/virtwrap/network/podinterface.go
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ type BridgeBindMechanism struct {
domain *api.Domain
podInterfaceName string
bridgeInterfaceName string
arpIgnore bool
}

func (b *BridgeBindMechanism) discoverPodNetworkInterface() error {
Expand Down Expand Up @@ -458,12 +459,22 @@ func (b *BridgeBindMechanism) preparePodNetworkInterfaces(queueNumber uint32, la
return err
}

if _, err := Handler.SetRandomMac(b.podInterfaceName); err != nil {
return err
if !b.vif.IPAMDisabled {
// Remove IP from POD interface
err := Handler.AddrDel(b.podNicLink, &b.vif.IP)

if err != nil {
log.Log.Reason(err).Errorf("failed to delete address for interface: %s", b.podInterfaceName)
return err
}

if err := b.switchPodInterfaceWithDummy(); err != nil {
log.Log.Reason(err).Error("failed to switch pod interface with a dummy")
return err
}
}

if err := Handler.LinkSetUp(b.podNicLink); err != nil {
log.Log.Reason(err).Errorf("failed to bring link up for interface: %s", b.podInterfaceName)
if _, err := Handler.SetRandomMac(b.podInterfaceName); err != nil {
return err
}

Expand All @@ -478,16 +489,18 @@ func (b *BridgeBindMechanism) preparePodNetworkInterfaces(queueNumber uint32, la
return err
}

if !b.vif.IPAMDisabled {
// Remove IP from POD interface
err := Handler.AddrDel(b.podNicLink, &b.vif.IP)

if err != nil {
log.Log.Reason(err).Errorf("failed to delete address for interface: %s", b.podInterfaceName)
if b.arpIgnore {
if err := Handler.ConfigureIpv4ArpIgnore(); err != nil {
log.Log.Reason(err).Errorf("failed to set arp_ignore=1 on interface %s", b.bridgeInterfaceName)
return err
}
}

if err := Handler.LinkSetUp(b.podNicLink); err != nil {
log.Log.Reason(err).Errorf("failed to bring link up for interface: %s", b.podInterfaceName)
return err
}

if err := Handler.LinkSetLearningOff(b.podNicLink); err != nil {
log.Log.Reason(err).Errorf("failed to disable mac learning for interface: %s", b.podInterfaceName)
return err
Expand Down Expand Up @@ -624,6 +637,52 @@ func (b *BridgeBindMechanism) createBridge() error {
return nil
}

func (b *BridgeBindMechanism) switchPodInterfaceWithDummy() error {
originalPodInterfaceName := b.podInterfaceName
newPodInterfaceName := fmt.Sprintf("%s-nic", originalPodInterfaceName)
dummy := &netlink.Dummy{LinkAttrs: netlink.LinkAttrs{Name: originalPodInterfaceName}}

if b.podNicLink.Type() != "dummy" {
// Set arp_ignore=1 on the bridge interface to avoid
// the interface being seen by Duplicate Address Detection (DAD).
// Without this, some VMs will lose their ip address after a few
// minutes.
b.arpIgnore = true

// Rename pod interface to free the original name for a new dummy interface
err := Handler.LinkSetName(b.podNicLink, newPodInterfaceName)
if err != nil {
log.Log.Reason(err).Errorf("failed to rename interface : %s", b.podInterfaceName)
return err
}

b.podInterfaceName = newPodInterfaceName
b.podNicLink, err = Handler.LinkByName(newPodInterfaceName)
if err != nil {
log.Log.Reason(err).Errorf("failed to get a link for interface: %s", b.podInterfaceName)
return err
}

// Create a dummy interface named after the original interface
err = Handler.LinkAdd(dummy)
if err != nil {
log.Log.Reason(err).Errorf("failed to create dummy interface : %s", newPodInterfaceName)
return err
}
}

// Replace original pod interface IP address to the dummy
// Since the dummy is not connected to anything, it should not affect networking
// Replace will add if ip doesn't exist or modify the ip
err := Handler.AddrReplace(dummy, &b.vif.IP)
if err != nil {
log.Log.Reason(err).Errorf("failed to replace original IP address to dummy interface: %s", newPodInterfaceName)
return err
}

return nil
}

type MasqueradeBindMechanism struct {
vmi *v1.VirtualMachineInstance
vif *VIF
Expand Down
7 changes: 7 additions & 0 deletions pkg/virt-launcher/virtwrap/network/podinterface_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ var _ = Describe("Pod Network", func() {
var mockNetwork *MockNetworkHandler
var ctrl *gomock.Controller
var dummy *netlink.Dummy
var dummySwap *netlink.Dummy
var addrList []netlink.Addr
var newPodInterfaceName string
var routeList []netlink.Route
var routeAddr netlink.Route
var fakeMac net.HardwareAddr
Expand Down Expand Up @@ -90,7 +92,9 @@ var _ = Describe("Pod Network", func() {
testMac := "12:34:56:78:9A:BC"
updateTestMac := "AF:B3:1F:78:2A:CA"
mtu = 1410
dummySwap = &netlink.Dummy{LinkAttrs: netlink.LinkAttrs{Name: primaryPodInterfaceName}}
dummy = &netlink.Dummy{LinkAttrs: netlink.LinkAttrs{Index: 1, MTU: mtu}}
newPodInterfaceName = fmt.Sprintf("%s-nic", primaryPodInterfaceName)
address := &net.IPNet{IP: net.IPv4(10, 35, 0, 6), Mask: net.CIDRMask(24, 32)}
gw := net.IPv4(10, 35, 0, 1)
fakeMac, _ = net.ParseMAC(testMac)
Expand Down Expand Up @@ -179,7 +183,10 @@ var _ = Describe("Pod Network", func() {
TestPodInterfaceIPBinding := func(vm *v1.VirtualMachineInstance, domain *api.Domain) {

//For Bridge tests
mockNetwork.EXPECT().LinkSetName(dummySwap, newPodInterfaceName).Return(nil)
mockNetwork.EXPECT().LinkByName(primaryPodInterfaceName).Return(dummy, nil).Times(2)
mockNetwork.EXPECT().LinkByName(primaryPodInterfaceName).Return(dummySwap, nil)
mockNetwork.EXPECT().AddrReplace(dummySwap, &fakeAddr).Return(nil)
mockNetwork.EXPECT().AddrList(dummy, netlink.FAMILY_V4).Return(addrList, nil)
mockNetwork.EXPECT().AddrList(dummy, netlink.FAMILY_ALL).Return(addrList, nil)
mockNetwork.EXPECT().RouteList(dummy, netlink.FAMILY_V4).Return(routeList, nil)
Expand Down
10 changes: 10 additions & 0 deletions tests/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -1468,6 +1468,16 @@ func DeletePV(os string) {
}
}

func VerifyDummyNicForBridgeNetwork(vmi *v1.VirtualMachineInstance) {
output := RunCommandOnVmiPod(vmi, []string{"/bin/bash", "-c", "/usr/sbin/ip link show|grep DOWN|grep -c eth0"})
ExpectWithOffset(1, strings.TrimSpace(output)).To(Equal("1"))

output = RunCommandOnVmiPod(vmi, []string{"/bin/bash", "-c", "/usr/sbin/ip link show|grep UP|grep -c eth0-nic"})
ExpectWithOffset(1, strings.TrimSpace(output)).To(Equal("1"))

return
}

func RunVMI(vmi *v1.VirtualMachineInstance, timeout int) *v1.VirtualMachineInstance {
By("Starting a VirtualMachineInstance")
virtCli, err := kubecli.GetKubevirtClient()
Expand Down
1 change: 0 additions & 1 deletion tests/vmi_cloudinit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,6 @@ var _ = Describe("[rfe_id:151][crit:high][vendor:[email protected]][level:compon

By("applying the hostname from meta-data")
Expect(libnet.WithIPv6(console.LoginToCirros)(vmi)).To(Succeed())

Expect(console.SafeExpectBatch(vmi, []expect.Batcher{
&expect.BSnd{S: "hostname\n"},
&expect.BExp{R: dns.SanitizeHostname(vmi)},
Expand Down
70 changes: 70 additions & 0 deletions tests/vmi_lifecycle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,76 @@ var _ = Describe("[rfe_id:273][crit:high][vendor:[email protected]][level:compon

tests.WaitForSuccessfulVMIStart(newVMI)
})

It("VMIs with Bridge Networking shouldn't fail after the kubelet restarts", func() {
bridgeVMI := vmi
// Remove the masquerade interface to use the default bridge one
bridgeVMI.Spec.Domain.Devices.Interfaces = nil
bridgeVMI.Spec.Networks = nil
v1.SetDefaults_NetworkInterface(bridgeVMI)
Expect(bridgeVMI.Spec.Domain.Devices.Interfaces).NotTo(BeEmpty())

By("starting a VMI with bridged network on a node")
bridgeVMI, err := virtClient.VirtualMachineInstance(tests.NamespaceTestDefault).Create(bridgeVMI)
Expect(err).To(BeNil(), "Should submit VMI successfully")

// Start a VirtualMachineInstance with bridged networking
nodeName := tests.WaitForSuccessfulVMIStart(bridgeVMI)

tests.VerifyDummyNicForBridgeNetwork(bridgeVMI)

By("restarting kubelet")
pod := renderPkillAllPod("kubelet")
pod.Spec.NodeName = nodeName
_, err = virtClient.CoreV1().Pods(tests.NamespaceTestDefault).Create(pod)
Expect(err).ToNot(HaveOccurred())

By("starting another VMI on the same node, to verify kubelet is running again")
newVMI := newCirrosVMI()
newVMI.Spec.NodeSelector = map[string]string{"kubernetes.io/hostname": nodeName}
Eventually(func() error {
newVMI, err = virtClient.VirtualMachineInstance(tests.NamespaceTestDefault).Create(newVMI)
Expect(err).To(BeNil())
return nil
}, 100, 10).Should(Succeed(), "Should be able to start a new VM")

By("checking if the VMI with bridged networking is still running, it will verify the CNI didn't cause the pod to be killed")
tests.WaitForSuccessfulVMIStart(bridgeVMI)
})

It("VMIs with Bridge Networking should work with Duplicate Address Detection (DAD)", func() {
bridgeVMI := tests.NewRandomVMIWithEphemeralDiskAndUserdata(cd.ContainerDiskFor(cd.ContainerDiskCirros), "#!/bin/bash\necho 'hello'\n")
// Remove the masquerade interface to use the default bridge one
bridgeVMI.Spec.Domain.Devices.Interfaces = nil
bridgeVMI.Spec.Networks = nil
v1.SetDefaults_NetworkInterface(bridgeVMI)
Expect(bridgeVMI.Spec.Domain.Devices.Interfaces).NotTo(BeEmpty())

By("starting a VMI with bridged network on a node")
bridgeVMI, err = virtClient.VirtualMachineInstance(vmi.Namespace).Create(bridgeVMI)
Expect(err).To(BeNil(), "Should submit VMI successfully")

// Start a VirtualMachineInstance with bridged networking
By("Waiting the VirtualMachineInstance start")
tests.WaitUntilVMIReady(bridgeVMI, console.LoginToCirros)
tests.VerifyDummyNicForBridgeNetwork(bridgeVMI)

// Update the VMI object so we get the IP address
bridgeVMI, err = virtClient.VirtualMachineInstance(bridgeVMI.Namespace).Get(bridgeVMI.Name, &metav1.GetOptions{})
Expect(err).ToNot(HaveOccurred())

vmIP := libnet.GetVmiPrimaryIpByFamily(bridgeVMI, k8sv1.IPv4Protocol)
dadCommand := fmt.Sprintf("sudo /usr/sbin/arping -D -I eth0 -c 2 %s | grep Received | cut -d ' ' -f 2\n", vmIP)

Expect(console.SafeExpectBatch(bridgeVMI, []expect.Batcher{
&expect.BSnd{S: "\n"},
&expect.BExp{R: console.PromptExpression},

&expect.BSnd{S: dadCommand},
&expect.BExp{R: "0"},
}, 600)).To(Succeed())
})

})

Context("[Serial]when virt-handler is not responsive", func() {
Expand Down
2 changes: 1 addition & 1 deletion tests/vmi_multus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ var _ = Describe("[Serial]Multus", func() {
Expect(err).ToNot(HaveOccurred())

By("checking pod has only one interface")
// lo0, eth0, k6t-eth0, vnet0
// lo0, eth0-nic, k6t-eth0, vnet0
output := tests.RunCommandOnVmiPod(detachedVMI, []string{"/bin/bash", "-c", "/usr/sbin/ip link show|grep -c UP"})
ExpectWithOffset(1, strings.TrimSpace(output)).To(Equal("4"))
})
Expand Down
2 changes: 1 addition & 1 deletion tests/vmi_networking_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ var _ = Describe("[Serial][rfe_id:694][crit:medium][vendor:[email protected]][le
}

checkLearningState := func(vmi *v1.VirtualMachineInstance, expectedValue string) {
output := tests.RunCommandOnVmiPod(vmi, []string{"cat", "/sys/class/net/eth0/brport/learning"})
output := tests.RunCommandOnVmiPod(vmi, []string{"cat", "/sys/class/net/eth0-nic/brport/learning"})
ExpectWithOffset(1, strings.TrimSpace(output)).To(Equal(expectedValue))
}

Expand Down

0 comments on commit 859e99c

Please sign in to comment.