Skip to content

Commit

Permalink
Merge pull request kubevirt#10689 from akalenyu/fix-cgroup-v2-device-…
Browse files Browse the repository at this point in the history
…rules-state

cgroupsv2: reconstruct device allowlist/drop internal device allow list state
  • Loading branch information
kubevirt-bot authored Nov 10, 2023
2 parents 09e5531 + b9efd9a commit 9851a50
Show file tree
Hide file tree
Showing 12 changed files with 402 additions and 188 deletions.
1 change: 1 addition & 0 deletions pkg/virt-handler/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ go_test(
"//pkg/virt-config:go_default_library",
"//pkg/virt-controller/services:go_default_library",
"//pkg/virt-handler/cache:go_default_library",
"//pkg/virt-handler/cgroup:go_default_library",
"//pkg/virt-handler/cmd-client:go_default_library",
"//pkg/virt-handler/container-disk:go_default_library",
"//pkg/virt-handler/hotplug-disk:go_default_library",
Expand Down
2 changes: 2 additions & 0 deletions pkg/virt-handler/cgroup/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ go_library(
importpath = "kubevirt.io/kubevirt/pkg/virt-handler/cgroup",
visibility = ["//visibility:public"],
deps = [
"//pkg/safepath:go_default_library",
"//pkg/util:go_default_library",
"//pkg/virt-handler/isolation:go_default_library",
"//staging/src/kubevirt.io/api/core/v1:go_default_library",
Expand All @@ -23,6 +24,7 @@ go_library(
"//vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/configs:go_default_library",
"//vendor/github.com/opencontainers/runc/libcontainer/devices:go_default_library",
"//vendor/golang.org/x/sys/unix:go_default_library",
],
)

Expand Down
20 changes: 14 additions & 6 deletions pkg/virt-handler/cgroup/cgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (

runc_cgroups "github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/devices"

v1 "kubevirt.io/api/core/v1"

Expand Down Expand Up @@ -89,9 +90,9 @@ func managerPath(taskPath string) string {
return retPath
}

// NewManagerFromPid initializes a new cgroup manager from VMI's pid.
// newManagerFromPid initializes a new cgroup manager from VMI's pid.
// The pid is expected to VMI's pid from the host's viewpoint.
func NewManagerFromPid(pid int) (manager Manager, err error) {
func newManagerFromPid(pid int, deviceRules []*devices.Rule) (manager Manager, err error) {
const isRootless = false
var version CgroupVersion

Expand All @@ -102,9 +103,11 @@ func NewManagerFromPid(pid int) (manager Manager, err error) {
}

config := &configs.Cgroup{
Path: HostCgroupBasePath,
Resources: &configs.Resources{},
Rootless: isRootless,
Path: HostCgroupBasePath,
Resources: &configs.Resources{
Devices: deviceRules,
},
Rootless: isRootless,
}

if runc_cgroups.IsCgroup2UnifiedMode() {
Expand Down Expand Up @@ -140,7 +143,12 @@ func NewManagerFromVM(vmi *v1.VirtualMachineInstance) (Manager, error) {
return nil, err
}

return NewManagerFromPid(isolationRes.Pid())
vmiDeviceRules, err := generateDeviceRulesForVMI(vmi, isolationRes)
if err != nil {
return nil, err
}

return newManagerFromPid(isolationRes.Pid(), vmiDeviceRules)
}

// GetGlobalCpuSetPath returns the CPU set of the main cgroup slice
Expand Down
7 changes: 4 additions & 3 deletions pkg/virt-handler/cgroup/cgroup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ var _ = Describe("cgroup manager", func() {
if version == V1 {
return newCustomizedV1Manager(mockRuncCgroupManager, false, execVirtChrootFunc, getCurrentlyDefinedRulesFunc)
} else {
return newCustomizedV2Manager(mockRuncCgroupManager, false, execVirtChrootFunc)
return newCustomizedV2Manager(mockRuncCgroupManager, false, nil, execVirtChrootFunc)
}
}

Expand Down Expand Up @@ -85,10 +85,11 @@ var _ = Describe("cgroup manager", func() {

Expect(rulesDefined).To(ContainElement(fakeRule), "defined rule is expected to exist")

for _, defaultRule := range GenerateDefaultDeviceRules() {
defaultDeviceRules := GenerateDefaultDeviceRules()
for _, defaultRule := range defaultDeviceRules {
Expect(rulesDefined).To(ContainElement(defaultRule), "default rules are expected to be defined")
}

Expect(rulesDefined).To(HaveLen(len(defaultDeviceRules) + 1))
},
Entry("for v1", V1),
Entry("for v2", V2),
Expand Down
29 changes: 18 additions & 11 deletions pkg/virt-handler/cgroup/cgroup_v2_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@ import (
"kubevirt.io/kubevirt/pkg/util"
)

var rulesPerPid = make(map[string][]*devices.Rule)

type v2Manager struct {
runc_cgroups.Manager
dirPath string
isRootless bool
deviceRules []*devices.Rule
execVirtChroot execVirtChrootFunc
}

Expand All @@ -31,14 +30,20 @@ func newV2Manager(config *runc_configs.Cgroup, dirPath string) (Manager, error)
return nil, err
}

return newCustomizedV2Manager(runcManager, config.Rootless, execVirtChrootCgroups)
return newCustomizedV2Manager(runcManager, config.Rootless, config.Resources.Devices, execVirtChrootCgroups)
}

func newCustomizedV2Manager(runcManager runc_cgroups.Manager, isRootless bool, execVirtChroot execVirtChrootFunc) (Manager, error) {
func newCustomizedV2Manager(
runcManager runc_cgroups.Manager,
isRootless bool,
deviceRules []*devices.Rule,
execVirtChroot execVirtChrootFunc,
) (Manager, error) {
manager := v2Manager{
runcManager,
runcManager.GetPaths()[""],
isRootless,
append(deviceRules, GenerateDefaultDeviceRules()...),
execVirtChroot,
}

Expand All @@ -53,18 +58,20 @@ func (v *v2Manager) Set(r *runc_configs.Resources) error {
// We want to keep given resources untouched
resourcesToSet := *r

//Add default rules
resourcesToSet.Devices = append(resourcesToSet.Devices, GenerateDefaultDeviceRules()...)

rulesToSet, err := addCurrentRules(rulesPerPid[v.dirPath], resourcesToSet.Devices)
rulesToSet, err := addCurrentRules(v.deviceRules, resourcesToSet.Devices)
if err != nil {
return err
}
rulesPerPid[v.dirPath] = rulesToSet
v.deviceRules = rulesToSet
resourcesToSet.Devices = rulesToSet
for _, rule := range rulesToSet {
if rule == nil {
continue
}
log.Log.V(5).Infof("cgroupsv2 device allowlist: rule after appending current+new: type: %d permissions: %s allow: %t major: %d minor: %d", rule.Type, rule.Permissions, rule.Allow, rule.Major, rule.Minor)
}

err = v.execVirtChroot(&resourcesToSet, map[string]string{"": v.dirPath}, v.isRootless, v.GetCgroupVersion())
return err
return v.execVirtChroot(&resourcesToSet, map[string]string{"": v.dirPath}, v.isRootless, v.GetCgroupVersion())
}

func (v *v2Manager) GetCgroupVersion() CgroupVersion {
Expand Down
70 changes: 70 additions & 0 deletions pkg/virt-handler/cgroup/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,28 @@ import (
"bufio"
"encoding/base64"
"encoding/json"
"errors"
"fmt"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"syscall"

"github.com/opencontainers/runc/libcontainer/cgroups"
"golang.org/x/sys/unix"

"github.com/opencontainers/runc/libcontainer/devices"

runc_cgroups "github.com/opencontainers/runc/libcontainer/cgroups"
runc_configs "github.com/opencontainers/runc/libcontainer/configs"

v1 "kubevirt.io/api/core/v1"
"kubevirt.io/client-go/log"

"kubevirt.io/kubevirt/pkg/safepath"
"kubevirt.io/kubevirt/pkg/virt-handler/isolation"
)

type CgroupVersion string
Expand Down Expand Up @@ -84,6 +91,69 @@ func addCurrentRules(currentRules, newRules []*devices.Rule) ([]*devices.Rule, e
return newRules, nil
}

// This builds up the known persistent block devices allow list for a VMI (as in, hotplugged volumes are handled separately)
// This will be maintained and extended as new devices likely have to end up on this list as well
// For example - https://kubernetes.io/docs/concepts/scheduling-eviction/dynamic-resource-allocation/
func generateDeviceRulesForVMI(vmi *v1.VirtualMachineInstance, isolationRes isolation.IsolationResult) ([]*devices.Rule, error) {
mountRoot, err := isolationRes.MountRoot()
if err != nil {
return nil, err
}

var vmiDeviceRules []*devices.Rule
for _, volume := range vmi.Spec.Volumes {
switch {
case volume.VolumeSource.PersistentVolumeClaim != nil:
if volume.VolumeSource.PersistentVolumeClaim.Hotpluggable {
continue
}
case volume.VolumeSource.DataVolume != nil:
if volume.VolumeSource.DataVolume.Hotpluggable {
continue
}
case volume.VolumeSource.Ephemeral != nil:
default:
continue
}
path, err := safepath.JoinNoFollow(mountRoot, filepath.Join("dev", volume.Name))
if err != nil {
if errors.Is(err, os.ErrNotExist) {
continue
}
return nil, fmt.Errorf("failed to resolve path for volume %s: %v", volume.Name, err)
}
if deviceRule, err := newAllowedDeviceRule(path); err != nil {
return nil, fmt.Errorf("failed to create device rule for %s: %v", path, err)
} else if deviceRule != nil {
log.Log.V(loggingVerbosity).Infof("device rule for volume %s: %v", volume.Name, deviceRule)
vmiDeviceRules = append(vmiDeviceRules, deviceRule)
}
}
return vmiDeviceRules, nil
}

func newAllowedDeviceRule(devicePath *safepath.Path) (*devices.Rule, error) {
fileInfo, err := safepath.StatAtNoFollow(devicePath)
if err != nil {
return nil, err
}
if (fileInfo.Mode() & os.ModeDevice) == 0 {
return nil, nil //not a device file
}
deviceType := devices.BlockDevice
if (fileInfo.Mode() & os.ModeCharDevice) != 0 {
deviceType = devices.CharDevice
}
stat := fileInfo.Sys().(*syscall.Stat_t)
return &devices.Rule{
Type: deviceType,
Major: int64(unix.Major(stat.Rdev)),
Minor: int64(unix.Minor(stat.Rdev)),
Permissions: "rwm",
Allow: true,
}, nil
}

func GenerateDefaultDeviceRules() []*devices.Rule {
if len(defaultDeviceRules) > 0 {
// To avoid re-computing default device rules
Expand Down
34 changes: 18 additions & 16 deletions pkg/virt-handler/hotplug-disk/generated_mock_mount.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
gomock "github.com/golang/mock/gomock"
types "k8s.io/apimachinery/pkg/types"
v1 "kubevirt.io/api/core/v1"

cgroup "kubevirt.io/kubevirt/pkg/virt-handler/cgroup"
)

// Mock of VolumeMounter interface
Expand All @@ -30,44 +32,44 @@ func (_m *MockVolumeMounter) EXPECT() *_MockVolumeMounterRecorder {
return _m.recorder
}

func (_m *MockVolumeMounter) Mount(vmi *v1.VirtualMachineInstance) error {
ret := _m.ctrl.Call(_m, "Mount", vmi)
func (_m *MockVolumeMounter) Mount(vmi *v1.VirtualMachineInstance, cgroupManager cgroup.Manager) error {
ret := _m.ctrl.Call(_m, "Mount", vmi, cgroupManager)
ret0, _ := ret[0].(error)
return ret0
}

func (_mr *_MockVolumeMounterRecorder) Mount(arg0 interface{}) *gomock.Call {
return _mr.mock.ctrl.RecordCall(_mr.mock, "Mount", arg0)
func (_mr *_MockVolumeMounterRecorder) Mount(arg0, arg1 interface{}) *gomock.Call {
return _mr.mock.ctrl.RecordCall(_mr.mock, "Mount", arg0, arg1)
}

func (_m *MockVolumeMounter) MountFromPod(vmi *v1.VirtualMachineInstance, sourceUID types.UID) error {
ret := _m.ctrl.Call(_m, "MountFromPod", vmi, sourceUID)
func (_m *MockVolumeMounter) MountFromPod(vmi *v1.VirtualMachineInstance, sourceUID types.UID, cgroupManager cgroup.Manager) error {
ret := _m.ctrl.Call(_m, "MountFromPod", vmi, sourceUID, cgroupManager)
ret0, _ := ret[0].(error)
return ret0
}

func (_mr *_MockVolumeMounterRecorder) MountFromPod(arg0, arg1 interface{}) *gomock.Call {
return _mr.mock.ctrl.RecordCall(_mr.mock, "MountFromPod", arg0, arg1)
func (_mr *_MockVolumeMounterRecorder) MountFromPod(arg0, arg1, arg2 interface{}) *gomock.Call {
return _mr.mock.ctrl.RecordCall(_mr.mock, "MountFromPod", arg0, arg1, arg2)
}

func (_m *MockVolumeMounter) Unmount(vmi *v1.VirtualMachineInstance) error {
ret := _m.ctrl.Call(_m, "Unmount", vmi)
func (_m *MockVolumeMounter) Unmount(vmi *v1.VirtualMachineInstance, cgroupManager cgroup.Manager) error {
ret := _m.ctrl.Call(_m, "Unmount", vmi, cgroupManager)
ret0, _ := ret[0].(error)
return ret0
}

func (_mr *_MockVolumeMounterRecorder) Unmount(arg0 interface{}) *gomock.Call {
return _mr.mock.ctrl.RecordCall(_mr.mock, "Unmount", arg0)
func (_mr *_MockVolumeMounterRecorder) Unmount(arg0, arg1 interface{}) *gomock.Call {
return _mr.mock.ctrl.RecordCall(_mr.mock, "Unmount", arg0, arg1)
}

func (_m *MockVolumeMounter) UnmountAll(vmi *v1.VirtualMachineInstance) error {
ret := _m.ctrl.Call(_m, "UnmountAll", vmi)
func (_m *MockVolumeMounter) UnmountAll(vmi *v1.VirtualMachineInstance, cgroupManager cgroup.Manager) error {
ret := _m.ctrl.Call(_m, "UnmountAll", vmi, cgroupManager)
ret0, _ := ret[0].(error)
return ret0
}

func (_mr *_MockVolumeMounterRecorder) UnmountAll(arg0 interface{}) *gomock.Call {
return _mr.mock.ctrl.RecordCall(_mr.mock, "UnmountAll", arg0)
func (_mr *_MockVolumeMounterRecorder) UnmountAll(arg0, arg1 interface{}) *gomock.Call {
return _mr.mock.ctrl.RecordCall(_mr.mock, "UnmountAll", arg0, arg1)
}

func (_m *MockVolumeMounter) IsMounted(vmi *v1.VirtualMachineInstance, volume string, sourceUID types.UID) (bool, error) {
Expand Down
Loading

0 comments on commit 9851a50

Please sign in to comment.