Add VGPU

minjac · Mar 16, 2021 · 1619a9b · 1619a9b
1 parent e5e1bef
commit 1619a9b
Show file tree

Hide file tree

Showing 10 changed files with 146 additions and 17 deletions.
diff --git a/Makefile b/Makefile
@@ -20,7 +20,7 @@
 
 DOCKER   ?= docker
 ifeq ($(IMAGE),)
-REGISTRY ?= nvcr.io/nvidia
+REGISTRY ?= m7-ieg-pico-test01:5000/nvidia
 IMAGE := $(REGISTRY)/k8s-device-plugin
 endif
 VERSION  ?= v0.9.0

diff --git a/docker/Dockerfile.ubi8 b/docker/Dockerfile.ubi8
@@ -53,7 +53,9 @@ LABEL summary="NVIDIA device plugin for Kubernetes"
 LABEL description="See summary"
 
 COPY ./LICENSE ./licenses/LICENSE
+COPY --from=build /build/entrypoint.sh /entrypoint.sh
+COPY --from=build /build/vgpu /etc/vgpu
 
 COPY --from=build /build/nvidia-device-plugin /usr/bin/nvidia-device-plugin
 
-ENTRYPOINT ["nvidia-device-plugin"]
+ENTRYPOINT ["/entrypoint.sh"]
diff --git a/docker/Dockerfile.ubuntu20.04 b/docker/Dockerfile.ubuntu20.04
@@ -15,6 +15,7 @@
 ARG CUDA_IMAGE=cuda
 FROM nvcr.io/nvidia/${CUDA_IMAGE}:11.2.1-base-ubuntu20.04 as build
 
+RUN rm -rf /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/nvidia-ml.list
 RUN apt-get update && apt-get install -y --no-install-recommends \
         g++ \
         ca-certificates \
@@ -53,7 +54,9 @@ LABEL summary="NVIDIA device plugin for Kubernetes"
 LABEL description="See summary"
 
 COPY ./LICENSE ./licenses/LICENSE
+COPY --from=build /build/entrypoint.sh /entrypoint.sh
+COPY --from=build /build/vgpu /etc/vgpu
 
 COPY --from=build /build/nvidia-device-plugin /usr/bin/nvidia-device-plugin
 
-ENTRYPOINT ["nvidia-device-plugin"]
+ENTRYPOINT ["/entrypoint.sh"]
diff --git a/entrypoint.sh b/entrypoint.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+cp -f /etc/vgpu/* /usr/local/vgpu/
+exec nvidia-device-plugin $@
diff --git a/main.go b/main.go
@@ -34,6 +34,8 @@ var passDeviceSpecsFlag bool
 var deviceListStrategyFlag string
 var deviceIDStrategyFlag string
 var nvidiaDriverRootFlag string
+var deviceSplitCountFlag uint
+var deviceMemoryScalingFlag float64
 
 var version string // This should be set at build time to indicate the actual version
 
@@ -43,14 +45,15 @@ func main() {
 	c.Before = validateFlags
 	c.Action = start
 
+	migStrategyFlag = MigStrategyNone
 	c.Flags = []cli.Flag{
-		&cli.StringFlag{
-			Name:        "mig-strategy",
-			Value:       "none",
-			Usage:       "the desired strategy for exposing MIG devices on GPUs that support it:\n\t\t[none | single | mixed]",
-			Destination: &migStrategyFlag,
-			EnvVars:     []string{"MIG_STRATEGY"},
-		},
+		//&cli.StringFlag{
+		//	Name:        "mig-strategy",
+		//	Value:       "none",
+		//	Usage:       "the desired strategy for exposing MIG devices on GPUs that support it:\n\t\t[none | single | mixed]",
+		//	Destination: &migStrategyFlag,
+		//	EnvVars:     []string{"MIG_STRATEGY"},
+		//},
 		&cli.BoolFlag{
 			Name:        "fail-on-init-error",
 			Value:       true,
@@ -86,6 +89,20 @@ func main() {
 			Destination: &nvidiaDriverRootFlag,
 			EnvVars:     []string{"NVIDIA_DRIVER_ROOT"},
 		},
+		&cli.UintFlag{
+			Name:        "device-split-count",
+			Value:       2,
+			Usage:       "the number for NVIDIA device split)",
+			Destination: &deviceSplitCountFlag,
+			EnvVars:     []string{"DEVICE_SPLIT_COUNT"},
+		},
+		&cli.Float64Flag{
+			Name:        "device-memory-scaling",
+			Value:       1.0,
+			Usage:       "the ratio for NVIDIA device memory scaling)",
+			Destination: &deviceMemoryScalingFlag,
+			EnvVars:     []string{"DEVICE_MEMORY_SCALING"},
+		},
 	}
 
 	err := c.Run(os.Args)

diff --git a/nvidia-device-plugin.yml b/nvidia-device-plugin.yml
@@ -56,7 +56,12 @@ spec:
         volumeMounts:
           - name: device-plugin
             mountPath: /var/lib/kubelet/device-plugins
+          - name: vgpu-dir
+            mountPath: /usr/local/vgpu
       volumes:
         - name: device-plugin
           hostPath:
             path: /var/lib/kubelet/device-plugins
+        - name: vgpu-dir
+          hostPath:
+            path: /usr/local/vgpu
diff --git a/server.go b/server.go
@@ -62,6 +62,7 @@ type NvidiaDevicePlugin struct {
 	cachedDevices []*Device
 	health        chan *Device
 	stop          chan interface{}
+	vDevices      []*VDevice
 }
 
 // NewNvidiaDevicePlugin returns an initialized NvidiaDevicePlugin
@@ -84,13 +85,15 @@ func NewNvidiaDevicePlugin(resourceName string, resourceManager ResourceManager,
 
 func (m *NvidiaDevicePlugin) initialize() {
 	m.cachedDevices = m.Devices()
+	m.vDevices = Device2VDevice(m.cachedDevices)
 	m.server = grpc.NewServer([]grpc.ServerOption{}...)
 	m.health = make(chan *Device)
 	m.stop = make(chan interface{})
 }
 
 func (m *NvidiaDevicePlugin) cleanup() {
 	close(m.stop)
+	m.vDevices = nil
 	m.cachedDevices = nil
 	m.server = nil
 	m.health = nil
@@ -201,7 +204,8 @@ func (m *NvidiaDevicePlugin) Register() error {
 		Endpoint:     path.Base(m.socket),
 		ResourceName: m.resourceName,
 		Options: &pluginapi.DevicePluginOptions{
-			GetPreferredAllocationAvailable: (m.allocatePolicy != nil),
+			GetPreferredAllocationAvailable: false,
+			//GetPreferredAllocationAvailable: (m.allocatePolicy != nil),
 		},
 	}
 
@@ -240,6 +244,7 @@ func (m *NvidiaDevicePlugin) ListAndWatch(e *pluginapi.Empty, s pluginapi.Device
 // GetPreferredAllocation returns the preferred allocation from the set of devices specified in the request
 func (m *NvidiaDevicePlugin) GetPreferredAllocation(ctx context.Context, r *pluginapi.PreferredAllocationRequest) (*pluginapi.PreferredAllocationResponse, error) {
 	response := &pluginapi.PreferredAllocationResponse{}
+	// get device
 	for _, req := range r.ContainerRequests {
 		available, err := gpuallocator.NewDevicesFrom(req.AvailableDeviceIDs)
 		if err != nil {
@@ -271,15 +276,14 @@ func (m *NvidiaDevicePlugin) GetPreferredAllocation(ctx context.Context, r *plug
 func (m *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
 	responses := pluginapi.AllocateResponse{}
 	for _, req := range reqs.ContainerRequests {
-		for _, id := range req.DevicesIDs {
-			if !m.deviceExists(id) {
-				return nil, fmt.Errorf("invalid allocation request for '%s': unknown device: %s", m.resourceName, id)
-			}
+		vdevices, err := VDevicesByIDs(m.vDevices, req.DevicesIDs)
+		if err != nil {
+			return nil, err
 		}
 
 		response := pluginapi.ContainerAllocateResponse{}
 
-		uuids := req.DevicesIDs
+		uuids := UniqueDeviceIDs(vdevices)
 		deviceIDs := m.deviceIDsFromUUIDs(uuids)
 
 		if deviceListStrategyFlag == DeviceListStrategyEnvvar {
@@ -293,6 +297,18 @@ func (m *NvidiaDevicePlugin) Allocate(ctx context.Context, reqs *pluginapi.Alloc
 			response.Devices = m.apiDeviceSpecs(nvidiaDriverRootFlag, uuids)
 		}
 
+		var mapEnvs []string
+		for i, vd := range vdevices {
+			limitKey := fmt.Sprintf("CUDA_DEVICE_MEMORY_LIMIT_%v", i)
+			response.Envs[limitKey] = fmt.Sprintf("%vm", vd.memory)
+			mapEnvs = append(mapEnvs, fmt.Sprintf("%v:%v", i, vd.dev.ID))
+		}
+		response.Envs["NVIDIA_DEVICE_MAP"] = strings.Join(mapEnvs, " ")
+		response.Mounts = append(response.Mounts,
+			&pluginapi.Mount{ContainerPath: "/usr/local/vgpu/libvgpu.so",
+				HostPath: "/usr/local/vgpu/libvgpu.so", ReadOnly: true},
+			&pluginapi.Mount{ContainerPath: "/etc/ld.so.preload",
+				HostPath: "/usr/local/vgpu/ld.so.preload", ReadOnly: true})
 		responses.ContainerResponses = append(responses.ContainerResponses, &response)
 	}
 
@@ -349,7 +365,8 @@ func (m *NvidiaDevicePlugin) deviceIDsFromUUIDs(uuids []string) []string {
 
 func (m *NvidiaDevicePlugin) apiDevices() []*pluginapi.Device {
 	var pdevs []*pluginapi.Device
-	for _, d := range m.cachedDevices {
+	for _, d := range m.vDevices {
+		d.Health = d.dev.Health
 		pdevs = append(pdevs, &d.Device)
 	}
 	return pdevs

diff --git a/vdevice.go b/vdevice.go
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2019, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package main
+
+import (
+	"fmt"
+	"github.com/NVIDIA/gpu-monitoring-tools/bindings/go/nvml"
+	pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
+)
+
+type VDevice struct {
+	pluginapi.Device
+	dev    *Device
+	memory uint64
+}
+
+func Device2VDevice(devices []*Device) []*VDevice {
+	var vdevices []*VDevice
+	for _, d := range devices {
+		dev, err := nvml.NewDeviceByUUID(d.ID)
+		check(err)
+		memory := uint64(float64(*dev.Memory) * deviceMemoryScalingFlag / float64(deviceSplitCountFlag))
+		for i := uint(0); i < deviceSplitCountFlag; i++ {
+			vd := &VDevice{Device: d.Device, dev: d, memory: memory}
+			vd.ID = fmt.Sprintf("%v-%v", d.ID, i)
+			vd.memory = memory
+			vdevices = append(vdevices, vd)
+		}
+	}
+	return vdevices
+}
+
+func VDevicesByIDs(vdevices []*VDevice, ids []string) ([]*VDevice, error) {
+	m := make(map[string]*VDevice, len(vdevices))
+	for _, vd := range vdevices {
+		m[vd.ID] = vd
+	}
+	var vds []*VDevice
+	for _, id := range ids {
+		if vd, ok := m[id]; ok {
+			vds = append(vds, vd)
+		} else {
+			return nil, fmt.Errorf("unknown device: %s", id)
+		}
+	}
+	return vds, nil
+}
+
+func UniqueDeviceIDs(vdevices []*VDevice) []string {
+	m := make(map[string]bool, len(vdevices))
+	var ids []string
+	for _, vd := range vdevices {
+		if _, ok := m[vd.dev.ID]; !ok {
+			m[vd.dev.ID] = true
+			ids = append(ids, vd.dev.ID)
+		}
+	}
+	return ids
+}
+
+//func VDeviceHealth(vdevices []*VDevice, id string, health string) {
+//	for _, vd := range vdevices {
+//		if vd.dev.ID == id {
+//			vd.Health = health
+//		}
+//	}
+//}
diff --git a/vgpu/ld.so.preload b/vgpu/ld.so.preload
@@ -0,0 +1 @@
+/usr/local/vgpu/libvgpu.so
diff --git a/vgpu/libvgpu.so b/vgpu/libvgpu.so