Skip to content

Commit

Permalink
Merge pull request kubernetes#24836 from Clarifai/gpu-impl
Browse files Browse the repository at this point in the history
Automatic merge from submit-queue

WIP v0 NVIDIA GPU support

```release-note
* Alpha support for scheduling pods on machines with NVIDIA GPUs whose kubelets use the `--experimental-nvidia-gpus` flag, using the alpha.kubernetes.io/nvidia-gpu resource 
```

Implements part of kubernetes#24071 for  kubernetes#23587

I am not familiar with the scheduler enough to know what to do with the scores. Mostly punting for now.

Missing items from the implementation plan: limitranger, rkt support, kubectl
support and docs

cc @erictune @davidopp @dchen1107 @vishh @Hui-Zhi @gopinatht
  • Loading branch information
k8s-merge-robot committed May 12, 2016
2 parents 3f2fe8b + 362c763 commit 08440b5
Show file tree
Hide file tree
Showing 21 changed files with 845 additions and 653 deletions.
2 changes: 2 additions & 0 deletions cmd/kubelet/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ func NewKubeletServer() *KubeletServer {
MaxPerPodContainerCount: 2,
MaxOpenFiles: 1000000,
MaxPods: 110,
NvidiaGPUs: 0,
MinimumGCAge: unversioned.Duration{Duration: 1 * time.Minute},
NetworkPluginDir: "/usr/libexec/kubernetes/kubelet-plugins/net/exec/",
NetworkPluginName: "",
Expand Down Expand Up @@ -227,6 +228,7 @@ func (s *KubeletServer) AddFlags(fs *pflag.FlagSet) {
fs.BoolVar(&s.BabysitDaemons, "babysit-daemons", s.BabysitDaemons, "If true, the node has babysitter process monitoring docker and kubelet.")
fs.MarkDeprecated("babysit-daemons", "Will be removed in a future version.")
fs.Int32Var(&s.MaxPods, "max-pods", s.MaxPods, "Number of Pods that can run on this Kubelet.")
fs.Int32Var(&s.NvidiaGPUs, "experimental-nvidia-gpus", s.NvidiaGPUs, "Number of NVIDIA GPU devices on this node. Only 0 (default) and 1 are currently supported.")
fs.StringVar(&s.DockerExecHandlerName, "docker-exec-handler", s.DockerExecHandlerName, "Handler to use when executing a command in a container. Valid values are 'native' and 'nsenter'. Defaults to 'native'.")
fs.StringVar(&s.NonMasqueradeCIDR, "non-masquerade-cidr", s.NonMasqueradeCIDR, "Traffic to IPs outside this range will use IP masquerade.")
fs.StringVar(&s.PodCIDR, "pod-cidr", "", "The CIDR to use for pod IP addresses, only used in standalone mode. In cluster mode, this is obtained from the master.")
Expand Down
4 changes: 4 additions & 0 deletions cmd/kubelet/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,7 @@ func UnsecuredKubeletConfig(s *options.KubeletServer) (*KubeletConfig, error) {
MaxOpenFiles: s.MaxOpenFiles,
MaxPerPodContainerCount: int(s.MaxPerPodContainerCount),
MaxPods: int(s.MaxPods),
NvidiaGPUs: int(s.NvidiaGPUs),
MinimumGCAge: s.MinimumGCAge.Duration,
Mounter: mounter,
NetworkPluginName: s.NetworkPluginName,
Expand Down Expand Up @@ -558,6 +559,7 @@ func SimpleKubelet(client *clientset.Clientset,
MaxOpenFiles: 1024,
MaxPerPodContainerCount: 2,
MaxPods: maxPods,
NvidiaGPUs: 0,
MinimumGCAge: minimumGCAge,
Mounter: mount.New(),
NodeStatusUpdateFrequency: nodeStatusUpdateFrequency,
Expand Down Expand Up @@ -750,6 +752,7 @@ type KubeletConfig struct {
NodeLabels map[string]string
NodeStatusUpdateFrequency time.Duration
NonMasqueradeCIDR string
NvidiaGPUs int
OOMAdjuster *oom.OOMAdjuster
OSInterface kubecontainer.OSInterface
PodCIDR string
Expand Down Expand Up @@ -860,6 +863,7 @@ func CreateAndInitKubelet(kc *KubeletConfig) (k KubeletBootstrap, pc *config.Pod
kc.PodCIDR,
kc.ReconcileCIDR,
kc.MaxPods,
kc.NvidiaGPUs,
kc.DockerExecHandler,
kc.ResolverConfig,
kc.CPUCFSQuota,
Expand Down
1 change: 1 addition & 0 deletions docs/admin/kubelet.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ kubelet
--eviction-soft="": A set of eviction thresholds (e.g. memory.available<1.5Gi) that if met over a corresponding grace period would trigger a pod eviction.
--eviction-soft-grace-period="": A set of eviction grace periods (e.g. memory.available=1m30s) that correspond to how long a soft eviction threshold must hold before triggering a pod eviction.
--experimental-flannel-overlay[=false]: Experimental support for starting the kubelet with the default overlay network (flannel). Assumes flanneld is already running in client mode. [default=false]
--experimental-nvidia-gpus=0: Number of NVIDIA GPU devices on this node. Only 0 (default) and 1 are currently supported.
--file-check-frequency=20s: Duration between checking config files for new data
--google-json-key="": The Google Cloud Platform Service Account JSON Key to use for authentication.
--hairpin-mode="promiscuous-bridge": How should the kubelet setup hairpin NAT. This allows endpoints of a Service to loadbalance back to themselves if they should try to access their own Service. Valid values are "promiscuous-bridge", "hairpin-veth" and "none".
Expand Down
1 change: 1 addition & 0 deletions hack/verify-flags/known-flags.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ executor-path
executor-suicide-timeout
experimental-flannel-overlay
experimental-keystone-url
experimental-nvidia-gpus
experimental-prefix
external-hostname
external-ip
Expand Down
7 changes: 7 additions & 0 deletions pkg/api/resource_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ func (self *ResourceList) Pods() *resource.Quantity {
return &resource.Quantity{}
}

func (self *ResourceList) NvidiaGPU() *resource.Quantity {
if val, ok := (*self)[ResourceNvidiaGPU]; ok {
return &val
}
return &resource.Quantity{}
}

func GetContainerStatus(statuses []ContainerStatus, name string) (ContainerStatus, bool) {
for i := range statuses {
if statuses[i].Name == name {
Expand Down
7 changes: 7 additions & 0 deletions pkg/api/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1922,13 +1922,20 @@ type NodeResources struct {
// ResourceName is the name identifying various resources in a ResourceList.
type ResourceName string

// Resource names must be not more than 63 characters, consisting of upper- or lower-case alphanumeric characters,
// with the -, _, and . characters allowed anywhere, except the first or last character.
// The default convention, matching that for annotations, is to use lower-case names, with dashes, rather than
// camel case, separating compound words.
// Fully-qualified resource typenames are constructed from a DNS-style subdomain, followed by a slash `/` and a name.
const (
// CPU, in cores. (500m = .5 cores)
ResourceCPU ResourceName = "cpu"
// Memory, in bytes. (500Gi = 500GiB = 500 * 1024 * 1024 * 1024)
ResourceMemory ResourceName = "memory"
// Volume size, in bytes (e,g. 5Gi = 5GiB = 5 * 1024 * 1024 * 1024)
ResourceStorage ResourceName = "storage"
// NVIDIA GPU, in devices. Alpha, might change: although fractional and allowing values >1, only one whole device per node is assigned.
ResourceNvidiaGPU ResourceName = "alpha.kubernetes.io/nvidia-gpu"
// Number of Pods that may be running on this Node: see ResourcePods
)

Expand Down
8 changes: 8 additions & 0 deletions pkg/api/v1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -2307,13 +2307,21 @@ type NodeAddress struct {
// ResourceName is the name identifying various resources in a ResourceList.
type ResourceName string

// Resource names must be not more than 63 characters, consisting of upper- or lower-case alphanumeric characters,
// with the -, _, and . characters allowed anywhere, except the first or last character.
// The default convention, matching that for annotations, is to use lower-case names, with dashes, rather than
// camel case, separating compound words.
// Fully-qualified resource typenames are constructed from a DNS-style subdomain, followed by a slash `/` and a name.
const (
// CPU, in cores. (500m = .5 cores)
ResourceCPU ResourceName = "cpu"
// Memory, in bytes. (500Gi = 500GiB = 500 * 1024 * 1024 * 1024)
ResourceMemory ResourceName = "memory"
// Volume size, in bytes (e,g. 5Gi = 5GiB = 5 * 1024 * 1024 * 1024)
ResourceStorage ResourceName = "storage"
// NVIDIA GPU, in devices. Alpha, might change: although fractional and allowing values >1, only one whole device per node is assigned.
ResourceNvidiaGPU ResourceName = "alpha.kubernetes.io/nvidia-gpu"
// Number of Pods that may be running on this Node: see ResourcePods
)

// ResourceList is a set of (resource name, quantity) pairs.
Expand Down
7 changes: 5 additions & 2 deletions pkg/api/validation/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -2489,6 +2489,7 @@ func validateBasicResource(quantity resource.Quantity, fldPath *field.Path) fiel
func ValidateResourceRequirements(requirements *api.ResourceRequirements, fldPath *field.Path) field.ErrorList {
allErrs := field.ErrorList{}
limPath := fldPath.Child("limits")
reqPath := fldPath.Child("requests")
for resourceName, quantity := range requirements.Limits {
fldPath := limPath.Key(string(resourceName))
// Validate resource name.
Expand All @@ -2499,12 +2500,14 @@ func ValidateResourceRequirements(requirements *api.ResourceRequirements, fldPat
// Check that request <= limit.
requestQuantity, exists := requirements.Requests[resourceName]
if exists {
if quantity.Cmp(requestQuantity) < 0 {
// For GPUs, require that no request be set.
if resourceName == api.ResourceNvidiaGPU {
allErrs = append(allErrs, field.Invalid(reqPath, requestQuantity.String(), "cannot be set"))
} else if quantity.Cmp(requestQuantity) < 0 {
allErrs = append(allErrs, field.Invalid(fldPath, quantity.String(), "must be greater than or equal to request"))
}
}
}
reqPath := fldPath.Child("requests")
for resourceName, quantity := range requirements.Requests {
fldPath := reqPath.Key(string(resourceName))
// Validate resource name.
Expand Down
35 changes: 35 additions & 0 deletions pkg/api/validation/validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1383,6 +1383,22 @@ func TestValidateContainers(t *testing.T) {
},
ImagePullPolicy: "IfNotPresent",
},
{
Name: "resources-test-with-gpu",
Image: "image",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
},
Limits: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
api.ResourceName(api.ResourceNvidiaGPU): resource.MustParse("1"),
},
},
ImagePullPolicy: "IfNotPresent",
},
{
Name: "resources-request-limit-simple",
Image: "image",
Expand Down Expand Up @@ -1606,6 +1622,25 @@ func TestValidateContainers(t *testing.T) {
ImagePullPolicy: "IfNotPresent",
},
},
"Resource can only have GPU limit": {
{
Name: "resources-request-limit-edge",
Image: "image",
Resources: api.ResourceRequirements{
Requests: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
api.ResourceName(api.ResourceNvidiaGPU): resource.MustParse("1"),
},
Limits: api.ResourceList{
api.ResourceName(api.ResourceCPU): resource.MustParse("10"),
api.ResourceName(api.ResourceMemory): resource.MustParse("10G"),
api.ResourceName(api.ResourceNvidiaGPU): resource.MustParse("1"),
},
},
ImagePullPolicy: "IfNotPresent",
},
},
"Request limit simple invalid": {
{
Name: "abc-123",
Expand Down
1 change: 1 addition & 0 deletions pkg/apis/componentconfig/deep_copy_generated.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ func DeepCopy_componentconfig_KubeletConfiguration(in KubeletConfiguration, out
out.HairpinMode = in.HairpinMode
out.BabysitDaemons = in.BabysitDaemons
out.MaxPods = in.MaxPods
out.NvidiaGPUs = in.NvidiaGPUs
out.DockerExecHandlerName = in.DockerExecHandlerName
out.PodCIDR = in.PodCIDR
out.ResolverConfig = in.ResolverConfig
Expand Down
Loading

0 comments on commit 08440b5

Please sign in to comment.