Skip to content

Commit

Permalink
update: test
Browse files Browse the repository at this point in the history
  • Loading branch information
StarCoral authored and justin0u0 committed Jun 18, 2022
1 parent 60febe7 commit 680d2a7
Show file tree
Hide file tree
Showing 47 changed files with 5,894 additions and 933 deletions.
169 changes: 169 additions & 0 deletions deploy/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
apiVersion: v1
kind: ServiceAccount
metadata:
name: kubeshare-scheduler
namespace: kube-system
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: kubeshare-scheduler
rules:
- apiGroups:
- ""
resources:
- "*"
verbs:
- "*"
- apiGroups:
- apps
- extensions
resources:
- "*"
verbs:
- "*"
- apiGroups:
- policy
resources:
- poddisruptionbudgets
verbs:
- get
- list
- watch
- apiGroups:
- "storage.k8s.io"
resources:
- "*"
verbs:
- "*"
- apiGroups:
- "coordination.k8s.io"
resources:
- leases
verbs:
- create
- get
- list
- update
- apiGroups:
- "events.k8s.io"
resources:
- "*"
verbs:
- "*"
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: kubeshare-scheduler
subjects:
- kind: ServiceAccount
name: kubeshare-scheduler
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: kubeshare-scheduler
---
apiVersion: v1
kind: ConfigMap
metadata:
name: scheduler-config
namespace: kube-system
data:
scheduler-config.yaml: |
apiVersion: kubescheduler.config.k8s.io/v1alpha2
kind: KubeSchedulerConfiguration
leaderElection:
leaderElect: false
profiles:
- schedulerName: kubeshare-scheduler
plugins:
queueSort:
enabled:
- name: kubeshare-scheduler
disabled:
- name: "*"
preFilter:
enabled:
- name: kubeshare-scheduler
filter:
enabled:
- name: kubeshare-scheduler
# score:
# enabled:
#- name: kubeshare-scheduler
# disabled:
# - name: "*"
reserve:
enabled:
- name: kubeshare-scheduler
unreserve:
enabled:
- name: kubeshare-scheduler
permit:
enabled:
- name: kubeshare-scheduler
# optional plugin configs
pluginConfig:
- name: kubeshare-scheduler
args:
level: 3
prometheusURL: "http://prometheus-k8s.monitoring:9090"
---

apiVersion: v1
kind: Pod
metadata:
name: kubeshare-scheduler
namespace: kube-system
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ""
labels:
app: kubeshare-scheduler
spec:
serviceAccountName: kubeshare-scheduler
priorityClassName: system-node-critical
tolerations:
- key: "CriticalAddonsOnly"
operator: "Exists"
- key: "node-role.kubernetes.io/master"
operator: "Exists"
effect: "NoSchedule"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: "node-role.kubernetes.io/master"
operator: "Exists"
volumes:
- name: scheduler-config
configMap:
name: scheduler-config
- name: "kubeshare-scheduler"
hostPath:
path: "/kubeshare/scheduler"
- name: "kubeshare-log"
hostPath:
path: "/kubeshare/log"
restartPolicy: Never
containers:
- name: kubeshare-scheduler
image: riyazhu/kubeshare-scheduler:db #20211108 #20210706124950
imagePullPolicy: Always
command: ["kubeshare-scheduler"]
args:
- --config=/scheduler/scheduler-config.yaml
resources:
requests:
cpu: "50m"
volumeMounts:
- name: scheduler-config
mountPath: /scheduler
- name: "kubeshare-scheduler"
mountPath: "/kubeshare/scheduler"
- name: "kubeshare-log"
mountPath: "/kubeshare/log"
---

4 changes: 2 additions & 2 deletions pkg/scheduler/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ func (kss *KubeShareScheduler) addNode(obj interface{}) {
if kss.nodePodManagerPortBitmap[name] == nil {
kss.nodePodManagerPortBitmap[name] = bitmap.NewRRBitmap(512)
kss.nodePodManagerPortBitmap[name].Mask(0)

kss.getGPUByNode(name)
}

kss.getGPUByNode(name)

kss.cellMutex.Lock()
defer kss.cellMutex.Unlock()

Expand Down
6 changes: 3 additions & 3 deletions test/cifar10/cifar10.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ metadata:
"sharedgpu/gpu_request": "1"
"sharedgpu/gpu_limit": "1.0"
"sharedgpu/priority": "100"
"sharedgpu/gpu_model": "NVIDIA-GeForce-GTX-1080"
# "sharedgpu/gpu_model": "NVIDIA-GeForce-GTX-1080"
spec:
schedulerName: kubeshare-scheduler
restartPolicy: Never
containers:
- name: pytorch
image: riyazhu/cifar10:test
command: ["sh", "-c", "sleep infinity"]
imagePullPolicy: Always #IfNotPresent
# command: ["sh", "-c", "sleep infinity"]
imagePullPolicy: IfNotPresent
volumeMounts:
- name: datasets
mountPath: "/datasets/"
Expand Down
24 changes: 24 additions & 0 deletions test/cifar10/cifar10_cupid.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
apiVersion: v1
kind: Pod
metadata:
name: cifar-2070
labels:
"sharedgpu/gpu_request": "0.3"
"sharedgpu/gpu_limit": "1.0"
"sharedgpu/priority": "100"
"sharedgpu/gpu_model": "NVIDIA-GeForce-RTX-2070"
spec:
restartPolicy: Never
schedulerName: kubeshare-scheduler
containers:
- name: pytorch
image: riyazhu/cifar10:test
# command: ["sh", "-c", "sleep infinity"]
imagePullPolicy: IfNotPresent
volumeMounts:
- name: datasets
mountPath: "/datasets/"
volumes:
- name: datasets
hostPath:
path: "/home/riya/experiment/datasets/"
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
# Test:
# gpu_request and gpu_limit correct
apiVersion: v1
kind: Pod
metadata:
name: cifar
name: cifar-1080
labels:
"sharedgpu/gpu_request": "1"
"sharedgpu/gpu_request": "0.3"
"sharedgpu/gpu_limit": "1.0"
"sharedgpu/priority": "100"
"sharedgpu/gpu_model": "NVIDIA-GeForce-GTX-1080"
spec:
restartPolicy: Never
schedulerName: kubeshare-scheduler
containers:
- name: pytorch
image: riyazhu/cifar10:test
command: ["sh", "-c", "sleep infinity"]
# command: ["sh", "-c", "sleep infinity"]
imagePullPolicy: IfNotPresent
volumeMounts:
- name: datasets
Expand Down
26 changes: 26 additions & 0 deletions test/cifar10/cifar10_ubuntu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# Test:
# gpu_request and gpu_limit correct
apiVersion: v1
kind: Pod
metadata:
name: cifar10-2
labels:
"sharedgpu/gpu_request": "0.3"
"sharedgpu/gpu_limit": "1.0"
"sharedgpu/priority": "100"
# "sharedgpu/gpu_model": "NVIDIA-GeForce-GTX-1080"
spec:
schedulerName: kubeshare-scheduler
restartPolicy: Never
containers:
- name: pytorch
image: riyazhu/cifar10:test
# command: ["sh", "-c", "sleep infinity"]
imagePullPolicy: IfNotPresent
volumeMounts:
- name: datasets
mountPath: "/datasets/"
volumes:
- name: datasets
hostPath:
path: "/home/riya/experiment/datasets/"
38 changes: 38 additions & 0 deletions test/cifar10/job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
apiVersion: batch/v1
kind: Job
metadata:
name: cifar10
labels:
app: cifar10
spec:
completions: 15
parallelism: 15
# selector:
# matchLabels:
# app: cifar10
template:
metadata:
name: cifar10
labels:
#app: cifar10
"sharedgpu/gpu_request": "1.0"
"sharedgpu/gpu_limit": "1.0"
# "sharedgpu/group_name": "dep"
# "sharedgpu/min_available": "2"
# "sharedgpu/priority": "100"
spec:
schedulerName: kubeshare-scheduler
restartPolicy: Never
containers:
- name: pytorch
image: riyazhu/cifar10:test
# command: ["sh", "-c", "sleep infinity"]
imagePullPolicy: IfNotPresent
volumeMounts:
- name: datasets
mountPath: "/datasets/"
volumes:
- name: datasets
hostPath:
path: "/home/riya/experiment/datasets/"

43 changes: 43 additions & 0 deletions test/cifar10/job_default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
apiVersion: batch/v1
kind: Job
metadata:
name: cifar10
labels:
app: cifar10
spec:
completions: 15
parallelism: 15
# selector:
# matchLabels:
# app: cifar10
template:
metadata:
name: cifar10
labels:
#app: cifar10
"sharedgpu/gpu_request": "0.5"
"sharedgpu/gpu_limit": "1.0"
# "sharedgpu/group_name": "dep"
# "sharedgpu/min_available": "2"
"sharedgpu/priority": "100"
spec:
# schedulerName: kubeshare-scheduler
restartPolicy: Never
containers:
- name: pytorch
image: riyazhu/cifar10:test
# command: ["sh", "-c", "sleep infinity"]
imagePullPolicy: IfNotPresent
volumeMounts:
- name: datasets
mountPath: "/datasets/"
resources:
limits:
"nvidia.com/gpu": 1
requests:
"nvidia.com/gpu": 1
volumes:
- name: datasets
hostPath:
path: "/home/riya/experiment/datasets/"

Loading

0 comments on commit 680d2a7

Please sign in to comment.