Skip to content

Commit

Permalink
doc: fix deploy tutorial (bytedance#611)
Browse files Browse the repository at this point in the history
  • Loading branch information
peng09 authored Feb 25, 2021
1 parent 18a9815 commit fe69db3
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 134 deletions.
60 changes: 55 additions & 5 deletions deploy/README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
# Fedlearner Operator
# Fedlearner

您可以在一个 K8S 集群中快速尝试 Fedlearner 的基础功能,部署 Fedlearner 会在您的 K8S 集群中安装以下组件:

- fedlearner-stack 包含了 Fedlearner 所依赖的基础设施,包括 NFS Server/RDS (mariadb) 等;
- fedleaner 包含了运行 Fedlearner 任务所需的组件,包括 Fedlearner Operator/Fedlearner APIServer 和 Fedlearner WebConsole

请注意,Fedlearner Operator 目前仅适用于 K8S 1.14.8 - 1.16.9 版本 (推荐 1.16.9 的 K8S 版本),我们正在适配更新版本的 K8S。

## Prerequisite

Expand All @@ -14,14 +21,57 @@ apt install nfs-common

```sh
helm install fedlearner-stack ./deploy/charts/fedlearner-stack
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: pvc-fedlearner-default
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 10Gi
storageClassName: nfs
EOF
```

### 安装 Fedlearner Controller 和 CRD

```sh
kubectl create ns leader
helm install fedlearner ./deploy/charts/fedlearner --namespace leader
export OPERATOR_IMAGE=`git rev-parse HEAD | cut -c 1-7`
helm install fedlearner ./deploy/charts/fedlearner \
--set fedlearner-operator.ingress.enabled=false \
--set fedlearner-operator.image.tag=$OPERATOR_IMAGE \
--set fedlearner-operator.extraArgs.ingress-enabled-client-auth=false
kubectl create ns follower
helm install fedlearner ./deploy/charts/fedlearner --namespace follower
kubectl apply -f ./deploy/charts/manifests/
helm install fedlearner ./deploy/charts/fedlearner --namespace follower \
--set fedlearner-apiserver.enabled=false \
--set fedlearner-web-console.enabled=false \
--set fedlearner-operator.installCRD=false \
--set fedlearner-operator.image.tag=$OPERATOR_IMAGE \
--set fedlearner-operator.extraArgs.ingress-enabled-client-auth=false
```

### 运行测试任务

```
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Secret
metadata:
name: test-secret
namespace: default
stringData:
ca.crt: fake
---
apiVersion: v1
kind: Secret
metadata:
name: test-secret
namespace: follower
stringData:
ca.crt: fake
EOF
kubectl create -f ./deploy/examples/normal_leader.yaml -f ./deploy/examples/normal_follower.yaml
```
69 changes: 6 additions & 63 deletions deploy/examples/normal_follower.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,68 +5,6 @@ metadata:
namespace: follower
spec:
flReplicaSpecs:
Master:
pair: true
replicas: 2
template:
spec:
restartPolicy: Never # required
containers:
- env:
- name: APPLICATION_ID
value: normal
- name: HOST_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
image: nicolaka/netshoot
imagePullPolicy: IfNotPresent
restartPolicy: Never # required
name: tensorflow # default
ports:
- containerPort: 50051
name: flapp-port # default
resources:
limits:
cpu: 1000m
memory: 1Gi
requests:
cpu: 1000m
memory: 1Gi
command: ["/bin/sleep"]
args: ["3m"]
PS:
pair: false
replicas: 0
template:
spec:
restartPolicy: Never # required
containers:
- env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: HOST_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
image: nicolaka/netshoot
imagePullPolicy: IfNotPresent
restartPolicy: Never # required
name: tensorflow # default
ports:
- containerPort: 50051
name: flapp-port # default
resources:
limits:
cpu: 1000m
memory: 500Mi
requests:
cpu: 1000m
memory: 500Mi
command: ["/bin/sleep"]
args: ["3m"]
Worker:
pair: true
replicas: 2
Expand Down Expand Up @@ -103,8 +41,13 @@ spec:
memory: 500Mi
command: ["/bin/sleep"]
args: ["3m"]
ingressSpec:
extraHostSuffix: ".fedlearner-test.org"
secretName: test-secret
clientAuthSecretName: test-secret
ingressClassName: test-ingress
role: Follower
peerSpecs:
Leader:
peerURL: flapp-operator.leader.svc.cluster.local:8100
peerURL: fedlearner-operator.default.svc.cluster.local:8100

77 changes: 11 additions & 66 deletions deploy/examples/normal_leader.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,69 +2,9 @@ apiVersion: fedlearner.k8s.io/v1alpha1
kind: FLApp
metadata:
name: normal
namespace: leader
namespace: default
spec:
flReplicaSpecs:
Master:
pair: true
replicas: 2
template:
spec:
restartPolicy: Never # required
containers:
- env:
- name: APPLICATION_ID
value: normal
- name: HOST_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
image: nicolaka/netshoot
imagePullPolicy: IfNotPresent
name: tensorflow # default
ports:
- containerPort: 50051
name: flapp-port # default
resources:
limits:
cpu: 4000m
memory: 4Gi
requests:
cpu: 4000m
memory: 4Gi
command: ["/bin/sleep"]
args: ["3m"]
PS:
pair: false
replicas: 0
template:
spec:
restartPolicy: Never # required
containers:
- env:
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: HOST_IP
valueFrom:
fieldRef:
fieldPath: status.hostIP
image: nicolaka/netshoot
imagePullPolicy: IfNotPresent
name: tensorflow # default
ports:
- containerPort: 50051
name: flapp-port # default
resources:
limits:
cpu: 4000m
memory: 4Gi
requests:
cpu: 4000m
memory: 4Gi
command: ["/bin/sleep"]
args: ["3m"]
Worker:
pair: true
replicas: 2
Expand All @@ -87,14 +27,19 @@ spec:
name: flapp-port # default
resources:
limits:
cpu: 4000m
memory: 4Gi
cpu: 1000m
memory: 1Gi
requests:
cpu: 4000m
memory: 4Gi
cpu: 1000m
memory: 1Gi
command: ["/bin/sleep"]
args: ["3m"]
ingressSpec:
extraHostSuffix: ".fedlearner-test.org"
secretName: test-secret
clientAuthSecretName: test-secret
ingressClassName: test-ingress
role: Leader
peerSpecs:
Follower:
peerURL: flapp-operator.follower.svc.cluster.local:8100
peerURL: fedlearner-operator.follower.svc.cluster.local:8100

0 comments on commit fe69db3

Please sign in to comment.