forked from easzlab/kubeasz
-
Notifications
You must be signed in to change notification settings - Fork 0
/
clean_one_node.yml
255 lines (222 loc) · 7.97 KB
/
clean_one_node.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# 警告:此脚本将清理单个node节点,使用请详细参阅 docs/op/clean_one_node.md
# 请三思后运行此脚本,特别的:如果有pod使用了本地存储类型,请自行判断重要性
# 使用:
# 1. 执行 ansible-playbook /etc/ansible/tools/clean_one_node.yml
# 2. 按照提示输入待删除节点
- hosts: deploy
vars_prompt:
- name: "NODE_TO_DEL"
prompt: "which node is about to be deleted?(e.g 192.168.1.1)"
private: no
confirm: yes
tasks:
- name: fail info1
fail: msg="you CAN NOT delete the last member of etcd cluster!"
when: "groups['etcd']|length < 2 and NODE_TO_DEL in groups['etcd']"
- name: fail info2
fail: msg="you CAN NOT delete the last member of kube-master!"
when: "groups['kube-master']|length < 2 and NODE_TO_DEL in groups['kube-master']"
- name: 执行kubectl drain(节点可能是kube-node节点)
shell: "{{ bin_dir }}/kubectl drain {{ NODE_TO_DEL }} --ignore-daemonsets --delete-local-data"
ignore_errors: true
- block:
# 清理 kube-node 相关服务
- name: stop and disable kube-node service
service: name={{ item }} state=stopped enabled=no
with_items:
- kubelet
- kube-proxy
ignore_errors: true
- name: umount kubelet 挂载的目录
shell: "mount | grep '/var/lib/kubelet'| awk '{print $3}'|xargs umount"
args:
warn: false
ignore_errors: true
- name: 清理目录和文件
file: name={{ item }} state=absent
with_items:
- "/var/lib/kubelet/"
- "/var/lib/kube-proxy/"
- "/etc/systemd/system/kubelet.service"
- "/etc/systemd/system/kube-proxy.service"
- "/opt/kube/kube-system/"
# 清理 kube-master 相关
- name: stop and disable kube-master service
service: name={{ item }} state=stopped enabled=no
with_items:
- kube-apiserver
- kube-controller-manager
- kube-scheduler
ignore_errors: true
- name: 清理目录和文件
file: name={{ item }} state=absent
with_items:
- "/var/run/kubernetes"
- "/etc/systemd/system/kube-apiserver.service"
- "/etc/systemd/system/kube-controller-manager.service"
- "/etc/systemd/system/kube-scheduler.service"
# 清理集群docker服务、网络相关
- block:
- name: 获取是否运行名为'kubeasz'的容器
shell: 'docker ps|grep kubeasz || echo "NOT FOUND"'
register: install_info
- name: fail info3
fail: msg="you CAN NOT delete dockerd, because container 'kubeasz' is running!"
when: "'kubeasz' in install_info.stdout"
- name: stop and disable docker service
service:
name: docker
state: stopped
enabled: no
ignore_errors: true
- name: unmount docker filesystem-1
mount: path=/var/run/docker/netns/default state=unmounted
- name: unmount docker filesystem-2
mount: path=/var/lib/docker/overlay state=unmounted
- name: remove files and dirs
file: name={{ item }} state=absent
with_items:
- "/var/lib/docker/"
- "/var/run/docker/"
- "/etc/systemd/system/docker.service"
- "/etc/systemd/system/docker.service.requires/"
- "/etc/systemd/system/docker.service.d/"
- "/etc/bash_completion.d/docker"
- "/usr/bin/docker"
when: CONTAINER_RUNTIME == 'docker'
- block:
- name: stop and disable containerd service
service:
name: containerd
state: stopped
enabled: no
ignore_errors: true
- name: umount containerd filesystems
shell: "mount | grep 'containerd/io.containerd'| awk '{print $3}'|xargs umount || exit 0"
args:
warn: false
ignore_errors: true
- name: remove files and dirs
file: name={{ item }} state=absent
with_items:
- "/etc/containerd/"
- "/etc/crictl.yaml"
- "/etc/systemd/system/containerd.service"
- "/opt/containerd/"
- "/var/lib/containerd/"
- "/var/run/containerd/"
when: CONTAINER_RUNTIME == 'containerd'
- name: remove files and dirs2
file: name={{ item }} state=absent
with_items:
- "/etc/cni/"
- "/run/flannel/"
- "/etc/calico/"
- "/var/run/calico/"
- "/var/lib/calico/"
- "/var/log/calico/"
- "/etc/cilium/"
- "/var/run/cilium/"
- "/sys/fs/bpf/tc/"
- "/var/lib/cni/"
- "/var/lib/kube-router/"
- "/opt/kube/kube-system/"
- "/var/run/openvswitch/"
- "/etc/origin/openvswitch/"
- "/etc/openvswitch/"
- "/var/log/openvswitch/"
- name: cleanup iptables
shell: "iptables -F && iptables -X \
&& iptables -F -t nat && iptables -X -t nat \
&& iptables -F -t raw && iptables -X -t raw \
&& iptables -F -t mangle && iptables -X -t mangle"
- name: cleanup networks1
shell: "ip link del tunl0; \
ip link del flannel.1; \
ip link del cni0; \
ip link del mynet0; \
ip link del kube-bridge; \
ip link del dummy0; \
ip link del kube-ipvs0; \
ip link del cilium_net; \
ip link del cilium_vxlan; \
ip link del ovn0; \
ip link del ovs-system"
ignore_errors: true
- name: cleanup networks2
shell: "ip link del docker0; \
systemctl restart networking; \
systemctl restart network"
ignore_errors: true
- name: cleanup 'calico' routes
shell: "for rt in `ip route|grep bird|sed 's/blackhole//'|awk '{print $1}'`;do ip route del $rt;done;"
when: "CLUSTER_NETWORK == 'calico'"
ignore_errors: true
# 清理etcd 集群相关
- name: stop and disable etcd service
service:
name: etcd
state: stopped
enabled: no
ignore_errors: true
- name: 清理目录和文件
file: name={{ item }} state=absent
with_items:
- "/var/lib/etcd"
- "/etc/etcd/"
- "/backup/k8s"
- "/etc/systemd/system/etcd.service"
# 清理负载均衡相关
- name: stop keepalived service
shell: systemctl disable keepalived && systemctl stop keepalived
ignore_errors: true
- name: stop haproxy service
shell: systemctl disable haproxy && systemctl stop haproxy
ignore_errors: true
- name: 清理LB 配置文件目录
file: name={{ item }} state=absent
with_items:
- "/etc/haproxy"
- "/etc/keepalived"
# 清理其他
- name: stop and disable chrony in Ubuntu
service: name=chrony state=stopped enabled=no
ignore_errors: true
tags: rm_ntp
when: ansible_distribution == "Ubuntu" or ansible_distribution == "Debian"
- name: stop and disable chronyd in CentOS/RedHat
service: name=chronyd state=stopped enabled=no
ignore_errors: true
tags: rm_ntp
when: ansible_distribution == "CentOS" or ansible_distribution == "RedHat"
- name: 清理证书目录和文件
file: name={{ item }} state=absent
with_items:
- "/etc/kubernetes/"
- "{{ ca_dir }}"
- "/root/.kube/"
- "/etc/docker/"
- name: 清理自动生成的PATH
lineinfile:
dest: ~/.bashrc
state: absent
regexp: '{{ item }}'
with_items:
- 'kubeasz'
- 'helm completion'
- 'crictl completion'
- 'kubectl completion'
- 'HELM_TLS_ENABLE'
delegate_to: "{{ NODE_TO_DEL }}"
run_once: true
# 执行kubectl delete(节点可能是kube-node节点)
- name: 执行kubectl delete(节点可能是kube-node节点)
shell: "{{ bin_dir }}/kubectl delete node {{ NODE_TO_DEL }}"
ignore_errors: true
# 删除 ansible hosts 中节点配置
- name: rm {{ NODE_TO_DEL }} in ansible hosts
lineinfile:
dest: "{{ base_dir }}/hosts"
state: absent
regexp: '^{{ NODE_TO_DEL }}$|^{{ NODE_TO_DEL }}[^0-9]'
connection: local