环境信息:
kubernetes v1.22.2
一、备份前准备
1.确认 etcd 集群状态
kubectl get nodes -o wide # 检查节点状态
kubectl get pods -n kube-system | grep etcd # 查看 etcd Pod 位置(通常在 Master 节点)
2.准备备份工具
使用官方工具 etcdctl
(需与 etcd 版本匹配):
# 查看 etcd 版本(在 Master 节点执行)
kubectl describe pod -n kube-system etcd-<master-node-name> | grep "Image:"
# 下载对应版本 etcdctl(示例:v3.5.0)
wget https://github.com/etcd-io/etcd/releases/download/v3.5.0/etcd-v3.5.0-linux-amd64.tar.gz
tar -xzvf etcd-v3.5.0-linux-amd64.tar.gz
sudo mv etcd-v3.5.0-linux-amd64/etcdctl /usr/local/bin/
etcdctl version # 确认版本
3.获取 etcd 证书路径
在 Master 节点上,证书通常位于 /etc/kubernetes/pki/etcd/
:
ls /etc/kubernetes/pki/etcd/
# 输出:ca.crt server.crt server.key peer.crt peer.key
二、备份方案
方案1: 使用 etcdctl
手动快照备份(推荐)
# 在 Master 节点执行(替换 <master-node-ip>)
ETCDCTL_API=3 etcdctl \
--endpoints=https://192.168.12.211:2379 \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--cert=/etc/kubernetes/pki/etcd/server.crt \
--key=/etc/kubernetes/pki/etcd/server.key \
snapshot save /backup/etcd-snapshot-$(date +%Y%m%d%H%M%S).db
参数说明:
- 仅单节点进行备份
--endpoints
:etcd 服务地址(通常为 Master 节点 IP + 2379 端口)--cacert
:CA 证书路径--cert
和--key
:etcd 服务端证书和私钥snapshot save
:保存快照到指定路径
方案2: 通过静态 Pod 直接备份(无需证书)
如果 etcd 以静态 Pod 运行(Kubeadm 部署常见):
进入pod进行手动备份
# 进入 etcd Pod 容器
kubectl exec -it -n kube-system etcd-<master-node-name> -- sh
# 在容器内执行备份
etcdctl --endpoints=https://127.0.0.1:2379 \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--cert=/etc/kubernetes/pki/etcd/server.crt \
--key=/etc/kubernetes/pki/etcd/server.key \
snapshot save /var/lib/etcd/snapshot.db
将手动备份的db
文件拷贝到备份文件所在目录
1、使用scp进行备份文件(测试可用)
获取etcd节点ip
kubectl get pod -A -o wide |grep etcd
[root@master-01 ~]# kubectl get pod -A -o wide |grep etcd
kube-system etcd-master-01 1/1 Running 30 (53m ago) 23d 192.168.12.211 master-01 <none> <none>
kube-system etcd-master-02 1/1 Running 17 (53m ago) 23d 192.168.12.212 master-02 <none> <none>
kube-system etcd-master-03 1/1 Running 17 (53m ago) 23d 192.168.12.213 master-03 <none> <none>
scp root@192.168.12.212:/var/lib/etcd/snapshot.db /backup/
2、使用kubectl cp
kubectl cp -n kube-system etcd-<master-node-name>:/var/lib/etcd/snapshot.db /backup/snapshot.db
三、验证备份文件
# 检查快照状态
[root@master-01 backup]# etcdctl snapshot status /backup/snapshot.db --write-out=table
Deprecated: Use `etcdutl snapshot status` instead.
+----------+----------+------------+------------+
| HASH | REVISION | TOTAL KEYS | TOTAL SIZE |
+----------+----------+------------+------------+
| 8e9865be | 494341 | 1956 | 13 MB |
+----------+----------+------------+------------+
四、自动化备份(CronJob)
创建定时备份任务:
# etcd-backup-cronjob.yaml
apiVersion: batch/v1
kind: CronJob
metadata:
name: etcd-backup
namespace: kube-system
spec:
schedule: "0 2 * * *" # 每天凌晨2点备份
jobTemplate:
spec:
template:
spec:
hostNetwork: true
containers:
- name: etcd-backup
image: k8s.gcr.io/etcd:3.5.0 # 与集群 etcd 版本一致
command:
- /bin/sh
- -c
- |
etcdctl --endpoints=https://<master-ip>:2379 \
--cacert=/etc/kubernetes/pki/etcd/ca.crt \
--cert=/etc/kubernetes/pki/etcd/server.crt \
--key=/etc/kubernetes/pki/etcd/server.key \
snapshot save /backup/snapshot-$(date +%Y%m%d).db
volumeMounts:
- name: etcd-certs
mountPath: /etc/kubernetes/pki/etcd
- name: backup-dir
mountPath: /backup
volumes:
- name: etcd-certs
hostPath:
path: /etc/kubernetes/pki/etcd
- name: backup-dir
hostPath:
path: /backup
restartPolicy: OnFailure
部署命令
kubectl apply -f etcd-backup-cronjob.yaml
五、灾难恢复(从备份还原)
还原操作会覆盖 etcd 数据,务必在测试环境验证!
# 1. 停止所有 etcd 服务(所有 Master 节点)
sudo systemctl stop etcd
# 2. 还原快照(在任意 Master 节点)
etcdctl snapshot restore /backup/snapshot.db \
--name etcd-0 \
--initial-cluster etcd-0=https://<master-ip>:2380 \
--initial-cluster-token etcd-cluster-1 \
--initial-advertise-peer-urls https://<master-ip>:2380 \
--data-dir /var/lib/etcd-restore
# 3. 替换原数据目录
sudo mv /var/lib/etcd /var/lib/etcd-old
sudo mv /var/lib/etcd-restore /var/lib/etcd
sudo chown -R etcd:etcd /var/lib/etcd
# 4. 重启 etcd 服务
sudo systemctl start etcd
# 5. 验证集群状态
kubectl get nodes