1. 启动minikube
minikube start --driver=docker
–extra-config=etcd.listen-metrics-urls=http://0.0.0.0:2381
–extra-config=controller-manager.bind-address=0.0.0.0
–extra-config=scheduler.bind-address=0.0.0.0
2. 安装Prometheus + Grafana + kube-state-metrics + node-exporter
helm install monitor prometheus-community/kube-prometheus-stack
端口转发
kubectl port-forward svc/monitor-grafana 3000:80
3. 在Grafana中可视化CPU使用率
4. 在Grafana中可视化内存使用率
5. 磁盘写入IOPS
6. 磁盘读取IOPS
7. 磁盘总IOPS
8. 默认情况下,数据滚动删除周期以及抓取频率
9. 修改数据滚动删除周期以及抓取频率
helm upgrade prometheus-stack prometheus-community/kube-prometheus-stack
–set prometheus.prometheusSpec.retention=“7d”
–set prometheus.prometheusSpec.scrapeInterval=“15s”
10 .部署自己的测试yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: highcpu-java
spec:
replicas: 1
selector:
matchLabels:
app: highcpu-java
template:
metadata:
labels:
app: highcpu-java
spec:
containers:
- name: highcpu-java
image: highcpu-java-app
imagePullPolicy: Never
resources:
requests:
cpu: "100m"
limits:
cpu: "500m"
kubectl apply -f highcpu-deploy.yaml
11. 创建告警规则
cpu-alert.yaml
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: cpu-alert
namespace: monitoring
labels:
release: monitor # ✅ 必须匹配 Prometheus 的 ruleSelector
spec:
groups:
- name: pod-cpu-alerts
rules:
- alert: PodHighCPU
expr: rate(container_cpu_usage_seconds_total[1m]) > 0.2
for: 1m
labels:
severity: warning
annotations:
summary: "Pod CPU 使用率过高"
description: "Pod {{ $labels.pod }} 在过去 1 分钟内 CPU 使用率超过 20%"
kubectl apply -f cpu-alert.yaml
✅ 必须匹配 Prometheus 的 ruleSelector
通过kubectl get prometheus monitor-kube-prometheus-st-prometheus -o yaml | grep -A5 ruleSelector
查看
12.配置告警推送
创建alertmanager.yaml
global:
smtp_smarthost: 'smtp.163.com:465'
smtp_from: 'fancycranvo@163.com'
smtp_auth_username: 'fancycranvo@163.com'
smtp_auth_password: 'YJehHEULysJ3mKij'
smtp_require_tls: false
smtp_hello: '163.com'
route:
receiver: 'email-receiver'
group_by: ['alertname']
group_wait: 5s
group_interval: 10s
repeat_interval: 5m
receivers:
- name: 'email-receiver'
email_configs:
- to: '1653037723@qq.com'
send_resolved: true
kubectl create secret generic alertmanager-config \
--from-file=alertmanager.yaml=alertmanager.yaml \
-n default
kubectl patch alertmanager monitor-kube-prometheus-st-alertmanager \
-n default \
--type=merge \
-p '{"spec": {"configSecret": "alertmanager-config"}}'
!!如果你想修改告警内容,直接修改 alertmanager.yaml 文件并更新 Secret
kubectl create secret generic alertmanager-config \
--from-file=alertmanager.yaml=alertmanager.yaml \
-n default \
--dry-run=client -o yaml | kubectl apply -f -