参考 AKA ES Nginx Logs, https://grafana.com/grafana/dashboards/11190-es-nginx-logs/
整体技术架构一样, 不过有些许区别
文档是7.x版本, 我的环境是8.x ELK
我的公网IP 取值的字段有区别
ES 8.x字段有些区别, 比如geoip.country_name在8.x 改为了geoip.geo.country_name, 一些geoip.x 增加了一层geoip.geo
Grafana报表有部分个性化调整
信息列表
主机 | 作用 | 版本 | |
web001 | Nginx、Filebeat | 8.12.2 | |
ops | Redis、Logstash | 8.12.2 | |
eslog001、002、003 | Elasticsearch | 8.12.2 |
架构图
Nginx新增日志格式
# 新增
log_format aka_logs '{"@timestamp":"$time_iso8601",'
'"host":"$hostname",'
'"server_ip":"$server_addr",'
'"client_ip":"$remote_addr",'
'"xff":"$http_x_forwarded_for",'
'"domain":"$host",'
'"url":"$uri",'
'"referer":"$http_referer",'
'"args":"$args",'
'"upstreamtime":"$upstream_response_time",'
'"responsetime":"$request_time",'
'"request_method":"$request_method",'
'"status":"$status",'
'"upstream_status":"$upstream_status",'
'"size":"$body_bytes_sent",'
'"request_length":"$request_length",'
'"protocol":"$server_protocol",'
'"upstreamhost":"$upstream_addr",'
'"file_dir":"$request_filename",'
'"http_user_agent":"$http_user_agent",'
'"platform":"$http_platform"'
'}';
# 修改
access_log /data/logs/nginx/xxxx.xxx.com.access_$year-$month-$day.log aka_logs;
Filebeat 部署
#=========================== Filebeat inputs =============================
filebeat.inputs:
# 收集nginx日志
- type: log
enabled: true
paths:
- /data/logs/nginx/*.log
# 日志时json开启这个
json.keys_under_root: true
json.overwrite_keys: true
json.add_error_key: true
# exclude_lines: ['^.*Blackbox.*$', '^.*ELB-HealthChecker.*$', '^.*favicon.*$']
#-------------------------- Logstash output ------------------------------
# output.logstash:
# hosts: ["LogstashIP:5044"]
#-------------------------- Redis output ------------------------------
output.redis:
hosts: ["redisIP:6379"] #输出到redis的机器
password: "密码"
key: "nginx_logs" #redis中日志数据的key值ֵ
db: 0
timeout: 5
# version: '3.8'
networks:
filebeat-network:
driver: bridge
services:
filebeat:
image: docker.elastic.co/beats/filebeat:8.12.2
restart: always
hostname: "{{.Node.Hostname}}-filebeat"
volumes:
- ./filebeat.yml:/usr/share/filebeat/filebeat.yml
- /data/logs/nginx:/data/logs/nginx
networks:
- filebeat-network
deploy:
resources:
limits:
cpus: '1.0'
memory: 1.0G
Redis 部署
新建一个redis_data目录存储数据
mkdir -p redis_data
# 绑定的IP地址,可以监听本地和外部地址
bind 127.0.0.1 0.0.0.0
# 关闭保护模式,允许外部连接
protected-mode no
# Redis 服务端口
port 6379
# TCP 连接积压队列长度
tcp-backlog 128
# 客户端超时时间,0表示不超时
timeout 0
# TCP 保持活动状态的时间
tcp-keepalive 300
# 不使用守护进程模式
daemonize no
# 不使用系统监督进程
supervised no
# Redis 进程的 PID 文件
#pidfile /var/run/redis_6379.pid
# 日志级别
#loglevel notice
# 日志文件路径
#logfile "/data/redis/logs/redis_master.log"
# 数据库数量
databases 16
# 总是显示 Redis 启动时的标志
always-show-logo yes
# 数据持久化:以下是 RDB 持久化的设置
# 900秒内至少有1个键被修改,进行持久化
save 900 1
# 300秒内至少有10个键被修改,进行持久化
save 300 10
# 60秒内至少有10000个键被修改,进行持久化
save 60 10000
# 如果后台保存出错,停止写操作
stop-writes-on-bgsave-error yes
# 对 RDB 文件进行压缩
rdbcompression yes
# 对 RDB 文件进行校验
rdbchecksum yes
# RDB 文件名
dbfilename dump.rdb
# 从节点可以使用过期数据
slave-serve-stale-data yes
# 从节点只读
slave-read-only no
# 不使用无盘复制
repl-diskless-sync no
# 无盘复制延迟时间
repl-diskless-sync-delay 5
# 启用 TCP 延迟传输,提高性能
repl-disable-tcp-nodelay no
# 从节点优先级,用于故障转移
slave-priority 100
# 连接密码,提高安全性
requirepass oHMU15FrRv!GIPGA
# 最大客户端连接数
maxclients 10000
# 最大内存使用量
maxmemory 6G
# 不使用懒惰淘汰策略
lazyfree-lazy-eviction no
lazyfree-lazy-expire no
lazyfree-lazy-server-del no
slave-lazy-flush no
# 不启用 AOF 持久化,仅使用 RDB 持久化
appendonly no
# AOF 文件名
appendfilename "appendonly.aof"
# AOF 持久化策略,每秒同步
appendfsync everysec
# 在 AOF 重写时不进行同步
no-appendfsync-on-rewrite no
# AOF 自动重写百分比
auto-aof-rewrite-percentage 100
# AOF 自动重写最小文件大小
auto-aof-rewrite-min-size 64mb
# 加载 AOF 文件时截断不完整的命令
aof-load-truncated yes
# 不使用 RDB 前置 AOF
aof-use-rdb-preamble no
# Lua 脚本执行时间限制
lua-time-limit 5000
# 慢日志阈值
slowlog-log-slower-than 10000
# 慢日志最大长度
slowlog-max-len 128
# 延迟监控阈值
latency-monitor-threshold 0
# 键空间通知事件
notify-keyspace-events ""
# 以下是一些数据结构的优化参数
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
list-max-ziplist-size -2
list-compress-depth 0
set-max-intset-entries 512
zset-max-ziplist-entries 128
zset-max-ziplist-value 64
hll-sparse-max-bytes 3000
# 开启活跃哈希表重哈希
activerehashing yes
# 客户端输出缓冲区限制
client-output-buffer-limit normal 0 0 0
client-output-buffer-limit slave 256mb 64mb 60
client-output-buffer-limit pubsub 32mb 8mb 60
# 哈希频率
hz 10
# AOF 重写时增量同步
aof-rewrite-incremental-fsync yes
version: '3.8'
networks:
redis-network:
driver: bridge
services:
redis:
image: redis:7.0.0
restart: always
volumes:
- ./redis_data:/data
- ./redis.conf:/etc/redis/redis.conf
ports:
- "6379:6379"
command: redis-server --requirepass "密码"
networks:
- redis-network
deploy:
resources:
limits:
cpus: '1.0'
memory: 1.5G
ES 创建索引模版、ILM
ES需要提前创建数据源, 数据模版, 参考《 Logstash 指定索引 输出至 ES》
Logstash 部署
1、需要下载GeoLite2-City.mmdb
1.1登陆
https://www.maxmind.com/en/account/sign-in
1.2找到 GeoLite City
https://www.maxmind.com/en/accounts/1161290/geoip/downloads
[root@ops /data/ops/elk/logstash]$ pwd
/data/ops/elk/logstash
[root@ops /data/ops/elk/logstash]$ ls
docker-compose.yml GeoLite2-City.mmdb logstash.conf logstash.yml
input {
redis {
data_type =>"list"
key =>"nginx_logs"
host =>"redisIP"
port => 6379
password => "redis的密码"
db => 0
}
}
filter {
if [xff] {
mutate {
split => { "[xff]" => "," }
add_field => { "client_public_ip" => "%{[xff][0]}" } # 公网IP存在ipv4、ipv6, 只取第一列
gsub => [
"[client_ip]", "\s", ""
]
}
# 确保client_public_ip是字符串类型
mutate {
convert => { "client_public_ip" => "string" }
}
}
geoip {
target => "geoip"
source => "client_public_ip"
database => "/usr/share/logstash/GeoLite2-City.mmdb"
# add_field => [ "[geoip][location]", "%{[geoip][geo][location][lon]}" ]
# add_field => [ "[geoip][location]", "%{[geoip][geo][location][lat]}" ]
}
mutate {
convert => [ "size", "integer" ]
convert => [ "status", "integer" ]
convert => [ "responsetime", "float" ]
convert => [ "upstreamtime", "float" ]
convert => [ "[geoip][geo][location][lat]", "float" ]
convert => [ "[geoip][geo][location][lon]", "float" ]
add_field => {
"[geoip][location]" => "%{[geoip][geo][location][lat]},%{[geoip][geo][location][lon]}"
}
remove_field => [ "ecs", "agent", "host", "cloud", "@version", "input", "logs_type", "xff" ]
}
# 根据http_user_agent来自动处理区分用户客户端系统与版本
if [http_user_agent] {
useragent {
source => "http_user_agent"
target => "ua"
remove_field => [ "[ua][minor]","[ua][major]", "[ua][build]", "[ua][patch]", "[ua][os_minor]", "[ua][os_major]", "[geoip][geo][country_iso_code]", "[geoip][geo][continent_code]", "[geoip][geo][region_name]", "[geoip][geo][postal_code]", "[geoip][geo][region_iso_code]" ]
}
}
}
# output {
# stdout { codec => rubydebug }
# }
output {
elasticsearch {
hosts => ["ES001IP:9200", "ES002IP:9200", "ES003IP:9200"]
index => "logstash-nginx-%{+YYYY.MM.dd}"
template_name => "logstash-nginx" # 确保模板名称与创建的模板一致
manage_template => false # 已手动管理模板
action => "create" # 必须使用create操作
}
}
http.host: "0.0.0.0"
xpack.monitoring.elasticsearch.hosts: [ "ES001IP:9200", "ES002IP:9200", "ES003IP:9200" ]
version: '3.8'
networks:
logstash-network:
driver: bridge
services:
logstash:
image: docker.elastic.co/logstash/logstash:8.12.2
restart: always
volumes:
- ./logstash.conf:/usr/share/logstash/pipeline/logstash.conf
- ./logstash.yml:/usr/share/logstash/config/logstash.yml
- ./GeoLite2-City.mmdb:/usr/share/logstash/GeoLite2-City.mmdb
ports:
- "5044:5044"
networks:
- logstash-network
deploy:
resources:
limits:
cpus: '1.0'
memory: 2.0G
Grafana 导入并调整模版
你可以直接用我改好的模版:
Grafana ES8.x修改后模版
也可以用 11190的模版自行修改 , 不过需要调整一些字段, 因为8.x 比7.x 多了一个层级
比如geoip.ip 改为 geoip.geo.ip 以下为改造后json, 有些个性化的东西调整了下
–
在热力图上卡了比较久, 最终配置如下
效果图
告警规则
[ALERTING] {{$labels.domain}} 域名/URL 监控触发告警
▶ 告警类型:{{$labels.alert_type}}
▶ 具体 URL:{{$labels.url}}
▶ 指标描述:{{$labels.message}}
▶ 触发时间:{{$time | date: "YYYY-MM-DD HH:mm:ss"}}