ELK部署与使用详解

发布于:2025-07-15 ⋅ 阅读:(20) ⋅ 点赞:(0)

1 部署 ELK Stack 监控 Java 服务

下面是一个完整的 ELK(Elasticsearch, Logstash, Kibana)部署方案,用于监控和分析您的 Java 服务(plugin-manager)日志:

1.1 完整的 docker-compose.yml

version: '3.8'

services:
  # 原有 Java 服务
  plugin-manager:
    image: openjdk:8-jre-alpine
    container_name: plugin-manager
    command: ["java", "-jar", "/app/jars/plugin-manager-1.0.0.jar"]
    volumes:
      - ./jars:/app/jars
    ports:
      - "8080:8080"
    environment:
      TZ: Asia/Shanghai
      JAVA_OPTS: "-Xmx512m -Dfile.encoding=UTF-8"
    restart: unless-stopped
    networks:
      - elk-network
    logging: &plugin-logging
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "3"
        tag: "plugin-manager"

  # Elasticsearch 服务
  elasticsearch:
    image: docker.elastic.co/elasticsearch/elasticsearch:8.9.0
    container_name: elasticsearch
    environment:
      - discovery.type=single-node
      - ES_JAVA_OPTS=-Xms1g -Xmx1g
      - bootstrap.memory_lock=true
      - xpack.security.enabled=false # 简化配置,生产环境应启用安全
    ulimits:
      memlock:
        soft: -1
        hard: -1
    volumes:
      - es-data:/usr/share/elasticsearch/data
    ports:
      - "9200:9200"
    networks:
      - elk-network
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:9200"]
      interval: 30s
      timeout: 10s
      retries: 5

  # Logstash 服务
  logstash:
    image: docker.elastic.co/logstash/logstash:8.9.0
    container_name: logstash
    volumes:
      - ./logstash-config:/usr/share/logstash/pipeline
    environment:
      LS_JAVA_OPTS: "-Xmx1g -Xms1g"
    depends_on:
      - elasticsearch
    networks:
      - elk-network
    ports:
      - "5000:5000"
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:9600"]
      interval: 30s
      timeout: 10s
      retries: 5

  # Kibana 服务
  kibana:
    image: docker.elastic.co/kibana/kibana:8.9.0
    container_name: kibana
    ports:
      - "5601:5601"
    environment:
      ELASTICSEARCH_HOSTS: "http://elasticsearch:9200"
    depends_on:
      - elasticsearch
    networks:
      - elk-network
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:5601/api/status"]
      interval: 30s
      timeout: 10s
      retries: 5

  # Filebeat 日志收集器
  filebeat:
    image: docker.elastic.co/beats/filebeat:8.9.0
    container_name: filebeat
    user: root
    volumes:
      - /var/lib/docker/containers:/var/lib/docker/containers:ro
      - /var/run/docker.sock:/var/run/docker.sock:ro
      - ./filebeat-config/filebeat.yml:/usr/share/filebeat/filebeat.yml
    depends_on:
      - elasticsearch
      - logstash
    networks:
      - elk-network
    healthcheck:
      test: ["CMD", "filebeat", "test", "output"]
      interval: 30s
      timeout: 10s
      retries: 5

networks:
  elk-network:
    name: elk-network 
    driver: bridge

volumes:
  es-data:
    driver: local

1.2 配置文件

1.2.1 Logstash 管道配置 (logstash-config/logstash.conf)

input {
  beats {
    port => 5044
  }
}

filter {
  # 解析 Docker JSON 日志
  json {
    source => "message"
    target => "docker"
  }
  
  # 解析 Java 日志
  if [docker][log] =~ "^\{.*\}$" {
    json {
      source => "[docker][log]"
      target => "log"
    }
  }
  
  # 提取日志级别
  grok {
    match => { "[log][message]" => "%{LOGLEVEL:log_level} %{GREEDYDATA:log_message}" }
  }
  
  # 解析时间戳
  date {
    match => [ "timestamp", "ISO8601" ]
    target => "@timestamp"
  }
  
  # 添加服务名称标签
  mutate {
    add_field => { "service" => "%{[docker][container][name]}" }
  }
}

output {
  elasticsearch {
    hosts => ["elasticsearch:9200"]
    index => "logs-%{+YYYY.MM.dd}"
  }
  
  # 调试用:输出到控制台
  stdout {
    codec => rubydebug
  }
}

1.2.2 Filebeat 配置 (filebeat-config/filebeat.yml)

filebeat.inputs:
- type: container
  paths: 
    - '/var/lib/docker/containers/*/*.log'
  processors:
    - add_docker_metadata: ~

# 只收集特定容器的日志
filebeat.autodiscover:
  providers:
    - type: docker
      templates:
        - condition:
            contains:
              docker.container.name: "plugin-manager"
          config:
            - type: container
              paths:
                - "/var/lib/docker/containers/${data.docker.container.id}/*.log"

output.logstash:
  hosts: ["logstash:5044"]

logging.level: info
logging.to_files: true
logging.files:
  path: /var/log/filebeat
  name: filebeat
  keepfiles: 7

1.3 部署步骤

  1. 创建配置目录
mkdir -p {logstash-config,filebeat-config}
  1. 创建配置文件
# 创建 Logstash 配置
cat > logstash-config/logstash.conf <<EOF
[上面的 Logstash 配置内容]
EOF

# 创建 Filebeat 配置
cat > filebeat-config/filebeat.yml <<EOF
[上面的 Filebeat 配置内容]
EOF
  1. 调整目录权限
chmod 644 filebeat-config/filebeat.yml
chmod go-w filebeat-config/filebeat.yml
  1. 启动所有服务
docker-compose up -d

1.4 验证部署

1.4.1 检查服务状态

docker-compose ps

# 期望输出
Name                     Command                  State                     Ports                   
-------------------------------------------------------------------------------------------------
elasticsearch   /bin/tini -- /usr/local/bi ...   Up (healthy)   0.0.0.0:9200->9200/tcp, 9300/tcp
filebeat        /usr/local/bin/docker-entr ...   Up                                               
kibana          /bin/tini -- /usr/local/bi ...   Up (healthy)   0.0.0.0:5601->5601/tcp           
logstash        /usr/local/bin/docker-entr ...   Up (healthy)   0.0.0.0:5000->5000/tcp, 5044/tcp, 9600/tcp
plugin-manager  java -jar /app/jars/plugi ...   Up             0.0.0.0:8080->8080/tcp            

1.4.2 测试日志收集

  1. 生成一些应用日志:
curl http://localhost:8080/actuator/health
  1. 检查 Elasticsearch 索引:
curl -X GET "localhost:9200/_cat/indices?v"

# 期望看到类似输出
health status index            uuid                   pri rep docs.count docs.deleted store.size pri.store.size
yellow open   logs-2023.07.15  abcdefgh1234567890      1   1          5            0     25.5kb         25.5kb

1.5 Kibana 仪表板配置

  1. 访问 Kibana: http://localhost:5601

  2. 创建索引模式

    • 导航到 Management > Stack Management > Kibana > Index Patterns
    • 创建新索引模式:logs-*
    • 选择时间字段:@timestamp
  3. 查看日志

    • 导航到 Analytics > Discover
    • 选择 logs-* 索引模式
    • 查看和搜索日志
  4. 创建仪表板(可选)

    • 导航到 Analytics > Dashboard > Create dashboard
    • 添加可视化组件:
      • 日志级别分布(饼图)
      • 服务错误率(指标)
      • 日志时间序列(面积图)

1.6 高级功能

1.6.1 错误告警配置

在 Kibana 中设置错误告警:

  1. 导航到 Management > Stack Management > Rules and Connectors
  2. 创建新规则:
    • 条件:当日志包含 “ERROR” 时
    • 操作:发送邮件或 Slack 通知

1.6.2 JVM 指标监控

在 Java 应用中添加 Micrometer 依赖:

<dependency>
    <groupId>io.micrometer</groupId>
    <artifactId>micrometer-registry-prometheus</artifactId>
    <version>1.11.2</version>
</dependency>

配置 Filebeat 收集指标:

filebeat.inputs:
- type: http_endpoint
  listen_address: 0.0.0.0
  listen_port: 8081
  path: "/actuator/prometheus"
  interval: 10s
  fields:
    type: "metrics"

1.6.3 APM 集成(应用性能监控)

添加 Elastic APM:

# 在 docker-compose.yml 中添加 APM 服务
apm-server:
  image: docker.elastic.co/apm/apm-server:8.9.0
  ports:
    - "8200:8200"
  environment:
    - output.elasticsearch.hosts=["elasticsearch:9200"]
  networks:
    - elk-network

在 Java 应用中配置 APM:

# application.properties
elastic.apm.server_url=http://apm-server:8200
elastic.apm.service_name=plugin-manager
elastic.apm.application_packages=com.yourcompany

1.7 维护与优化

1.7.1 日志保留策略

在 Elasticsearch 中配置 ILM(索引生命周期管理):

# 创建生命周期策略
curl -X PUT "localhost:9200/_ilm/policy/logs-policy" -H 'Content-Type: application/json' -d'
{
  "policy": {
    "phases": {
      "hot": {
        "min_age": "0ms",
        "actions": {
          "rollover": {
            "max_size": "50gb",
            "max_age": "30d"
          }
        }
      },
      "delete": {
        "min_age": "60d",
        "actions": {
          "delete": {}
        }
      }
    }
  }
}'

1.7.2 性能调优

  1. Elasticsearch:

    environment:
      - ES_JAVA_OPTS=-Xms4g -Xmx4g
      - indices.query.bool.max_clause_count=10000
    
  2. Logstash:

    # 在 logstash.conf 中添加
    pipeline:
      workers: 4
      batch.size: 125
    
  3. Filebeat:

    queue:
      mem:
        events: 4096
        flush.min_events: 512
    

1.8 故障排除

1.8.1 常见问题解决

  1. 日志未显示在 Kibana 中

    # 检查 Filebeat 日志
    docker logs filebeat
    
    # 测试 Logstash 连接
    docker exec filebeat filebeat test output
    
  2. Elasticsearch 磁盘空间不足

    # 清理旧索引
    curl -X DELETE "localhost:9200/logs-2023.06.*"
    
    # 调整存储策略
    curl -X PUT "localhost:9200/_cluster/settings" -H 'Content-Type: application/json' -d'
    {
      "persistent": {
        "cluster.routing.allocation.disk.watermark.low": "85%",
        "cluster.routing.allocation.disk.watermark.high": "90%"
      }
    }'
    
  3. 高负载下的性能问题

    # 查看热点线程
    curl "localhost:9200/_nodes/hot_threads?threads=10"
    
    # 调整索引刷新间隔
    curl -X PUT "localhost:9200/logs-*/_settings" -H 'Content-Type: application/json' -d'
    {
      "index": {
        "refresh_interval": "30s"
      }
    }'
    

1.9 安全加固

  1. 启用 Elasticsearch 安全功能

    environment:
      - xpack.security.enabled=true
      - ELASTIC_PASSWORD=YourSecurePassword
    
  2. 配置 HTTPS

    # 生成证书
    docker run --rm -v $(pwd)/certs:/certs elasticsearch:8.9.0 \
      bin/elasticsearch-certutil ca --pass secret --out /certs/elastic-stack-ca.p12
    
    # 在配置中引用证书
    volumes:
      - ./certs:/usr/share/elasticsearch/config/certs
    environment:
      - xpack.security.http.ssl.enabled=true
      - xpack.security.http.ssl.keystore.path=certs/elastic-stack-ca.p12
    
  3. 网络隔离

    networks:
      elk-network:
        internal: true
    

此 ELK Stack 部署提供了完整的日志监控解决方案,从日志收集、处理到可视化分析。您可以根据实际需求调整资源配置和保留策略,对于生产环境,强烈建议启用安全功能并配置适当的访问控制。

2 elk使用

2.1 Docker 容器日志查看(基础)

2.1.1 查看单个容器日志

# 查看 plugin-manager 容器日志
docker logs plugin-manager

# 实时跟踪日志
docker logs -f --tail 100 plugin-manager

# 查看最近5分钟日志
docker logs --since 5m plugin-manager

2.1.1 查看所有容器日志

# 一键查看所有容器日志
docker-compose logs -f --tail=50

# 按服务名过滤
docker-compose logs -f plugin-manager

2.1.3 高级日志筛选

# 过滤包含 ERROR 的日志行
docker logs plugin-manager 2>&1 | grep ERROR

# 使用 jq 解析 JSON 日志
docker logs plugin-manager --tail 100 | jq .

2.2 ELK 日志查看(生产级方案)

2.2.1 Kibana 控制台访问

  1. 打开浏览器访问:http://<服务器IP>:5601

  2. 进入 Discover 页面

  3. 创建索引模式:logs-*(时间字段选择 @timestamp

  4. 使用 KQL 查询:

    service : "plugin-manager" and log_level : "ERROR"
    

2.2.2 常用 Kibana 查询示例

查询目的 KQL 语句
特定服务 service : "plugin-manager"
错误日志 log_level : "ERROR"
时间范围 @timestamp >= now-15m
日志内容 message : "TimeoutException"
组合查询 service : "plugin-manager" and log_level : "WARN"

2.2.3 创建仪表板

  1. 进入 DashboardCreate dashboard
  2. 添加可视化组件:
    • 日志级别分布(饼图)
    • 请求延迟百分位(直方图)
    • 异常趋势(时间序列)

2.3 命令行高级诊断

2.3.1 ELK 服务状态检查

# 检查 Elasticsearch 健康状态
curl -XGET 'http://localhost:9200/_cluster/health?pretty'

# 查看 Logstash 管道状态
curl -XGET 'http://localhost:9600/_node/stats/pipelines?pretty'

# Filebeat 内部指标
docker exec filebeat filebeat test output

2.3.2. 日志文件直查

# 查看 Filebeat 收集的原始日志
docker exec filebeat cat /var/log/filebeat/filebeat

# 检查 Logstash 处理日志
docker exec logstash tail -f /usr/share/logstash/logs/logstash-plain.log

# Elasticsearch 索引内容
curl -XGET 'http://localhost:9200/logs-2023.07.15/_search?q=service:plugin-manager&pretty'

2.4 容器日志持久化方案

2.4.1. Docker 日志驱动配置

# docker-compose.yml
services:
  plugin-manager:
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "5"
        tag: "plugin-manager"

2.4.2. ELK 日志收集路径

# 查看容器日志存储位置
docker inspect plugin-manager | grep LogPath

# 典型路径:
/var/lib/docker/containers/<container-id>/<container-id>-json.log

2.4.3. 日志轮转配置

# 创建 logrotate 配置
sudo tee /etc/logrotate.d/docker <<EOF
/var/lib/docker/containers/*/*.log {
  daily
  rotate 7
  size 100M
  compress
  delaycompress
  missingok
  copytruncate
}
EOF

2.5 故障排查专项命令

2.5.1. 容器启动失败

# 查看最后50行启动日志
docker logs --tail 50 plugin-manager

# 检查容器退出码
docker inspect plugin-manager | jq '.[0].State.ExitCode'

2.5.2. ELK 管道阻塞

# 检查 Logstash 队列
curl -s 'http://localhost:9600/_node/stats/pipelines?pretty' | jq '.pipelines.main.queue'

# Filebeat 积压情况
docker exec filebeat filebeat export monitoring | jq '.filebeat.events.active'

2.5.3. 日志丢失诊断

# 检查收集点
docker exec filebeat ls -lh /var/lib/docker/containers/*/*.log

# 验证 Logstash 接收
tcpdump -i any port 5044 -A | grep 'plugin-manager'

2.6 可视化增强方案

2.6.1. 安装 Kibana 插件

# 进入 Kibana 容器
docker exec -it kibana /bin/bash

# 安装日志增强插件
bin/kibana-plugin install https://github.com/sivasamyk/logtrail/releases/download/v0.1.31/logtrail-7.10.0-0.1.31.zip

2.6.2. Logtrail 配置

// kibana.yml
logtrail:
  index_patterns: [
    { pattern: 'logs-*', default: true }
  ]
  search_bar: true

2.6.3. Grafana 集成

# docker-compose 添加
grafana:
  image: grafana/grafana
  ports:
    - "3000:3000"
  environment:
    GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-simple-json-datasource

2.7 日志查看速查表

场景 命令
实时跟踪 docker-compose logs -f --tail=100
错误过滤 docker logs plugin-manager 2>&1 | grep -A 5 -B 5 ERROR
时间范围 docker logs --since "2023-07-15T00:00:00" --until "2023-07-16T00:00:00" plugin-manager
JSON 解析 docker logs plugin-manager | jq -R 'fromjson?'
ELK 健康检查 curl -s 'http://localhost:9200/_cat/indices?v'
日志文件位置 docker inspect plugin-manager | jq -r '.[0].LogPath'

注意事项

  1. 日志量控制:生产环境启用日志轮转,避免磁盘爆满
  2. 敏感信息:不要在日志中记录密码、密钥等敏感数据
  3. 权限管理:限制访问 Kibana 控制台,使用 RBAC 授权
  4. 性能影响:避免长期开启 DEBUG 级别日志

通过以上方法,您可以全面掌握从基础到高级的日志查看技术。对于生产环境,建议优先使用 Kibana 进行日志分析,它提供强大的搜索、过滤和可视化能力,同时保存历史记录便于回溯分析。

2.8 其他容器服务接入elk中

services:
  plugin-manager:
    image: plugin-manager:1.0
    container_name: plugin-manager-service
    restart: unless-stopped   # 异常退出时自动重启
    ports:
      - "8088:8088"          # 主机端口:容器端口
    environment:
      - TZ=Asia/Shanghai     # 时区设置
      - JAVA_OPTS=-Xmx512m -Xms256m -Dfile.encoding=UTF-8  # JVM参数
      - SPRING_PROFILES_ACTIVE=prod  # Spring Profile
    volumes:
      - ./logs/:/app/logs   # 日志持久化
      - ./config:/app/config:ro         # 配置文件目录(只读)
      - ./jars:/app/jars  # JAR 动态加载目录
      - ./file:/app/file
    command: ["java", "-jar", "/app/jars/plugin-manager-1.0.0.jar"]
    networks:
      - elk-network
    deploy:   # 资源限制(生产环境必需)
      resources:
        limits:
          cpus: '1.0'
          memory: 768M
        reservations:
          memory: 512M
    # 接入elk 日志
    logging:
      driver: "json-file"
      options:
        max-size: "10m"
        max-file: "5"
        tag: "plugin-manager"

# 引用elk网络
networks:
  elk-network:
    name: elk-network  # 绑定到实际存在的网络
    external: true  # 声明使用外部网络

# 卷配置(持久化存储)
volumes:
  plugin-manager-logs:
    driver: local


网站公告

今日签到

点亮在社区的每一天
去签到