Promethues + grafana + oracledb_exporter 监控 Oracle 数据库

发布于:2022-12-06 ⋅ 阅读:(1759) ⋅ 点赞:(1)

前言:  因为Oracle 服务原本是安装在windows server 2016 下的, 原本折腾了好几天在windows 下安装 oracledb_exporter 以及 docker 都没有成功, 各种报错, 索性直接在监控机器上安装 oracledb_exporter 的 docker 远程连接数据库;

官网:

GitHub - iamseth/oracledb_exporter: Prometheus Oracle database exporter.

https://hub.docker.com/r/iamseth/oracledb_exporter

步骤:

我的文件目录结构

[root@localhost oracledb_exporter]# pwd
/data/oracledb_exporter
[root@localhost oracledb_exporter]# ll
总用量 16
-rw-r--r--. 1 root root 1347 9月  27 09:22 docker-compose.yml
-rw-r--r--. 1 root root 2510 9月  27 09:11 metrics-normal.toml
-rw-r--r--. 1 root root 4170 9月  27 09:10 metrics-static.toml

 

1. 创建 docker-compose.yml 

# 主要负责抓取慢SQL,全盘扫描,逻辑读,物理读等等,由于查询耗时长,promethues抓取周期10分钟
oracledb_exporter_custom:
    image: iamseth/oracledb_exporter:latest
    container_name: oracledb_exporter_static
    hostname: oracledb_exporter_custom
    restart: on-failure
    ports:
      - "9161:9161"
    volumes:
      - "/data/oracledb_exporter/metrics-static.toml:/default-metrics.toml"
    command: --default.metrics="/default-metrics.toml" --log.level="debug"
    environment:
      - TZ=Asia/Shanghai
      - DATA_SOURCE_NAME=用户名/密码@ip:端口/库名
      - QUERY_TIMEOUT=180  #查询超时时间,以秒为单位,oracledb_exporter查询oracle的超时时间,由于逻辑读,物理读,全盘扫描查询时间较长,很容易造成查询超时,需要设置长一些。

# 此容器为常规oracle系统性能查询,promethues抓取周期为2分钟
oracledb_exporter_default:
    image: iamseth/oracledb_exporter:latest
    container_name: oracledb_exporter_normal
    hostname: oracledb_exporter_default
    restart: on-failure
    ports:
      - "9162:9161"
    volumes:
      - "/data/oracledb_exporter/metrics-normal.toml:/default-metrics.toml"
    command: --default.metrics="/default-metrics.toml"
    environment:
      - TZ=Asia/Shanghai
      - DATA_SOURCE_NAME=用户名/密码@ip:端口/库名

2. 创建 metrics-normal.toml 

[[metric]]
context = "sessions"
labels = [ "status", "type" ]
metricsdesc = { value= "Gauge metric with count of sessions by status and type." }
request = "SELECT status, type, COUNT(*) as value FROM v$session GROUP BY status, type"

[[metric]]
context = "resource"
labels = [ "resource_name" ]
metricsdesc = { current_utilization= "Generic counter metric from v$resource_limit view in Oracle (current value).", limit_value="Generic counter metric from v$resource_limit view in Oracle (UNLIMITED: -1)." }
request="SELECT resource_name,current_utilization,CASE WHEN TRIM(limit_value) LIKE 'UNLIMITED' THEN '-1' ELSE TRIM(limit_value) END as limit_value FROM v$resource_limit"

#[[metric]]
#context = "asm_diskgroup"
#labels = [ "name" ]
#metricsdesc = { total = "Total size of ASM disk group.", free = "Free space available on ASM disk group." }
#request = "SELECT name,total_mb*1024*1024 as total,free_mb*1024*1024 as free FROM v$asm_diskgroup_stat where exists (select 1 from v$datafile where name like '+%')"
#ignorezeroresult = true

[[metric]]
context = "activity"
metricsdesc = { value="Generic counter metric from v$sysstat view in Oracle." }
fieldtoappend = "name"
request = "SELECT name, value FROM v$sysstat WHERE name IN ('parse count (total)', 'execute count', 'user commits', 'user rollbacks')"

[[metric]]
context = "process"
metricsdesc = { count="Gauge metric with count of processes." }
request = "SELECT COUNT(*) as count FROM v$process"

[[metric]]
context = "wait_time"
metricsdesc = { value="Generic counter metric from v$waitclassmetric view in Oracle." }
fieldtoappend= "wait_class"
request = '''
SELECT
  n.wait_class as WAIT_CLASS,
  round(m.time_waited/m.INTSIZE_CSEC,3) as VALUE
FROM
  v$waitclassmetric  m, v$system_wait_class n
WHERE
  m.wait_class_id=n.wait_class_id AND n.wait_class != 'Idle'
'''

[[metric]]
context = "tablespace"
labels = [ "tablespace", "type" ]
metricsdesc = { bytes = "Generic counter metric of tablespaces bytes in Oracle.", max_bytes = "Generic counter metric of tablespaces max bytes in Oracle.", free = "Generic counter metric of tablespaces free bytes in Oracle." }
request = '''
SELECT
    dt.tablespace_name as tablespace,
    dt.contents as type,
    dt.block_size * dtum.used_space as bytes,
    dt.block_size * dtum.tablespace_size as max_bytes,
    dt.block_size * (dtum.tablespace_size - dtum.used_space) as free
FROM  dba_tablespace_usage_metrics dtum, dba_tablespaces dt
WHERE dtum.tablespace_name = dt.tablespace_name
ORDER by tablespace
'''

3. 创建 metrics-static.toml

#物理读
[[metric]]
context = "physical_disk"
labels = ["sql_id", "sql_text"]
metricsdesc = { reads= "Disk read.", exec= "EXECUTIONS.", reads_exec= "Reads_Exec." }
request = """
SELECT distinct
       A.SQL_ID as sql_id,
       B.SQL_TEXT as sql_text,
       A.DISK_READS AS reads,
       A.EXECUTIONS AS exec,
       round(A.DISK_READS / A.EXECUTIONS) AS reads_exec
  FROM V$SQLAREA A, V$SQL B
 WHERE A.DISK_READS > 1000
   AND A.EXECUTIONS > 200
   AND A.sql_id = B.sql_id
   AND A.MODULE = 'JDBC Thin Client'
   AND A.DISK_READS / A.EXECUTIONS > 500
"""

#逻辑读
[[metric]]
context = "logical_read"
labels = ["sql_id", "sql_text"]
metricsdesc = { gets_exec= "gets exec.", buffer_gets= "buffer gets.", exec= "EXECUTIONS." }
request = """
SELECT *
  FROM (
        SELECT  distinct
                A.SQL_ID,
                B.SQL_TEXT as sql_text,
                ROUND(A.BUFFER_GETS / A.EXECUTIONS) AS gets_exec,
                A.BUFFER_GETS as buffer_gets,
                A.EXECUTIONS as exec
          FROM V$SQLAREA A, V$SQL B
         WHERE A.BUFFER_GETS > 1000
           AND A.sql_id = B.sql_id
           AND A.MODULE = 'JDBC Thin Client'
           AND A.BUFFER_GETS / A.EXECUTIONS > 2000
           AND A.EXECUTIONS > 200
         ORDER BY gets_exec DESC)
 WHERE ROWNUM <= 10
"""

#[[metric]]
#context = "librarycache"
#metricsdesc = { value="Generic counter metric from V$LIBRARYCACHE view in Oracle."  }
#fieldtoappend= "name"
#request = "SELECT to_char('librarycache_hit_rate') as name, round(SUM(PINHITS) / SUM(PINS), 3) AS VALUE FROM V$LIBRARYCACHE"

[[metric]]
context = "parse"
labels = ["name"]
metricsdesc = { value= "Soft and hard analysis." }
request = "SELECT NAME as name, VALUE as value FROM V$SYSSTAT WHERE NAME LIKE 'parse%'"

#共享池
[[metric]]
context = "shared_pool"
labels = ["pool", "name"]
metricsdesc = { bytes= "shared pool bytes." }
request = """
SELECT POOL as pool,
       Name as name,
       BYTES as bytes
  FROM V$SGASTAT A
 WHERE A.POOL = 'shared pool'
   AND A.NAME = 'free memory'
   OR A.NAME = 'row cache'
"""

#[[metric]]
#context = "row_cache"
#metricsdesc = { gets= "gets.", getmisses= "getmisses.", hit_rate= "hit rate." }
#request = "SELECT SUM(GETS) as gets, SUM(GETMISSES) as getmisses, 100 * SUM(GETS - GETMISSES) / SUM(GETS) AS hit_rate FROM V$ROWCACHE"

#全盘扫描
[[metric]]
context = "full_access"
labels = ["sql_id", "sql_text", "obj_owner", "ogj_name"]
metricsdesc = { size_m= "size m.", all_exec= "all executions.", max_exec= "max executions." }
request = """
SELECT *
  FROM (SELECT distinct C.SQL_ID AS sql_id,
               D.SQL_TEXT as sql_text,
               OBJECT_OWNER AS obj_owner,
               OBJECT_NAME AS ogj_name,
               ROUND(B.BYTES / 1024 / 1024, 0) AS size_m,
               SUM(EXECUTIONS_DELTA) AS all_exec,
               MAX(EXECUTIONS_DELTA) AS max_exec,
               ROUND(SUM(EXECUTIONS_DELTA) * B.BYTES / 1024 / 1024 / 1024 / 800,0) AS all_io_large,
               ROUND(MAX(EXECUTIONS_DELTA) * B.BYTES / 1024 / 1024 / 1024 / 40,0) AS max_io_large
          FROM DBA_HIST_SQL_PLAN A, 
               (SELECT OWNER, SEGMENT_NAME, SUM(BYTES) BYTES FROM DBA_SEGMENTS GROUP BY OWNER, SEGMENT_NAME) B,
               DBA_HIST_SQLSTAT C,  V$SQL D
         WHERE A.OPERATION = 'TABLE ACCESS'
           AND A.OPTIONS = 'FULL'
           AND A.OBJECT_OWNER IN (SELECT USERNAME FROM DBA_USERS WHERE ACCOUNT_STATUS = 'OPEN' AND USERNAME NOT IN ('SYS', 'SYSMAN', 'SYSTEM', 'PATROL', 'MDSYS', 'ORDDATA', 'OLAPSYS', 'EXFSYS'))
           AND B.SEGMENT_NAME = A.OBJECT_NAME
           AND B.OWNER = A.OBJECT_OWNER
           AND B.BYTES > 1024 * 1024 * 10
           AND C.SQL_ID = A.SQL_ID
           AND C.PARSING_SCHEMA_NAME NOT IN ('SYS', 'SYSMAN', 'SYSTEM', 'PATROL', 'MDSYS', 'ORDDATA', 'OLAPSYS', 'EXFSYS')
           AND EXECUTIONS_DELTA > 100
           AND C.SNAP_ID > (SELECT MIN(SNAP_ID) FROM DBA_HIST_SNAPSHOT WHERE BEGIN_INTERVAL_TIME > SYSDATE - 7)
           AND A.SQL_ID = D.SQL_ID
         GROUP BY C.SQL_ID, D.SQL_TEXT, OBJECT_NAME, OBJECT_OWNER,B.BYTES
         ORDER BY ROUND(B.BYTES / 1024 / 1024, 0) DESC, OBJECT_NAME)
 WHERE ROWNUM <= 30
"""

4. 目录下执行 docker-compose up -d

# docker-compose 安装命令
curl -L https://get.daocloud.io/docker/compose/releases/download/v2.4.1/docker-compose-`uname -s`-`uname -m` > /usr/local/bin/docker-compose

5. 待安装启动成功后访问 [ip]:9161/9162  查看是正常

 6. 修改  promethues.yml , 增加配置, 注意缩进, 否则可能报错导致启动失败


  - job_name: 'oracledb'
    static_configs:
      - targets: ['192.168.101.19:9161','192.168.101.19:9162']

7. 重启 promethues, 我这里使用的docker 的, 所以直接 docker restart xxx 就可以了

8. 访问 [ip]:9090, 查看刚刚添加的两个状态 

 

9. 接下来添加仪表板, 在市场找到一个中文的 id 11121, 导入它, 最终效果如下

上文中配置部分 多是借鉴 oracledb_exporter安装配置和避免踩坑小技巧_ZacharyBacon的博客-CSDN博客_oracledb_exporter安装配置

本文含有隐藏内容,请 开通VIP 后查看

网站公告

今日签到

点亮在社区的每一天
去签到