目录
一、JMX 简介
官网:Apache HBase ™ Reference Guide
JMX (Java管理扩展)提供了内置的工具,使您能够监视和管理Java VM。要启用远程系统的监视和管理,需要在启动Java VM时设置系统属性com.sun.management.jmxremote.port(希望通过该端口号启用JMX RMI连接)。
访问:
curl http://hdp-node2:16030/jmx?qry=Hadoop:service=HBase,name=RegionServer,sub=Server
输出的 指标如下:
{
"beans" : [ {
"name" : "Hadoop:service=HBase,name=RegionServer,sub=Server",
"modelerType" : "RegionServer,sub=Server",
"tag.zookeeperQuorum" : "hdp-node2:2181,hdp-node3:2181,hdp-node1:2181",
"tag.serverName" : "hdp-node2,16020,1738720067137",
"tag.clusterId" : "85aa06c7-b28c-41fd-aa17-a49376641751",
"tag.Context" : "regionserver",
"tag.Hostname" : "hdp-node2",
"regionCount" : 34,
"storeCount" : 51,
"hlogFileCount" : 1,
"hlogFileSize" : 0,
"storeFileCount" : 31,
"memStoreSize" : 0,
"storeFileSize" : 212110208,
"maxStoreFileAge" : 23910274739,
"minStoreFileAge" : 595075791,
"avgStoreFileAge" : 12083745007,
"numReferenceFiles" : 0,
"regionServerStartTime" : 1738720067137,
"averageRegionSize" : 6238535,
"storeFileIndexSize" : 529736,
"staticIndexSize" : 1348988,
"staticBloomSize" : 2438222,
"mutationsWithoutWALCount" : 0,
"mutationsWithoutWALSize" : 0,
"percentFilesLocal" : 100.0,
"percentFilesLocalSecondaryRegions" : 0.0,
"splitQueueLength" : 0,
"compactionQueueLength" : 0,
"smallCompactionQueueLength" : 0,
"largeCompactionQueueLength" : 0,
"flushQueueLength" : 0,
"blockCacheFreeSize" : 1716727624,
"blockCacheCount" : 0,
"blockCacheSize" : 1259320,
"blockCacheCountHitPercent" : 0.0,
"blockCacheExpressHitPercent" : 0.0,
"l1CacheHitCount" : 0,
"l1CacheMissCount" : 0,
"l1CacheHitRatio" : 0.0,
"l1CacheMissRatio" : 0.0,
"l2CacheHitCount" : 0,
"l2CacheMissCount" : 0,
"l2CacheHitRatio" : 0.0,
"l2CacheMissRatio" : 0.0,
"mobFileCacheCount" : 0,
"mobFileCacheHitPercent" : 0.0,
"totalRequestCount" : 2,
"totalRowActionRequestCount" : 0,
"readRequestCount" : 0,
"filteredReadRequestCount" : 0,
"writeRequestCount" : 0,
"rpcGetRequestCount" : 0,
"rpcScanRequestCount" : 0,
"rpcMultiRequestCount" : 0,
"rpcMutateRequestCount" : 0,
"checkMutateFailedCount" : 0,
"checkMutatePassedCount" : 0,
"blockCacheHitCount" : 0,
"blockCacheHitCountPrimary" : 0,
"blockCacheMissCount" : 0,
"blockCacheMissCountPrimary" : 0,
"blockCacheEvictionCount" : 0,
"blockCacheEvictionCountPrimary" : 0,
"blockCacheFailedInsertionCount" : 0,
"blockCacheDataMissCount" : 0,
"blockCacheLeafIndexMissCount" : 0,
"blockCacheBloomChunkMissCount" : 0,
"blockCacheMetaMissCount" : 0,
"blockCacheRootIndexMissCount" : 0,
"blockCacheIntermediateIndexMissCount" : 0,
"blockCacheFileInfoMissCount" : 0,
"blockCacheGeneralBloomMetaMissCount" : 0,
"blockCacheDeleteFamilyBloomMissCount" : 0,
"blockCacheTrailerMissCount" : 0,
"blockCacheDataHitCount" : 0,
"blockCacheLeafIndexHitCount" : 0,
"blockCacheBloomChunkHitCount" : 0,
"blockCacheMetaHitCount" : 0,
"blockCacheRootIndexHitCount" : 0,
"blockCacheIntermediateIndexHitCount" : 0,
"blockCacheFileInfoHitCount" : 0,
"blockCacheGeneralBloomMetaHitCount" : 0,
"blockCacheDeleteFamilyBloomHitCount" : 0,
"blockCacheTrailerHitCount" : 0,
"updatesBlockedTime" : 0,
"flushedCellsCount" : 0,
"compactedCellsCount" : 0,
"majorCompactedCellsCount" : 0,
"flushedCellsSize" : 0,
"compactedCellsSize" : 0,
"majorCompactedCellsSize" : 0,
"cellsCountCompactedFromMob" : 0,
"cellsCountCompactedToMob" : 0,
"cellsSizeCompactedFromMob" : 0,
"cellsSizeCompactedToMob" : 0,
"mobFlushCount" : 0,
"mobFlushedCellsCount" : 0,
"mobFlushedCellsSize" : 0,
"mobScanCellsCount" : 0,
"mobScanCellsSize" : 0,
"mobFileCacheAccessCount" : 0,
"mobFileCacheMissCount" : 0,
"mobFileCacheEvictedCount" : 0,
"hedgedReads" : 0,
"hedgedReadWins" : 0,
"blockedRequestCount" : 0,
"MajorCompactionTime_num_ops" : 2,
"MajorCompactionTime_min" : 0,
"MajorCompactionTime_max" : 0,
"MajorCompactionTime_mean" : 0,
"MajorCompactionTime_25th_percentile" : 0,
"MajorCompactionTime_median" : 0,
"MajorCompactionTime_75th_percentile" : 0,
"MajorCompactionTime_90th_percentile" : 0,
"MajorCompactionTime_95th_percentile" : 0,
"MajorCompactionTime_98th_percentile" : 0,
"MajorCompactionTime_99th_percentile" : 0,
"MajorCompactionTime_99.9th_percentile" : 0,
"MajorCompactionTime_TimeRangeCount_600000-inf" : 2,
"PauseTimeWithGc_num_ops" : 0,
"PauseTimeWithGc_min" : 0,
"PauseTimeWithGc_max" : 0,
"PauseTimeWithGc_mean" : 0,
"PauseTimeWithGc_25th_percentile" : 0,
"PauseTimeWithGc_median" : 0,
"PauseTimeWithGc_75th_percentile" : 0,
"PauseTimeWithGc_90th_percentile" : 0,
"PauseTimeWithGc_95th_percentile" : 0,
"PauseTimeWithGc_98th_percentile" : 0,
"PauseTimeWithGc_99th_percentile" : 0,
"PauseTimeWithGc_99.9th_percentile" : 0,
"compactedOutputBytes" : 8924,
"pauseWarnThresholdExceeded" : 0,
"ScanTime_num_ops" : 0,
"ScanTime_min" : 0,
"ScanTime_max" : 0,
"ScanTime_mean" : 0,
"ScanTime_25th_percentile" : 0,
"ScanTime_median" : 0,
"ScanTime_75th_percentile" : 0,
"ScanTime_90th_percentile" : 0,
"ScanTime_95th_percentile" : 0,
"ScanTime_98th_percentile" : 0,
"ScanTime_99th_percentile" : 0,
"ScanTime_99.9th_percentile" : 0,
"Increment_num_ops" : 0,
"Increment_min" : 0,
"Increment_max" : 0,
"Increment_mean" : 0,
"Increment_25th_percentile" : 0,
"Increment_median" : 0,
"Increment_75th_percentile" : 0,
"Increment_90th_percentile" : 0,
"Increment_95th_percentile" : 0,
"Increment_98th_percentile" : 0,
"Increment_99th_percentile" : 0,
"Increment_99.9th_percentile" : 0,
"Delete_num_ops" : 0,
"Delete_min" : 0,
"Delete_max" : 0,
"Delete_mean" : 0,
"Delete_25th_percentile" : 0,
"Delete_median" : 0,
"Delete_75th_percentile" : 0,
"Delete_90th_percentile" : 0,
"Delete_95th_percentile" : 0,
"Delete_98th_percentile" : 0,
"Delete_99th_percentile" : 0,
"Delete_99.9th_percentile" : 0,
"Put_num_ops" : 0,
"Put_min" : 0,
"Put_max" : 0,
"Put_mean" : 0,
"Put_25th_percentile" : 0,
"Put_median" : 0,
"Put_75th_percentile" : 0,
"Put_90th_percentile" : 0,
"Put_95th_percentile" : 0,
"Put_98th_percentile" : 0,
"Put_99th_percentile" : 0,
"Put_99.9th_percentile" : 0,
"DeleteBatch_num_ops" : 0,
"DeleteBatch_min" : 0,
"DeleteBatch_max" : 0,
"DeleteBatch_mean" : 0,
"DeleteBatch_25th_percentile" : 0,
"DeleteBatch_median" : 0,
"DeleteBatch_75th_percentile" : 0,
"DeleteBatch_90th_percentile" : 0,
"DeleteBatch_95th_percentile" : 0,
"DeleteBatch_98th_percentile" : 0,
"DeleteBatch_99th_percentile" : 0,
"DeleteBatch_99.9th_percentile" : 0,
"splitRequestCount" : 0,
"FlushMemstoreSize_num_ops" : 0,
"FlushMemstoreSize_min" : 0,
"FlushMemstoreSize_max" : 0,
"FlushMemstoreSize_mean" : 0,
"FlushMemstoreSize_25th_percentile" : 0,
"FlushMemstoreSize_median" : 0,
"FlushMemstoreSize_75th_percentile" : 0,
"FlushMemstoreSize_90th_percentile" : 0,
"FlushMemstoreSize_95th_percentile" : 0,
"FlushMemstoreSize_98th_percentile" : 0,
"FlushMemstoreSize_99th_percentile" : 0,
"FlushMemstoreSize_99.9th_percentile" : 0,
"CompactionInputFileCount_num_ops" : 2,
"CompactionInputFileCount_min" : 0,
"CompactionInputFileCount_max" : 0,
"CompactionInputFileCount_mean" : 0,
"CompactionInputFileCount_25th_percentile" : 0,
"CompactionInputFileCount_median" : 0,
"CompactionInputFileCount_75th_percentile" : 0,
"CompactionInputFileCount_90th_percentile" : 0,
"CompactionInputFileCount_95th_percentile" : 0,
"CompactionInputFileCount_98th_percentile" : 0,
"CompactionInputFileCount_99th_percentile" : 0,
"CompactionInputFileCount_99.9th_percentile" : 0,
"PutBatch_num_ops" : 0,
"PutBatch_min" : 0,
"PutBatch_max" : 0,
"PutBatch_mean" : 0,
"PutBatch_25th_percentile" : 0,
"PutBatch_median" : 0,
"PutBatch_75th_percentile" : 0,
"PutBatch_90th_percentile" : 0,
"PutBatch_95th_percentile" : 0,
"PutBatch_98th_percentile" : 0,
"PutBatch_99th_percentile" : 0,
"PutBatch_99.9th_percentile" : 0,
"CompactionTime_num_ops" : 2,
"CompactionTime_min" : 0,
"CompactionTime_max" : 0,
"CompactionTime_mean" : 0,
"CompactionTime_25th_percentile" : 0,
"CompactionTime_median" : 0,
"CompactionTime_75th_percentile" : 0,
"CompactionTime_90th_percentile" : 0,
"CompactionTime_95th_percentile" : 0,
"CompactionTime_98th_percentile" : 0,
"CompactionTime_99th_percentile" : 0,
"CompactionTime_99.9th_percentile" : 0,
"CompactionTime_TimeRangeCount_600000-inf" : 2,
"Get_num_ops" : 0,
"Get_min" : 0,
"Get_max" : 0,
"Get_mean" : 0,
"Get_25th_percentile" : 0,
"Get_median" : 0,
"Get_75th_percentile" : 0,
"Get_90th_percentile" : 0,
"Get_95th_percentile" : 0,
"Get_98th_percentile" : 0,
"Get_99th_percentile" : 0,
"Get_99.9th_percentile" : 0,
"MajorCompactionInputFileCount_num_ops" : 2,
"MajorCompactionInputFileCount_min" : 0,
"MajorCompactionInputFileCount_max" : 0,
"MajorCompactionInputFileCount_mean" : 0,
"MajorCompactionInputFileCount_25th_percentile" : 0,
"MajorCompactionInputFileCount_median" : 0,
"MajorCompactionInputFileCount_75th_percentile" : 0,
"MajorCompactionInputFileCount_90th_percentile" : 0,
"MajorCompactionInputFileCount_95th_percentile" : 0,
"MajorCompactionInputFileCount_98th_percentile" : 0,
"MajorCompactionInputFileCount_99th_percentile" : 0,
"MajorCompactionInputFileCount_99.9th_percentile" : 0,
"CheckAndPut_num_ops" : 0,
"CheckAndPut_min" : 0,
"CheckAndPut_max" : 0,
"CheckAndPut_mean" : 0,
"CheckAndPut_25th_percentile" : 0,
"CheckAndPut_median" : 0,
"CheckAndPut_75th_percentile" : 0,
"CheckAndPut_90th_percentile" : 0,
"CheckAndPut_95th_percentile" : 0,
"CheckAndPut_98th_percentile" : 0,
"CheckAndPut_99th_percentile" : 0,
"CheckAndPut_99.9th_percentile" : 0,
"SplitTime_num_ops" : 0,
"SplitTime_min" : 0,
"SplitTime_max" : 0,
"SplitTime_mean" : 0,
"SplitTime_25th_percentile" : 0,
"SplitTime_median" : 0,
"SplitTime_75th_percentile" : 0,
"SplitTime_90th_percentile" : 0,
"SplitTime_95th_percentile" : 0,
"SplitTime_98th_percentile" : 0,
"SplitTime_99th_percentile" : 0,
"SplitTime_99.9th_percentile" : 0,
"MajorCompactionOutputSize_num_ops" : 2,
"MajorCompactionOutputSize_min" : 0,
"MajorCompactionOutputSize_max" : 0,
"MajorCompactionOutputSize_mean" : 0,
"MajorCompactionOutputSize_25th_percentile" : 0,
"MajorCompactionOutputSize_median" : 0,
"MajorCompactionOutputSize_75th_percentile" : 0,
"MajorCompactionOutputSize_90th_percentile" : 0,
"MajorCompactionOutputSize_95th_percentile" : 0,
"MajorCompactionOutputSize_98th_percentile" : 0,
"MajorCompactionOutputSize_99th_percentile" : 0,
"MajorCompactionOutputSize_99.9th_percentile" : 0,
"MajorCompactionOutputSize_SizeRangeCount_100000000-inf" : 2,
"majorCompactedInputBytes" : 8924,
"slowAppendCount" : 0,
"flushedOutputBytes" : 0,
"CompactionOutputFileCount_num_ops" : 2,
"CompactionOutputFileCount_min" : 0,
"CompactionOutputFileCount_max" : 0,
"CompactionOutputFileCount_mean" : 0,
"CompactionOutputFileCount_25th_percentile" : 0,
"CompactionOutputFileCount_median" : 0,
"CompactionOutputFileCount_75th_percentile" : 0,
"CompactionOutputFileCount_90th_percentile" : 0,
"CompactionOutputFileCount_95th_percentile" : 0,
"CompactionOutputFileCount_98th_percentile" : 0,
"CompactionOutputFileCount_99th_percentile" : 0,
"CompactionOutputFileCount_99.9th_percentile" : 0,
"slowDeleteCount" : 0,
"Replay_num_ops" : 0,
"Replay_min" : 0,
"Replay_max" : 0,
"Replay_mean" : 0,
"Replay_25th_percentile" : 0,
"Replay_median" : 0,
"Replay_75th_percentile" : 0,
"Replay_90th_percentile" : 0,
"Replay_95th_percentile" : 0,
"Replay_98th_percentile" : 0,
"Replay_99th_percentile" : 0,
"Replay_99.9th_percentile" : 0,
"FlushTime_num_ops" : 0,
"FlushTime_min" : 0,
"FlushTime_max" : 0,
"FlushTime_mean" : 0,
"FlushTime_25th_percentile" : 0,
"FlushTime_median" : 0,
"FlushTime_75th_percentile" : 0,
"FlushTime_90th_percentile" : 0,
"FlushTime_95th_percentile" : 0,
"FlushTime_98th_percentile" : 0,
"FlushTime_99th_percentile" : 0,
"FlushTime_99.9th_percentile" : 0,
"MajorCompactionInputSize_num_ops" : 2,
"MajorCompactionInputSize_min" : 0,
"MajorCompactionInputSize_max" : 0,
"MajorCompactionInputSize_mean" : 0,
"MajorCompactionInputSize_25th_percentile" : 0,
"MajorCompactionInputSize_median" : 0,
"MajorCompactionInputSize_75th_percentile" : 0,
"MajorCompactionInputSize_90th_percentile" : 0,
"MajorCompactionInputSize_95th_percentile" : 0,
"MajorCompactionInputSize_98th_percentile" : 0,
"MajorCompactionInputSize_99th_percentile" : 0,
"MajorCompactionInputSize_99.9th_percentile" : 0,
"MajorCompactionInputSize_SizeRangeCount_100000000-inf" : 2,
"pauseInfoThresholdExceeded" : 0,
"splitSuccessCount" : 0,
"CheckAndDelete_num_ops" : 0,
"CheckAndDelete_min" : 0,
"CheckAndDelete_max" : 0,
"CheckAndDelete_mean" : 0,
"CheckAndDelete_25th_percentile" : 0,
"CheckAndDelete_median" : 0,
"CheckAndDelete_75th_percentile" : 0,
"CheckAndDelete_90th_percentile" : 0,
"CheckAndDelete_95th_percentile" : 0,
"CheckAndDelete_98th_percentile" : 0,
"CheckAndDelete_99th_percentile" : 0,
"CheckAndDelete_99.9th_percentile" : 0,
"CompactionInputSize_num_ops" : 2,
"CompactionInputSize_min" : 0,
"CompactionInputSize_max" : 0,
"CompactionInputSize_mean" : 0,
"CompactionInputSize_25th_percentile" : 0,
"CompactionInputSize_median" : 0,
"CompactionInputSize_75th_percentile" : 0,
"CompactionInputSize_90th_percentile" : 0,
"CompactionInputSize_95th_percentile" : 0,
"CompactionInputSize_98th_percentile" : 0,
"CompactionInputSize_99th_percentile" : 0,
"CompactionInputSize_99.9th_percentile" : 0,
"CompactionInputSize_SizeRangeCount_100000000-inf" : 2,
"MajorCompactionOutputFileCount_num_ops" : 2,
"MajorCompactionOutputFileCount_min" : 0,
"MajorCompactionOutputFileCount_max" : 0,
"MajorCompactionOutputFileCount_mean" : 0,
"MajorCompactionOutputFileCount_25th_percentile" : 0,
"MajorCompactionOutputFileCount_median" : 0,
"MajorCompactionOutputFileCount_75th_percentile" : 0,
"MajorCompactionOutputFileCount_90th_percentile" : 0,
"MajorCompactionOutputFileCount_95th_percentile" : 0,
"MajorCompactionOutputFileCount_98th_percentile" : 0,
"MajorCompactionOutputFileCount_99th_percentile" : 0,
"MajorCompactionOutputFileCount_99.9th_percentile" : 0,
"ScanSize_num_ops" : 0,
"ScanSize_min" : 0,
"ScanSize_max" : 0,
"ScanSize_mean" : 0,
"ScanSize_25th_percentile" : 0,
"ScanSize_median" : 0,
"ScanSize_75th_percentile" : 0,
"ScanSize_90th_percentile" : 0,
"ScanSize_95th_percentile" : 0,
"ScanSize_98th_percentile" : 0,
"ScanSize_99th_percentile" : 0,
"ScanSize_99.9th_percentile" : 0,
"slowGetCount" : 0,
"flushedMemstoreBytes" : 0,
"CompactionOutputSize_num_ops" : 2,
"CompactionOutputSize_min" : 0,
"CompactionOutputSize_max" : 0,
"CompactionOutputSize_mean" : 0,
"CompactionOutputSize_25th_percentile" : 0,
"CompactionOutputSize_median" : 0,
"CompactionOutputSize_75th_percentile" : 0,
"CompactionOutputSize_90th_percentile" : 0,
"CompactionOutputSize_95th_percentile" : 0,
"CompactionOutputSize_98th_percentile" : 0,
"CompactionOutputSize_99th_percentile" : 0,
"CompactionOutputSize_99.9th_percentile" : 0,
"CompactionOutputSize_SizeRangeCount_100000000-inf" : 2,
"majorCompactedOutputBytes" : 8924,
"PauseTimeWithoutGc_num_ops" : 0,
"PauseTimeWithoutGc_min" : 0,
"PauseTimeWithoutGc_max" : 0,
"PauseTimeWithoutGc_mean" : 0,
"PauseTimeWithoutGc_25th_percentile" : 0,
"PauseTimeWithoutGc_median" : 0,
"PauseTimeWithoutGc_75th_percentile" : 0,
"PauseTimeWithoutGc_90th_percentile" : 0,
"PauseTimeWithoutGc_95th_percentile" : 0,
"PauseTimeWithoutGc_98th_percentile" : 0,
"PauseTimeWithoutGc_99th_percentile" : 0,
"PauseTimeWithoutGc_99.9th_percentile" : 0,
"slowPutCount" : 0,
"slowIncrementCount" : 0,
"compactedInputBytes" : 8924,
"Append_num_ops" : 0,
"Append_min" : 0,
"Append_max" : 0,
"Append_mean" : 0,
"Append_25th_percentile" : 0,
"Append_median" : 0,
"Append_75th_percentile" : 0,
"Append_90th_percentile" : 0,
"Append_95th_percentile" : 0,
"Append_98th_percentile" : 0,
"Append_99th_percentile" : 0,
"Append_99.9th_percentile" : 0,
"FlushOutputSize_num_ops" : 0,
"FlushOutputSize_min" : 0,
"FlushOutputSize_max" : 0,
"FlushOutputSize_mean" : 0,
"FlushOutputSize_25th_percentile" : 0,
"FlushOutputSize_median" : 0,
"FlushOutputSize_75th_percentile" : 0,
"FlushOutputSize_90th_percentile" : 0,
"FlushOutputSize_95th_percentile" : 0,
"FlushOutputSize_98th_percentile" : 0,
"FlushOutputSize_99th_percentile" : 0,
"FlushOutputSize_99.9th_percentile" : 0,
"Bulkload_count" : 0,
"Bulkload_mean_rate" : 0.0,
"Bulkload_1min_rate" : 0.0,
"Bulkload_5min_rate" : 0.0,
"Bulkload_15min_rate" : 0.0,
"Bulkload_num_ops" : 0,
"Bulkload_min" : 0,
"Bulkload_max" : 0,
"Bulkload_mean" : 0,
"Bulkload_25th_percentile" : 0,
"Bulkload_median" : 0,
"Bulkload_75th_percentile" : 0,
"Bulkload_90th_percentile" : 0,
"Bulkload_95th_percentile" : 0,
"Bulkload_98th_percentile" : 0,
"Bulkload_99th_percentile" : 0,
"Bulkload_99.9th_percentile" : 0
} ]
}
如上监控主要是HBase 内某个RegionServer 详细信息。具体有gc, scan,flush ,block,compaction 等细粒度的监控。
二、JMX监控信息钉钉告警实现
下面我们实现了一个RegionServer运行时长的钉钉通知消息
# -*- coding: utf-8 -*-
import time
import requests
import json
import schedule as schedule
"""
~~~~~~~~~~~~
author: kangll
date: 2025/02/11 11:50
desc: reid cluster HBase JMX 获取指标信息
-- curl 请求: curl http://hdp-node2:16030/jmx?qry=Hadoop:service=HBase,name=RegionServer,sub=Server
2小时发出一次正常的RS运行状态信息,RS运行时间小于10分钟且当前小时立即发出一次告警
"""
__author__ = 'kanglilong <kangll@winnerinf.com>'
headers = {'Content-Type': 'application/json;charset=utf-8'}
hostArr = {"hdp-node1", "hdp-node2", "hdp-node3"}
dingding_url = "https://oapi.dingtalk.com/robot/send?access_token=ba7693ae5a1a5a4cda1358f35b19785a6d8a7659da92ba3685d6532994a6d82c"
# 记录上一次发送运行时间小于 10 分钟告警的小时
last_less_than_10mins_alert_hour = None
def jmxGetHBaseStatus(regionserver_host):
"""
从 HBase JMX 接口获取 RegionServer 运行时长信息
:return: 告警信息
"""
jmx_port = 16030
# 构建JMX查询URL,用于获取运行时间指标
jmx_url = f'http://{regionserver_host}:{jmx_port}/jmx?qry=Hadoop:service=HBase,name=RegionServer,sub=Server'
try:
# 发送HTTP请求获取JMX数据
response = requests.get(jmx_url)
# 检查响应状态码
response.raise_for_status()
# 解析JSON响应
jmx_data = response.json()
# 从JMX数据中提取运行时间(单位:毫秒)
region_server_start_time = jmx_data['beans'][0]['regionServerStartTime']
# 获取当前时间戳(毫秒)
current_time = int(time.time() * 1000)
# 计算RegionServer运行时长(毫秒)
uptime = current_time - region_server_start_time
# 将运行时长转换为时分秒格式
uptime_hms = convert_milliseconds_to_hms(uptime)
text = f"hostname: {regionserver_host}, RegionServer uptime: {uptime_hms}"
return text
except requests.exceptions.RequestException as e:
print(f'请求出错: {e}')
except (KeyError, IndexError, json.JSONDecodeError) as e:
print(f'解析 JMX 数据出错: {e}')
def jmxGetHBaseAlarmStatus(regionserver_host):
"""
从 HBase JMX 接口获取 RegionServer 重启的运行时长,也就是运行时间小于10min
:return: 告警信息
"""
jmx_port = 16030
# 构建JMX查询URL,用于获取运行时间指标
jmx_url = f'http://{regionserver_host}:{jmx_port}/jmx?qry=Hadoop:service=HBase,name=RegionServer,sub=Server'
try:
text = ""
now_time = time.localtime(time.time())
formatted_time = time.strftime('%Y-%m-%d %H:%M:%S', now_time)
# 发送HTTP请求获取JMX数据
response = requests.get(jmx_url)
if response.status_code == 200:
# 检查响应状态码
# response.raise_for_status()
# 解析JSON响应
jmx_data = response.json()
if len(jmx_data['beans'][0]) > 400:
# print("---", less_than_10mins_alert_sent)
# if jmx_data is not None and len(jmx_data) > 0:
# 从JMX数据中提取运行时间(单位:毫秒)
region_server_start_time = jmx_data['beans'][0]['regionServerStartTime']
# 获取当前时间戳(毫秒)
current_time = int(time.time() * 1000)
# 计算 RegionServer 运行时长(毫秒)
uptime = current_time - region_server_start_time
# 将运行时长转换为时分秒格式
uptime_hms = convert_milliseconds_to_hms(uptime)
#
current_hour = time.localtime().tm_hour
global last_less_than_10mins_alert_hour
if uptime is not None:
if uptime < 10 * 60 * 1000: # 运行时间小于 10 分钟
if last_less_than_10mins_alert_hour is None or last_less_than_10mins_alert_hour != current_hour:
print("++++", last_less_than_10mins_alert_hour)
text = "告警类型: reid 集群HBase 重启告警通知 \n" + "告警信息: \n" + f"hostname: {regionserver_host} ,RegionServer uptime: {uptime_hms} " + "\n告警时间:" + formatted_time
# 发出告警
msg(text, dingding_url)
last_less_than_10mins_alert_hour = current_hour
# print(f"hostname: {regionserver_host}, RegionServer uptime: {uptime_hms}")
return text
except requests.exceptions.RequestException as e:
print(f'请求出错: {e}')
except (KeyError, IndexError, json.JSONDecodeError) as e:
print(f'解析 JMX 数据出错: {e}')
return None
def convert_milliseconds_to_hms(milliseconds):
"""
将毫秒转换为时分秒的格式
:param milliseconds: 毫秒数
:return: 时分秒格式的字符串
"""
seconds = milliseconds // 1000
hours = seconds // 3600
seconds %= 3600
minutes = seconds // 60
seconds %= 60
return f"{hours}小时 {minutes}分钟 {seconds}秒."
def getAllHostsHBase(alert_message=""):
"""
从 HBase JMX 接口获取 RegionServer 运行时长信息
:return: 正常通知信息或 None
"""
count = 0
now_time = time.localtime(time.time())
formatted_time = time.strftime('%Y-%m-%d %H:%M:%S', now_time)
# 将集合转换为列表,并进行排序
sorted_hosts = sorted(list(hostArr))
alert_message += "告警类型: reid 集群HBase告警通知 \n" + "告警信息: \n"
for host in sorted_hosts:
line_alarm = str(jmxGetHBaseStatus(host))
count += 1
alert_message += "\t" + str(count) + "." + line_alarm + "\n"
alert_message += "\n告警时间:" + formatted_time
print(alert_message)
notify_msg(alert_message, dingding_url)
def check_and_alert():
"""
检查运行时长,若小于 10 分钟且满足条件则立即发送
"""
now_time = time.localtime(time.time())
formatted_time = time.strftime('%Y-%m-%d %H:%M:%S', now_time)
# 将集合转换为列表,并进行排序
sorted_hosts = sorted(list(hostArr))
for host in sorted_hosts:
alarm_str = jmxGetHBaseAlarmStatus(host)
print(alarm_str)
if alarm_str is not None and alarm_str != "":
print("时间: ", formatted_time, "主机:", host, 'RegionServer 重启告警发出!')
else:
print("时间: ", formatted_time, "主机:", host, 'RegionServer 状 态 正 常!')
def msg(text, api_url):
"""
钉钉告警发出 通知具体负责人
:param text: 告警文本
:param api_url: 钉钉URL
:return: 无返回值
"""
json_text = {
"msgtype": "text",
"text": {
"content": text
}, "at": {
"atMobiles": [""]
}
}
requests.post(api_url, json.dumps(json_text), headers=headers).content
def notify_msg(text, api_url):
"""
钉钉告警发出
:param text: 告警文本
:param api_url: 钉钉URL
:return: 无返回值
"""
json_text = {
"msgtype": "text",
"text": {
"content": text
}, "at": {
"atMobiles": [""]
}
}
requests.post(api_url, json.dumps(json_text), headers=headers).content
def correct_msg(text, api_url):
"""
钉钉告警发出, 组件正常的告警信息,不艾特告警人
:param text: 告警文本
:param api_url: 钉钉URL
:return: 无返回值
"""
json_text = {
"msgtype": "text",
"text": {
"content": text
}, "at": {
"atMobiles": [""]
}
}
requests.post(api_url, json.dumps(json_text), headers=headers).content
if __name__ == '__main__':
# 设定整点执行常规告警任务
schedule.every().hour.at(":00").do(getAllHostsHBase)
while True:
check_and_alert()
schedule.run_pending()
time.sleep(10)
钉钉告警通知: