1.deepseek版本
import numpy as np
from collections import defaultdict
def calculate_iou(box1, box2):
"""
计算两个边界框的交并比(IoU)
:param box1: [x1, y1, x2, y2]
:param box2: [x1, y1, x2, y2]
:return: IoU
"""
# 计算交集区域
x1_min, y1_min, x1_max, y1_max = box1
x2_min, y2_min, x2_max, y2_max = box2
inter_x1 = max(x1_min, x2_min)
inter_y1 = max(y1_min, y2_min)
inter_x2 = min(x1_max, x2_max)
inter_y2 = min(y1_max, y2_max)
# 计算交集面积
inter_width = max(0, inter_x2 - inter_x1)
inter_height = max(0, inter_y2 - inter_y1)
inter_area = inter_width * inter_height
# 计算并集面积
box1_area = (x1_max - x1_min) * (y1_max - y1_min)
box2_area = (x2_max - x2_min) * (y2_max - y2_min)
union_area = box1_area + box2_area - inter_area
# 避免除以零
iou = inter_area / union_area if union_area > 0 else 0.0
return iou
def evaluate_detection(gt_dict, pred_dict, class_list, iou_threshold=0.5):
"""
评估目标检测模型性能
:param gt_dict: 真实标注字典 {image_id: {'boxes': [[x1,y1,x2,y2], ...], 'labels': [label1, ...]}}
:param pred_dict: 预测结果字典 {image_id: {'boxes': [[x1,y1,x2,y2], ...], 'scores': [score1, ...], 'labels': [label1, ...]}}
:param class_list: 所有类别ID列表
:param iou_threshold: IoU阈值
:return: 评估结果字典
"""
# 初始化存储结构
results = {
'mAP': 0.0,
'per_class': {}
}
# 为每个类别初始化存储
for class_id in class_list:
results['per_class'][class_id] = {
'AP': 0.0, # 平均精度
'Recall': 0.0, # 检出率
'FPR': 0.0, # 误检率
'Precision': 0.0, # 精确率
'TP': 0, # 真正例
'FP': 0, # 假正例
'FN': 0, # 假负例
'gt_count': 0, # 真实框总数
'pred_count': 0 # 预测框总数
}
# 存储每个类别的所有预测信息(用于AP计算)
class_preds = {class_id: [] for class_id in class_list}
# 第一步:遍历所有图像,收集匹配结果
all_image_ids = set(gt_dict.keys()) | set(pred_dict.keys())
for img_id in all_image_ids:
# 获取当前图像的标注和预测
gt_ann = gt_dict.get(img_id, {'boxes': [], 'labels': []})
pred_ann = pred_dict.get(img_id, {'boxes': [], 'scores': [], 'labels': []})
# 按类别组织真实框
gt_boxes_by_class = {class_id: [] for class_id in class_list}
for box, label in zip(gt_ann['boxes'], gt_ann['labels']):
if label in class_list:
gt_boxes_by_class[label].append(box)
results['per_class'][label]['gt_count'] += 1
# 按类别组织预测框
pred_boxes_by_class = {class_id: [] for class_id in class_list}
for box, score, label in zip(pred_ann['boxes'], pred_ann['scores'], pred_ann['labels']):
if label in class_list:
pred_boxes_by_class[label].append((box, score))
results['per_class'][label]['pred_count'] += 1
# 对每个类别单独处理
for class_id in class_list:
gt_boxes = gt_boxes_by_class[class_id]
pred_boxes = pred_boxes_by_class[class_id]
# 如果没有预测框,所有真实框都是FN
if len(pred_boxes) == 0:
results['per_class'][class_id]['FN'] += len(gt_boxes)
continue
# 如果没有真实框,所有预测框都是FP
if len(gt_boxes) == 0:
results['per_class'][class_id]['FP'] += len(pred_boxes)
# 记录FP用于AP计算
for box, score in pred_boxes:
class_preds[class_id].append((score, 0)) # 0表示FP
continue
# 按置信度降序排序预测框
pred_boxes_sorted = sorted(pred_boxes, key=lambda x: x[1], reverse=True)
# 初始化匹配矩阵
gt_matched = [False] * len(gt_boxes)
pred_matched = [False] * len(pred_boxes_sorted)
# 尝试匹配每个预测框
for pred_idx, (pred_box, score) in enumerate(pred_boxes_sorted):
best_iou = 0.0
best_gt_idx = -1
# 寻找最佳匹配的真实框
for gt_idx, gt_box in enumerate(gt_boxes):
if gt_matched[gt_idx]:
continue
iou = calculate_iou(pred_box, gt_box)
if iou > best_iou:
best_iou = iou
best_gt_idx = gt_idx
# 检查是否超过IoU阈值
if best_iou >= iou_threshold:
gt_matched[best_gt_idx] = True
pred_matched[pred_idx] = True
class_preds[class_id].append((score, 1)) # 1表示TP
else:
class_preds[class_id].append((score, 0)) # 0表示FP
# 统计当前图像的结果
results['per_class'][class_id]['TP'] += sum(pred_matched)
results['per_class'][class_id]['FP'] += len(pred_matched) - sum(pred_matched)
results['per_class'][class_id]['FN'] += len(gt_matched) - sum(gt_matched)
# 第二步:计算每个类别的指标
aps = []
for class_id in class_list:
class_data = results['per_class'][class_id]
tp = class_data['TP']
fp = class_data['FP']
fn = class_data['FN']
gt_count = class_data['gt_count']
pred_count = class_data['pred_count']
# 计算检出率(Recall)
recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
# 计算误检率(FPR)
# 注意:在目标检测中,负样本是无穷的,这里使用近似计算
# FPR = FP / (FP + TN) ≈ FP / (所有非目标区域)
# 我们使用每张图像的平均预测数作为分母的近似
num_images = len(all_image_ids)
fpr = fp / (fp + num_images * 100) # 假设每张图像有100个潜在负样本区域
# 计算精确率(Precision)
precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
# 计算AP(Average Precision)
ap = 0.0
pred_records = class_preds[class_id]
if pred_records:
# 按置信度降序排序
pred_records_sorted = sorted(pred_records, key=lambda x: x[0], reverse=True)
# 计算累积TP和FP
cum_tp = 0
cum_fp = 0
precisions = []
recalls = []
for score, is_tp in pred_records_sorted:
cum_tp += is_tp
cum_fp += (1 - is_tp)
p = cum_tp / (cum_tp + cum_fp) if (cum_tp + cum_fp) > 0 else 0
r = cum_tp / gt_count if gt_count > 0 else 0
precisions.append(p)
recalls.append(r)
# 平滑PR曲线(保证单调递减)
for i in range(len(precisions)-2, -1, -1):
precisions[i] = max(precisions[i], precisions[i+1])
# 计算AP(PR曲线下面积)
ap = 0
for i in range(1, len(recalls)):
if recalls[i] != recalls[i-1]:
ap += (recalls[i] - recalls[i-1]) * precisions[i]
# 更新结果
class_data['Recall'] = recall
class_data['FPR'] = fpr
class_data['Precision'] = precision
class_data['AP'] = ap
aps.append(ap)
# 计算mAP(所有类别AP的平均)
results['mAP'] = sum(aps) / len(aps) if aps else 0.0
return results
# 示例用法
if __name__ == "__main__":
# 示例数据
class_list = [0, 1, 2] # 类别ID列表
# 真实标注(模拟数据)
gt_dict = {
'img1': {'boxes': [[10, 10, 50, 50], [30, 30, 70, 70]], 'labels': [0, 1]},
'img2': {'boxes': [], 'labels': []}, # 真实框为空
'img3': {'boxes': [[20, 20, 60, 60]], 'labels': [2]},
'img4': {'boxes': [[15, 15, 55, 55]], 'labels': [1]} # 标签存在但真实框为空的情况已在循环中处理
}
# 预测结果(模拟数据)
pred_dict = {
'img1': {
'boxes': [[12, 12, 52, 52], [28, 28, 68, 68], [100, 100, 150, 150]],
'scores': [0.9, 0.8, 0.7],
'labels': [0, 1, 1]
},
'img2': {'boxes': [[40, 40, 80, 80]], 'scores': [0.85], 'labels': [0]}, # 误检
'img3': {'boxes': [], 'scores': [], 'labels': []}, # 预测框为空
'img4': {'boxes': [[15, 15, 55, 55]], 'scores': [0.75], 'labels': [1]}
}
# 执行评估
results = evaluate_detection(gt_dict, pred_dict, class_list)
# 打印结果
print(f"mAP: {results['mAP']:.4f}")
print("\nPer-class metrics:")
for class_id, metrics in results['per_class'].items():
print(f"Class {class_id}:")
print(f" AP: {metrics['AP']:.4f}")
print(f" Recall: {metrics['Recall']:.4f}")
print(f" FPR: {metrics['FPR']:.4f}")
print(f" Precision: {metrics['Precision']:.4f}")
print(f" TP: {metrics['TP']}, FP: {metrics['FP']}, FN: {metrics['FN']}")
print(f" GT Count: {metrics['gt_count']}, Pred Count: {metrics['pred_count']}")
2.豆包版本
import numpy as np
from collections import defaultdict
def calculate_iou(box1, box2):
"""
计算两个边界框的交并比(IoU)
box格式: [x1, y1, x2, y2]
"""
x1, y1, x2, y2 = box1
x1_p, y1_p, x2_p, y2_p = box2
# 计算交集区域
inter_x1 = max(x1, x1_p)
inter_y1 = max(y1, y1_p)
inter_x2 = min(x2, x2_p)
inter_y2 = min(y2, y2_p)
inter_area = max(0, inter_x2 - inter_x1) * max(0, inter_y2 - inter_y1)
# 计算每个框的面积
area1 = (x2 - x1) * (y2 - y1)
area2 = (x2_p - x1_p) * (y2_p - y1_p)
# 计算并集面积
union_area = area1 + area2 - inter_area
# 计算IoU
if union_area == 0:
return 0
return inter_area / union_area
def evaluate_detection(true_boxes_list, pred_boxes_list, iou_threshold=0.5):
"""
评估目标检测结果
参数:
true_boxes_list: 真实框列表,每个元素是一张图片的真实框,
每个真实框格式: {'box': [x1, y1, x2, y2], 'label': 标签名}
pred_boxes_list: 预测框列表,每个元素是一张图片的预测框,
每个预测框格式: {'box': [x1, y1, x2, y2], 'label': 标签名, 'score': 置信度}
iou_threshold: IoU阈值,用于判断预测框是否与真实框匹配
返回:
评估结果字典,包含每个标签的检出率、误检率以及整体mAP
"""
# 收集所有出现过的标签
all_labels = set()
for true_boxes in true_boxes_list:
for box in true_boxes:
all_labels.add(box['label'])
for pred_boxes in pred_boxes_list:
for box in pred_boxes:
all_labels.add(box['label'])
all_labels = sorted(list(all_labels))
# 初始化评估指标
results = {
'per_label': {label: {'tp': 0, 'fp': 0, 'fn': 0, 'precision': [], 'recall': [], 'ap': 0}
for label in all_labels},
'mAP': 0
}
# 处理每张图片
for img_idx, (true_boxes, pred_boxes) in enumerate(zip(true_boxes_list, pred_boxes_list)):
# 按置信度降序排序预测框
pred_boxes_sorted = sorted(pred_boxes, key=lambda x: x['score'], reverse=True)
# 跟踪已匹配的真实框
matched_true = set()
# 处理预测框
for pred in pred_boxes_sorted:
pred_label = pred['label']
best_iou = 0
best_true_idx = -1
# 寻找最佳匹配的真实框
for true_idx, true in enumerate(true_boxes):
if true_idx not in matched_true and true['label'] == pred_label:
iou = calculate_iou(true['box'], pred['box'])
if iou > best_iou and iou >= iou_threshold:
best_iou = iou
best_true_idx = true_idx
# 判断是TP还是FP
if best_true_idx != -1:
results['per_label'][pred_label]['tp'] += 1
matched_true.add(best_true_idx)
else:
results['per_label'][pred_label]['fp'] += 1
# 计算未匹配的真实框作为FN
for true_idx, true in enumerate(true_boxes):
if true_idx not in matched_true:
results['per_label'][true['label']]['fn'] += 1
# 计算每个标签的检出率、误检率和AP
aps = []
for label in all_labels:
stats = results['per_label'][label]
tp = stats['tp']
fp = stats['fp']
fn = stats['fn']
# 计算检出率 (召回率)
if tp + fn == 0:
detection_rate = 0.0 # 真实框为空的情况
else:
detection_rate = tp / (tp + fn)
# 计算误检率
if tp + fp == 0:
false_detection_rate = 0.0 # 预测框为空的情况
else:
false_detection_rate = fp / (tp + fp)
# 存储计算结果
results['per_label'][label]['detection_rate'] = detection_rate
results['per_label'][label]['false_detection_rate'] = false_detection_rate
# 这里简化了AP计算,实际应用中应根据置信度阈值计算PR曲线并求面积
# 对于本示例,我们使用简单的P-R计算
if tp + fp == 0:
precision = 0.0
else:
precision = tp / (tp + fp)
if tp + fn == 0:
recall = 0.0
else:
recall = tp / (tp + fn)
results['per_label'][label]['precision'].append(precision)
results['per_label'][label]['recall'].append(recall)
# 简单AP计算(实际应用中应使用更精确的方法)
results['per_label'][label]['ap'] = precision * recall
aps.append(results['per_label'][label]['ap'])
# 计算mAP
if len(aps) > 0:
results['mAP'] = sum(aps) / len(aps)
else:
results['mAP'] = 0.0
return results
def print_evaluation_results(results):
"""打印评估结果"""
print("目标检测模型评估结果:")
print("======================")
# 打印每个标签的结果
for label, stats in results['per_label'].items():
print(f"\n标签: {label}")
print(f" 检出率: {stats['detection_rate']:.4f}")
print(f" 误检率: {stats['false_detection_rate']:.4f}")
print(f" 精确率: {stats['precision'][0]:.4f}" if stats['precision'] else " 精确率: N/A")
print(f" 召回率: {stats['recall'][0]:.4f}" if stats['recall'] else " 召回率: N/A")
print(f" AP: {stats['ap']:.4f}")
print(f" TP: {stats['tp']}, FP: {stats['fp']}, FN: {stats['fn']}")
# 打印mAP
print("\n======================")
print(f"mAP: {results['mAP']:.4f}")
# 示例用法
if __name__ == "__main__":
# 示例1: 正常情况 - 有真实框和预测框
true_boxes1 = [
{'box': [10, 10, 50, 50], 'label': 'car'},
{'box': [60, 60, 100, 100], 'label': 'person'}
]
pred_boxes1 = [
{'box': [12, 12, 52, 52], 'label': 'car', 'score': 0.9},
{'box': [65, 65, 105, 105], 'label': 'person', 'score': 0.85},
{'box': [120, 120, 150, 150], 'label': 'bike', 'score': 0.7} # 误检
]
# 示例2: 真实框为空的情况
true_boxes2 = []
pred_boxes2 = [
{'box': [20, 20, 40, 40], 'label': 'car', 'score': 0.6} # 误检
]
# 示例3: 预测框为空的情况
true_boxes3 = [
{'box': [30, 30, 70, 70], 'label': 'car'}
]
pred_boxes3 = [] # 漏检
# 示例4: 标签存在但真实框为空的情况
true_boxes4 = [
{'box': [80, 80, 120, 120], 'label': 'person'}
]
pred_boxes4 = [
{'box': [100, 100, 140, 140], 'label': 'bike', 'score': 0.5} # 对于bike标签,真实框为空
]
# 组合所有示例
true_boxes_list = [true_boxes1, true_boxes2, true_boxes3, true_boxes4]
pred_boxes_list = [pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4]
# 评估
evaluation_results = evaluate_detection(true_boxes_list, pred_boxes_list, iou_threshold=0.5)
# 打印结果
print_evaluation_results(evaluation_results)
3.baidu版
import numpy as np
from collections import defaultdict
class DetectionEvaluator:
def __init__(self, iou_threshold=0.5):
self.iou_threshold = iou_threshold
self.reset()
def reset(self):
self.gt_counts = defaultdict(int) # 每类真实框数量
self.pred_counts = defaultdict(int) # 每类预测框数量
self.tp = defaultdict(list) # 每类真正例(按置信度排序)
self.fp = defaultdict(list) # 每类假正例(按置信度排序)
self.scores = defaultdict(list) # 每类预测置信度
def calculate_iou(self, box1, box2):
"""计算两个边界框的IoU"""
x1 = max(box1[0], box2[0])
y1 = max(box1[1], box2[1])
x2 = min(box1[2], box2[2])
y2 = min(box1[3], box2[3])
inter_area = max(0, x2 - x1) * max(0, y2 - y1)
box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
union_area = box1_area + box2_area - inter_area
return inter_area / union_area if union_area > 0 else 0
def evaluate_image(self, gt_boxes, pred_boxes):
"""
评估单张图像
:param gt_boxes: 字典 {class_id: [[x1,y1,x2,y2], ...]}
:param pred_boxes: 字典 {class_id: [[x1,y1,x2,y2,score], ...]}
"""
# 处理真实框为空的情况
if not gt_boxes:
for class_id in pred_boxes:
for pred in pred_boxes[class_id]:
self.fp[class_id].append(1)
self.tp[class_id].append(0)
self.scores[class_id].append(pred[4])
self.pred_counts[class_id] += 1
return
# 处理预测框为空的情况
if not pred_boxes:
for class_id in gt_boxes:
self.gt_counts[class_id] += len(gt_boxes[class_id])
return
# 统计每类真实框数量
for class_id in gt_boxes:
self.gt_counts[class_id] += len(gt_boxes[class_id])
# 处理每类预测结果
for class_id in pred_boxes:
preds = pred_boxes[class_id]
self.pred_counts[class_id] += len(preds)
# 按置信度降序排序
preds = sorted(preds, key=lambda x: x[4], reverse=True)
# 初始化匹配状态
gt_matched = set()
for pred in preds:
pred_box = pred[:4]
max_iou = 0
best_gt_idx = -1
# 查找匹配的真实框
if class_id in gt_boxes:
for gt_idx, gt_box in enumerate(gt_boxes[class_id]):
if gt_idx in gt_matched:
continue
iou = self.calculate_iou(pred_box, gt_box)
if iou > max_iou:
max_iou = iou
best_gt_idx = gt_idx
# 判断TP/FP
if max_iou >= self.iou_threshold:
gt_matched.add(best_gt_idx)
self.tp[class_id].append(1)
self.fp[class_id].append(0)
else:
self.tp[class_id].append(0)
self.fp[class_id].append(1)
self.scores[class_id].append(pred[4])
def calculate_metrics(self):
"""计算各类评估指标"""
metrics = {}
for class_id in set(self.gt_counts.keys()).union(set(self.pred_counts.keys())):
gt_count = self.gt_counts.get(class_id, 0)
pred_count = self.pred_counts.get(class_id, 0)
tp = np.array(self.tp.get(class_id, []))
fp = np.array(self.fp.get(class_id, []))
scores = np.array(self.scores.get(class_id, []))
# 检出率 = TP / (TP + FN) = TP / GT总数
detection_rate = np.sum(tp) / gt_count if gt_count > 0 else 0
# 误检率 = FP / (TP + FP) = FP / 预测总数
false_alarm_rate = np.sum(fp) / pred_count if pred_count > 0 else 0
# 计算AP
ap = self.calculate_ap(tp, fp, gt_count)
metrics[class_id] = {
'检出率': round(detection_rate,3),
'误检率': round(false_alarm_rate,3),
'ap': round(ap,3),
'gt_count': gt_count,
'pred_count': pred_count,
'tp_count': int(np.sum(tp)),
'fp_count': int(np.sum(fp))
}
# 计算mAP
aps = [metrics[c]['ap'] for c in metrics if metrics[c]['gt_count'] > 0]
mAP = np.mean(aps) if aps else 0
return {
'per_class': metrics,
'mAP': mAP
}
def calculate_ap(self, tp, fp, gt_count):
"""计算单类别的AP值"""
if gt_count == 0:
return 0
tp_cumsum = np.cumsum(tp)
fp_cumsum = np.cumsum(fp)
recalls = tp_cumsum / gt_count
precisions = tp_cumsum / (tp_cumsum + fp_cumsum + 1e-6)
# 计算PR曲线下面积
ap = 0
for t in np.arange(0, 1.1, 0.1):
mask = recalls >= t
if np.any(mask):
p = np.max(precisions[mask])
else:
p = 0
ap += p / 11
return ap
# 使用示例
if __name__ == "__main__":
evaluator = DetectionEvaluator(iou_threshold=0.5)
# 示例1: 正常情况
gt_boxes = {
'cat': [[10, 10, 50, 50], [100, 100, 150, 150]], # 类别0的两个真实框
'dog': [[200, 200, 250, 250]] # 类别1的一个真实框
}
pred_boxes = {
'cat': [[12, 12, 48, 48, 0.9], [105, 105, 155, 155, 0.8]], # 类别0的两个预测框
'dog': [[210, 210, 240, 240, 0.7], [300, 300, 350, 350, 0.6]] # 类别1的两个预测框
}
evaluator.evaluate_image(gt_boxes, pred_boxes)
# 示例2: 真实框为空的情况
gt_boxes_empty = {}
pred_boxes_normal = {
'cat': [[10, 10, 50, 50, 0.9]]
}
evaluator.evaluate_image(gt_boxes_empty, pred_boxes_normal)
# 示例3: 预测框为空的情况
gt_boxes_normal = {
'cat': [[10, 10, 50, 50]]
}
pred_boxes_empty = {}
evaluator.evaluate_image(gt_boxes_normal, pred_boxes_empty)
# 示例4: 标签存在但真实框为空的情况
gt_boxes_with_empty_class = {
'bird': [], # 类别0存在但真实框为空
'dog': [[200, 200, 250, 250]]
}
pred_boxes_normal = {
'cat': [[10, 10, 50, 50, 0.9]], # 类别0的预测框将被视为FP
'dog': [[210, 210, 240, 240, 0.7]]
}
# 示例4: 标签存在但真实框为空的情况
gt_boxes_with_empty_class = {
'dog': [[200, 200, 250, 250]]
}
pred_boxes_normal = {
'cat': [[10, 10, 50, 50, 0.9]], # 类别0的预测框将被视为FP
'dog': [[210, 210, 240, 240, 0.7]],
'bird':[[10, 33, 22, 50, 0.9],[110, 323, 222, 520, 0.3]]
}
evaluator.evaluate_image(gt_boxes_with_empty_class, pred_boxes_normal)
# 计算最终指标
metrics = evaluator.calculate_metrics()
import json
print("评估结果:", json.dumps(metrics, indent=4,ensure_ascii=False))
代码可直接运行,如果需要评估本地txt结果和标注xml结果的相关检出率,误检率和map,可自行根据验证输入调用相关函数即可。