本篇记录了如何使用 onnxruntime-gpu
和 OpenCV 对导出的 YOLOv5 ONNX 模型进行推理,支持:
- ✅ CUDA 加速
- ✅ 自动 Letterbox 缩放与填充
- ✅ 非极大值抑制(NMS)
- ✅ 输出坐标自动还原到原图尺寸
- ✅ 实时可视化
📦 依赖安装
pip install onnxruntime-gpu opencv-python numpy
🧠 推理代码(支持 YOLOv5 ONNX)
import onnxruntime as ort
import numpy as np
import cv2
import time
class YOLOv5ONNX:
def __init__(self, model_path, input_size=(640, 640), providers=['CUDAExecutionProvider']):
self.input_size = input_size # (w, h)
self.session = ort.InferenceSession(model_path, providers=providers)
self.input_name = self.session.get_inputs()[0].name
def letterbox(self, image, new_shape=(640, 640), color=(114, 114, 114)):
shape = image.shape[:2] # current shape [height, width]
r = min(new_shape[1] / shape[0], new_shape[0] / shape[1])
new_unpad = (int(round(shape[1] * r)), int(round(shape[0] * r)))
dw = new_shape[0] - new_unpad[0]
dh = new_shape[1] - new_unpad[1]
dw /= 2 # divide padding into 2 sides
dh /= 2
resized = cv2.resize(image, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
padded = cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)
return padded, r, dw, dh
def preprocess(self, img_bgr):
img, r, dw, dh = self.letterbox(img_bgr, self.input_size)
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img_rgb = img_rgb.astype(np.float32) / 255.0
img_rgb = np.transpose(img_rgb, (2, 0, 1)) # HWC to CHW
img_rgb = np.expand_dims(img_rgb, axis=0) # Add batch dimension
return img_rgb, r, dw, dh
def scale_coords(self, box, r, dw, dh, orig_shape):
x1 = (box[0] - dw) / r
y1 = (box[1] - dh) / r
x2 = (box[2] - dw) / r
y2 = (box[3] - dh) / r
x1 = np.clip(x1, 0, orig_shape[1])
y1 = np.clip(y1, 0, orig_shape[0])
x2 = np.clip(x2, 0, orig_shape[1])
y2 = np.clip(y2, 0, orig_shape[0])
return x1, y1, x2, y2
def nms(self, boxes, scores, iou_threshold):
x1, y1, x2, y2 = boxes.T
areas = (x2 - x1) * (y2 - y1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
if order.size == 1:
break
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
inter = np.maximum(0.0, xx2 - xx1) * np.maximum(0.0, yy2 - yy1)
iou = inter / (areas[i] + areas[order[1:]] - inter)
order = order[1:][iou < iou_threshold]
return keep
def postprocess(self, outputs, orig_shape, r, dw, dh, conf_thres=0.25, iou_thres=0.45):
pred = outputs[0] # shape: (1, 25200, 85)
pred = np.squeeze(pred, axis=0) # shape: (25200, 85)
conf = pred[:, 4]
pred = pred[conf > conf_thres]
if len(pred) == 0:
return []
scores = pred[:, 4]
class_scores = pred[:, 5:]
class_ids = np.argmax(class_scores, axis=1)
class_conf = class_scores[np.arange(len(pred)), class_ids]
final_scores = scores * class_conf
boxes = pred[:, :4]
boxes_xyxy = np.zeros_like(boxes)
boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2 # x1
boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2 # y1
boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2 # x2
boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2 # y2
keep = self.nms(boxes_xyxy, final_scores, iou_thres)
boxes_xyxy = boxes_xyxy[keep]
final_scores = final_scores[keep]
class_ids = class_ids[keep]
results = []
for box, score, cls in zip(boxes_xyxy, final_scores, class_ids):
x1, y1, x2, y2 = self.scale_coords(box, r, dw, dh, orig_shape)
results.append((int(x1), int(y1), int(x2), int(y2), float(score), int(cls)))
return results
def infer(self, img_bgr):
input_tensor, r, dw, dh = self.preprocess(img_bgr)
outputs = self.session.run(None, {self.input_name: input_tensor})
return self.postprocess(outputs, img_bgr.shape[:2], r, dw, dh)
🔍 推理与可视化示例
if __name__ == "__main__":
model_path = "weights/best.onnx" # 替换为你的 ONNX 模型路径
image_path = "data/images/bus.jpg" # 替换为你的测试图片
detector = YOLOv5ONNX(model_path)
img = cv2.imread(image_path)
t0 = time.time()
results = detector.infer(img)
print(f"Inference time: {time.time() - t0:.3f}s")
for (x1, y1, x2, y2, score, cls) in results:
cv2.rectangle(img, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(img, f"{cls}:{score:.2f}", (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (36, 255, 12), 1)
cv2.imshow("YOLOv5 ONNX", img)
cv2.waitKey(0)
cv2.destroyAllWindows()
📝 总结
本脚本支持加载 YOLOv5 的 ONNX 模型,自动完成:
- 图像 letterbox 预处理
- 模型推理(支持 GPU 加速)
- 输出坐标还原
- 置信度过滤 + NMS 后处理
- 可视化结果绘制