yolov5_trt.h
#pragma once
#ifndef YOLOV5_TRT_H
#define YOLOV5_TRT_H
#include <opencv2/opencv.hpp>
#include <string>
#include<vector>
struct Configuration
{
float confThreshold; // Confidence threshold
float nmsThreshold; // Non-maximum suppression threshold
float objThreshold; //Object Confidence threshold
std::string modelpath;
};
typedef struct BoxInfo
{
float x1;
float y1;
float x2;
float y2;
float score;
int label;
} BoxInfo;
class YOLOv5
{
public:
YOLOv5(Configuration config);
~YOLOv5();
void UnInit();
std::string detectWarn(cv::cuda::GpuMat& frame);
void detect(cv::Mat& frame, bool& BoxNum);
private:
float confThreshold;
float nmsThreshold;
float objThreshold;
int inpWidth;
int inpHeight;
const int max_objects = 1000;
const int NUM_BOX_ELEMENT = 7;
std::vector<BoxInfo> newBbox;
std::string classes[2] = {"D", "x"};
const bool keep_ratio = true;
void normalize_(cv::cuda::GpuMat img);
void nms(std::vector<BoxInfo>& input_boxes);
cv::cuda::GpuMat resize_image(cv::cuda::GpuMat srcimg, int* newh, int* neww, int* top, int* left);
void loadTrt(const std::string strName);
int m_iInputIndex;
int m_iOutputIndex;
int m_iClassNums;
int m_iBoxNums;
cv::Size m_InputSize;
void* m_ArrayDevMemory[2]{ 0 };
float* output_device = nullptr;
float* output_host = nullptr;
int m_ArraySize[2]{ 0 };
std::vector<cv::cuda::GpuMat> m_InputWrappers;
};
#endif
yolov5_trt.cpp
#include <fstream>
#include <iostream>
#include <sys/stat.h>
#include <string>
#include <NvInfer.h>
#include <NvOnnxParser.h>
#include <cuda_runtime.h>
#include "yolov5_trt.h"
#include <opencv2/core/cuda.hpp>
#include <opencv2/core/cuda_stream_accessor.hpp>
#include <opencv2/cudaimgproc.hpp>
#include <opencv2/cudaarithm.hpp>
#include <opencv2/cudawarping.hpp>
#include <cuda_runtime_api.h>
#include "stdio.h"
extern "C"
{
#include "postprocess.cuh"
};
using namespace std;
using namespace cv;
using namespace nvinfer1;
nvinfer1::ICudaEngine* m_CudaEngine;
nvinfer1::IRuntime* m_CudaRuntime;
nvinfer1::IExecutionContext* m_CudaContext;
cudaStream_t m_CudaStream;
// Logger for TRT info/warning/errors, https://github.com/onnx/onnx-tensorrt/blob/main/onnx_trt_backend.cpp
class TRT_Logger : public nvinfer1::ILogger
{
nvinfer1::ILogger::Severity _verbosity;
std::ostream* _ostream;
public:
TRT_Logger(Severity verbosity = Severity::kWARNING, std::ostream& ostream = std::cout)
: _verbosity(verbosity)
, _ostream(&ostream)
{
}
void log(Severity severity, const char* msg) noexcept override
{
if (severity <= _verbosity)
{
time_t rawtime = std::time(0);
char buf[256];
//strftime(&buf[0], 256, "%Y-%m-%d %H:%M:%S", std::gmtime(&rawtime));
const char* sevstr = (severity == Severity::kINTERNAL_ERROR ? " BUG" : severity == Severity::kERROR
? " ERROR"
: severity == Severity::kWARNING ? "WARNING" : severity == Severity::kINFO ? " INFO"
: "UNKNOWN");
(*_ostream) << "[" << buf << " " << sevstr << "] " << msg << std::endl;
}
}
};
static bool ifFileExists(const char* FileName)
{
struct stat my_stat;
return (stat(FileName, &my_stat) == 0);
}
void YOLOv5::loadTrt(const std::string strName)
{
TRT_Logger gLogger;
m_CudaRuntime = createInferRuntime(gLogger);
std::ifstream fin(strName, std::ios::in | std::ios::binary);
std::string cached_engine = "";
while (fin.peek() != EOF)
{
std::stringstream buffer;
buffer << fin.rdbuf();
cached_engine.append(buffer.str());
}
fin.close();
m_CudaEngine = m_CudaRuntime->deserializeCudaEngine(cached_engine.data(), cached_engine.size(), nullptr);
m_CudaContext = m_CudaEngine->createExecutionContext();
m_CudaRuntime->destroy();
}
YOLOv5::YOLOv5(Configuration config)
{
confThreshold = config.confThreshold;
nmsThreshold = config.nmsThreshold;
objThreshold = config.objThreshold;
inpHeight = 640;
inpWidth = 640;
std::string model_path = config.modelpath;
std::string strTrtName = config.modelpath;
size_t sep_pos = model_path.find_last_of(".");
strTrtName = model_path.substr(0, sep_pos) + ".engine"; // ".trt"
if (ifFileExists(strTrtName.c_str()))
{
loadTrt(strTrtName);
}
else
{
//loadOnnx(config.modelpath);
}
m_iInputIndex = m_CudaEngine->getBindingIndex("images");
m_iOutputIndex = m_CudaEngine->getBindingIndex("output0");
Dims dims_i = m_CudaEngine->getBindingDimensions(m_iInputIndex);
int size1 = dims_i.d[0] * dims_i.d[1] * dims_i.d[2] * dims_i.d[3];
m_InputSize = cv::Size(dims_i.d[3], dims_i.d[2]);
Dims dims_o = m_CudaEngine->getBindingDimensions(m_iOutputIndex);
int size2 = dims_o.d[0] * dims_o.d[1] * dims_o.d[2];
m_iClassNums = dims_o.d[2] - 5; // [,,classes+5]
m_iBoxNums = dims_o.d[1]; // [b,num_pre_boxes,classes+5]
cudaMalloc(&m_ArrayDevMemory[m_iInputIndex], size1 * sizeof(float));
/*m_ArrayHostMemory[m_iInputIndex] = (float*)malloc(size1 * sizeof(float));*/
m_ArraySize[m_iInputIndex] = size1 * sizeof(float);
cudaMalloc(&m_ArrayDevMemory[m_iOutputIndex], size2 * sizeof(float));
//m_ArrayHostMemory[m_iOutputIndex] = (float*)malloc(size2 * sizeof(float));
m_ArraySize[m_iOutputIndex] = size2 * sizeof(float);
// 分配一块足够大的内存, 第一个元素是count
cudaMalloc(&output_device, sizeof(float) + max_objects * NUM_BOX_ELEMENT * sizeof(float));
// 分配CPU内存
/*cudaMallocHost(&output_host, sizeof(float) + max_objects * NUM_BOX_ELEMENT * sizeof(float));*/
output_host = (float*)malloc(sizeof(float) + max_objects * NUM_BOX_ELEMENT * sizeof(float));
m_InputWrappers.emplace_back(dims_i.d[2], dims_i.d[3], CV_32FC1, (float*)m_ArrayDevMemory[m_iInputIndex]);
m_InputWrappers.emplace_back(dims_i.d[2], dims_i.d[3], CV_32FC1, (float*)m_ArrayDevMemory[m_iInputIndex] + dims_i.d[2] * dims_i.d[3]);
m_InputWrappers.emplace_back(dims_i.d[2], dims_i.d[3], CV_32FC1, (float*)m_ArrayDevMemory[m_iInputIndex] + 2 * dims_i.d[2] * dims_i.d[3]);
cudaStreamCreate(&m_CudaStream);
}
void YOLOv5::UnInit()
{
for (auto& p : m_ArrayDevMemory)
{
cudaFree(p);
p = nullptr;
}
/*for (auto& p : m_ArrayHostMemory)
{
free(p);
p = nullptr;
}*/
cudaStreamDestroy(m_CudaStream);
cudaFree(output_device); // 释放GPU上分配解码输出的内存
free(output_host); // 释放在主机上分配的输出结果缓冲区的内存
output_host = nullptr;
m_CudaContext->destroy();
m_CudaEngine->destroy();
}
YOLOv5::~YOLOv5()
{
UnInit();
}
cuda::GpuMat YOLOv5::resize_image(cuda::GpuMat srcimg, int* newh, int* neww, int* top, int* left)
{
int srch = srcimg.rows, srcw = srcimg.cols;
*newh = this->inpHeight;
*neww = this->inpWidth;
/*Mat cpusrcimg;
Mat cpudstimg;
srcimg.download(cpusrcimg);*/
cuda::GpuMat dstimg;
if (this->keep_ratio && srch != srcw) {
float hw_scale = (float)srch / srcw;
if (hw_scale > 1) {
*newh = this->inpHeight;
*neww = int(this->inpWidth / hw_scale);
cuda::resize(srcimg, dstimg, Size(*neww, *newh), INTER_AREA);
*left = int((this->inpWidth - *neww) * 0.5);
cuda::copyMakeBorder(dstimg, dstimg, 0, 0, *left, this->inpWidth - *neww - *left, BORDER_CONSTANT, 114);
}
else {
*newh = (int)this->inpHeight * hw_scale;
*neww = this->inpWidth;
cuda::resize(srcimg, dstimg, Size(*neww, *newh), INTER_AREA);
*top = (int)(this->inpHeight - *newh) * 0.5;
cuda::copyMakeBorder(dstimg, dstimg, *top, this->inpHeight - *newh - *top, 0, 0, BORDER_CONSTANT, 114);
}
}
else {
cuda::resize(srcimg, dstimg, Size(*neww, *newh), INTER_AREA);
}
return dstimg;
}
string YOLOv5::detect(cuda::GpuMat& frame)
{
int newh = 0, neww = 0, padh = 0, padw = 0;
cuda::GpuMat dstimg = this->resize_image(frame, &newh, &neww, &padh, &padw);
/*cuda::Stream stream1;*/
cuda::cvtColor(dstimg, dstimg, cv::COLOR_BGR2RGB, 0);
cuda::GpuMat m_Normalized;
dstimg.convertTo(m_Normalized, CV_32FC3, 1 / 255.);
cuda::split(m_Normalized, m_InputWrappers);
//void * aa = m_InputWrappers1.data();
/*auto ret = cudaMemcpyAsync(m_ArrayDevMemory[m_iInputIndex], m_ArrayHostMemory[m_iInputIndex], m_ArraySize[m_iInputIndex], cudaMemcpyHostToDevice, m_CudaStream);*/
auto ret1 = m_CudaContext->enqueueV2(m_ArrayDevMemory, m_CudaStream, nullptr);
/*auto ret1 = m_CudaContext->executeV2(m_ArrayDevMemory);*/
/*auto ret = cudaMemcpyAsync(m_ArrayHostMemory[m_iOutputIndex], m_ArrayDevMemory[m_iOutputIndex], m_ArraySize[m_iOutputIndex], cudaMemcpyDeviceToHost, m_CudaStream);*/
/*ret = cudaStreamSynchronize(m_CudaStream);*/
float* pdata = (float*)m_ArrayDevMemory[m_iOutputIndex];
/*float* ptr = (float*)pdata.data();*/
int nelem = m_iBoxNums; // 计算data有多少个数据
int ncols = 7; // cx, cy, width, height, objness, classification * 2
int nrows = nelem / ncols;
std::vector<BoxInfo> generate_boxes;
float ratioh = (float)frame.rows / newh, ratiow = (float)frame.cols / neww;
cudaMemset(output_device, 0, sizeof(float) + max_objects * NUM_BOX_ELEMENT * sizeof(float));
memset(output_host, 0, sizeof(float) + max_objects * NUM_BOX_ELEMENT * sizeof(float));
decode_kernel_invoker(
pdata, m_iBoxNums, m_iClassNums, this->confThreshold,
this->nmsThreshold, nullptr, output_device, max_objects, NUM_BOX_ELEMENT, m_CudaStream, padh, padw, ratioh, ratiow);
cudaMemcpyAsync(output_host, output_device,
sizeof(float) + max_objects * NUM_BOX_ELEMENT * sizeof(float),
cudaMemcpyDeviceToHost, m_CudaStream);
cudaStreamSynchronize(m_CudaStream);
int num_boxes = min((int)output_host[0], max_objects);
for (int i = 0; i < num_boxes; i++)
{
float* ptr = output_host + 1 + NUM_BOX_ELEMENT * i;
int keep_flag = ptr[6]; // 最后一个位置就是flag的值
if (keep_flag) // True
{
generate_boxes.emplace_back(
BoxInfo{ ptr[0], ptr[1], ptr[2], ptr[3], ptr[4], (int)ptr[5] });
}
}
if (generate_boxes.size() < 2)
{
return "";
}
for (size_t i = 0; i < generate_boxes.size(); ++i)
{
int xmin = int(generate_boxes[i].x1);
int ymin = int(generate_boxes[i].y1);
Mat showFrame;
frame.download(showFrame);
rectangle(showFrame, Point(xmin, ymin), Point(int(generate_boxes[i].x2), int(generate_boxes[i].y2)), Scalar(0, 0, 255), 2);
std::string label = format("%.2f", generate_boxes[i].score);
label = this->classes[int(generate_boxes[i].label)] + ":" + label;
if (classes[int(generate_boxes[i].label)] == "x") {
label = "length_" + to_string(x_length);
}
putText(showFrame, label, Point(xmin, ymin - 5), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
frame.upload(showFrame);
}
}
postprocess.cuh
#include <iostream>
#include "yolov5_trt.h"
#include <vector>
#include <cuda_runtime.h>
#include <cuda_runtime_api.h>
#include <device_launch_parameters.h>
#ifdef __cplusplus
#ifndef checkRuntime
#define checkRuntime(callstr)\
{\
cudaError_t error_code = callstr;\
if (error_code != cudaSuccess) {\
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__ << std::endl;\
assert(0);\
}\
}
#endif // checkRuntime
extern "C"
{
#endif
static __global__ void decode_kernel(
float* predict, int num_bboxes, int num_classes, float confidence_threshold,
float* invert_affine_matrix, float* parray, int max_objects, int NUM_BOX_ELEMENT, int padh, int padw, float ratioh, float ratiow);
static __global__ void fast_nms_kernel(float* bboxes, int max_objects, float threshold, int NUM_BOX_ELEMENT);
static __device__ float box_iou(
float aleft, float atop, float aright, float abottom,
float bleft, float btop, float bright, float bbottom);
/*std::vector<BoxInfo> gpu_decode(float* predict, int rows, int cols, cudaStream_t& stream, int padh, int padw, float ratioh, float ratiow,
float confidence_threshold = 0.45f, float nms_threshold = 0.45f);*/
void decode_kernel_invoker(
float* predict, int num_bboxes, int num_classes, float confidence_threshold,
float nms_threshold, float* invert_affine_matrix, float* parray, int max_objects, int NUM_BOX_ELEMENT, cudaStream_t stream, int padh, int padw, float ratioh, float ratiow);
#ifdef __cplusplus
}
#endif
postprocess.cu
#include <iostream>
#include <string>
#include "postprocess.cuh"
using namespace std;
void decode_kernel_invoker(
float* predict, int num_bboxes, int num_classes, float confidence_threshold,
float nms_threshold, float* invert_affine_matrix, float* parray, int max_objects, int NUM_BOX_ELEMENT, cudaStream_t stream, int padh, int padw, float ratioh, float ratiow) {
/*
参数解析:
predict: 预测结果, 这个就是data, 未处理未过滤的predict
num_bboxes: 在预测结果的(n x num_classes+ 5) tensor中, 多少行就是多少个box
num_classes: 类别数量
confidence_threshold: 置信度阈值
nms_threshold: nms阈值
invert_affine_matrix: 逆矩阵的指针
parray: 输出结果数组
max_objects: 最大数量框, 这边设置的是1000, 只是拿来确保有足够的内存
NUM_BOX_ELEMENT: Box的element, left, top, right, bottom, confidence, class, keepflag 一共7个
stream: 流
*/
// 这里是确保有足够的线程去处理每一个box, 也就是每一个预测结果,所以用num_boxxes
// 确保每个block的线程不超过512
int block = num_bboxes > 512 ? 512 : num_bboxes;
int grid = (num_bboxes + block - 1) / block;
/* 如果核函数有波浪线,没关系,他是正常的,你只是看不顺眼罢了 */
decode_kernel << <grid, block, 0, stream >> > (
predict, num_bboxes, num_classes, confidence_threshold,
invert_affine_matrix, parray, max_objects, NUM_BOX_ELEMENT,
padh, padw, ratioh, ratiow);
//auto code1 = cudaPeekAtLastError();
//cout << cudaGetErrorString(code1) << endl;
对当前设备的核函数进行同步,等待执行完毕,可以发现过程是否存在异常
//auto code2 = cudaDeviceSynchronize();
//cout << cudaGetErrorString(code2) << endl;
// 这里是针对每张图的框,确保每个狂都能被线程处理
// 同样确保每个block的线程不超过512
block = max_objects > 512 ? 512 : max_objects;
grid = (max_objects + block - 1) / block;
fast_nms_kernel << <grid, block, 0, stream >> > (parray, max_objects, nms_threshold, NUM_BOX_ELEMENT);
}
static __global__ void decode_kernel(
float* predict, int num_bboxes, int num_classes, float confidence_threshold,
float* invert_affine_matrix, float* parray, int max_objects, int NUM_BOX_ELEMENT, int padh, int padw, float ratioh, float ratiow)
{
// 确保有足够的thread, 每一个thread处理一个bounding box
// 如果threadId超过了bounding box的数量, 这样就不会进行后续处理, 每个预测框都敲好被处理了一次
int position = blockDim.x * blockIdx.x + threadIdx.x;
if (position >= num_bboxes) {
return;
}
/*
predict是n x 85tensor输出的首地址
pitem 就是每行的指针, pitem[0] - pitem[3] 是位置信息, pitem[4]是objness
*/
/*printf("index: %d\n", index1);*/
float* pitem = predict + (num_classes + 5) * position;
/*printf("pitem: %f\n", *pitem);*/
float objectness = pitem[4];
if (objectness < confidence_threshold) {
return;
}
// 从这个元素开始都是confidence
float* class_confidence = pitem + 5;
// 这里是第一个condience, 取到数值
float confidence = *class_confidence++;
// for循环判断是哪个类别
int label = 0;
for (int i = 1; i < num_classes; i++, ++class_confidence)
{
if (*class_confidence > confidence)
{
// 如果大了, 就更新class_confidence
confidence = *class_confidence;
label = i; // 取到label
}
}
/*
上面的最后算出来的condifence是class_confidence只是条件概率
当前bounding box的 confidence(置信度) = objectness(物体概率) x class_confidence(条件概率)
最后拿来计算置信度的confidence是最大的class_confidence
*/
confidence *= objectness;
if (confidence < confidence_threshold) {
return;
}
/*
这里是恢复boudingbox的操作, 需要先取出来中心点(cx, cy), width, height, 然后转换为原始坐标
*/
float cx = *pitem++;
float cy = *pitem++;
float width = *pitem++;
float height = *pitem++;
float left = (cx - padw - 0.5 * width) * ratiow;
float top = (cy - padh - 0.5 * height) * ratioh;
float right = (cx - padw + 0.5 * width) * ratiow;
float bottom = (cy - padh + 0.5 * height) * ratioh;
/*affine_project(invert_affine_matrix, left, top, &left, &top);
affine_project(invert_affine_matrix, right, bottom, &right, &bottom);*/
/*
atomicAdd()简介:
int atomicAdd(int* address, int val);
这个函数执行的操作是将指定地址 address 处的值与 val 相加,并将结果写回 address 处。这个操作是原子性的,即不会受到并发写入的干扰,保证了数据的正确性。
使用 atomicAdd 函数可以保证多个线程在对同一个内存地址进行写操作时,不会发生数据覆盖的问题。
由于每个线程都会在输出中写入一个bounding box,因此需要使用原子操作确保每个线程写入的位置唯一
*/
/*
[count, box1, box2, box3]
因为GPU解码是多线程的, 所以需要用count记录已经处理了多少个bounding box。
CPU单线程不需要, GPU需要确保不会将一个检测框重复输出或者漏掉。
atomicAdd -> count +=1 返回 old_count
这里是对parray(output_device第一个值+1)
*/
int index = atomicAdd(parray, 1);
// 如果超过了1000, 这个线程就没必要处理后面的boxes
if (index >= max_objects)
return;
// left, top, right, bottom, confidence, class, keepflag
float* pout_item = parray + 1 + index * NUM_BOX_ELEMENT;
*pout_item++ = left;
*pout_item++ = top;
*pout_item++ = right;
*pout_item++ = bottom;
*pout_item++ = confidence;
*pout_item++ = label;
*pout_item++ = 1; // 1 = keep, 0 = ignore
}
static __global__ void fast_nms_kernel(float* bboxes, int max_objects, float threshold, int NUM_BOX_ELEMENT)
{
/*
参数解析:
bboxes:存储了所有待处理的检测框信息的一维数组;
max_objects:最大的输出检测框数量; 案例设置的是1000, 预计一张图不会超过1000个bounding box
threshold:用于判断两个检测框是否重叠的 IOU 阈值;
NUM_BOX_ELEMENT:每个检测框存储的元素个数
一般包含: left, top, right, bottom, confidence, class, keepflag
*/
// 计算position, 超过count不用进行下面计算了
int position = blockDim.x * blockIdx.x + threadIdx.x;
int count = min((int)*bboxes, max_objects);
if (position > count) {
return;
}
/*
重叠度高, 并且类别相同,然后是condience小于另外一个, 就删掉他
极端情况下会有误删, 如果测试cpu map的时候, 只能采用cpu nms
日常推理的时候, 则可以使用这个NMS
left, top, right, bottom, confidence, class, keepflag
*/
// 这里计算出来当前的指针, 在bboxes上
float* pcurrent = bboxes + 1 + position * NUM_BOX_ELEMENT;
// 便利每一个bbox
for (int i = 0; i < count; ++i) {
float* pitem = bboxes + 1 + i * NUM_BOX_ELEMENT;
// NMS计算需要保证类别必须相同
if (i == position || pcurrent[5] != pitem[5]) {
continue;
}
// 判断置信度大小, 如果比pcurrent大,干掉pcurrent
if (pitem[4] > pcurrent[4]) {
// 如果两个一样大,保留编号小的那个
if (pitem[4] == pcurrent[4] && i < position) {
continue;
}
// 拿前面四个信息计算IOU
float iou = box_iou(
pcurrent[0], pcurrent[1], pcurrent[2], pcurrent[3],
pitem[0], pitem[1], pitem[2], pitem[3]);
if (iou > threshold) {
pcurrent[6] = 0; // 这里pitem跟pcurrent重合度高而且达到阈值
return;
}
}
}
}
static __device__ float box_iou(
float aleft, float atop, float aright, float abottom,
float bleft, float btop, float bright, float bbottom)
{
float cleft = max(aleft, bleft);
float ctop = max(atop, btop);
float cright = min(aright, bright);
float cbottom = min(abottom, bbottom);
float c_area = max(cright - cleft, 0.0f) * max(cbottom - ctop, 0.0f);
if (c_area == 0.0f)
return 0.0f;
float a_area = max(0.0f, aright - aleft) * max(0.0f, abottom - atop);
float b_area = max(0.0f, bright - bleft) * max(0.0f, bbottom - btop);
return c_area / (a_area + b_area - c_area);
}
main.cpp
#include <iostream>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <opencv2/opencv.hpp>
#include <string>
#include <filesystem>
#include <opencv2/core/cuda.hpp>
#include <opencv2/core/cuda_stream_accessor.hpp>
#include <opencv2/cudaimgproc.hpp>
#include <opencv2/cudaarithm.hpp>
#include <opencv2/cudawarping.hpp>
#include <fstream>
namespace fs = std::filesystem;
#include <windows.h>
using namespace cv;
using namespace std;
#include "yolov5_trt.h"
std::condition_variable r_cond;
std::condition_variable w_cond;
using namespace cv;
using namespace std;
queue<Mat> srcImageQ;
queue<cuda::GpuMat> wImageQ;
clock_t startTime, endTime;
bool stop = false;
bool dstop = false;
bool sstop = false;
int totalFrame = 0;
//缓冲区最大空间
const int MAX_CACHEDATA_LENGTH = 50;
mutex m1;
mutex m2;
int nnum = 0;
Configuration yolo_nets = { 0.45, 0.5, 0.45,"C:/0WORK/project/FB/exp2/weights/best.engine" };
YOLOv5 yolo_model(yolo_nets);
void writeImageToFile(const std::string& filepath, const std::vector<uchar>& imgData, size_t length) {
// 打开文件
std::ofstream outputFile(filepath, std::ios::out | std::ios::binary);
// 检查文件是否成功打开
if (!outputFile.is_open()) {
std::cerr << "Failed to open file for writing." << std::endl;
return;
}
// 将图像数据写入文件
outputFile.write(reinterpret_cast<const char*>(imgData.data()), static_cast<int>(length));
// 关闭文件
outputFile.close();
// 检查是否写入成功
/*if (!outputFile) {
std::cerr << "Failed to write image data to file." << std::endl;
return;
}*/
/*std::cout << "Image data successfully written to file: " << filepath << std::endl;*/
}
void readVideo(string dirString) {
VideoCapture cap(dirString);
if (!cap.isOpened()) {
std::cerr << "Error opening video file" << std::endl;
return;
}
//cv::namedWindow("YOLOv5 Object Detection", cv::WINDOW_NORMAL);
int totalFrames = cap.get(cv::CAP_PROP_FRAME_COUNT);
totalFrame = totalFrames;
/*cv::Mat frame;*/
/*int count = 0;*/
while (true) {
cv::Mat frame;
if (!cap.read(frame)) {
// 若无法读取帧,则退出循环
break;
}
int width = frame.cols;
int height = frame.rows;
int channels = frame.channels();
/*if ((count % 3) != 0) {
count += 1;
continue;}*/
unique_lock<mutex> rl(m1);
if (srcImageQ.size() > MAX_CACHEDATA_LENGTH) {
r_cond.wait(rl);
}
srcImageQ.push(frame);
}
//while (cap.read(frame)) {
// /*if ((count % 3) != 0) {
// count += 1;
// continue;*/
// m1.lock();
// srcImageQ.push(frame);
// m1.unlock();
// Sleep(0.5);
//}
stop = true;
}
void readImage(string searchPath) {
for (const auto& entry : fs::directory_iterator(searchPath)) {
fs::path dirPath = entry.path();
string dirString = dirPath.string();
int length = dirString.length();
if (dirString.substr(length - 4) != ".jpg") { continue; }
Mat srcImage = imread(dirString);
unique_lock<mutex> rl(m1);
if (srcImageQ.size() > MAX_CACHEDATA_LENGTH) {
r_cond.wait(rl);
}
/*m1.lock();*/
srcImageQ.push(srcImage);
/*m1.unlock();*/
/*cout << "read image queue " << srcImageQ.size() << endl;*/
/*Sleep(0.5);*/
}
stop = true;
}
void detImage() {
while (true) {
cout << "srcImageQ" << srcImageQ.size() << endl;
if (!srcImageQ.empty()) {
Mat srcImage = srcImageQ.front();
/*m1.lock();*/
srcImageQ.pop();
r_cond.notify_one();
/*m1.unlock();*/
cuda::GpuMat gpuFrame;
gpuFrame.upload(srcImage);
yolo_model.detect(gpuFrame);
unique_lock<mutex> r2(m2);
if (wImageQ.size() > MAX_CACHEDATA_LENGTH) {
w_cond.wait(r2);
}
wImageQ.push(gpuFrame);
if (srcImageQ.empty() && stop) {
dstop = true;
break;
}
}
}
void writeImage() {
while (true) {
if (!wImageQ.empty()) {
cuda::GpuMat image = wImageQ.front();
Mat wImage;
image.download(wImage);
/*m2.lock();*/
wImageQ.pop();
/*m2.unlock();*/
w_cond.notify_one();
// 创建内存缓冲区
//std::vector<uchar> buffer;
将图像编码为内存缓冲区
//cv::imencode(".jpg", wImage, buffer);
//writeImageToFile("C:/0WORK/project/FB/mp4/warn/" + to_string(count) + ".jpg", buffer, buffer.size());
imwrite("C:/0WORK/project/FB/mp4/warn2/" + to_string(nnum) + ".jpg", wImage);
/*cout << "imwrite image:" << count << endl;*/
nnum++;
}
cout << "read image queue " << srcImageQ.size() << " write image queue " << wImageQ.size() << endl;
/*if (srcImageQ.size() > 2000) {
int a = 0;
}*/
if (wImageQ.empty() && dstop) {
break;
}
}
}
void main() {
/*string searchPath = "C:/0WORK/project/FB/fbData/JPEGImages";*/
string dirString = "C:/0WORK/project/FB/mp4/FB/";
for (const auto& entry : fs::directory_iterator(dirString)) {
fs::path dirPath = entry.path();
string dirString = dirPath.string();
startTime = clock();
thread read(readVideo, ref(dirString));
read.detach();
endTime = clock();
cout << "detection infer time:" << double(endTime - startTime) / totalFrame << "ms" << endl;
cout << "totalFrame:" << totalFrame << endl;
}
thread detect(detImage);
thread write(writeImage);
detect.detach();
write.join();
}