main.cpp
#include <iostream>
#include "opencv2/opencv.hpp"
#include "openvino/openvino.hpp"
struct DetResult {
cv::Rect bbox;
float conf;
int lable;
DetResult() {}
DetResult(cv::Rect bbox, float conf, int lable) :bbox(bbox), conf(conf), lable(lable) {}
};
void draw_bbox(cv::Mat& img, std::vector<DetResult>& res) {
for (size_t j = 0; j < res.size(); j++) {
cv::rectangle(img, res[j].bbox, cv::Scalar(255, 0, 255), 2);
cv::putText(img, std::to_string(res[j].lable) + "-" + std::to_string(res[j].conf),
cv::Point(res[j].bbox.x, res[j].bbox.y - 1), cv::FONT_HERSHEY_PLAIN,
1.2, cv::Scalar(0, 0, 255), 2);
}
}
void pre_process(cv::Mat* img, int length, float* factor, std::vector<float>& data) {
cv::Mat mat;
int rh = img->rows;
int rw = img->cols;
int rc = img->channels();
cv::cvtColor(*img, mat, cv::COLOR_BGR2RGB);
int max_image_length = rw > rh ? rw : rh;
cv::Mat max_image = cv::Mat::zeros(max_image_length, max_image_length, CV_8UC3);
max_image = max_image * 255;
cv::Rect roi(0, 0, rw, rh);
mat.copyTo(cv::Mat(max_image, roi));
cv::Mat resize_img;
cv::resize(max_image, resize_img, cv::Size(length, length), 0.0f, 0.0f, cv::INTER_LINEAR);
*factor = (float)((float)max_image_length / (float)length);
resize_img.convertTo(resize_img, CV_32FC3, 1 / 255.0);
rh = resize_img.rows;
rw = resize_img.cols;
rc = resize_img.channels();
for (int i = 0; i < rc; ++i) {
cv::extractChannel(resize_img, cv::Mat(rh, rw, CV_32FC1, data.data() + i * rh * rw), i);
}
}
std::vector<DetResult> post_process(float* result, float factor, int outputLength) {
std::vector<cv::Rect> position_boxes;
std::vector <int> class_ids;
std::vector <float> confidences;
// Preprocessing output results
for (int i = 0; i < outputLength; i++)
{
for (int j = 4; j < 4 + 80; j++)
{
float source = result[outputLength * j + i];
int label = j - 4;
if (source > 0.2)
{
float maxSource = source;
float cx = result[outputLength * 0 + i];
float cy = result[outputLength * 1 + i];
float ow = result[outputLength * 2 + i];
float oh = result[outputLength * 3 + i];
int x = (int)((cx - 0.5 * ow) * factor);
int y = (int)((cy - 0.5 * oh) * factor);
int width = (int)(ow * factor);
int height = (int)(oh * factor);
cv::Rect box(x, y, width, height);
position_boxes.push_back(box);
class_ids.push_back(label);
confidences.push_back(maxSource);
}
}
}
std::vector<int> indices;
cv::dnn::NMSBoxes(position_boxes, confidences, 0.2f, 0.5f, indices);
std::vector<DetResult> re;
for (int i = 0; i < indices.size(); i++)
{
int index = indices[i];
DetResult det(position_boxes[index], confidences[index], class_ids[index]);
re.push_back(det);
}
return re;
}
void yolov8_infer() {
std::string video_path = "store-aisle-detection.mp4";
std::string model_path = "yolov8s.onnx";
ov::Core core;
auto model = core.read_model(model_path);
auto compiled_model = core.compile_model(model, "GPU");
ov::InferRequest request = compiled_model.create_infer_request();
cv::VideoCapture capture(video_path);
if (!capture.isOpened()) {
std::cerr << "ERROR: 视频无法打开" << std::endl;
return;
}
cv::VideoWriter wri("./yolov8_infer.mp4", cv::VideoWriter::fourcc('H', '2', '6', '4'), 30,
cv::Size(capture.get(cv::CAP_PROP_FRAME_WIDTH), capture.get(cv::CAP_PROP_FRAME_HEIGHT)), true);
float factor = 0;
float total_infs[3];
request.get_input_tensor().set_shape(std::vector<size_t>{1, 3, 640, 640});
std::vector<float> inputData(640 * 640 * 3);
std::chrono::time_point<std::chrono::steady_clock> t_beg;
std::chrono::time_point<std::chrono::steady_clock> t_end;
while (true)
{
cv::Mat frame;
if (!capture.read(frame)) {
break;
}
t_beg = std::chrono::high_resolution_clock::now();
pre_process(&frame, 640, &factor, inputData);
memcpy(request.get_input_tensor().data<float>(), inputData.data(), 640 * 640 * 3);
t_end = std::chrono::high_resolution_clock::now();
total_infs[0] = std::chrono::duration<float, std::milli>(t_end - t_beg).count();
t_beg = std::chrono::high_resolution_clock::now();
request.infer();
t_end = std::chrono::high_resolution_clock::now();
total_infs[1] = std::chrono::duration<float, std::milli>(t_end - t_beg).count();
t_beg = std::chrono::high_resolution_clock::now();
float* output_data = request.get_output_tensor().data<float>();
std::vector<float> d(output_data, output_data + 84 * 8400);
std::vector<DetResult> result = post_process(output_data, factor, 8400);
t_end = std::chrono::high_resolution_clock::now();
total_infs[2] = std::chrono::duration<float, std::milli>(t_end - t_beg).count();
draw_bbox(frame, result);
cv::putText(frame, "PreProcess: " + std::to_string(1000.0 / total_infs[0]) + "FPS " + std::to_string(total_infs[0]) + "ms",
cv::Point(20, 40), 1, 2, cv::Scalar(255, 0, 255), 2);
cv::putText(frame, "Inference: " + std::to_string(1000.0 / total_infs[1]) + "FPS " + std::to_string(total_infs[1]) + "ms",
cv::Point(20, 70), 1, 2, cv::Scalar(255, 0, 255), 2);
cv::putText(frame, "PostProcess: " + std::to_string(1000.0 / total_infs[2]) + "FPS " + std::to_string(total_infs[2]) + "ms",
cv::Point(20, 100), 1, 2, cv::Scalar(255, 0, 255), 2);
cv::putText(frame, "Total: " + std::to_string(1000.0 / (total_infs[0] + total_infs[1] + total_infs[2]))
+ "FPS " + std::to_string((total_infs[0] + total_infs[1] + total_infs[2])) + "ms",
cv::Point(20, 130), 1, 2, cv::Scalar(255, 0, 255), 2);
wri << frame;
imshow("读取视频", frame);
cv::waitKey(1);
}
wri.release();
cv::destroyAllWindows();
return;
}
void yolov8_async_infer() {
std::string video_path = "demo.mp4";
std::string model_path = "yolov8s.onnx";
ov::Core core;
auto model = core.read_model(model_path);
auto compiled_model = core.compile_model(model, "GPU");
std::vector<ov::InferRequest>requests = { compiled_model.create_infer_request(), compiled_model.create_infer_request() };
cv::VideoCapture capture(video_path);
if (!capture.isOpened()) {
std::cerr << "ERROR: 视频无法打开" << std::endl;
return;
}
cv::VideoWriter wri("./yolov8_async_infer.mp4", cv::VideoWriter::fourcc('H', '2', '6', '4'), 30,
cv::Size(capture.get(cv::CAP_PROP_FRAME_WIDTH), capture.get(cv::CAP_PROP_FRAME_HEIGHT)), true);
std::chrono::time_point<std::chrono::steady_clock> t_beg;
std::chrono::time_point<std::chrono::steady_clock> t_end;
float total_infs[3];
float factor = 0;
requests[0].get_input_tensor().set_shape(std::vector<size_t>{1, 3, 640, 640});
requests[1].get_input_tensor().set_shape(std::vector<size_t>{1, 3, 640, 640});
cv::Mat frame;
capture.read(frame);
std::vector<float> inputData(640 * 640 * 3);
pre_process(&frame, 640, &factor, inputData);
memcpy(requests[0].get_input_tensor().data<float>(), inputData.data(), 640 * 640 * 3);
requests[0].start_async();
while (true)
{
cv::Mat next_frame;
if (!capture.read(next_frame)) {
break;
}
t_beg = std::chrono::high_resolution_clock::now();
pre_process(&next_frame, 640, &factor, inputData);
memcpy(requests[1].get_input_tensor().data<float>(), inputData.data(), 640 * 640 * 3);
t_end = std::chrono::high_resolution_clock::now();
total_infs[0] = std::chrono::duration<float, std::milli>(t_end - t_beg).count();
//std::cout << "The preprocess time is: " << total_infs[0] << "ms" << std::endl;
t_beg = std::chrono::high_resolution_clock::now();
requests[1].start_async();
requests[0].wait();
t_end = std::chrono::high_resolution_clock::now();
total_infs[1] = std::chrono::duration<float, std::milli>(t_end - t_beg).count();
//std::cout << "The infer time is: " << total_infs[1] << "ms" << std::endl;
t_beg = std::chrono::high_resolution_clock::now();
float* output_data = requests[0].get_output_tensor().data<float>();
std::vector<DetResult> result = post_process(output_data, factor, 8400);
t_end = std::chrono::high_resolution_clock::now();
total_infs[2] = std::chrono::duration<float, std::milli>(t_end - t_beg).count();
//std::cout << "The postprocess time is: " << total_infs[2] << "ms" << std::endl;
draw_bbox(frame, result);
cv::putText(frame, "PreProcess: " + std::to_string(1000.0 / total_infs[0]) + "FPS " + std::to_string(total_infs[0]) + "ms",
cv::Point(20, 40), 1, 2, cv::Scalar(255, 0, 255), 2);
cv::putText(frame, "Inference: " + std::to_string(1000.0 / total_infs[1]) + "FPS " + std::to_string(total_infs[1]) + "ms",
cv::Point(20, 70), 1, 2, cv::Scalar(255, 0, 255), 2);
cv::putText(frame, "PostProcess: " + std::to_string(1000.0 / total_infs[2]) + "FPS " + std::to_string(total_infs[2]) + "ms",
cv::Point(20, 100), 1, 2, cv::Scalar(255, 0, 255), 2);
cv::putText(frame, "Total: " + std::to_string(1000.0 / (total_infs[0] + total_infs[1] + total_infs[2]))
+ "FPS " + std::to_string((total_infs[0] + total_infs[1] + total_infs[2])) + "ms",
cv::Point(20, 130), 1, 2, cv::Scalar(255, 0, 255), 2);
imshow("读取视频", frame);
wri << frame;
cv::waitKey(1); //延时30
frame = next_frame;
std::swap(requests[0], requests[1]);
}
wri.release(); //释放对象
cv::destroyAllWindows();
return;
}
int main()
{
yolov8_async_infer();
}
CMakeLists.txt
cmake_minimum_required(VERSION 3.18)
project(Yolo)
set("OpenCV_DIR" "E:\\Opencv\\opencv_vs\\build")
set("OpenVINO_DIR" "E:\\Openvino\\default\\runtime\\cmake")
set(OpenCV_INCLUDE_DIRS ${OpenCV_DIR}\\include)
set(OpenCV_LIB_DIRS ${OpenCV_DIR}\\x64\\vc16\\lib)
set(OpenCV_LIB_DEBUG ${OpenCV_DIR}\\x64\\vc16\\lib\\opencv_world480d.lib)
set(OpenCV_LIB_RELEASE ${OpenCV_DIR}\\x64\\vc16\\lib\\opencv_world480.lib)
include_directories(${OpenCV_INCLUDE_DIRS})
link_directories(${OpenCV_LIB_DIRS})
find_package(OpenCV QUIET)
add_executable(Yolo ${PROJECT_SOURCE_DIR}/main.cpp)
find_package(OpenVINO COMPONENTS Runtime)
include_directories(${OpenVINO_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME} openvino::runtime)
target_link_libraries(yolo
$<$<CONFIG:Debug>:${OpenCV_LIB_DEBUG}>
$<$<CONFIG:Release>:${OpenCV_LIB_RELEASE}>
)