Openvino实现Yolov8视频推理

发布于:2025-07-09 ⋅ 阅读:(16) ⋅ 点赞:(0)

main.cpp

#include <iostream>
#include "opencv2/opencv.hpp"
#include "openvino/openvino.hpp"
struct DetResult {
    cv::Rect bbox;
    float conf;
    int lable;
    DetResult() {}
    DetResult(cv::Rect bbox, float conf, int lable) :bbox(bbox), conf(conf), lable(lable) {}
};
void draw_bbox(cv::Mat& img, std::vector<DetResult>& res) {
    for (size_t j = 0; j < res.size(); j++) {
        cv::rectangle(img, res[j].bbox, cv::Scalar(255, 0, 255), 2);
        cv::putText(img, std::to_string(res[j].lable) + "-" + std::to_string(res[j].conf),
            cv::Point(res[j].bbox.x, res[j].bbox.y - 1), cv::FONT_HERSHEY_PLAIN,
            1.2, cv::Scalar(0, 0, 255), 2);
    }
}
void pre_process(cv::Mat* img, int length, float* factor, std::vector<float>& data) {
    cv::Mat mat;
    int rh = img->rows;
    int rw = img->cols;
    int rc = img->channels();
    cv::cvtColor(*img, mat, cv::COLOR_BGR2RGB);
    int max_image_length = rw > rh ? rw : rh;
    cv::Mat max_image = cv::Mat::zeros(max_image_length, max_image_length, CV_8UC3);
    max_image = max_image * 255;
    cv::Rect roi(0, 0, rw, rh);
    mat.copyTo(cv::Mat(max_image, roi));
    cv::Mat resize_img;
    cv::resize(max_image, resize_img, cv::Size(length, length), 0.0f, 0.0f, cv::INTER_LINEAR);
    *factor = (float)((float)max_image_length / (float)length);
    resize_img.convertTo(resize_img, CV_32FC3, 1 / 255.0);
    rh = resize_img.rows;
    rw = resize_img.cols;
    rc = resize_img.channels();
    for (int i = 0; i < rc; ++i) {
        cv::extractChannel(resize_img, cv::Mat(rh, rw, CV_32FC1, data.data() + i * rh * rw), i);
    }
}
std::vector<DetResult> post_process(float* result, float factor, int outputLength) {
    std::vector<cv::Rect> position_boxes;
    std::vector <int> class_ids;
    std::vector <float> confidences;
    // Preprocessing output results
    for (int i = 0; i < outputLength; i++)
    {
        for (int j = 4; j < 4 + 80; j++)
        {
            float source = result[outputLength * j + i];
            int label = j - 4;
            if (source > 0.2)
            {
                float maxSource = source;
                float cx = result[outputLength * 0 + i];
                float cy = result[outputLength * 1 + i];
                float ow = result[outputLength * 2 + i];
                float oh = result[outputLength * 3 + i];
                int x = (int)((cx - 0.5 * ow) * factor);
                int y = (int)((cy - 0.5 * oh) * factor);
                int width = (int)(ow * factor);
                int height = (int)(oh * factor);
                cv::Rect box(x, y, width, height);
                position_boxes.push_back(box);
                class_ids.push_back(label);
                confidences.push_back(maxSource);
            }
        }
    }
    std::vector<int> indices;
    cv::dnn::NMSBoxes(position_boxes, confidences, 0.2f, 0.5f, indices);
    std::vector<DetResult> re;
    for (int i = 0; i < indices.size(); i++)
    {
        int index = indices[i];
        DetResult det(position_boxes[index], confidences[index], class_ids[index]);
        re.push_back(det);
    }
    return re;
}
void yolov8_infer() {
    std::string video_path = "store-aisle-detection.mp4";
    std::string model_path = "yolov8s.onnx";
    ov::Core core;
    auto model = core.read_model(model_path);
    auto compiled_model = core.compile_model(model, "GPU");
    ov::InferRequest request = compiled_model.create_infer_request();
    cv::VideoCapture capture(video_path);
    if (!capture.isOpened()) {
        std::cerr << "ERROR: 视频无法打开" << std::endl;
        return;
    }
    cv::VideoWriter wri("./yolov8_infer.mp4", cv::VideoWriter::fourcc('H', '2', '6', '4'), 30,
        cv::Size(capture.get(cv::CAP_PROP_FRAME_WIDTH), capture.get(cv::CAP_PROP_FRAME_HEIGHT)), true);
    float factor = 0;
    float total_infs[3];
    request.get_input_tensor().set_shape(std::vector<size_t>{1, 3, 640, 640});
    std::vector<float> inputData(640 * 640 * 3);
    std::chrono::time_point<std::chrono::steady_clock> t_beg;
    std::chrono::time_point<std::chrono::steady_clock> t_end;
    while (true)
    {
        cv::Mat frame;
        if (!capture.read(frame)) {
            break;
        }
        t_beg = std::chrono::high_resolution_clock::now();
        pre_process(&frame, 640, &factor, inputData);
        memcpy(request.get_input_tensor().data<float>(), inputData.data(), 640 * 640 * 3);
        t_end = std::chrono::high_resolution_clock::now();
        total_infs[0] = std::chrono::duration<float, std::milli>(t_end - t_beg).count();
        t_beg = std::chrono::high_resolution_clock::now();
        request.infer();
        t_end = std::chrono::high_resolution_clock::now();
        total_infs[1] = std::chrono::duration<float, std::milli>(t_end - t_beg).count();
        t_beg = std::chrono::high_resolution_clock::now();
        float* output_data = request.get_output_tensor().data<float>();
        std::vector<float> d(output_data, output_data + 84 * 8400);
        std::vector<DetResult> result = post_process(output_data, factor, 8400);
        t_end = std::chrono::high_resolution_clock::now();
        total_infs[2] = std::chrono::duration<float, std::milli>(t_end - t_beg).count();
        draw_bbox(frame, result);
        cv::putText(frame, "PreProcess: " + std::to_string(1000.0 / total_infs[0]) + "FPS  " + std::to_string(total_infs[0]) + "ms",
            cv::Point(20, 40), 1, 2, cv::Scalar(255, 0, 255), 2);
        cv::putText(frame, "Inference: " + std::to_string(1000.0 / total_infs[1]) + "FPS  " + std::to_string(total_infs[1]) + "ms",
            cv::Point(20, 70), 1, 2, cv::Scalar(255, 0, 255), 2);
        cv::putText(frame, "PostProcess: " + std::to_string(1000.0 / total_infs[2]) + "FPS  " + std::to_string(total_infs[2]) + "ms",
            cv::Point(20, 100), 1, 2, cv::Scalar(255, 0, 255), 2);
        cv::putText(frame, "Total: " + std::to_string(1000.0 / (total_infs[0] + total_infs[1] + total_infs[2]))
            + "FPS   " + std::to_string((total_infs[0] + total_infs[1] + total_infs[2])) + "ms",
            cv::Point(20, 130), 1, 2, cv::Scalar(255, 0, 255), 2);
        wri << frame;
        imshow("读取视频", frame);
        cv::waitKey(1);	
    }
    wri.release();  
    cv::destroyAllWindows();
    return;
}
void yolov8_async_infer() {
    std::string video_path = "demo.mp4";
    std::string model_path = "yolov8s.onnx";
    ov::Core core;
    auto model = core.read_model(model_path);
    auto compiled_model = core.compile_model(model, "GPU");
    std::vector<ov::InferRequest>requests = { compiled_model.create_infer_request(), compiled_model.create_infer_request() };
    cv::VideoCapture capture(video_path);
    if (!capture.isOpened()) {
        std::cerr << "ERROR: 视频无法打开" << std::endl;
        return;
    }
    cv::VideoWriter wri("./yolov8_async_infer.mp4", cv::VideoWriter::fourcc('H', '2', '6', '4'), 30,
        cv::Size(capture.get(cv::CAP_PROP_FRAME_WIDTH), capture.get(cv::CAP_PROP_FRAME_HEIGHT)), true);
    std::chrono::time_point<std::chrono::steady_clock> t_beg;
    std::chrono::time_point<std::chrono::steady_clock> t_end;
    float total_infs[3];
    float factor = 0;
    requests[0].get_input_tensor().set_shape(std::vector<size_t>{1, 3, 640, 640});
    requests[1].get_input_tensor().set_shape(std::vector<size_t>{1, 3, 640, 640});
    cv::Mat frame;
    capture.read(frame);
    std::vector<float> inputData(640 * 640 * 3);
    pre_process(&frame, 640, &factor, inputData);
    memcpy(requests[0].get_input_tensor().data<float>(), inputData.data(), 640 * 640 * 3);
    requests[0].start_async();
    while (true)
    {
        cv::Mat next_frame;
        if (!capture.read(next_frame)) {
            break;
        }
        t_beg = std::chrono::high_resolution_clock::now();
        pre_process(&next_frame, 640, &factor, inputData);
        memcpy(requests[1].get_input_tensor().data<float>(), inputData.data(), 640 * 640 * 3);
        t_end = std::chrono::high_resolution_clock::now();
        total_infs[0] = std::chrono::duration<float, std::milli>(t_end - t_beg).count();
        //std::cout << "The preprocess time is: " << total_infs[0] << "ms" << std::endl;
        t_beg = std::chrono::high_resolution_clock::now();
        requests[1].start_async();
        requests[0].wait();
        t_end = std::chrono::high_resolution_clock::now();
        total_infs[1] = std::chrono::duration<float, std::milli>(t_end - t_beg).count();
        //std::cout << "The infer time is: " << total_infs[1] << "ms" << std::endl;
        t_beg = std::chrono::high_resolution_clock::now();
        float* output_data = requests[0].get_output_tensor().data<float>();
        std::vector<DetResult> result = post_process(output_data, factor, 8400);
        t_end = std::chrono::high_resolution_clock::now();
        total_infs[2] = std::chrono::duration<float, std::milli>(t_end - t_beg).count();
        //std::cout << "The postprocess time is: " << total_infs[2] << "ms" << std::endl;
        draw_bbox(frame, result);
        cv::putText(frame, "PreProcess: " + std::to_string(1000.0 / total_infs[0]) + "FPS  " + std::to_string(total_infs[0]) + "ms",
            cv::Point(20, 40), 1, 2, cv::Scalar(255, 0, 255), 2);
        cv::putText(frame, "Inference: " + std::to_string(1000.0 / total_infs[1]) + "FPS  " + std::to_string(total_infs[1]) + "ms",
            cv::Point(20, 70), 1, 2, cv::Scalar(255, 0, 255), 2);
        cv::putText(frame, "PostProcess: " + std::to_string(1000.0 / total_infs[2]) + "FPS  " + std::to_string(total_infs[2]) + "ms",
            cv::Point(20, 100), 1, 2, cv::Scalar(255, 0, 255), 2);
        cv::putText(frame, "Total: " + std::to_string(1000.0 / (total_infs[0] + total_infs[1] + total_infs[2]))
            + "FPS   " + std::to_string((total_infs[0] + total_infs[1] + total_infs[2])) + "ms",
            cv::Point(20, 130), 1, 2, cv::Scalar(255, 0, 255), 2);
        imshow("读取视频", frame);
        wri << frame;
        cv::waitKey(1);	//延时30
        frame = next_frame;
        std::swap(requests[0], requests[1]);
    }
    wri.release();  //释放对象
    cv::destroyAllWindows();
    return;
}
int main()
{
    yolov8_async_infer();
}

CMakeLists.txt

cmake_minimum_required(VERSION 3.18)
project(Yolo)
set("OpenCV_DIR" "E:\\Opencv\\opencv_vs\\build")
set("OpenVINO_DIR" "E:\\Openvino\\default\\runtime\\cmake")
set(OpenCV_INCLUDE_DIRS ${OpenCV_DIR}\\include)
set(OpenCV_LIB_DIRS ${OpenCV_DIR}\\x64\\vc16\\lib) 
set(OpenCV_LIB_DEBUG ${OpenCV_DIR}\\x64\\vc16\\lib\\opencv_world480d.lib) 
set(OpenCV_LIB_RELEASE ${OpenCV_DIR}\\x64\\vc16\\lib\\opencv_world480.lib)  
include_directories(${OpenCV_INCLUDE_DIRS}) 
link_directories(${OpenCV_LIB_DIRS})  
find_package(OpenCV QUIET)		
add_executable(Yolo ${PROJECT_SOURCE_DIR}/main.cpp)   
find_package(OpenVINO COMPONENTS Runtime)
include_directories(${OpenVINO_INCLUDE_DIRS})
target_link_libraries(${PROJECT_NAME}  openvino::runtime)
target_link_libraries(yolo 
    $<$<CONFIG:Debug>:${OpenCV_LIB_DEBUG}>
    $<$<CONFIG:Release>:${OpenCV_LIB_RELEASE}>
)


网站公告

今日签到

点亮在社区的每一天
去签到