Onnxruntime推理Yolov8-seg

🕗 发布于 2024-12-12 14:37 YOLO 计算机视觉 opencv

python

import cv2
import math
import numpy as np
import onnxruntime as ort
def sigmoid(x):
    return 1.0/(1+np.exp(-x))
def readClassesNames(file_path):
    with open(file_path, encoding='utf-8') as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names
def xywh2xyxy(x):
    y = np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2
    y[..., 1] = x[..., 1] - x[..., 3] / 2
    y[..., 2] = x[..., 0] + x[..., 2] / 2
    y[..., 3] = x[..., 1] + x[..., 3] / 2
    return y
classes_names = 'coco.names'
classes = readClassesNames(classes_names)
rng = np.random.default_rng(3)
colors = rng.uniform(0, 255, size=(len(classes_names), 3))
image = cv2.imread('bus.jpg')
image_height, image_width = image.shape[:2]
model_path = 'yolov8n-seg.onnx'
start_time = cv2.getTickCount()
session = ort.InferenceSession(model_path, providers=['CPUExecutionProvider'])
conf_thresold = 0.45
iou_threshold = 0.25
model_inputs = session.get_inputs()
input_names = [model_inputs[i].name for i in range(len(model_inputs))]
input_shape = model_inputs[0].shape
model_output = session.get_outputs()
output_names = [model_output[i].name for i in range(len(model_output))]
input_height, input_width = input_shape[2:]
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
resized = cv2.resize(image_rgb, (input_width, input_height))
input_image = resized / 255.0
input_image = input_image.transpose(2,0,1)
input_tensor = input_image[np.newaxis, :, :, :].astype(np.float32)
outputs = session.run(output_names, {input_names[0]: input_tensor})
outputs_detect = outputs[0]
outputs_proto = outputs[1]
predictions = np.squeeze(outputs_detect).T
scores = np.max(predictions[:, 4:(outputs_detect.shape[1]-outputs_proto.shape[1])], axis=1)
predictions = predictions[scores > conf_thresold, :]
scores = scores[scores > conf_thresold]
class_ids = np.argmax(predictions[:, 4: (outputs_detect.shape[1]-outputs_proto.shape[1])], axis=1)
boxes_ori = predictions[:, :4]
input_shape = np.array([input_width, input_height, input_width, input_height])
boxes = np.divide(boxes_ori, input_shape, dtype=np.float32)
boxes *= np.array([image_width, image_height, image_width, image_height])
boxes = boxes.astype(np.int32)
boxmasks = np.divide(boxes_ori, input_shape, dtype=np.float32)
boxmasks *= np.array([outputs_proto.shape[3], outputs_proto.shape[2], outputs_proto.shape[3], outputs_proto.shape[2]])
maskconfs = predictions[:, (outputs_detect.shape[1]-outputs_proto.shape[1]):outputs_detect.shape[1]]
indices = cv2.dnn.NMSBoxes(boxes, scores, score_threshold=conf_thresold, nms_threshold=iou_threshold)
mask_img = image.copy()
for (bbox, bboxmask, score, label, maskconf) in zip(xywh2xyxy(boxes[indices]), xywh2xyxy(boxmasks[indices]),scores[indices], class_ids[indices], maskconfs[indices]):
    bbox = bbox.round().astype(np.int32).tolist()
    cls_id = int(label)
    cls = classes[cls_id]
    cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0,0,255), 2, 8)
    cv2.rectangle(image, (bbox[0], (bbox[1]-20)), (bbox[2], bbox[1]), (0,255,255), -1)
    cv2.putText(image, f'{cls}', (bbox[0], bbox[1] - 5),
                cv2.FONT_HERSHEY_PLAIN,2, [225, 0, 0], thickness=2)
    outputs_proto = np.squeeze(outputs_proto)
    num_mask, mask_height, mask_width = outputs_proto.shape  # CHW
    masks = sigmoid(maskconf @ outputs_proto.reshape((num_mask, -1)))
    masks = np.squeeze(masks.reshape((-1, mask_height, mask_width)))
    mask_map = np.zeros((image_height, image_width))
    scale_x1 = int(bboxmask[0])
    scale_y1 = int(bboxmask[1])
    scale_x2 = int(bboxmask[2])
    scale_y2 = int(bboxmask[3])
    x1 = int(bbox[0])
    y1 = int(bbox[1])
    x2 = int(bbox[2])
    y2 = int(bbox[3])
    scale_crop_mask = masks[scale_y1:scale_y2, scale_x1:scale_x2]
    crop_mask = cv2.resize(scale_crop_mask,(x2 - x1, y2 - y1),interpolation=cv2.INTER_CUBIC)
    crop_mask = (crop_mask > 0.5).astype(np.uint8)
    mask_map[y1:y2, x1:x2] = crop_mask
    crop_mask = mask_map[y1:y2, x1:x2, np.newaxis]
    crop_mask_img = mask_img[y1:y2, x1:x2]
    crop_mask_img = crop_mask_img * (1 - crop_mask) + crop_mask * colors[cls_id]
    mask_img[y1:y2, x1:x2] = crop_mask_img
    mask_img = cv2.addWeighted(image, 0.3, mask_img, 0.7, 0)
end_time = cv2.getTickCount()
t = (end_time - start_time)/cv2.getTickFrequency()
fps = 1/t
print(f"EStimated FPS: {fps:.2f}")
cv2.putText(mask_img, 'FPS: {:.2f}'.format(fps), (20, 40), cv2.FONT_HERSHEY_PLAIN, 2, [225, 0, 0], 2, 8);
cv2.imshow("YOLOV8-SEG-ONNXRUNTIME", mask_img)
cv2.waitKey(0)

cpp

#include <fstream>
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>
using namespace std;
using namespace cv;
using namespace Ort;
float sigmoid_function(float a) {
    float b = 1. / (1. + exp(-a));
    return b;
}
vector<string> readClassNames(const string& filename) {
    vector<string> classNames;
    ifstream file(filename);
    if (!file.is_open()) {
        cerr << "Error opening file: " << filename << endl;
        return classNames;
    }
    string line;
    while (getline(file, line)) {
        if (!line.empty()) {
            classNames.push_back(line);
        }
    }
    file.close();
    return classNames;
}
int main(int argc, char** argv)
{
    string filename = "coco.names";
    vector<string> labels = readClassNames(filename);
    Mat image = imread("bus.jpg");
    int ih = image.rows;
    int iw = image.cols;
    string onnxpath = "yolov8n-seg.onnx";
    float conf_threshold = 0.25;
    float nms_threshold = 0.4;
    float score_threshold = 0.25;
    wstring modelPath = wstring(onnxpath.begin(), onnxpath.end());
    Env env = Env(ORT_LOGGING_LEVEL_ERROR, "yolov8n-seg");
    SessionOptions session_options;
    session_options.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
    Session session_(env, modelPath.c_str(), session_options);
    vector<string> input_node_names;
    vector<string> output_node_names;
    size_t numInputNodes = session_.GetInputCount();
    size_t numOutputNodes = session_.GetOutputCount();
    AllocatorWithDefaultOptions allocator;
    input_node_names.reserve(numInputNodes);
    int input_w = 0;
    int input_h = 0;
    for (int i = 0; i < numInputNodes; i++) {
        auto input_name = session_.GetInputNameAllocated(i, allocator);
        input_node_names.push_back(input_name.get());
        TypeInfo input_type_info = session_.GetInputTypeInfo(i);
        auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
        auto input_dims = input_tensor_info.GetShape();
        input_w = input_dims[3];
        input_h = input_dims[2];
        cout << "input format: NxCxHxW = " << input_dims[0] << "x" << input_dims[1] << "x" << input_dims[2] << "x" << input_dims[3] << endl;
    }
    for (int i = 0; i < numOutputNodes; i++) {
        auto out_name = session_.GetOutputNameAllocated(i, allocator);
        output_node_names.push_back(out_name.get());
    }
    int output_detect_h = 0;
    int output_detect_w = 0;
    TypeInfo output_detect_type_info = session_.GetOutputTypeInfo(0);
    auto output_detect_tensor_info = output_detect_type_info.GetTensorTypeAndShapeInfo();
    auto output_detect_dims = output_detect_tensor_info.GetShape();
    output_detect_h = output_detect_dims[1];
    output_detect_w = output_detect_dims[2];
    cout << "output detect format : HxW = " << output_detect_h << "x" << output_detect_w << endl;
    int output_proto_h = 0;
    int output_proto_w = 0;
    int output_proto_c = 0;
    TypeInfo output_proto_type_info = session_.GetOutputTypeInfo(1);
    auto output_proto_tensor_info = output_proto_type_info.GetTensorTypeAndShapeInfo();
    auto output_proto_dims = output_proto_tensor_info.GetShape();
    output_proto_h = output_proto_dims[2];
    output_proto_w = output_proto_dims[3];
    output_proto_c = output_proto_dims[1];
    cout << "output proto format : CxHxW = " << output_proto_c << "x" << output_proto_h << "x" << output_proto_w << endl;
    cout << "input: " << input_node_names[0] << " output detect: " << output_node_names[0] <<
        " output proto: " << output_node_names[1] << endl;
    int64 start = getTickCount();
    int w = image.cols;
    int h = image.rows;
    int _max = max(h, w);
    Mat image_ = Mat::zeros(Size(_max, _max), CV_8UC3);
    Rect roi(0, 0, w, h);
    image.copyTo(image_(roi));
    float x_factor = image_.cols / static_cast<float>(input_w);
    float y_factor = image_.rows / static_cast<float>(input_h);
    Mat blob = dnn::blobFromImage(image_, 1 / 255.0, Size(input_w, input_h), Scalar(0, 0, 0), true, false);
    size_t tpixels = input_h * input_w * 3;
    array<int64_t, 4> input_shape_info{ 1, 3, input_h, input_w };
    auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
    Value input_tensor_ = Value::CreateTensor<float>(allocator_info, blob.ptr<float>(), tpixels, input_shape_info.data(), input_shape_info.size());
    const array<const char*, 1> inputNames = { input_node_names[0].c_str() };
    const array<const char*, 2> outNames = { output_node_names[0].c_str() , output_node_names[1].c_str() };
    vector<Value> ort_outputs;
    try {
        ort_outputs = session_.Run(RunOptions{ nullptr }, inputNames.data(), &input_tensor_, 1, outNames.data(), outNames.size());
    }
    catch (exception e) {
        cout << e.what() << endl;
    }
    auto data_detect_shape = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape();
    float* detect_pdata = ort_outputs[0].GetTensorMutableData<float>();
    Mat output_detect = Mat(Size((int)data_detect_shape[2], (int)data_detect_shape[1]), CV_32F, detect_pdata).t();
    auto data_proto_shape = ort_outputs[1].GetTensorTypeAndShapeInfo().GetShape();
    float* proto_pdata = ort_outputs[1].GetTensorMutableData<float>();
    Mat output_proto = Mat(Size((int)(data_proto_shape[2]) * (int)(data_proto_shape[3]),(int)data_proto_shape[1]), CV_32F, proto_pdata);
    vector<Rect> boxes;
    vector<int> classIds;
    vector<float> confidences;
    std::vector<Mat> mask_confs;
    for (int i = 0; i < output_detect.rows; i++) {
        Mat classes_scores = output_detect.row(i).colRange(4, data_detect_shape[1]-data_proto_shape[1]);
        Point classIdPoint;
        double score;
        minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
        if (score > 0.25)
        {
            float cx = output_detect.at<float>(i, 0);
            float cy = output_detect.at<float>(i, 1);
            float ow = output_detect.at<float>(i, 2);
            float oh = output_detect.at<float>(i, 3);
            int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
            int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
            int width = static_cast<int>(ow * x_factor);
            int height = static_cast<int>(oh * y_factor);
            Rect box;
            box.x = x;
            box.y = y;
            box.width = width;
            box.height = height;
            boxes.push_back(box);
            classIds.push_back(classIdPoint.x);
            confidences.push_back(score);
            Mat mask_conf = output_detect.row(i).colRange(data_detect_shape[1] - data_proto_shape[1],data_detect_shape[1]);
            mask_confs.push_back(mask_conf);
        }
    }
    Mat rgb_mask = Mat::zeros(image.size(), image.type());
    Mat masked_img;
    RNG rng;
    vector<int> indexes;
    dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indexes);
    for (size_t i = 0; i < indexes.size(); i++) {
        int index = indexes[i];
        int idx = classIds[index];
        rectangle(image, boxes[index], Scalar(0, 0, 255), 2, 8);
        rectangle(image, Point(boxes[index].tl().x, boxes[index].tl().y - 20),
            Point(boxes[index].br().x, boxes[index].tl().y), Scalar(0, 255, 255), -1);
        putText(image, labels[idx], Point(boxes[index].tl().x, boxes[index].tl().y), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 0), 2, 8);
        Mat m = mask_confs[i] * output_proto;
        for (int col = 0; col < m.cols; col++) {
            m.at<float>(0, col) = sigmoid_function(m.at<float>(0, col));
        }
        Mat m1 = m.reshape(1, 160); 
        int x1 = std::max(0, boxes[index].x);
        int y1 = std::max(0, boxes[index].y);
        int x2 = std::max(0, boxes[index].br().x);
        int y2 = std::max(0, boxes[index].br().y);
        int mx1 = int(x1 / x_factor * 0.25);
        int my1 = int(y1 / y_factor * 0.25);
        int mx2 = int(x2 / x_factor * 0.25);
        int my2 = int(y2 / y_factor * 0.25);
        Mat mask_roi = m1(Range(my1, my2), Range(mx1, mx2));
        Mat rm, det_mask;
        resize(mask_roi, rm, Size(x2 - x1, y2 - y1));
        for (int r = 0; r < rm.rows; r++) {
            for (int c = 0; c < rm.cols; c++) {
                float pv = rm.at<float>(r, c);
                if (pv > 0.5) {
                    rm.at<float>(r, c) = 1.0;
                }
                else {
                    rm.at<float>(r, c) = 0.0;
                }
            }
        }
        rm = rm * rng.uniform(0, 255);
        rm.convertTo(det_mask, CV_8UC1);
        if ((y1 + det_mask.rows) >= image.rows) {
            y2 = image.rows - 1;
        }
        if ((x1 + det_mask.cols) >= image.cols) {
            x2 = image.cols - 1;
        }
        Mat mask = Mat::zeros(Size(image.cols, image.rows), CV_8UC1);
        det_mask(Range(0, y2 - y1), Range(0, x2 - x1)).copyTo(mask(Range(y1, y2), Range(x1, x2)));
        add(rgb_mask, Scalar(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255)), rgb_mask, mask);
        addWeighted(image, 0.5, rgb_mask, 0.5, 0, masked_img);
    }
    float t = (getTickCount() - start) / static_cast<float>(getTickFrequency());
    putText(masked_img, format("FPS: %.2f", 1 / t), Point(20, 40), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 0), 2, 8);
    imshow("YOLOV8-SEG-ONNXRUNTIME", masked_img);
    waitKey(0);
    return 0;
}

CMakeLists.txt

cmake_minimum_required(VERSION 3.18)
project(Yolov8-seg)
set("OpenCV_DIR" "E:\\Opencv\\opencv_vs\\build")
set("ONNXRUNTIME_DIR" "E:\\Onnxruntime\\cpu\\1.15")
set(OpenCV_INCLUDE_DIRS ${OpenCV_DIR}\\include)
set(OpenCV_LIB_DIRS ${OpenCV_DIR}\\x64\\vc16\\lib) 
set(OpenCV_LIBS "opencv_world480d.lib" "opencv_world480.lib")    
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
include_directories(${OpenCV_INCLUDE_DIRS}) 
link_directories(${OpenCV_LIB_DIRS})  
find_package(OpenCV QUIET)
link_libraries(${OpenCV_LIBS})
add_executable(Yolov8-seg main.cpp)
target_compile_features(Yolov8-seg PRIVATE cxx_std_14)
find_library(PATH ${ONNXRUNTIME_DIR})
target_include_directories(Yolov8-seg PRIVATE "${ONNXRUNTIME_DIR}/include")
target_link_libraries(Yolov8-seg "${ONNXRUNTIME_DIR}/lib/onnxruntime.lib")

原文地址：https://blog.csdn.net/qq_49595983/article/details/144409084

免责声明：本站文章内容转载自网络资源，如本站内容侵犯了原著者的合法权益，可联系本站删除。更多内容请关注自学内容网（zxcms.com）！

上一篇：ruoyi-vue退出登录配置域名后无法退出登录问题的解决
下一篇：AI 名人堂：Ian J. Goodfellow 生成对抗网络之父

【流畅的Python】第三章：字典和集合——《Fluent Python》学习笔记
介绍了dict、defaultdict和OrderedDict实现的常用方法，如clear、copy、get、items、keys、pop、popitem、setdefault、update、valu
阅读更多2024-12-12
kafka相关问题
偏移量是消费者组维护的，而不是单个消费者。Kafka 为每个消费者组记录每个分区的偏移量，存储在主题中。每个消费者组的偏移量更新是独立的，消费者组之间的消费进度互不干扰。偏移量是由消费者控制和提交的，
阅读更多2024-12-12
鲲鹏麒麟安装Kafka-v1.1.1
因项目需要在鲲鹏麒麟服务器上安装Kafka v1.1.1，因此这里将安装配置过程记录下来。
阅读更多2024-12-12
springboot/ssm城市垃圾分类管理系统Java代码编写web项目社区垃圾分类
springboot/ssm城市垃圾分类管理系统Java代码编写web项目社区垃圾分类。基于springboot(可改ssm)+html+vue项目。代码+数据库保证完整可用，免费修改项目名以及数据库
阅读更多2024-12-12
unity打包流程整理-Windows/Mac/Linux平台
1.Platform（平台）功能: 选择目标构建的目标平台选项PC, Mac & Linux Standalone: 适用于 Windows、Mac 和 Linux 平台2.Target Pl
阅读更多2024-12-12
MySQL | 尚硅谷 | 第15章_存储过程与函数
存储过程的英文是 Stored Procedure。它的思想很简单，就是一组经过预先编译的 SQL 语句的封装。**执行过程：**存储过程预先存储在 MySQL 服务器上，需要执行的时候，客户端只
阅读更多2024-12-12
Model Context Protocol 精选资源列表
MCP是一种开放协议，通过标准化的服务器实现，使 AI 模型能够安全地与本地和远程资源进行交互。此列表重点关注可用于生产和实验性的 MCP 服务器，这些服务器通过文件访问、数据库连接、API 集成和其
阅读更多2024-12-12
【Unity】Amplify Shader Editor
Amplify Shader Editor，是一个功能强大的基于节点的着色器开发工具，允许开发者在 Unity 中轻松创建和管理复杂的 Shader。
阅读更多2024-12-12
从模型到视图：如何用 .NET Core MVC 构建完整 Web 应用
作用是项目用来链接服务的，我们可以右键它来添加链接服务，Propertieswwwroot依赖项：控制器文件，主要用来编写业务逻辑并做一些业务逻辑计算Models：模型文件，主要作为数据传输的一个存储
阅读更多2024-12-12
PCL点云库入门——PCL库中点云数据格式PCD和PLY及其输入输出（IO）
根据PCL点云库的IO模块信息，目前PCL库中支持PCD、PLY、OBJ、IFS和PNG等多种文件格式的读取与写入。然而，支持的文件类型相对有限。在本节中，我们将重点讲解和阐释PCL库中最为常用的和文
阅读更多2024-12-12

Onnxruntime推理Yolov8-seg

相关文章