Onnxruntime推理Yolov8-seg
python
import cv2
import math
import numpy as np
import onnxruntime as ort
def sigmoid(x):
return 1.0/(1+np.exp(-x))
def readClassesNames(file_path):
with open(file_path, encoding='utf-8') as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def xywh2xyxy(x):
y = np.copy(x)
y[..., 0] = x[..., 0] - x[..., 2] / 2
y[..., 1] = x[..., 1] - x[..., 3] / 2
y[..., 2] = x[..., 0] + x[..., 2] / 2
y[..., 3] = x[..., 1] + x[..., 3] / 2
return y
classes_names = 'coco.names'
classes = readClassesNames(classes_names)
rng = np.random.default_rng(3)
colors = rng.uniform(0, 255, size=(len(classes_names), 3))
image = cv2.imread('bus.jpg')
image_height, image_width = image.shape[:2]
model_path = 'yolov8n-seg.onnx'
start_time = cv2.getTickCount()
session = ort.InferenceSession(model_path, providers=['CPUExecutionProvider'])
conf_thresold = 0.45
iou_threshold = 0.25
model_inputs = session.get_inputs()
input_names = [model_inputs[i].name for i in range(len(model_inputs))]
input_shape = model_inputs[0].shape
model_output = session.get_outputs()
output_names = [model_output[i].name for i in range(len(model_output))]
input_height, input_width = input_shape[2:]
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
resized = cv2.resize(image_rgb, (input_width, input_height))
input_image = resized / 255.0
input_image = input_image.transpose(2,0,1)
input_tensor = input_image[np.newaxis, :, :, :].astype(np.float32)
outputs = session.run(output_names, {input_names[0]: input_tensor})
outputs_detect = outputs[0]
outputs_proto = outputs[1]
predictions = np.squeeze(outputs_detect).T
scores = np.max(predictions[:, 4:(outputs_detect.shape[1]-outputs_proto.shape[1])], axis=1)
predictions = predictions[scores > conf_thresold, :]
scores = scores[scores > conf_thresold]
class_ids = np.argmax(predictions[:, 4: (outputs_detect.shape[1]-outputs_proto.shape[1])], axis=1)
boxes_ori = predictions[:, :4]
input_shape = np.array([input_width, input_height, input_width, input_height])
boxes = np.divide(boxes_ori, input_shape, dtype=np.float32)
boxes *= np.array([image_width, image_height, image_width, image_height])
boxes = boxes.astype(np.int32)
boxmasks = np.divide(boxes_ori, input_shape, dtype=np.float32)
boxmasks *= np.array([outputs_proto.shape[3], outputs_proto.shape[2], outputs_proto.shape[3], outputs_proto.shape[2]])
maskconfs = predictions[:, (outputs_detect.shape[1]-outputs_proto.shape[1]):outputs_detect.shape[1]]
indices = cv2.dnn.NMSBoxes(boxes, scores, score_threshold=conf_thresold, nms_threshold=iou_threshold)
mask_img = image.copy()
for (bbox, bboxmask, score, label, maskconf) in zip(xywh2xyxy(boxes[indices]), xywh2xyxy(boxmasks[indices]),scores[indices], class_ids[indices], maskconfs[indices]):
bbox = bbox.round().astype(np.int32).tolist()
cls_id = int(label)
cls = classes[cls_id]
cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0,0,255), 2, 8)
cv2.rectangle(image, (bbox[0], (bbox[1]-20)), (bbox[2], bbox[1]), (0,255,255), -1)
cv2.putText(image, f'{cls}', (bbox[0], bbox[1] - 5),
cv2.FONT_HERSHEY_PLAIN,2, [225, 0, 0], thickness=2)
outputs_proto = np.squeeze(outputs_proto)
num_mask, mask_height, mask_width = outputs_proto.shape # CHW
masks = sigmoid(maskconf @ outputs_proto.reshape((num_mask, -1)))
masks = np.squeeze(masks.reshape((-1, mask_height, mask_width)))
mask_map = np.zeros((image_height, image_width))
scale_x1 = int(bboxmask[0])
scale_y1 = int(bboxmask[1])
scale_x2 = int(bboxmask[2])
scale_y2 = int(bboxmask[3])
x1 = int(bbox[0])
y1 = int(bbox[1])
x2 = int(bbox[2])
y2 = int(bbox[3])
scale_crop_mask = masks[scale_y1:scale_y2, scale_x1:scale_x2]
crop_mask = cv2.resize(scale_crop_mask,(x2 - x1, y2 - y1),interpolation=cv2.INTER_CUBIC)
crop_mask = (crop_mask > 0.5).astype(np.uint8)
mask_map[y1:y2, x1:x2] = crop_mask
crop_mask = mask_map[y1:y2, x1:x2, np.newaxis]
crop_mask_img = mask_img[y1:y2, x1:x2]
crop_mask_img = crop_mask_img * (1 - crop_mask) + crop_mask * colors[cls_id]
mask_img[y1:y2, x1:x2] = crop_mask_img
mask_img = cv2.addWeighted(image, 0.3, mask_img, 0.7, 0)
end_time = cv2.getTickCount()
t = (end_time - start_time)/cv2.getTickFrequency()
fps = 1/t
print(f"EStimated FPS: {fps:.2f}")
cv2.putText(mask_img, 'FPS: {:.2f}'.format(fps), (20, 40), cv2.FONT_HERSHEY_PLAIN, 2, [225, 0, 0], 2, 8);
cv2.imshow("YOLOV8-SEG-ONNXRUNTIME", mask_img)
cv2.waitKey(0)
cpp
#include <fstream>
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>
using namespace std;
using namespace cv;
using namespace Ort;
float sigmoid_function(float a) {
float b = 1. / (1. + exp(-a));
return b;
}
vector<string> readClassNames(const string& filename) {
vector<string> classNames;
ifstream file(filename);
if (!file.is_open()) {
cerr << "Error opening file: " << filename << endl;
return classNames;
}
string line;
while (getline(file, line)) {
if (!line.empty()) {
classNames.push_back(line);
}
}
file.close();
return classNames;
}
int main(int argc, char** argv)
{
string filename = "coco.names";
vector<string> labels = readClassNames(filename);
Mat image = imread("bus.jpg");
int ih = image.rows;
int iw = image.cols;
string onnxpath = "yolov8n-seg.onnx";
float conf_threshold = 0.25;
float nms_threshold = 0.4;
float score_threshold = 0.25;
wstring modelPath = wstring(onnxpath.begin(), onnxpath.end());
Env env = Env(ORT_LOGGING_LEVEL_ERROR, "yolov8n-seg");
SessionOptions session_options;
session_options.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
Session session_(env, modelPath.c_str(), session_options);
vector<string> input_node_names;
vector<string> output_node_names;
size_t numInputNodes = session_.GetInputCount();
size_t numOutputNodes = session_.GetOutputCount();
AllocatorWithDefaultOptions allocator;
input_node_names.reserve(numInputNodes);
int input_w = 0;
int input_h = 0;
for (int i = 0; i < numInputNodes; i++) {
auto input_name = session_.GetInputNameAllocated(i, allocator);
input_node_names.push_back(input_name.get());
TypeInfo input_type_info = session_.GetInputTypeInfo(i);
auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
auto input_dims = input_tensor_info.GetShape();
input_w = input_dims[3];
input_h = input_dims[2];
cout << "input format: NxCxHxW = " << input_dims[0] << "x" << input_dims[1] << "x" << input_dims[2] << "x" << input_dims[3] << endl;
}
for (int i = 0; i < numOutputNodes; i++) {
auto out_name = session_.GetOutputNameAllocated(i, allocator);
output_node_names.push_back(out_name.get());
}
int output_detect_h = 0;
int output_detect_w = 0;
TypeInfo output_detect_type_info = session_.GetOutputTypeInfo(0);
auto output_detect_tensor_info = output_detect_type_info.GetTensorTypeAndShapeInfo();
auto output_detect_dims = output_detect_tensor_info.GetShape();
output_detect_h = output_detect_dims[1];
output_detect_w = output_detect_dims[2];
cout << "output detect format : HxW = " << output_detect_h << "x" << output_detect_w << endl;
int output_proto_h = 0;
int output_proto_w = 0;
int output_proto_c = 0;
TypeInfo output_proto_type_info = session_.GetOutputTypeInfo(1);
auto output_proto_tensor_info = output_proto_type_info.GetTensorTypeAndShapeInfo();
auto output_proto_dims = output_proto_tensor_info.GetShape();
output_proto_h = output_proto_dims[2];
output_proto_w = output_proto_dims[3];
output_proto_c = output_proto_dims[1];
cout << "output proto format : CxHxW = " << output_proto_c << "x" << output_proto_h << "x" << output_proto_w << endl;
cout << "input: " << input_node_names[0] << " output detect: " << output_node_names[0] <<
" output proto: " << output_node_names[1] << endl;
int64 start = getTickCount();
int w = image.cols;
int h = image.rows;
int _max = max(h, w);
Mat image_ = Mat::zeros(Size(_max, _max), CV_8UC3);
Rect roi(0, 0, w, h);
image.copyTo(image_(roi));
float x_factor = image_.cols / static_cast<float>(input_w);
float y_factor = image_.rows / static_cast<float>(input_h);
Mat blob = dnn::blobFromImage(image_, 1 / 255.0, Size(input_w, input_h), Scalar(0, 0, 0), true, false);
size_t tpixels = input_h * input_w * 3;
array<int64_t, 4> input_shape_info{ 1, 3, input_h, input_w };
auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
Value input_tensor_ = Value::CreateTensor<float>(allocator_info, blob.ptr<float>(), tpixels, input_shape_info.data(), input_shape_info.size());
const array<const char*, 1> inputNames = { input_node_names[0].c_str() };
const array<const char*, 2> outNames = { output_node_names[0].c_str() , output_node_names[1].c_str() };
vector<Value> ort_outputs;
try {
ort_outputs = session_.Run(RunOptions{ nullptr }, inputNames.data(), &input_tensor_, 1, outNames.data(), outNames.size());
}
catch (exception e) {
cout << e.what() << endl;
}
auto data_detect_shape = ort_outputs[0].GetTensorTypeAndShapeInfo().GetShape();
float* detect_pdata = ort_outputs[0].GetTensorMutableData<float>();
Mat output_detect = Mat(Size((int)data_detect_shape[2], (int)data_detect_shape[1]), CV_32F, detect_pdata).t();
auto data_proto_shape = ort_outputs[1].GetTensorTypeAndShapeInfo().GetShape();
float* proto_pdata = ort_outputs[1].GetTensorMutableData<float>();
Mat output_proto = Mat(Size((int)(data_proto_shape[2]) * (int)(data_proto_shape[3]),(int)data_proto_shape[1]), CV_32F, proto_pdata);
vector<Rect> boxes;
vector<int> classIds;
vector<float> confidences;
std::vector<Mat> mask_confs;
for (int i = 0; i < output_detect.rows; i++) {
Mat classes_scores = output_detect.row(i).colRange(4, data_detect_shape[1]-data_proto_shape[1]);
Point classIdPoint;
double score;
minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
if (score > 0.25)
{
float cx = output_detect.at<float>(i, 0);
float cy = output_detect.at<float>(i, 1);
float ow = output_detect.at<float>(i, 2);
float oh = output_detect.at<float>(i, 3);
int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
int width = static_cast<int>(ow * x_factor);
int height = static_cast<int>(oh * y_factor);
Rect box;
box.x = x;
box.y = y;
box.width = width;
box.height = height;
boxes.push_back(box);
classIds.push_back(classIdPoint.x);
confidences.push_back(score);
Mat mask_conf = output_detect.row(i).colRange(data_detect_shape[1] - data_proto_shape[1],data_detect_shape[1]);
mask_confs.push_back(mask_conf);
}
}
Mat rgb_mask = Mat::zeros(image.size(), image.type());
Mat masked_img;
RNG rng;
vector<int> indexes;
dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indexes);
for (size_t i = 0; i < indexes.size(); i++) {
int index = indexes[i];
int idx = classIds[index];
rectangle(image, boxes[index], Scalar(0, 0, 255), 2, 8);
rectangle(image, Point(boxes[index].tl().x, boxes[index].tl().y - 20),
Point(boxes[index].br().x, boxes[index].tl().y), Scalar(0, 255, 255), -1);
putText(image, labels[idx], Point(boxes[index].tl().x, boxes[index].tl().y), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 0), 2, 8);
Mat m = mask_confs[i] * output_proto;
for (int col = 0; col < m.cols; col++) {
m.at<float>(0, col) = sigmoid_function(m.at<float>(0, col));
}
Mat m1 = m.reshape(1, 160);
int x1 = std::max(0, boxes[index].x);
int y1 = std::max(0, boxes[index].y);
int x2 = std::max(0, boxes[index].br().x);
int y2 = std::max(0, boxes[index].br().y);
int mx1 = int(x1 / x_factor * 0.25);
int my1 = int(y1 / y_factor * 0.25);
int mx2 = int(x2 / x_factor * 0.25);
int my2 = int(y2 / y_factor * 0.25);
Mat mask_roi = m1(Range(my1, my2), Range(mx1, mx2));
Mat rm, det_mask;
resize(mask_roi, rm, Size(x2 - x1, y2 - y1));
for (int r = 0; r < rm.rows; r++) {
for (int c = 0; c < rm.cols; c++) {
float pv = rm.at<float>(r, c);
if (pv > 0.5) {
rm.at<float>(r, c) = 1.0;
}
else {
rm.at<float>(r, c) = 0.0;
}
}
}
rm = rm * rng.uniform(0, 255);
rm.convertTo(det_mask, CV_8UC1);
if ((y1 + det_mask.rows) >= image.rows) {
y2 = image.rows - 1;
}
if ((x1 + det_mask.cols) >= image.cols) {
x2 = image.cols - 1;
}
Mat mask = Mat::zeros(Size(image.cols, image.rows), CV_8UC1);
det_mask(Range(0, y2 - y1), Range(0, x2 - x1)).copyTo(mask(Range(y1, y2), Range(x1, x2)));
add(rgb_mask, Scalar(rng.uniform(0, 255), rng.uniform(0, 255), rng.uniform(0, 255)), rgb_mask, mask);
addWeighted(image, 0.5, rgb_mask, 0.5, 0, masked_img);
}
float t = (getTickCount() - start) / static_cast<float>(getTickFrequency());
putText(masked_img, format("FPS: %.2f", 1 / t), Point(20, 40), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 0), 2, 8);
imshow("YOLOV8-SEG-ONNXRUNTIME", masked_img);
waitKey(0);
return 0;
}
CMakeLists.txt
cmake_minimum_required(VERSION 3.18)
project(Yolov8-seg)
set("OpenCV_DIR" "E:\\Opencv\\opencv_vs\\build")
set("ONNXRUNTIME_DIR" "E:\\Onnxruntime\\cpu\\1.15")
set(OpenCV_INCLUDE_DIRS ${OpenCV_DIR}\\include)
set(OpenCV_LIB_DIRS ${OpenCV_DIR}\\x64\\vc16\\lib)
set(OpenCV_LIBS "opencv_world480d.lib" "opencv_world480.lib")
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
include_directories(${OpenCV_INCLUDE_DIRS})
link_directories(${OpenCV_LIB_DIRS})
find_package(OpenCV QUIET)
link_libraries(${OpenCV_LIBS})
add_executable(Yolov8-seg main.cpp)
target_compile_features(Yolov8-seg PRIVATE cxx_std_14)
find_library(PATH ${ONNXRUNTIME_DIR})
target_include_directories(Yolov8-seg PRIVATE "${ONNXRUNTIME_DIR}/include")
target_link_libraries(Yolov8-seg "${ONNXRUNTIME_DIR}/lib/onnxruntime.lib")
原文地址:https://blog.csdn.net/qq_49595983/article/details/144409084
免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!