onnx-web + yolov8n 在视频流里做推理

🕗 发布于 2024-11-08 23:00 YOLO

顺着我上一篇文章使用onnxruntime-web 运行yolov8-nano推理继续说，有朋友在问能不能接入

视频流动，实时去识别物品。

首先使用 getUserMedia 获取摄像头视频流

getUserMedia API 可以访问设备的摄像头和麦克风。你可以使用这个 API 获取视频流，并将其显示在页面上的 <video> 标签中。

注意事项：

浏览器支持：getUserMedia 被现代浏览器大多数支持，但在一些旧版浏览器上可能不兼容。可以使用 can I use 网站检查浏览器的支持情况。
HTTPS 连接：访问摄像头通常要求在 HTTPS 环境下运行。如果你在开发时遇到问题，确保你的本地开发服务器使用 HTTPS（例如通过 localhost 和 https 协议）。

import React, {useState, useRef, useEffect} from "react";

const App = () => {
  const [hasVideo, setHasVideo] = useState(false);  // 用来记录是否成功获取视频流
  const videoRef = useRef(null);  // 用来引用 <video> 元素

  useEffect(() => {
    // 定义一个异步函数来获取视频流
    const getVideoStream = async () => {
      try {
        // 获取视频流
        const stream = await navigator.mediaDevices.getUserMedia({
          video: true,  // 只请求视频流，不需要音频
        });

        // 如果获取成功，将视频流绑定到 <video> 元素
        if (videoRef.current) {
          videoRef.current.srcObject = stream;
        }

        // 标记为成功获取视频流
        setHasVideo(true);
      } catch (err) {
        console.error("Error accessing the camera: ", err);
        setHasVideo(false);
      }
    };

    // 执行获取视频流
    getVideoStream();

    // 清理函数，在组件卸载时停止视频流
    return () => {
      if (videoRef.current && videoRef.current.srcObject) {
        const stream = videoRef.current.srcObject;
        const tracks = stream.getTracks();
        tracks.forEach(track => track.stop());  // 停止视频流
      }
    };
  }, []);

  return (
      <div className="video-container">
      {hasVideo ? (
        <video ref={videoRef} autoPlay playsInline className="video-stream" />
      ) : (
        <p>Unable to access the camera.</p>
      )}
    </div>
  );

};

export default App;

效果如下，不过我们需要实画框的并不在vedio对象上，所以需要把vedio上图片在canvas上重新绘制，方便后续在detect的位置再画上方框

增加取帧模型推理的代码

import React, {useEffect, useRef, useState} from 'react';
import "./style/Camera.css";
import cv from "@techstark/opencv-js";
import {download} from "./utils/download";
import {InferenceSession, Tensor} from "onnxruntime-web";
import {detectImage} from "./utils/detect";

const CameraToCanvas = () => {
    const videoRef = useRef(null);
    const canvasRef = useRef(null);

    const [session, setSession] = useState(null);
    const [loading, setLoading] = useState({text: "Loading OpenCV.js", progress: null});
    const imageRef = useRef(null);

    // Configs
    const modelName = "yolov8n.onnx";
    const modelInputShape = [1, 3, 640, 640];
    const topk = 100;
    const iouThreshold = 0.45;
    const scoreThreshold = 0.25;


    useEffect(() => {
        cv["onRuntimeInitialized"] = async () => {
            const baseModelURL = `${process.env.PUBLIC_URL}/model`;

            // create session
            const url = `${baseModelURL}/${modelName}`
            console.log(`url:${url}`)
            const arrBufNet = await download(url, // url
                ["加载 YOLOv8", setLoading] // logger
            );
            const yolov8 = await InferenceSession.create(arrBufNet);
            const arrBufNMS = await download(`${baseModelURL}/nms-yolov8.onnx`, // url
                ["加载 NMS model", setLoading] // logger
            );
            const nms = await InferenceSession.create(arrBufNMS);

            // warmup main model
            setLoading({text: "model 预热...", progress: null});
            const tensor = new Tensor("float32", new Float32Array(modelInputShape.reduce((a, b) => a * b)), modelInputShape);
            await yolov8.run({images: tensor});

            setSession({net: yolov8, nms: nms});
            //window.session = {net: yolov8, nms: nms};
            setLoading(null);
        };

        // 获取摄像头流
        async function startCamera() {
            try {
                const stream = await navigator.mediaDevices.getUserMedia({video: true});
                if (videoRef.current) {
                    videoRef.current.srcObject = stream;
                    videoRef.current.play();
                }
            } catch (error) {
                console.error("Error accessing the camera:", error);
            }
        }

        startCamera();

        const drawToCanvas = async () => {
            const video = videoRef.current;
            const canvas = canvasRef.current;
            const context = canvas.getContext('2d');
            if (video && canvas) {
                canvas.width = video.videoWidth;
                canvas.height = video.videoHeight;
                context.drawImage(video, 0, 0, canvas.width, canvas.height);
                // 把图片赋到imageRef.src上
                imageRef.current.src = canvas.toDataURL();
                //const session = window.session
                if (session) {
                    detectImage(imageRef.current, canvas, session, topk, iouThreshold, scoreThreshold, modelInputShape);
                }
            }
            requestAnimationFrame(drawToCanvas);
        };

        requestAnimationFrame(drawToCanvas);

        return () => {
            if (videoRef.current && videoRef.current.srcObject) {
                const stream = videoRef.current.srcObject;
                const tracks = stream.getTracks();
                tracks.forEach(track => track.stop()); // 停止摄像头流
            }
        };
    }, []);

    return (<div className='video-container'>
        <video ref={videoRef} style={{display: 'none'}}/>
        <canvas ref={canvasRef}/>
        <img
            ref={imageRef}
            src="#"
            alt=""
            style={{display: "none"}}
        />
    </div>);
}

export default CameraToCanvas;

使用模型进行检测的方法

export const detectImage = async (
    image,
    canvas,
    session,
    topk,
    iouThreshold,
    scoreThreshold,
    inputShape
) => {
    const [modelWidth, modelHeight] = inputShape.slice(2);
    const [input, xRatio, yRatio] = preprocessing(image, modelWidth, modelHeight);

    const tensor = new Tensor("float32", input.data32F, inputShape); // to ort.Tensor
    const config = new Tensor(
        "float32",
        new Float32Array([
            topk, // topk per class
            iouThreshold, // iou threshold
            scoreThreshold, // score threshold
        ])
    ); // nms config tensor
    const {output0} = await session.net.run({images: tensor}); // run session and get output layer
    const {selected} = await session.nms.run({detection: output0, config: config}); // perform nms and filter boxes

    const boxes = [];

    // looping through output
    for (let idx = 0; idx < selected.dims[1]; idx++) {
        const data = selected.data.slice(idx * selected.dims[2], (idx + 1) * selected.dims[2]); // get rows
        const box = data.slice(0, 4);
        const scores = data.slice(4); // classes probability scores
        const score = Math.max(...scores); // maximum probability scores
        const label = scores.indexOf(score); // class id of maximum probability scores

        const [x, y, w, h] = [
            (box[0] - 0.5 * box[2]) * xRatio, // upscale left
            (box[1] - 0.5 * box[3]) * yRatio, // upscale top
            box[2] * xRatio, // upscale width
            box[3] * yRatio, // upscale height
        ]; // keep boxes in maxSize range

        boxes.push({
            label: label,
            probability: score,
            bounding: [x, y, w, h], // upscale box
        }); // update boxes to draw later
    }

    if (boxes.length > 0) {
        renderBoxes(canvas, boxes); // Draw boxes
    }
    input.delete(); // delete unused Mat
};

测试下效果，视频流的处理还是卡顿，勉强找了个静态场景测试下

原文地址：https://blog.csdn.net/u011564831/article/details/143572801

免责声明：本站文章内容转载自网络资源，如本站内容侵犯了原著者的合法权益，可联系本站删除。更多内容请关注自学内容网（zxcms.com）！

上一篇：操作系统-4.2文件系统的层次结构&虚拟文件系统
下一篇：城镇住房保障：SpringBoot系统功能概览

A20红色革命文物征集管理系统
红色革命文物征集管理系统在对开发工具的选择上也很慎重，为了便于开发实现，选择的开发工具为Eclipse，选择的数据库工具为Mysql。以此搭建开发环境实现红色革命文物征集管理系统的功能。其中管理员管理
阅读更多2024-11-09
楼梯区域分割系统：Web效果惊艳
数据集信息展示在本研究中，我们使用了名为“Stairs”的数据集，以改进YOLOv8-seg模型在楼梯区域分割任务中的表现。该数据集专门设计用于训练和评估模型在复杂环境中对楼梯及其周边区域的识别能力。
阅读更多2024-11-09
Java学习教程，从入门到精通，Java修饰符语法知识点及案例代码（23）
Java修饰符用于改变类、方法、变量、接口等元素的行为和可见性。主要分为两大类：访问修饰符和非访问修饰符。
阅读更多2024-11-09
Ubuntu 安装 redis
修改 /etc/redis/redis.conf。
阅读更多2024-11-09
c++程序设计速学笔记2基础数据结构
数组是一种线性数据结构，它存储相同类型的元素的连续内存块。数组的每个元素都有一个索引，用于快速访问和操作数据。然而，由于它不支持随机访问，所以不适合需要频繁访问中间元素的场景。栈（Stack）是一种后
阅读更多2024-11-09
SGD学习器和Adam学习器之间的区别与关系
（随机梯度下降）和（Adaptive Moment Estimation）是两种常用的优化算法，在训练神经网络时，它们都用来最小化损失函数并更新模型参数。尽管它们有相似的目标，但在更新规则和效率上有所
阅读更多2024-11-09
05 SQL炼金术：深入探索与实战优化
Plan Baselines（执行计划基线）是Oracle 11g引入的一种功能，它可以记录并保存SQL语句的历史执行计划，并在后续执行时优先选择这些历史执行计划中性能较好的一个。通过本文的介绍，我们
阅读更多2024-11-09
【MRAN】情感分析中情态缺失问题的多模态重构和对齐网络
为此，我们提出了多模态重构和对齐网络（MRAN）来解决情态缺失问题，特别是缓解由于文本情态缺失而导致的衰退。我们首先提出了多模态嵌入和缺失索引嵌入来指导缺失模态特征的重建。然后，将视觉和听觉特征投射到
阅读更多2024-11-09
windows server2019下载docker拉取redis等镜像并运行项目
指由微软公司开发的“Windows”系列中的“服务器”版本。这意味着它是基于Windows操作系统的，但专门设计用于服务器环境，而不是普通的桌面或个人用户使用。主要用途包括服务器功能、用户和资源管理、
阅读更多2024-11-09
streamlit run的启动参数讲解
当你运行命令时，后面可以带一些参数，用来指定如何运行 Streamlit 应用。下面是。
阅读更多2024-11-09

onnx-web + yolov8n 在视频流里做推理

注意事项：

相关文章