自学内容网 自学内容网

Unity对接科大讯飞实时语音转写WebAPI(Windows平台)(二)

上一篇中,用到的是MicPhone类,然后遗留问题是yield return new WaitForSecondsRealtime(0.04f)导致消息发送得很慢,语音识别不及时。

上一篇链接:Unity对接科大讯飞实时语音转写WebAPI(Windows平台)_unity webgl对接讯飞实时语音听写-CSDN博客

本篇将使用NAudio.Wave.WaveIn类完成麦克风语音数据的获取,属性BufferMilliseconds可以控制数据时间间隔;发送依然使用WebSocketSharp.WebSocket。直接贴代码

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System;
using WebSocketSharp;
using System.Text;
using System.Security.Cryptography;
using LitJson;
using Newtonsoft.Json;

using NAudio.Wave;

public class SpeechHelper2 : MonoBehaviour
{
    private WaveIn audioClip;
    WebSocket speechWebSocket;
    private System.Action<string> resultCallback;
    private System.Action errorCallback;
    private static Queue<string> messageQueue = new Queue<string>();
    private static Queue<byte[]> requestQueue = new Queue<byte[]>();
    public void InitSpeechHelper(System.Action<string> textCallback, Action _errorCallback)
    {
        resultCallback = textCallback;
        errorCallback = _errorCallback;
    }
    private void AudioClip_DataAvailable(object sender, WaveInEventArgs e)
    {
        //UnityEngine.Debug.Log("AudioClip_DataAvailable");
        requestQueue.Enqueue(e.Buffer);
    }
    public bool StartSpeech()
    {
        if (speechWebSocket != null && speechWebSocket.ReadyState == WebSocketState.Open)
        {
            //Debug.LogWarning("开始语音识别失败!,等待上次识别连接结束");
            MessageMng.Ins.ShowFloatTip("请等待上次识别结束");
            return false;
        }
        if (WaveIn.DeviceCount <= 0) 
        {
            //Debug.LogWarning("找不到麦克风");
            MessageMng.Ins.ShowTipMsg("找不到麦克风!");
            return false;
        }
        messageQueue.Clear();
        audioClip = new WaveIn();
        WaveFormat waveFormat = new WaveFormat(16000, 16, 1);
        audioClip.BufferMilliseconds = 80;
        audioClip.WaveFormat = waveFormat;
        audioClip.DataAvailable += AudioClip_DataAvailable;
        audioClip.StartRecording();
        ConnectSpeechWebSocket();
        return true;
    }

    public void StopSpeech()
    {
        try
        {
            Debug.Log("识别结束,停止录音");
            audioClip.StopRecording();
            audioClip.Dispose();
            SendEndMsg(null);
        }
        catch(Exception ex)
        {
            Debug.LogError(ex.Message);
        }
        
    }

    void ConnectSpeechWebSocket()
    {
        try
        {
            speechWebSocket = new WebSocket(GetWebSocketUrl());
        }
        catch (Exception ex)
        {
            UnityEngine.Debug.LogError(ex.Message);
            return;
        }
        speechWebSocket.OnOpen += (sender, e) =>
        {
            Debug.Log("OnOpen");
            speechWebSocket.OnClose += OnWebSocketClose;
        };
        speechWebSocket.OnMessage += OnInitMessage;
        speechWebSocket.OnError += OnError;
        speechWebSocket.ConnectAsync();
    }
    void OnWebSocketClose(object sender, CloseEventArgs e)
    {
        Debug.Log("OnWebSocketClose");
    }
    
    void OnInitMessage(object sender, MessageEventArgs e)
    {
        UnityEngine.Debug.Log("qqqqqqqqqqqqqWebSocket数据返回:" + e.Data);
        messageQueue.Enqueue(e.Data);
    }
    private void MainThreadOnMessage(string message)
    {
        try
        {
            XFResponse response = JsonConvert.DeserializeObject<XFResponse>(message);
            if (0 != response.code)
            {
                errorCallback?.Invoke();
                MessageMng.Ins.ShowFloatTip("连接出错,请重试!");
                return;
            }
            if (response.action.Equals("result"))
            {
                var result = ParseXunfeiRecognitionResult(response.data);
                if (result.IsFinal)
                {
                    //Debug.Log("Text最终:" + result.Text);
                    resultCallback?.Invoke(result.Text);
                }
                else
                {
                    //Debug.Log("Text中间:" + result.Text);
                }
            }
        }
        catch (Exception ex)
        {
            Debug.LogError(ex.Message);
        }
    }
    void OnError(object sender, WebSocketSharp.ErrorEventArgs e)
    {
        UnityEngine.Debug.Log("WebSoclet:发生错误:" + e.Message);

    }

    public SpeechRecognitionResult ParseXunfeiRecognitionResult(string dataJson)
    {
        StringBuilder builder = new StringBuilder();
        SpeechRecognitionResult res = new SpeechRecognitionResult();
        try
        {
            JsonData data = JsonMapper.ToObject(dataJson);
            JsonData cn = data["cn"];
            JsonData st = cn["st"];
            if (st["ed"].ToString().Equals("0"))
            {
                res.IsFinal = false;
            }
            else
            {
                res.IsFinal = true;
            }
            JsonData rtArry = st["rt"];
            foreach (JsonData rtObject in rtArry)
            {
                JsonData wsArr = rtObject["ws"];
                foreach (JsonData wsObject in wsArr)
                {
                    JsonData cwArr = wsObject["cw"];
                    foreach (JsonData cwObject in cwArr)
                    {
                        builder.Append(cwObject["w"].ToString());
                    }
                }
            }
        }
        catch (Exception ex)
        {
            Debug.LogError(ex.Message);
        }
        res.Text = builder.ToString();
        res.type = ASRResultType.TEXT;
        return res;
    }

    void SendData(byte[] voiceData)
    {
        //Debug.Log("SendData:" + voiceData.Length + ",time:" + Time.realtimeSinceStartup);
        if (speechWebSocket.ReadyState != WebSocketState.Open)
        {
            return;
        }
        try
        {
            if (speechWebSocket != null && speechWebSocket.IsAlive)
            {
                speechWebSocket.SendAsync(voiceData, success =>
                {
                    if (success)
                    {
                        //UnityEngine.Debug.Log("WebSoclet:发送成功:" + voiceData.Length);
                    }
                    else
                    {
                        //UnityEngine.Debug.Log("WebSoclet:发送失败:");
                    }
                });
            }
        }
        catch
        {

        }
    }
    void SendEndMsg(System.Action callback)
    {
        string endMsg = "{\"end\": true}";
        byte[] data = Encoding.UTF8.GetBytes(endMsg);
        try
        {
            if (speechWebSocket != null && speechWebSocket.IsAlive)
            {
                speechWebSocket.SendAsync(data, success =>
                {
                    if (success)
                    {
                        UnityEngine.Debug.Log("WebSoclet:发送END成功:" + data.Length);
                    }
                    else
                    {
                        UnityEngine.Debug.Log("WebSoclet:发送END失败:");
                    }
                    callback?.Invoke();
                });
            }
        }
        catch
        {

        }
    }

    private string GetWebSocketUrl()
    {
        string appid = "appid";
        string ts = GetCurrentUnixTimestampMillis().ToString();
        string baseString = appid + ts;
        string md5 = GetMD5Hash(baseString);
        UnityEngine.Debug.Log("baseString:" + baseString + ",md5:" + md5);
        string sha1 = CalculateHmacSha1(md5, "appsecret");
        string signa = sha1;
        string url = string.Format("ws://rtasr.xfyun.cn/v1/ws?appid={0}&ts={1}&signa={2}", appid, ts, signa);
        UnityEngine.Debug.Log(url);
        return url;
    }
    private long GetCurrentUnixTimestampMillis()
    {
        DateTime unixStartTime = new DateTime(1970, 1, 1).ToLocalTime();
        DateTime now = DateTime.Now;// DateTime.UtcNow;
        TimeSpan timeSpan = now - unixStartTime;
        long timestamp = (long)timeSpan.TotalSeconds;
        return timestamp;
    }
    public string GetMD5Hash(string input)
    {
        MD5 md5Hasher = MD5.Create();
        byte[] data = md5Hasher.ComputeHash(Encoding.Default.GetBytes(input));
        StringBuilder sBuilder = new StringBuilder();
        for (int i = 0; i < data.Length; i++)
        {
            sBuilder.Append(data[i].ToString("x2"));
        }
        return sBuilder.ToString();
    }
    public string CalculateHmacSha1(string data, string key)
    {
        HMACSHA1 hmac = new HMACSHA1(Encoding.UTF8.GetBytes(key));
        byte[] hashBytes = hmac.ComputeHash(Encoding.UTF8.GetBytes(data));
        return Convert.ToBase64String(hashBytes);
    }

    private void Update()
    {
        if (messageQueue.Count > 0)
        {
            MainThreadOnMessage(messageQueue.Dequeue());
        }
        if(requestQueue.Count > 0)
        {
            SendData(requestQueue.Dequeue());
        }
    }

    private void OnApplicationQuit()
    {
        audioClip.StopRecording();
        audioClip.Dispose();
    }
}

BufferMilliseconds我这里设置的80,数据长度会大于1280,讯飞也是可以完成识别的。


原文地址:https://blog.csdn.net/cheng219101/article/details/139168325

免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!