Unity中接入讯飞语音（实时语音转写）WebAPI（非SDK）

该功能可在PC和移动端通用。

实时语音转写 API 文档

注意：
在这里插入图片描述

准备工作

1.unity2021+
2.注册科大讯飞开放平台账号
3.在开放平台上新建项目
在这里插入图片描述

4.点击左侧边栏的实时语音转写按钮在这里插入图片描述

5.领取免费时长，记录APPID和APIKey这两个值。
在这里插入图片描述

6.unity中需要导入这两个dll。该资源已上传可供下载在这里插入图片描述

结合当前文档，编写代码部分

新建脚本名为SpeechHelper.cs

usingSystem.Collections;usingSystem.Collections.Generic;usingUnityEngine;usingSystem;usingSystem.Net.WebSockets;usingWebSocketSharp;usingSystem.Text;usingSystem.Security.Cryptography;usingLitJson;usingNewtonsoft.Json;usingUnityEngine.Networking.PlayerConnection;usingUnityEngine.UI;publicclassSpeechHelper:MonoBehaviour{privateint last_length =-1;privatefloat[] volumeData =newfloat[9999];privateshort[] intData =newshort[9999];bool isRunning =true;publicevent Action<string> 语音识别完成事件;//语音识别回调事件[HideInInspector]publicAudioClip RecordedClip;privatestring micphoneName =string.Empty;WebSocketSharp.WebSocket speechWebSocket;privateSystem.Action<string> resultCallback;publicText contentText;privatebool isEnd =false;publicButton startButton;privatevoidStart(){InitSpeechHelper(SpeechToText);}publicvoidInitSpeechHelper(System.Action<string> textCallback){
        resultCallback = textCallback;}privatevoidSpeechToText(string s){
        contentText.text += s;
        isRunning =true;}//开始语音转文字publicvoidStartSpeech(){
        startButton.interactable =false;if(speechWebSocket !=null&& speechWebSocket.ReadyState == WebSocketSharp.WebSocketState.Open){
            Debug.LogWarning("开始语音识别失败！，等待上次识别连接结束");return;}if(Microphone.devices.Length <=0){
            Debug.LogWarning("找不到麦克风");return;}
        messageQueue.Clear();
        micphoneName = Microphone.devices[0];
        Debug.Log("micphoneName:"+ micphoneName);
        isRunning =true;
        isEnd =false;try{
            RecordedClip = Microphone.Start(micphoneName,false,60,16000);ConnectSpeechWebSocket();}catch(Exception ex){
            Debug.LogError(ex.Message);}}//adb forward tcp:34999 localabstract:Unity-com.example.iflyteka//停止语音转文字publicvoidStopSpeech(){if(!isEnd){SendEndMsg(null);
            Microphone.End(micphoneName);
            isEnd =true;
            startButton.interactable =true;}
        Debug.Log("识别结束，停止录音");}voidConnectSpeechWebSocket(){try{
            speechWebSocket =newWebSocketSharp.WebSocket(GetWebSocketUrl());}catch(Exception ex){
            UnityEngine.Debug.LogError(ex.Message);return;}
 
        speechWebSocket.OnOpen +=(sender, e)=>{
            Debug.Log("OnOpen");
            speechWebSocket.OnClose += OnWebSocketClose;};
        speechWebSocket.OnMessage += OnInitMessage;
        speechWebSocket.OnError += OnError;
        speechWebSocket.ConnectAsync();StartCoroutine(SendVoiceData());}voidOnWebSocketClose(object sender,CloseEventArgs e){
        Debug.Log("OnWebSocketClose");}privatestaticQueue<string> messageQueue =newQueue<string>();voidOnInitMessage(object sender,WebSocketSharp.MessageEventArgs e){
        UnityEngine.Debug.Log("WebSocket数据返回："+ e.Data);
        messageQueue.Enqueue(e.Data);}privatevoidMainThreadOnMessage(string message){try{XFResponse response = JsonConvert.DeserializeObject<XFResponse>(message);
            Debug.Log("response.code:"+response.code+"response.data"+response.data);if(0!= response.code){return;}if(response.action.Equals("result")){var result =ParseXunfeiRecognitionResult(response.data);
                Debug.Log("result:"+result);if(result.IsFinal){
                    Debug.Log("Text最终:"+ result.Text);
                    resultCallback?.Invoke(result.Text);}else{
                    Debug.Log("Text中间:"+ result.Text);}}}catch(Exception ex){
            Debug.LogError(ex.Message);}}voidOnError(object sender,ErrorEventArgs e){
        UnityEngine.Debug.Log("WebSoclet:发生错误:"+ e.Message);}publicSpeechRecognitionResultParseXunfeiRecognitionResult(string dataJson){StringBuilder builder =newStringBuilder();SpeechRecognitionResult res =newSpeechRecognitionResult();try{JsonData data = JsonMapper.ToObject(dataJson);JsonData cn = data["cn"];JsonData st = cn["st"];if(st["ed"].ToString().Equals("0")){
                res.IsFinal =false;}else{
                res.IsFinal =true;}JsonData rtArry = st["rt"];foreach(JsonData rtObject in rtArry){JsonData wsArr = rtObject["ws"];foreach(JsonData wsObject in wsArr){JsonData cwArr = wsObject["cw"];foreach(JsonData cwObject in cwArr){
                        builder.Append(cwObject["w"].ToString());}}}}catch(Exception ex){
            Debug.LogError(ex.Message);}
        res.Text = builder.ToString();return res;}voidSendData(byte[] voiceData){
        Debug.Log("SendData:"+ voiceData.Length +",time:"+ Time.realtimeSinceStartup);if(speechWebSocket.ReadyState != WebSocketSharp.WebSocketState.Open){return;}try{if(speechWebSocket !=null&& speechWebSocket.IsAlive){
                speechWebSocket.SendAsync(voiceData, success =>{if(success){
                        UnityEngine.Debug.Log("WebSoclet:发送成功："+ voiceData.Length);}else{
                        UnityEngine.Debug.Log("WebSoclet:发送失败：");}});}}catch{}}voidSendEndMsg(System.Action callback){string endMsg ="{\"end\": true}";byte[] data = Encoding.UTF8.GetBytes(endMsg);try{if(speechWebSocket !=null&& speechWebSocket.IsAlive){
                speechWebSocket.SendAsync(data, success =>{if(success){
                        UnityEngine.Debug.Log("WebSoclet:发送END成功："+ data.Length);}else{
                        UnityEngine.Debug.Log("WebSoclet:发送END失败：");}
                    callback?.Invoke();});}}catch{}}IEnumeratorSendVoiceData(){yieldreturnnewWaitUntil(()=>(speechWebSocket.ReadyState == WebSocketSharp.WebSocketState.Open));yieldreturnnewWaitWhile(()=> Microphone.GetPosition(micphoneName)<=0);float t =0;int position = Microphone.GetPosition(micphoneName);constfloat waitTime =0.04f;//每隔40ms发送音频int lastPosition =0;constint Maxlength =640;//最大发送长度
        Debug.Log("position:"+ position +",samples:"+ RecordedClip.samples);while(position < RecordedClip.samples && speechWebSocket.ReadyState == WebSocketSharp.WebSocketState.Open){
            t += waitTime;yieldreturnnewWaitForSecondsRealtime(waitTime);if(Microphone.IsRecording(micphoneName)) position = Microphone.GetPosition(micphoneName);//Debug.Log("录音时长：" + t + "position=" + position + ",lastPosition=" + lastPosition);if(position <= lastPosition){
                Debug.LogWarning("字节流发送完毕！强制结束！");break;}int length = position - lastPosition >Maxlength ? Maxlength : position - lastPosition;byte[] date =GetClipData(lastPosition, length, RecordedClip);//SendData(date);
            lastPosition = lastPosition + length;}yieldreturnnewWaitForSecondsRealtime(waitTime);if(!isEnd){SendEndMsg(null);
            Microphone.End(micphoneName);
            isEnd =true;
            startButton.interactable =true;}// SendEndMsg(null);// Microphone.End(micphoneName);}publicbyte[]GetClipData(int star,int length,AudioClip recordedClip){float[] soundata =newfloat[length];
        recordedClip.GetData(soundata, star);int rescaleFactor =32767;byte[] outData =newbyte[soundata.Length *2];for(int i =0; i < soundata.Length; i++){short temshort =(short)(soundata[i]* rescaleFactor);byte[] temdata = BitConverter.GetBytes(temshort);
            outData[i *2]= temdata[0];
            outData[i *2+1]= temdata[1];}return outData;}privatestringGetWebSocketUrl(){string appid ="你的APPID";string ts =GetCurrentUnixTimestampMillis().ToString();string baseString = appid + ts;string md5 =GetMD5Hash(baseString);
        UnityEngine.Debug.Log("baseString:"+ baseString +",md5:"+ md5);string sha1 =CalculateHmacSha1(md5,"你的APIKey");string signa = sha1;string url =string.Format("ws://rtasr.xfyun.cn/v1/ws?appid={0}&ts={1}&signa={2}", appid, ts, signa);
        UnityEngine.Debug.Log(url);return url;}privatelongGetCurrentUnixTimestampMillis(){DateTime unixStartTime =newDateTime(1970,1,1).ToLocalTime();DateTime now = DateTime.Now;// DateTime.UtcNow;TimeSpan timeSpan = now - unixStartTime;long timestamp =(long)timeSpan.TotalSeconds;return timestamp;}publicstringGetMD5Hash(string input){MD5 md5Hasher = MD5.Create();byte[] data = md5Hasher.ComputeHash(Encoding.Default.GetBytes(input));StringBuilder sBuilder =newStringBuilder();for(int i =0; i < data.Length; i++){
            sBuilder.Append(data[i].ToString("x2"));}return sBuilder.ToString();}publicstringCalculateHmacSha1(string data,string key){HMACSHA1 hmac =newHMACSHA1(Encoding.UTF8.GetBytes(key));byte[] hashBytes = hmac.ComputeHash(Encoding.UTF8.GetBytes(data));return Convert.ToBase64String(hashBytes);}privatevoidUpdate(){if(isRunning){byte[] voiceData =GetVoiveData();if(voiceData !=null){SendData(voiceData);}}if(messageQueue.Count >0){MainThreadOnMessage(messageQueue.Dequeue());}}privatebyte[]GetVoiveData(){if(RecordedClip ==null){returnnull;}int new_length = Microphone.GetPosition(null);if(new_length == last_length){if(Microphone.devices.Length ==0){
                isRunning =false;}returnnull;}int length = new_length - last_length;int offset = last_length +1;
        last_length = new_length;if(offset <0){returnnull;}if(length <0){float[] temp =newfloat[RecordedClip.samples];
            RecordedClip.GetData(temp,0);int lengthTail = RecordedClip.samples - offset;int lengthHead = new_length +1;try{
                Array.Copy(temp, offset, volumeData,0, lengthTail);
                Array.Copy(temp,0, volumeData, lengthTail +1, lengthHead);
                length = lengthTail + lengthHead;}catch(Exception){returnnull;}}else{if(length > volumeData.Length){
                volumeData =newfloat[length];
                intData =newshort[length];}
            RecordedClip.GetData(volumeData, offset);}byte[] bytesData =newbyte[length *2];int rescaleFactor =32767;//to convert float to Int16for(int i =0; i < length; i++){
            intData[i]=(short)(volumeData[i]* rescaleFactor);byte[] byteArr = BitConverter.GetBytes(intData[i]);
            byteArr.CopyTo(bytesData, i *2);}return bytesData;}}[Serializable]publicstructXFResponse{publicstring action;publicint code;publicstring data;publicstring desc;publicstring sid;}[Serializable]publicstructSpeechRecognitionResult{publicstring Text;publicbool IsFinal;}

场景设置部分

将代码挂在到Canvas上，在Content上添加Text组件和Content Size Fitter组件，新建两个按钮开始和结束。
在这里插入图片描述
将刚创建的物体按照下图所示拖拽到对应位置。

在开始和结束按钮上挂载Canvas物体，在开始按钮上选择StartSpeech方法，在结束按钮上选择StopSpeech方法。

之后就可以打包测试啦！