该功能可在PC和移动端通用。
实时语音转写 API 文档
注意:
准备工作
1.unity2021+
2.注册科大讯飞开放平台账号
3.在开放平台上新建项目
4.点击左侧边栏的实时语音转写按钮
5.领取免费时长,记录APPID和APIKey这两个值。
6.unity中需要导入这两个dll。该资源已上传可供下载
结合当前文档,编写代码部分
新建脚本名为SpeechHelper.cs
usingSystem.Collections;usingSystem.Collections.Generic;usingUnityEngine;usingSystem;usingSystem.Net.WebSockets;usingWebSocketSharp;usingSystem.Text;usingSystem.Security.Cryptography;usingLitJson;usingNewtonsoft.Json;usingUnityEngine.Networking.PlayerConnection;usingUnityEngine.UI;publicclassSpeechHelper:MonoBehaviour{privateint last_length =-1;privatefloat[] volumeData =newfloat[9999];privateshort[] intData =newshort[9999];bool isRunning =true;publicevent Action<string> 语音识别完成事件;//语音识别回调事件[HideInInspector]publicAudioClip RecordedClip;privatestring micphoneName =string.Empty;WebSocketSharp.WebSocket speechWebSocket;privateSystem.Action<string> resultCallback;publicText contentText;privatebool isEnd =false;publicButton startButton;privatevoidStart(){InitSpeechHelper(SpeechToText);}publicvoidInitSpeechHelper(System.Action<string> textCallback){
resultCallback = textCallback;}privatevoidSpeechToText(string s){
contentText.text += s;
isRunning =true;}//开始语音转文字publicvoidStartSpeech(){
startButton.interactable =false;if(speechWebSocket !=null&& speechWebSocket.ReadyState == WebSocketSharp.WebSocketState.Open){
Debug.LogWarning("开始语音识别失败!,等待上次识别连接结束");return;}if(Microphone.devices.Length <=0){
Debug.LogWarning("找不到麦克风");return;}
messageQueue.Clear();
micphoneName = Microphone.devices[0];
Debug.Log("micphoneName:"+ micphoneName);
isRunning =true;
isEnd =false;try{
RecordedClip = Microphone.Start(micphoneName,false,60,16000);ConnectSpeechWebSocket();}catch(Exception ex){
Debug.LogError(ex.Message);}}//adb forward tcp:34999 localabstract:Unity-com.example.iflyteka//停止语音转文字publicvoidStopSpeech(){if(!isEnd){SendEndMsg(null);
Microphone.End(micphoneName);
isEnd =true;
startButton.interactable =true;}
Debug.Log("识别结束,停止录音");}voidConnectSpeechWebSocket(){try{
speechWebSocket =newWebSocketSharp.WebSocket(GetWebSocketUrl());}catch(Exception ex){
UnityEngine.Debug.LogError(ex.Message);return;}
speechWebSocket.OnOpen +=(sender, e)=>{
Debug.Log("OnOpen");
speechWebSocket.OnClose += OnWebSocketClose;};
speechWebSocket.OnMessage += OnInitMessage;
speechWebSocket.OnError += OnError;
speechWebSocket.ConnectAsync();StartCoroutine(SendVoiceData());}voidOnWebSocketClose(object sender,CloseEventArgs e){
Debug.Log("OnWebSocketClose");}privatestaticQueue<string> messageQueue =newQueue<string>();voidOnInitMessage(object sender,WebSocketSharp.MessageEventArgs e){
UnityEngine.Debug.Log("WebSocket数据返回:"+ e.Data);
messageQueue.Enqueue(e.Data);}privatevoidMainThreadOnMessage(string message){try{XFResponse response = JsonConvert.DeserializeObject<XFResponse>(message);
Debug.Log("response.code:"+response.code+"response.data"+response.data);if(0!= response.code){return;}if(response.action.Equals("result")){var result =ParseXunfeiRecognitionResult(response.data);
Debug.Log("result:"+result);if(result.IsFinal){
Debug.Log("Text最终:"+ result.Text);
resultCallback?.Invoke(result.Text);}else{
Debug.Log("Text中间:"+ result.Text);}}}catch(Exception ex){
Debug.LogError(ex.Message);}}voidOnError(object sender,ErrorEventArgs e){
UnityEngine.Debug.Log("WebSoclet:发生错误:"+ e.Message);}publicSpeechRecognitionResultParseXunfeiRecognitionResult(string dataJson){StringBuilder builder =newStringBuilder();SpeechRecognitionResult res =newSpeechRecognitionResult();try{JsonData data = JsonMapper.ToObject(dataJson);JsonData cn = data["cn"];JsonData st = cn["st"];if(st["ed"].ToString().Equals("0")){
res.IsFinal =false;}else{
res.IsFinal =true;}JsonData rtArry = st["rt"];foreach(JsonData rtObject in rtArry){JsonData wsArr = rtObject["ws"];foreach(JsonData wsObject in wsArr){JsonData cwArr = wsObject["cw"];foreach(JsonData cwObject in cwArr){
builder.Append(cwObject["w"].ToString());}}}}catch(Exception ex){
Debug.LogError(ex.Message);}
res.Text = builder.ToString();return res;}voidSendData(byte[] voiceData){
Debug.Log("SendData:"+ voiceData.Length +",time:"+ Time.realtimeSinceStartup);if(speechWebSocket.ReadyState != WebSocketSharp.WebSocketState.Open){return;}try{if(speechWebSocket !=null&& speechWebSocket.IsAlive){
speechWebSocket.SendAsync(voiceData, success =>{if(success){
UnityEngine.Debug.Log("WebSoclet:发送成功:"+ voiceData.Length);}else{
UnityEngine.Debug.Log("WebSoclet:发送失败:");}});}}catch{}}voidSendEndMsg(System.Action callback){string endMsg ="{\"end\": true}";byte[] data = Encoding.UTF8.GetBytes(endMsg);try{if(speechWebSocket !=null&& speechWebSocket.IsAlive){
speechWebSocket.SendAsync(data, success =>{if(success){
UnityEngine.Debug.Log("WebSoclet:发送END成功:"+ data.Length);}else{
UnityEngine.Debug.Log("WebSoclet:发送END失败:");}
callback?.Invoke();});}}catch{}}IEnumeratorSendVoiceData(){yieldreturnnewWaitUntil(()=>(speechWebSocket.ReadyState == WebSocketSharp.WebSocketState.Open));yieldreturnnewWaitWhile(()=> Microphone.GetPosition(micphoneName)<=0);float t =0;int position = Microphone.GetPosition(micphoneName);constfloat waitTime =0.04f;//每隔40ms发送音频int lastPosition =0;constint Maxlength =640;//最大发送长度
Debug.Log("position:"+ position +",samples:"+ RecordedClip.samples);while(position < RecordedClip.samples && speechWebSocket.ReadyState == WebSocketSharp.WebSocketState.Open){
t += waitTime;yieldreturnnewWaitForSecondsRealtime(waitTime);if(Microphone.IsRecording(micphoneName)) position = Microphone.GetPosition(micphoneName);//Debug.Log("录音时长:" + t + "position=" + position + ",lastPosition=" + lastPosition);if(position <= lastPosition){
Debug.LogWarning("字节流发送完毕!强制结束!");break;}int length = position - lastPosition >Maxlength ? Maxlength : position - lastPosition;byte[] date =GetClipData(lastPosition, length, RecordedClip);//SendData(date);
lastPosition = lastPosition + length;}yieldreturnnewWaitForSecondsRealtime(waitTime);if(!isEnd){SendEndMsg(null);
Microphone.End(micphoneName);
isEnd =true;
startButton.interactable =true;}// SendEndMsg(null);// Microphone.End(micphoneName);}publicbyte[]GetClipData(int star,int length,AudioClip recordedClip){float[] soundata =newfloat[length];
recordedClip.GetData(soundata, star);int rescaleFactor =32767;byte[] outData =newbyte[soundata.Length *2];for(int i =0; i < soundata.Length; i++){short temshort =(short)(soundata[i]* rescaleFactor);byte[] temdata = BitConverter.GetBytes(temshort);
outData[i *2]= temdata[0];
outData[i *2+1]= temdata[1];}return outData;}privatestringGetWebSocketUrl(){string appid ="你的APPID";string ts =GetCurrentUnixTimestampMillis().ToString();string baseString = appid + ts;string md5 =GetMD5Hash(baseString);
UnityEngine.Debug.Log("baseString:"+ baseString +",md5:"+ md5);string sha1 =CalculateHmacSha1(md5,"你的APIKey");string signa = sha1;string url =string.Format("ws://rtasr.xfyun.cn/v1/ws?appid={0}&ts={1}&signa={2}", appid, ts, signa);
UnityEngine.Debug.Log(url);return url;}privatelongGetCurrentUnixTimestampMillis(){DateTime unixStartTime =newDateTime(1970,1,1).ToLocalTime();DateTime now = DateTime.Now;// DateTime.UtcNow;TimeSpan timeSpan = now - unixStartTime;long timestamp =(long)timeSpan.TotalSeconds;return timestamp;}publicstringGetMD5Hash(string input){MD5 md5Hasher = MD5.Create();byte[] data = md5Hasher.ComputeHash(Encoding.Default.GetBytes(input));StringBuilder sBuilder =newStringBuilder();for(int i =0; i < data.Length; i++){
sBuilder.Append(data[i].ToString("x2"));}return sBuilder.ToString();}publicstringCalculateHmacSha1(string data,string key){HMACSHA1 hmac =newHMACSHA1(Encoding.UTF8.GetBytes(key));byte[] hashBytes = hmac.ComputeHash(Encoding.UTF8.GetBytes(data));return Convert.ToBase64String(hashBytes);}privatevoidUpdate(){if(isRunning){byte[] voiceData =GetVoiveData();if(voiceData !=null){SendData(voiceData);}}if(messageQueue.Count >0){MainThreadOnMessage(messageQueue.Dequeue());}}privatebyte[]GetVoiveData(){if(RecordedClip ==null){returnnull;}int new_length = Microphone.GetPosition(null);if(new_length == last_length){if(Microphone.devices.Length ==0){
isRunning =false;}returnnull;}int length = new_length - last_length;int offset = last_length +1;
last_length = new_length;if(offset <0){returnnull;}if(length <0){float[] temp =newfloat[RecordedClip.samples];
RecordedClip.GetData(temp,0);int lengthTail = RecordedClip.samples - offset;int lengthHead = new_length +1;try{
Array.Copy(temp, offset, volumeData,0, lengthTail);
Array.Copy(temp,0, volumeData, lengthTail +1, lengthHead);
length = lengthTail + lengthHead;}catch(Exception){returnnull;}}else{if(length > volumeData.Length){
volumeData =newfloat[length];
intData =newshort[length];}
RecordedClip.GetData(volumeData, offset);}byte[] bytesData =newbyte[length *2];int rescaleFactor =32767;//to convert float to Int16for(int i =0; i < length; i++){
intData[i]=(short)(volumeData[i]* rescaleFactor);byte[] byteArr = BitConverter.GetBytes(intData[i]);
byteArr.CopyTo(bytesData, i *2);}return bytesData;}}[Serializable]publicstructXFResponse{publicstring action;publicint code;publicstring data;publicstring desc;publicstring sid;}[Serializable]publicstructSpeechRecognitionResult{publicstring Text;publicbool IsFinal;}
场景设置部分
将代码挂在到Canvas上,在Content上添加Text组件和Content Size Fitter组件,新建两个按钮开始和结束。
将刚创建的物体按照下图所示拖拽到对应位置。
在开始和结束按钮上挂载Canvas物体,在开始按钮上选择StartSpeech方法,在结束按钮上选择StopSpeech方法。
之后就可以打包测试啦!
测试结果
本篇借鉴此大佬
版权归原作者 育婴房扛把子 所有, 如有侵权,请联系我们删除。