0


讯飞星火认知大模型智能语音交互调用

  1. 随着国内外大模型热度的兴起,依托于大模型的智能化,传统的人机交互已经不能满足人们交互的需求。而结合语音和大模型的交互拜托传统互联网获取知识的文字限制,用语音也可以轻松获取想要的知识和思路。

一、大模型智能语音交互调用实现思路

唤醒的持久运行--->合成能力加持(唤醒成功后语音答复:主人 我在)--->调用在线或离线听写能力(建议用讯飞在线效果好)--->识别用户说的语音成文字后发给大模型--->建议调用讯飞星火认知大模型--->获取大模型答案后调用语音合成(合成在线离线均可)进行答案输出。

这样就顺利实现了用纯语音与大模型进行交互!

难点:唤醒+听写同时读取麦克风音频的节奏控制

持续语音交互调用大模型效果图:

二、离线环境常量定义

  1. package com.day.config;
  2. import com.sun.jna.ptr.IntByReference;
  3. import javax.sound.sampled.*;
  4. import java.io.ByteArrayOutputStream;
  5. import java.io.FileInputStream;
  6. public class Constants {
  7. // 构造16K 16BIT 单声道音频
  8. public static final String APPID = "5e11538f"; // APPID
  9. public static final String WORK_DIR = "src/main/resources";
  10. // 1、唤醒相关 ssb_param,一定注意IVW_SSB_PARAMS的fo|xxx资源的路径,xxx取值是指WORK_DIR目录下/msc/xxx xxx是以后的路径开始拼接的!!!!!!!!!!!
  11. public static final AudioFormat IVW_ASR_AUDIO_FORMAT = new AudioFormat(16000F, 16, 1, true, false);
  12. public static final String IVW_DLL_PATH = "src/main/resources/ivw_msc_x64.dll"; // windows动态库路径
  13. public static final String IVW_LOGIN_PARAMS = "appid = " + APPID + ", work_dir = " + WORK_DIR;
  14. public static final String IVW_SSB_PARAMS = "ivw_threshold=0:1500,sst=wakeup,ivw_shot_word=1,ivw_res_path =fo|res/ivw/wakeupresource.jet";
  15. public static IntByReference IVW_ERROR_CODE = new IntByReference(-100);
  16. public static Integer IVW_FRAME_SIZE = 6400; // 一定要每200ms写10帧,否则会出现唤醒一段时间后无法唤醒的问题,一帧的大小为640B,其他大小可能导致无法唤醒。
  17. public static Integer IVW_AUDIO_STATUS = 1;
  18. public static DataLine.Info IVW_ASR_DATA_LINE_INFO = new DataLine.Info(TargetDataLine.class, IVW_ASR_AUDIO_FORMAT);
  19. public static TargetDataLine IVW_ASR_TARGET_DATA_LINE; // 录音
  20. static {
  21. try {
  22. IVW_ASR_TARGET_DATA_LINE = (TargetDataLine) AudioSystem.getLine(IVW_ASR_DATA_LINE_INFO);
  23. } catch (LineUnavailableException e) {
  24. e.printStackTrace();
  25. }
  26. }
  27. // 2、合成相关
  28. public static final AudioFormat TTS_AUDIO_FORMAT = new AudioFormat(16000F, 16, 1, true, false);
  29. public static final String TTS_DLL_PATH = "src/main/resources/tts_msc_x64.dll"; // windows动态库路径
  30. public static final String TTS_LOGIN_PARAMS = "appid = " + APPID + ", work_dir = " + WORK_DIR;
  31. public static final String TTS_SESSION_BEGIN_PARAMS = "engine_type = local, voice_name = xiaoyuan, text_encoding = UTF8," + " tts_res_path = fo|res/tts/xiaoyuan.jet;fo|res/tts/common.jet, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 2";
  32. public static IntByReference TTS_ERROR_CODE = new IntByReference(-100);
  33. public static IntByReference TTS_AUDIO_LEN = new IntByReference(-100);
  34. public static IntByReference TTS_SYNTH_STATUS = new IntByReference(-100);
  35. public static String TTS_TEXT; // 合成文本
  36. public static Integer TTS_TOTAL_AUDIO_LENGTH; // 合成音频长度
  37. public static ByteArrayOutputStream TTS_BYTE_ARRAY_OUTPUT_STREAM; // 合成音频流
  38. public static DataLine.Info TTS_DATA_LINE_INFO = new DataLine.Info(SourceDataLine.class, TTS_AUDIO_FORMAT, AudioSystem.NOT_SPECIFIED);
  39. public static SourceDataLine TTS_SOURCE_DATA_LINE; // 播放
  40. static {
  41. try {
  42. TTS_SOURCE_DATA_LINE = (SourceDataLine) AudioSystem.getLine(Constants.TTS_DATA_LINE_INFO);
  43. } catch (LineUnavailableException e) {
  44. e.printStackTrace();
  45. }
  46. }
  47. public static final String YELLOW_BACKGROUND = "\u001B[43m"; // ANSI code for yellow background
  48. public static final String RESET = "\u001B[0m"; // ANSI code to reset to default
  49. }

三、唤醒+合成代码

  1. package com.day;
  2. import com.day.config.Constants;
  3. import com.day.service.IvwService;
  4. import com.day.service.TtsService;
  5. import com.day.service.imp.IvwCallback;
  6. import com.sun.jna.Pointer;
  7. import javax.sound.sampled.*;
  8. import java.io.ByteArrayInputStream;
  9. import java.io.ByteArrayOutputStream;
  10. import java.io.File;
  11. // 主函数入口
  12. public class AIMain {
  13. public static boolean ttsFlag = false;
  14. public static boolean ivwFlag = false;
  15. public static byte[] audioDataByteArray;
  16. public static int len;
  17. public static void main(String[] args) throws Exception {
  18. // 调用流程:唤醒--->
  19. // System.out.println(Constants.yellowBackground + "呼叫大飞" + Constants.reset);
  20. // 以线程的方式启动唤醒
  21. MyThread myThread = new MyThread();
  22. myThread.start();
  23. }
  24. static class MyThread extends Thread {
  25. public void run() {
  26. startIvw();
  27. }
  28. }
  29. // 1、唤醒调用
  30. public static void startIvw() {
  31. Integer ret = IvwService.INSTANCE.MSPLogin(null, null, Constants.IVW_LOGIN_PARAMS); // 登录
  32. if (ret != 0) {
  33. System.out.println("唤醒登录失败...:" + ret);
  34. }
  35. String sessionId = IvwService.INSTANCE.QIVWSessionBegin(null, Constants.IVW_SSB_PARAMS, Constants.IVW_ERROR_CODE); // 开启会话
  36. if (Constants.IVW_ERROR_CODE.getValue() != 0) {
  37. System.out.println("开启唤醒会话失败...:" + Constants.IVW_ERROR_CODE.getValue());
  38. }
  39. ret = IvwService.INSTANCE.QIVWRegisterNotify(sessionId, new IvwCallback(), null); // 注册唤醒回调函数
  40. if (ret != 0) {
  41. System.out.println("注册唤醒回调函数失败...:" + ret);
  42. }
  43. try {
  44. while (true) {
  45. // System.err.println("唤醒监听中");
  46. Constants.IVW_ASR_TARGET_DATA_LINE.open(Constants.IVW_ASR_AUDIO_FORMAT);
  47. Constants.IVW_ASR_TARGET_DATA_LINE.start();
  48. audioDataByteArray = new byte[Constants.IVW_FRAME_SIZE];
  49. len = new AudioInputStream(Constants.IVW_ASR_TARGET_DATA_LINE).read(audioDataByteArray);
  50. if (len == -1) { // 调用麦克风时候,这段将不会被执行...
  51. Constants.IVW_AUDIO_STATUS = 4;
  52. ret = IvwService.INSTANCE.QIVWAudioWrite(sessionId, "".getBytes(), 0, Constants.IVW_AUDIO_STATUS);
  53. System.out.println("最后一帧返回的错误码:" + ret + ",即将执行退出...");
  54. break; //文件读完,跳出循环
  55. } else {
  56. // 反复调用QIVWAudioWrite写音频方法,直到音频写完为止!!!!!!!!!!!!
  57. ret = IvwService.INSTANCE.QIVWAudioWrite(sessionId, audioDataByteArray, len, Constants.IVW_AUDIO_STATUS);
  58. // System.out.println("写入音频中");
  59. }
  60. Constants.IVW_AUDIO_STATUS = 2; // 中间帧
  61. if (ret != 0) {
  62. System.err.println("唤醒音频写入失败...:" + ret);
  63. }
  64. Thread.sleep(200); // 模拟人说话时间间隙,10帧的音频200ms写入一次
  65. if (ivwFlag) {
  66. IvwService.INSTANCE.QIVWSessionEnd(sessionId, "");
  67. IvwService.INSTANCE.MSPLogout();
  68. Constants.IVW_ASR_TARGET_DATA_LINE.stop();
  69. Constants.IVW_ASR_TARGET_DATA_LINE.close();
  70. ivwFlag = false;
  71. break;
  72. }
  73. // System.err.println("唤醒监听中");
  74. }
  75. startIvw();
  76. } catch (Exception e) {
  77. e.printStackTrace();
  78. }
  79. }
  80. // 2、合成调用
  81. public static void startTts(String ttsText) {
  82. if (!AIMain.ttsFlag) {
  83. ttsFlag = true;
  84. Constants.TTS_TEXT = ttsText;
  85. Constants.TTS_TOTAL_AUDIO_LENGTH = 0;
  86. Integer ret = TtsService.INSTANCE.MSPLogin(null, null, Constants.TTS_LOGIN_PARAMS); // 登录
  87. if (ret != 0) {
  88. System.out.println("合成登录失败...:" + ret);
  89. }
  90. String session_id = TtsService.INSTANCE.QTTSSessionBegin(Constants.TTS_SESSION_BEGIN_PARAMS, Constants.TTS_ERROR_CODE); // 开启合成会话
  91. if (Constants.TTS_ERROR_CODE.getValue() != 0) {
  92. System.out.println("合成开启会话失败...:" + Constants.TTS_ERROR_CODE.getValue());
  93. }
  94. ret = TtsService.INSTANCE.QTTSTextPut(session_id, Constants.TTS_TEXT, Constants.TTS_TEXT.getBytes().length, null); // 正式合成
  95. if (ret != 0) {
  96. System.out.println("合成音频失败...:" + ret);
  97. }
  98. try { //实时播放
  99. Constants.TTS_SOURCE_DATA_LINE.open(Constants.TTS_AUDIO_FORMAT);
  100. Constants.TTS_SOURCE_DATA_LINE.start();
  101. } catch (Exception e) {
  102. e.printStackTrace();
  103. }
  104. while (true) {
  105. Pointer audioPointer = TtsService.INSTANCE.QTTSAudioGet(session_id, Constants.TTS_AUDIO_LEN, Constants.TTS_SYNTH_STATUS, Constants.TTS_ERROR_CODE); // 获取音频
  106. byte[] audioDataByteArray = null;
  107. if (audioPointer != null) {
  108. audioDataByteArray = audioPointer.getByteArray(0, Constants.TTS_AUDIO_LEN.getValue());
  109. }
  110. if (Constants.TTS_ERROR_CODE.getValue() != 0) {
  111. System.out.println("合成获取音频失败...+:" + Constants.TTS_ERROR_CODE);
  112. break;
  113. }
  114. if (audioDataByteArray != null) {
  115. try {
  116. Constants.TTS_SOURCE_DATA_LINE.write(audioDataByteArray, 0, Constants.TTS_AUDIO_LEN.getValue()); //实时写音频流
  117. } catch (Exception e) {
  118. e.printStackTrace();
  119. }
  120. Constants.TTS_TOTAL_AUDIO_LENGTH = Constants.TTS_TOTAL_AUDIO_LENGTH + Constants.TTS_AUDIO_LEN.getValue(); //计算总音频长度,用来生成音频文件
  121. }
  122. if (Constants.TTS_SYNTH_STATUS.getValue() == 2) {
  123. // 说明音频已经取完,退出本次循环
  124. try {
  125. // Constants.TTS_SOURCE_DATA_LINE.drain();
  126. // Constants.TTS_SOURCE_DATA_LINE.close();
  127. } catch (Exception e) {
  128. e.printStackTrace();
  129. }
  130. break;
  131. }
  132. }
  133. ret = TtsService.INSTANCE.QTTSSessionEnd(session_id, "正常退出"); //结束会话
  134. if (ret != 0) {
  135. System.out.println("合成结束会话失败...:" + ret);
  136. }
  137. ret = TtsService.INSTANCE.MSPLogout(); // 退出
  138. if (ret != 0) {
  139. System.out.println("合成退出失败...:" + ret);
  140. }
  141. } else {
  142. Constants.TTS_SOURCE_DATA_LINE.stop();
  143. Constants.TTS_SOURCE_DATA_LINE.close();
  144. }
  145. AIMain.ttsFlag = false;
  146. }
  147. }

唤醒+合成库加载

  1. package com.day.service;
  2. import com.day.config.Constants;
  3. import com.day.service.imp.IvwCallback;
  4. import com.sun.jna.Library;
  5. import com.sun.jna.Native;
  6. import com.sun.jna.ptr.IntByReference;
  7. public interface IvwService extends Library {
  8. /**
  9. * 重点:
  10. * 1.char * 对应 String
  11. * 2.int * 对应 IntByReference
  12. * 3.void * 对应 Pointer或byte[]
  13. * 4.int 对应 int
  14. * 5.无参 对应 无参
  15. * 6.回调函数 对应 根据文档自定义回调函数,实现接口Callback
  16. */
  17. //加载dll动态库并实例化,从而使用其内部的方法
  18. IvwService INSTANCE = Native.loadLibrary(Constants.IVW_DLL_PATH, IvwService.class);
  19. //定义登录方法 MSPLogin(const char *usr, const char *pwd, const char *params)
  20. public Integer MSPLogin(String usr, String pwd, String params);
  21. //定义开始方法 QIVWSessionbegin(const char *grammarList, const char *params, int *errorCode)
  22. public String QIVWSessionBegin(String grammarList, String params, IntByReference errorCode);
  23. //定义写音频方法 QIVWAudioWrite(const char *sessionID, const void *audioData, unsigned int audioLen, int audioStatus)
  24. public Integer QIVWAudioWrite(String sessionID, byte[] audioData, int audioLen, int audioStatus);
  25. //定义结束方法 QIVWSessionEnd(const char *sessionID, const char *hints)
  26. public Integer QIVWSessionEnd(String sessionID, String hints);
  27. //定义获取结果方法 QIVWRegisterNotify(const char *sessionID, ivw_ntf_handler msgProcCb, void *userData)
  28. public Integer QIVWRegisterNotify(String sessionID, IvwCallback ivwCallback, byte[] userData);
  29. //定义退出方法 唤醒一般不用退出
  30. public Integer MSPLogout();
  31. }
  1. package com.day.service;
  2. import com.day.config.Constants;
  3. import com.sun.jna.Library;
  4. import com.sun.jna.Native;
  5. import com.sun.jna.Pointer;
  6. import com.sun.jna.ptr.IntByReference;
  7. public interface TtsService extends Library {
  8. /**
  9. * 重点:
  10. * 1.char * 对应 String
  11. * 2.int * 对应 IntByReference
  12. * 3.void * 对应 byte[]/Pointer,回调函数里此类型需用String来对应。
  13. * 4.int 对应 int
  14. * 5.无参 对应 void
  15. * 6.回调函数 对应 根据文档自定义回调函数,实现接口Callback,离线语音合成无回调
  16. */
  17. //加载dll动态库并实例化,从而使用其内部的方法
  18. TtsService INSTANCE = Native.loadLibrary(Constants.TTS_DLL_PATH, TtsService.class);
  19. //定义登录方法
  20. public Integer MSPLogin(String usr, String pwd, String params);
  21. //开始一次普通离线语音合成
  22. public String QTTSSessionBegin(String params, IntByReference errorCode);
  23. //写入需要合成的文本
  24. public Integer QTTSTextPut(String sessionID, String textString, int textLen, String params);
  25. //获取离线合成的音频
  26. public Pointer QTTSAudioGet(String sessionID, IntByReference audioLen, IntByReference synthStatus, IntByReference errorCode);
  27. //结束本次普通离线语音合成
  28. public Integer QTTSSessionEnd(String sessionID, String hints);
  29. //定义退出方法
  30. public Integer MSPLogout();
  31. }

四、唤醒回调

  1. package com.day.service.imp;
  2. import com.day.AIMain;
  3. import com.day.ability.IatMic;
  4. import com.day.config.Constants;
  5. import com.sun.jna.Callback;
  6. import javax.sound.sampled.AudioFileFormat;
  7. import javax.sound.sampled.AudioInputStream;
  8. import javax.sound.sampled.AudioSystem;
  9. import java.io.ByteArrayInputStream;
  10. import java.io.ByteArrayOutputStream;
  11. import java.io.File;
  12. public class IvwCallback implements Callback {
  13. public int cb_ivw_msg_proc(String sessionID, int msg, int param1, int param2, String info, String userData) throws Exception {
  14. System.out.println("机器人大飞:主人,您请说~");
  15. AIMain.startTts("主人,您请说~");
  16. // 先录音后调用听写
  17. IatMic.iatWork();
  18. return 0;
  19. }
  20. }

五、听写代码(重点是和唤醒公用一个麦克风音频流)

  1. package com.day.ability;
  2. import com.day.AIMain;
  3. import com.day.config.Constants;
  4. import com.day.service.IvwService;
  5. import com.google.gson.Gson;
  6. import com.google.gson.JsonObject;
  7. import okhttp3.*;
  8. import javax.crypto.Mac;
  9. import javax.crypto.spec.SecretKeySpec;
  10. import javax.sound.sampled.AudioInputStream;
  11. import java.io.IOException;
  12. import java.net.URL;
  13. import java.nio.charset.Charset;
  14. import java.text.SimpleDateFormat;
  15. import java.util.*;
  16. // 麦克风传流听写
  17. public class IatMic extends WebSocketListener {
  18. private static final String hostUrl = "https://iat-api.xfyun.cn/v2/iat"; //中英文,http url 不支持解析 ws/wss schema
  19. // private static final String hostUrl = "https://iat-niche-api.xfyun.cn/v2/iat";//小语种
  20. private static final String appid = ""; //在控制台-我的应用获取
  21. private static final String apiSecret = ""; //在控制台-我的应用-语音听写(流式版)获取
  22. private static final String apiKey = ""; //在控制台-我的应用-语音听写(流式版)获取
  23. //private static final String file = "./zMusic/pcm/科大讯飞.pcm"; // 中文
  24. public static final int StatusFirstFrame = 0;
  25. public static final int StatusContinueFrame = 1;
  26. public static final int StatusLastFrame = 2;
  27. public static final Gson json = new Gson();
  28. Decoder decoder = new Decoder();
  29. // 开始时间
  30. private static Date dateBegin = new Date();
  31. // 结束时间
  32. private static Date dateEnd = new Date();
  33. private static final SimpleDateFormat sdf = new SimpleDateFormat("yyy-MM-dd HH:mm:ss.SSS");
  34. static int status = 0; // 音频的状态
  35. public static boolean IAT_FLAG = true;
  36. public static String fileName = "";
  37. public static void main(String[] args) throws Exception {
  38. iatWork();
  39. }
  40. static class MyThread extends Thread {
  41. public void run() {
  42. /* // 录制用户说话
  43. ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
  44. long startTime1 = System.currentTimeMillis();
  45. long endTime1 = startTime1 + 500; // 10 seconds
  46. while (System.currentTimeMillis() < endTime1) {
  47. System.out.print("");
  48. }
  49. // Step 4: Start recording
  50. byte[] buffer = new byte[Constants.IVW_ASR_TARGET_DATA_LINE.getBufferSize() / 5]; // Adjust buffer size as needed
  51. int bytesRead;
  52. long startTime = System.currentTimeMillis();
  53. long endTime = startTime + 4000; // 10 seconds
  54. // Step 5: Loop until recording time reaches 10 seconds
  55. while (System.currentTimeMillis() < endTime) {
  56. bytesRead = Constants.IVW_ASR_TARGET_DATA_LINE.read(buffer, 0, buffer.length);
  57. if (bytesRead > 0) {
  58. outputStream.write(buffer, 0, bytesRead);
  59. }
  60. }
  61. byte[] audioBytes = outputStream.toByteArray();
  62. // Step 9: Write byte array to audio file or other destination using AudioSystem.write method
  63. // Example: Save audioBytes to a WAV file
  64. try {
  65. File audioFile = new File("src/main/resources/1.wav");
  66. AudioInputStream audioInputStream = new AudioInputStream(new ByteArrayInputStream(audioBytes), Constants.IVW_ASR_AUDIO_FORMAT, audioBytes.length / Constants.IVW_ASR_AUDIO_FORMAT.getFrameSize());
  67. AudioSystem.write(audioInputStream, AudioFileFormat.Type.WAVE, audioFile);
  68. } catch (IOException e) {
  69. e.printStackTrace();
  70. }
  71. fileName = "src/main/resources/1.wav";*/
  72. // 需要初始化的参数都在这里添加
  73. IatMic.IAT_FLAG = true;
  74. status = 0;
  75. // 结束初始化
  76. IatMic iatMic = new IatMic();
  77. // 构建鉴权url
  78. String authUrl = null;
  79. try {
  80. authUrl = getAuthUrl(hostUrl, apiKey, apiSecret);
  81. } catch (Exception e) {
  82. throw new RuntimeException(e);
  83. }
  84. OkHttpClient client = new OkHttpClient.Builder().build();
  85. //将url中的 schema http://和https://分别替换为ws:// 和 wss://
  86. String url = authUrl.toString().replace("http://", "ws://").replace("https://", "wss://");
  87. // System.err.println(url);
  88. Request request = new Request.Builder().url(url).build();
  89. WebSocket webSocket = client.newWebSocket(request, iatMic);
  90. }
  91. }
  92. public static void iatWork() throws Exception {
  93. // 用线程方式启动,不影响唤醒,里面不要执行任何长时间的代码
  94. MyThread myThread = new MyThread();
  95. myThread.start();
  96. }
  97. @Override
  98. public void onOpen(WebSocket webSocket, Response response) {
  99. // System.out.println("建立连接成功");
  100. System.out.println(Constants.YELLOW_BACKGROUND + "机器人正在听,您请说:" + Constants.RESET);
  101. super.onOpen(webSocket, response);
  102. new Thread(() -> {
  103. //连接成功,开始发送数据
  104. // int interval = 200;
  105. try {
  106. Constants.IVW_ASR_TARGET_DATA_LINE.open(Constants.IVW_ASR_AUDIO_FORMAT);
  107. Constants.IVW_ASR_TARGET_DATA_LINE.start();
  108. while (true) {
  109. // System.err.println(AIMain.len + "" + AIMain.audioDataByteArray);
  110. if (AIMain.len == -1) {
  111. status = 2;// 标志读取完毕
  112. }
  113. switch (status) {
  114. case StatusFirstFrame: // 第一帧音频status = 0
  115. JsonObject frame = new JsonObject();
  116. JsonObject business = new JsonObject(); //第一帧必须发送
  117. JsonObject common = new JsonObject(); //第一帧必须发送
  118. JsonObject data = new JsonObject(); //每一帧都要发送
  119. // 填充common
  120. common.addProperty("app_id", appid);
  121. //填充business
  122. business.addProperty("language", "zh_cn");//
  123. //business.addProperty("language", "en_us");//英文
  124. //business.addProperty("language", "ja_jp");//日语,在控制台可添加试用或购买
  125. //business.addProperty("language", "ko_kr");//韩语,在控制台可添加试用或购买
  126. //business.addProperty("language", "ru-ru");//俄语,在控制台可添加试用或购买
  127. //business.addProperty("ptt", 1);
  128. business.addProperty("domain", "iat");
  129. //mandarin中文普通话 广东话cantonese
  130. business.addProperty("accent", "mandarin");//中文方言请在控制台添加试用,添加后即展示相应参数值cantonese//mandarin
  131. //business.addProperty("nunum", 0);
  132. //business.addProperty("ptt", 1);//标点符号
  133. //business.addProperty("rlang", "zh-hk"); // zh-cn :简体中文(默认值)zh-hk :繁体香港(若未授权不生效,在控制台可免费开通)
  134. business.addProperty("vinfo", 1);
  135. business.addProperty("dwa", "wpgs");//动态修正(若未授权不生效,在控制台可免费开通)
  136. business.addProperty("vad_eos", 3000);
  137. //business.addProperty("fa_nbest", true);
  138. //business.addProperty("fa_sch", true);
  139. //business.addProperty("vinfo", 1);
  140. //business.addProperty("speex_size", 70);
  141. //business.addProperty("nbest", 5);// 句子多候选(若未授权不生效,在控制台可免费开通)
  142. //business.addProperty("wbest", 3);// 词级多候选(若未授权不生效,在控制台可免费开通)
  143. //填充data
  144. data.addProperty("status", StatusFirstFrame);
  145. data.addProperty("format", "audio/L16;rate=16000");
  146. //data.addProperty("encoding", "speex-wb");
  147. data.addProperty("encoding", "raw");
  148. data.addProperty("audio", Base64.getEncoder().encodeToString(Arrays.copyOf(AIMain.audioDataByteArray, AIMain.len)));
  149. //填充frame
  150. frame.add("common", common);
  151. frame.add("business", business);
  152. frame.add("data", data);
  153. // System.out.println("即将发送第一帧数据...");
  154. // System.err.println(frame.toString());
  155. webSocket.send(frame.toString());
  156. status = StatusContinueFrame; // 发送完第一帧改变status 为 1
  157. break;
  158. case StatusContinueFrame: //中间帧status = 1
  159. JsonObject frame1 = new JsonObject();
  160. JsonObject data1 = new JsonObject();
  161. data1.addProperty("status", StatusContinueFrame);
  162. data1.addProperty("format", "audio/L16;rate=16000");
  163. //data1.addProperty("encoding", "speex-wb");
  164. data1.addProperty("encoding", "raw");
  165. String temp = Base64.getEncoder().encodeToString(Arrays.copyOf(AIMain.audioDataByteArray, AIMain.len));
  166. data1.addProperty("audio", temp);
  167. frame1.add("data", data1);
  168. //System.out.println(temp);
  169. webSocket.send(frame1.toString());
  170. break;
  171. }
  172. try {
  173. Thread.sleep(200);
  174. if (!IAT_FLAG) {
  175. //System.out.println("本次会话结束");
  176. break;
  177. }
  178. } catch (Exception e) {
  179. e.printStackTrace();
  180. }
  181. }
  182. //说明读完了
  183. status = StatusLastFrame;
  184. JsonObject frame2 = new JsonObject();
  185. JsonObject data2 = new JsonObject();
  186. data2.addProperty("status", StatusLastFrame);
  187. data2.addProperty("audio", "");
  188. data2.addProperty("format", "audio/L16;rate=16000");
  189. //data2.addProperty("encoding", "speex-wb");
  190. data2.addProperty("encoding", "raw");
  191. frame2.add("data", data2);
  192. webSocket.send(frame2.toString());
  193. // System.err.println(frame2.toString());
  194. // System.out.println("all data is send");
  195. } catch (Exception e) {
  196. // TODO Auto-generated catch block
  197. e.printStackTrace();
  198. }
  199. }).start();
  200. }
  201. @Override
  202. public void onMessage(WebSocket webSocket, String text) {
  203. // System.out.println(text);
  204. super.onMessage(webSocket, text);
  205. ResponseData resp = json.fromJson(text, ResponseData.class);
  206. if (resp != null) {
  207. if (resp.getCode() != 0) {
  208. AIMain.ivwFlag = true; // 如果报错也需要恢复唤醒
  209. System.out.println("code=>" + resp.getCode() + " error=>" + resp.getMessage() + " sid=" + resp.getSid());
  210. System.out.println("错误码查询链接:https://www.xfyun.cn/document/error-code");
  211. return;
  212. }
  213. if (resp.getData() != null) {
  214. if (resp.getData().getResult() != null) {
  215. Text te = resp.getData().getResult().getText();
  216. //System.out.println(te.toString());
  217. try {
  218. decoder.decode(te);
  219. dateEnd = new Date();
  220. // System.out.println("耗时:" + (dateEnd.getTime() - dateBegin.getTime()) + "ms");
  221. System.out.println(Constants.YELLOW_BACKGROUND + "用户说话识别中:" + decoder.toString() + Constants.RESET);
  222. //System.err.println("中间识别JSON结果 ----" + text);
  223. } catch (Exception e) {
  224. e.printStackTrace();
  225. }
  226. }
  227. if (resp.getData().getStatus() == 2) {
  228. // todo resp.data.status ==2 说明数据全部返回完毕,可以关闭连接,释放资源
  229. //System.err.println("我的getStatus() == 2");
  230. // System.out.println("session end ");
  231. dateEnd = new Date();
  232. // System.out.println(sdf.format(dateBegin) + "开始");
  233. // System.out.println(sdf.format(dateEnd) + "结束");
  234. // System.out.println("耗时:" + (dateEnd.getTime() - dateBegin.getTime()) + "ms");
  235. System.out.println(Constants.YELLOW_BACKGROUND + "用户说话识别最终结果:" + decoder.toString() + Constants.RESET);
  236. AIMain.ivwFlag = true; // 恢复唤醒
  237. // System.out.println("本次识别sid ==》" + resp.getSid());
  238. try {
  239. BigModelNew.doSpark(decoder.toString()); // 调用大模型回答问题!!!
  240. } catch (Exception e) {
  241. throw new RuntimeException(e);
  242. }
  243. decoder.discard();
  244. webSocket.close(1000, "");
  245. IatMic.IAT_FLAG = false;
  246. // System.exit(0);
  247. } else {
  248. // todo 根据返回的数据处理
  249. }
  250. }
  251. }
  252. }
  253. @Override
  254. public void onFailure(WebSocket webSocket, Throwable t, Response response) {
  255. super.onFailure(webSocket, t, response);
  256. try {
  257. if (null != response) {
  258. int code = response.code();
  259. System.out.println("onFailure code:" + code);
  260. System.out.println("onFailure body:" + response.body().string());
  261. if (101 != code) {
  262. System.out.println("connection failed");
  263. System.exit(0);
  264. }
  265. }
  266. } catch (IOException e) {
  267. // TODO Auto-generated catch block
  268. e.printStackTrace();
  269. }
  270. }
  271. public static String getAuthUrl(String hostUrl, String apiKey, String apiSecret) throws Exception {
  272. URL url = new URL(hostUrl);
  273. SimpleDateFormat format = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US);
  274. format.setTimeZone(TimeZone.getTimeZone("GMT"));
  275. String date = format.format(new Date());
  276. //String date = format.format(new Date());
  277. //System.err.println(date);
  278. StringBuilder builder = new StringBuilder("host: ").append(url.getHost()).append("\n").//
  279. append("date: ").append(date).append("\n").//
  280. append("GET ").append(url.getPath()).append(" HTTP/1.1");
  281. //System.err.println(builder);
  282. Charset charset = Charset.forName("UTF-8");
  283. Mac mac = Mac.getInstance("hmacsha256");
  284. SecretKeySpec spec = new SecretKeySpec(apiSecret.getBytes(charset), "hmacsha256");
  285. mac.init(spec);
  286. byte[] hexDigits = mac.doFinal(builder.toString().getBytes(charset));
  287. String sha = Base64.getEncoder().encodeToString(hexDigits);
  288. //System.err.println(sha);
  289. String authorization = String.format("api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"", apiKey, "hmac-sha256", "host date request-line", sha);
  290. //System.err.println(authorization);
  291. HttpUrl httpUrl = HttpUrl.parse("https://" + url.getHost() + url.getPath()).newBuilder().//
  292. addQueryParameter("authorization", Base64.getEncoder().encodeToString(authorization.getBytes(charset))).//
  293. addQueryParameter("date", date).//
  294. addQueryParameter("host", url.getHost()).//
  295. build();
  296. return httpUrl.toString();
  297. }
  298. public static class ResponseData {
  299. private int code;
  300. private String message;
  301. private String sid;
  302. private Data data;
  303. public int getCode() {
  304. return code;
  305. }
  306. public String getMessage() {
  307. return this.message;
  308. }
  309. public String getSid() {
  310. return sid;
  311. }
  312. public Data getData() {
  313. return data;
  314. }
  315. }
  316. public static class Data {
  317. private int status;
  318. private Result result;
  319. public int getStatus() {
  320. return status;
  321. }
  322. public Result getResult() {
  323. return result;
  324. }
  325. }
  326. public static class Result {
  327. int bg;
  328. int ed;
  329. String pgs;
  330. int[] rg;
  331. int sn;
  332. Ws[] ws;
  333. boolean ls;
  334. JsonObject vad;
  335. public Text getText() {
  336. Text text = new Text();
  337. StringBuilder sb = new StringBuilder();
  338. for (Ws ws : this.ws) {
  339. sb.append(ws.cw[0].w);
  340. }
  341. text.sn = this.sn;
  342. text.text = sb.toString();
  343. text.sn = this.sn;
  344. text.rg = this.rg;
  345. text.pgs = this.pgs;
  346. text.bg = this.bg;
  347. text.ed = this.ed;
  348. text.ls = this.ls;
  349. text.vad = this.vad == null ? null : this.vad;
  350. return text;
  351. }
  352. }
  353. public static class Ws {
  354. Cw[] cw;
  355. int bg;
  356. int ed;
  357. }
  358. public static class Cw {
  359. int sc;
  360. String w;
  361. }
  362. public static class Text {
  363. int sn;
  364. int bg;
  365. int ed;
  366. String text;
  367. String pgs;
  368. int[] rg;
  369. boolean deleted;
  370. boolean ls;
  371. JsonObject vad;
  372. @Override
  373. public String toString() {
  374. return "Text{" + "bg=" + bg + ", ed=" + ed + ", ls=" + ls + ", sn=" + sn + ", text='" + text + '\'' + ", pgs=" + pgs + ", rg=" + Arrays.toString(rg) + ", deleted=" + deleted + ", vad=" + (vad == null ? "null" : vad.getAsJsonArray("ws").toString()) + '}';
  375. }
  376. }
  377. //解析返回数据,仅供参考
  378. public static class Decoder {
  379. private Text[] texts;
  380. private int defc = 10;
  381. public Decoder() {
  382. this.texts = new Text[this.defc];
  383. }
  384. public synchronized void decode(Text text) {
  385. if (text.sn >= this.defc) {
  386. this.resize();
  387. }
  388. if ("rpl".equals(text.pgs)) {
  389. for (int i = text.rg[0]; i <= text.rg[1]; i++) {
  390. this.texts[i].deleted = true;
  391. }
  392. }
  393. this.texts[text.sn] = text;
  394. }
  395. public String toString() {
  396. StringBuilder sb = new StringBuilder();
  397. for (Text t : this.texts) {
  398. if (t != null && !t.deleted) {
  399. sb.append(t.text);
  400. }
  401. }
  402. return sb.toString();
  403. }
  404. public void resize() {
  405. int oc = this.defc;
  406. this.defc <<= 1;
  407. Text[] old = this.texts;
  408. this.texts = new Text[this.defc];
  409. for (int i = 0; i < oc; i++) {
  410. this.texts[i] = old[i];
  411. }
  412. }
  413. public void discard() {
  414. for (int i = 0; i < this.texts.length; i++) {
  415. this.texts[i] = null;
  416. }
  417. }
  418. }
  419. }

六、大模型调用代码

  1. package com.day.ability;
  2. import com.day.AIMain;
  3. import com.day.util.MyUtil;
  4. import com.google.gson.Gson;
  5. import okhttp3.HttpUrl;
  6. import javax.crypto.Mac;
  7. import javax.crypto.spec.SecretKeySpec;
  8. import java.net.URL;
  9. import java.nio.charset.StandardCharsets;
  10. import java.text.SimpleDateFormat;
  11. import java.util.*;
  12. // 主函数入口
  13. public class BigModelNew {
  14. public static final String hostUrl = "https://spark-api.xf-yun.com/v3/completions";
  15. private static final String appid = "";
  16. private static final String apiSecret = "";
  17. private static final String apiKey = "";
  18. private static final Gson gson = new Gson();
  19. public static void main(String[] args) throws Exception {
  20. doSpark("我想吃鸡。");
  21. }
  22. public static void doSpark(String content) throws Exception {
  23. MyThread myThread = new MyThread(content);
  24. myThread.start();
  25. }
  26. static class MyThread extends Thread {
  27. String content;
  28. public MyThread(String content) {
  29. this.content = content;
  30. }
  31. public void run() {
  32. String authUrl = null;
  33. try {
  34. authUrl = getAuthUrl(hostUrl, apiKey, apiSecret);
  35. } catch (Exception e) {
  36. throw new RuntimeException(e);
  37. }
  38. // URL地址正确
  39. // System.err.println(authUrl);
  40. String json = "{\n" + " \"app_id\": \"" + appid + "\",\n" + " \"uid\": \"" + UUID.randomUUID().toString().substring(0, 10) + "\",\n" + " \"domain\": \"generalv2\",\n" + " \"temperature\": 0.5,\n" + " \"max_tokens\": 4096,\n" + " \"auditing\": \"default\",\n" + " \"stream\": true,\n" + " \"messages\": [\n" + " {\n" + " \"role\": \"user\",\n" + " \"content\": \"" + content + "\"\n" + " }\n" + " ]\n" + "}";
  41. // 发起Post请求
  42. String res = MyUtil.doPostJson(authUrl, null, json);
  43. String finalRes = "";
  44. String[] resArray = res.split("\n");
  45. for (int i = 0; i < resArray.length; i++) {
  46. if (resArray[i].contains("data:")) {
  47. String jsonStr = resArray[i].replace("data:", "");
  48. BigJsonParse bigJsonParse = gson.fromJson(jsonStr, BigJsonParse.class);
  49. List<Choices> choicesList = bigJsonParse.choices;
  50. if (choicesList != null && choicesList.size() > 0) {
  51. for (Choices choice : choicesList) {
  52. finalRes = finalRes + choice.content;
  53. }
  54. } else {
  55. finalRes = "您好,我是讯飞星火认知大模型";
  56. }
  57. }
  58. }
  59. System.out.println(finalRes);
  60. String temp = finalRes.replaceAll("\r\n", "").replaceAll("\n", "");
  61. System.out.println("*****************************************************************************************************");
  62. AIMain.startTts(temp);
  63. }
  64. }
  65. // 鉴权方法
  66. public static String getAuthUrl(String hostUrl, String apiKey, String apiSecret) throws Exception {
  67. URL url = new URL(hostUrl);
  68. // 时间
  69. SimpleDateFormat format = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US);
  70. format.setTimeZone(TimeZone.getTimeZone("GMT"));
  71. String date = format.format(new Date());
  72. // date="Thu, 12 Oct 2023 03:05:28 GMT";
  73. // 拼接
  74. String preStr = "host: " + url.getHost() + "\n" + "date: " + date + "\n" + "POST " + url.getPath() + " HTTP/1.1";
  75. // System.err.println(preStr);
  76. // SHA256加密
  77. Mac mac = Mac.getInstance("hmacsha256");
  78. SecretKeySpec spec = new SecretKeySpec(apiSecret.getBytes(StandardCharsets.UTF_8), "hmacsha256");
  79. mac.init(spec);
  80. byte[] hexDigits = mac.doFinal(preStr.getBytes(StandardCharsets.UTF_8));
  81. // Base64加密
  82. String sha = Base64.getEncoder().encodeToString(hexDigits);
  83. // System.err.println(sha);
  84. // 拼接
  85. String authorization = String.format("api_key=\"%s\", algorithm=\"%s\", headers=\"%s\", signature=\"%s\"", apiKey, "hmac-sha256", "host date request-line", sha);
  86. // 拼接地址
  87. HttpUrl httpUrl = Objects.requireNonNull(HttpUrl.parse("https://" + url.getHost() + url.getPath())).newBuilder().//
  88. addQueryParameter("authorization", Base64.getEncoder().encodeToString(authorization.getBytes(StandardCharsets.UTF_8))).//
  89. addQueryParameter("date", date).//
  90. addQueryParameter("host", url.getHost()).//
  91. build();
  92. // System.err.println(httpUrl.toString());
  93. return httpUrl.toString();
  94. }
  95. }
  96. // JSON
  97. class BigJsonParse {
  98. List<Choices> choices;
  99. }
  100. class Choices {
  101. String content;
  102. }

七、HTTP PSOT请求代码

  1. package com.day.util;
  2. import org.apache.http.client.methods.CloseableHttpResponse;
  3. import org.apache.http.client.methods.HttpUriRequest;
  4. import org.apache.http.client.methods.RequestBuilder;
  5. import org.apache.http.entity.ContentType;
  6. import org.apache.http.entity.StringEntity;
  7. import org.apache.http.impl.client.CloseableHttpClient;
  8. import org.apache.http.impl.client.HttpClients;
  9. import org.apache.http.util.EntityUtils;
  10. import java.net.URI;
  11. import java.nio.charset.StandardCharsets;
  12. import java.util.Map;
  13. public class MyUtil {
  14. /**
  15. * 1.发起post请求
  16. */
  17. public static String doPostJson(String url, Map<String, String> urlParams, String json) {
  18. CloseableHttpClient closeableHttpClient = HttpClients.createDefault();
  19. CloseableHttpResponse closeableHttpResponse = null;
  20. String resultString = "";
  21. try {
  22. // 创建Http Post请求
  23. String asciiUrl = URI.create(url).toASCIIString();
  24. RequestBuilder builder = RequestBuilder.post(asciiUrl);
  25. builder.setCharset(StandardCharsets.UTF_8);
  26. if (urlParams != null) {
  27. for (Map.Entry<String, String> entry : urlParams.entrySet()) {
  28. builder.addParameter(entry.getKey(), entry.getValue());
  29. }
  30. }
  31. // 创建请求内容
  32. StringEntity entity = new StringEntity(json, ContentType.APPLICATION_JSON);
  33. builder.setEntity(entity);
  34. HttpUriRequest request = builder.build();
  35. // 执行http请求
  36. closeableHttpResponse = closeableHttpClient.execute(request);
  37. resultString = EntityUtils.toString(closeableHttpResponse.getEntity(), StandardCharsets.UTF_8);
  38. } catch (Exception e) {
  39. e.printStackTrace();
  40. } finally {
  41. try {
  42. if (closeableHttpResponse != null) {
  43. closeableHttpResponse.close();
  44. }
  45. if (closeableHttpClient != null) {
  46. closeableHttpClient.close();
  47. }
  48. } catch (Exception e) {
  49. e.printStackTrace();
  50. }
  51. }
  52. return resultString;
  53. }
  54. }

八、整体项目结构目录


本文转载自: https://blog.csdn.net/p6448777/article/details/135231771
版权归原作者 王者鳜錸 所有, 如有侵权,请联系我们删除。

“讯飞星火认知大模型智能语音交互调用”的评论:

还没有评论