Răsfoiți Sursa

处理豆包tts语音指令参数反编译
优化逻辑移出无用日志
给tts都加入语音指令参数(可空)

liyanbo 3 săptămâni în urmă
părinte
comite
244ef80ea8

+ 2 - 2
byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/controller/admin/tts/AiTtsController.java

@@ -116,9 +116,9 @@ public class AiTtsController {
     @PostMapping("/convert")
     @Operation(summary = "文本转语音")
     @PreAuthorize("@ss.hasPermission('ai:tts:convert')")
-    public CommonResult<String> convertTextToSpeech(@RequestParam("roleId") Long roleId, @RequestParam("content") String content) {
+    public CommonResult<String> convertTextToSpeech(@RequestParam("roleId") Long roleId, @RequestParam("content") String content, @RequestParam(value = "command", required = false) String command) {
         // 执行文本转语音
-        String audioUrl = ttsService.convertTextToSpeech(roleId, content);
+        String audioUrl = ttsService.convertTextToSpeech(roleId, content, command);
         // 返回结果
         return success(audioUrl);
     }

+ 3 - 2
byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/service/tts/AiTtsService.java

@@ -69,12 +69,13 @@ public interface AiTtsService {
     List<AiTtsDO> getTtsSimpleListByStatus(Integer status);
 
     /**
-     * 文本转语音
+     * 文本转语音(带语音指令)
      *
      * @param roleId 角色编号
      * @param content 需要转语音的内容
+     * @param command 命令(语音指令)
      * @return 语音文件URL
      */
-    String convertTextToSpeech(Long roleId, String content);
+    String convertTextToSpeech(Long roleId, String content, String command);
 
 }

+ 2 - 3
byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/service/tts/AiTtsServiceImpl.java

@@ -108,7 +108,7 @@ public class AiTtsServiceImpl implements AiTtsService {
     }
 
     @Override
-    public String convertTextToSpeech(Long roleId, String content) {
+    public String convertTextToSpeech(Long roleId, String content, String command) {
         // 1. 根据角色id查询角色信息
         AiChatRoleDO chatRole = chatRoleMapper.selectById(roleId);
         if (chatRole == null) {
@@ -130,7 +130,7 @@ public class AiTtsServiceImpl implements AiTtsService {
         if ("DouBao".equals(aiTtsDO.getPlatform())) {
             // 使用豆包TTS服务
             try {
-                mp3Data = douBaoTtsService.convertTextToSpeech(aiTtsDO, content);
+                mp3Data = douBaoTtsService.convertTextToSpeech(aiTtsDO, content, command);
             } catch (Exception e) {
                 throw new RuntimeException("豆包文本转语音失败", e);
             }
@@ -147,5 +147,4 @@ public class AiTtsServiceImpl implements AiTtsService {
         String filePath = fileApi.createFile(mp3Data);
         return filePath;
     }
-
 }

+ 59 - 81
byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/service/tts/DouBaoTtsService.java

@@ -25,7 +25,7 @@ public class DouBaoTtsService {
     @Resource
     private YudaoAiProperties yudaoAiProperties;
 
-    public byte[] convertTextToSpeech(AiTtsDO aiTtsDO, String content) throws IOException {
+    public byte[] convertTextToSpeech(AiTtsDO aiTtsDO, String content, String command) throws IOException {
         YudaoAiProperties.DouBaoProperties doubaoProperties = yudaoAiProperties.getDoubao();
         if (doubaoProperties == null) {
             throw new IllegalArgumentException("豆包配置未设置");
@@ -36,14 +36,9 @@ public class DouBaoTtsService {
             throw new IllegalArgumentException("豆包TTS配置未设置");
         }
 
-        logger.info("豆包配置: enable={}, apiKey={}", doubaoProperties.getEnable(), doubaoProperties.getApiKey());
-        logger.info("豆包TTS配置: appId={}, accessKey={}, resourceId={}, baseUrl={}", 
-                doubaoTtsProperties.getAppId(), doubaoTtsProperties.getAccessKey(), 
-                doubaoTtsProperties.getResourceId(), doubaoTtsProperties.getBaseUrl());
-
         String ttsUrl = doubaoTtsProperties.getBaseUrl() != null ? doubaoTtsProperties.getBaseUrl() : "https://openspeech.bytedance.com/api/v3/tts/unidirectional";
         String appId = doubaoTtsProperties.getAppId();
-        String accessKey = doubaoTtsProperties.getAccessKey();
+        String accessKey = doubaoTtsProperties.getAccessToken() != null ? doubaoTtsProperties.getAccessToken() : doubaoTtsProperties.getAccessKey();
         String resourceId = doubaoTtsProperties.getResourceId() != null ? doubaoTtsProperties.getResourceId() : "seed-tts-2.0";
 
         if (appId == null || accessKey == null) {
@@ -51,9 +46,6 @@ public class DouBaoTtsService {
         }
 
         OkHttpClient client = new OkHttpClient();
-
-        // 构造请求体,按照接口文档要求组装JSON参数
-
         // 使用Map构建请求体,按照文档要求组装参数
         Map<String, Object> requestMap = new HashMap<>();
         Map<String, Object> reqParams = new HashMap<>();
@@ -63,7 +55,7 @@ public class DouBaoTtsService {
         Map<String, Object> audioParams = new HashMap<>();
         audioParams.put("format", "mp3"); // 输出音频格式
         audioParams.put("sample_rate", 16000); // 推荐采样率
-//        audioParams.put("emotion", "带有感情的朗读诗词,要深情的朗读。");
+//        audioParams.put("emotion", "开心");
 
         // 语速和音量参数
         if (aiTtsDO.getSpeechRate() != null) {
@@ -78,15 +70,19 @@ public class DouBaoTtsService {
         // 额外参数
         Map<String, Object> additions = new HashMap<>();
         //音调
-//        Map<String, Object> post_process = new HashMap<>();
-//        post_process.put("pitch", aiTtsDO.getVolume());
-//        additions.put("post_process", post_process);
+        Map<String, Object> post_process = new HashMap<>();
+        post_process.put("pitch", aiTtsDO.getVolume());
+        additions.put("post_process", post_process);
 
         //语音指令
-        String[] context_texts = {"带有感情的朗读诗词,要深情的朗读。"};
-        additions.put("context_texts", context_texts);
+        if (command != null && !command.isEmpty()) {
+            additions.put("context_texts", List.of(command));
+        }
 
-        reqParams.put("additions", additions);
+        // 将 additions 映射序列化为 JSON 字符串
+        ObjectMapper additionsMapper = new ObjectMapper();
+        String additionsJson = additionsMapper.writeValueAsString(additions);
+        reqParams.put("additions", additionsJson);
         requestMap.put("req_params", reqParams);
 
 
@@ -96,12 +92,8 @@ public class DouBaoTtsService {
 
         MediaType mediaType = MediaType.parse("application/json");
         RequestBody body = RequestBody.create(mediaType, requestBody);
-        
         String requestId = UUID.randomUUID().toString();
-        logger.info("发送豆包TTS请求,url: {}, appId: {}, resourceId: {}, requestId: {}", 
-                ttsUrl, appId, resourceId, requestId);
-        logger.debug("请求体: {}", requestBody);
-        
+
         // 构建请求
         Request request = new Request.Builder()
                 .url(ttsUrl)
@@ -113,16 +105,6 @@ public class DouBaoTtsService {
                 .post(body)
                 .build();
         
-        // 打印完整的请求头信息(不包含敏感信息)
-        logger.debug("请求头信息:");
-        for (String name : request.headers().names()) {
-            if (!name.equals("X-Api-Access-Key")) {
-                logger.debug("{}: {}", name, request.headers().get(name));
-            } else {
-                logger.debug("{}: ******", name);
-            }
-        }
-
         // 发送请求并流式接收响应
         try (Response response = client.newCall(request).execute()) {
             if (!response.isSuccessful()) {
@@ -133,61 +115,57 @@ public class DouBaoTtsService {
             }
 
             // 读取响应体并逐行解析JSON,处理SSE流式响应
-            try (InputStream inputStream = response.body().byteStream();
-                 java.io.BufferedReader reader = new java.io.BufferedReader(new java.io.InputStreamReader(inputStream))) {
-                String line;
-                StringBuilder base64AudioBuilder = new StringBuilder();
-                ObjectMapper objectMapper2 = new ObjectMapper();
-                boolean hasAudioData = false;
-                
-                while ((line = reader.readLine()) != null) {
-                    if (line.trim().isEmpty()) {
-                        continue;
-                    }
-                    
-                    logger.debug("豆包TTS响应行: {}", line);
-                    
-                    try {
-                        // 解析单行JSON
-                        Map<String, Object> responseMap = objectMapper2.readValue(line, Map.class);
-                        
-                        // 检查响应状态
-                        int code = (int) responseMap.get("code");
-                        if (code != 0 && code != 20000000) {
-                            String message = (String) responseMap.get("message");
-                            throw new IOException("豆包TTS服务返回错误: code=" + code + ", message=" + message);
+            if (response.body() != null) {
+                try (InputStream inputStream = response.body().byteStream();
+                     java.io.BufferedReader reader = new java.io.BufferedReader(new java.io.InputStreamReader(inputStream))) {
+                    String line;
+                    StringBuilder base64AudioBuilder = new StringBuilder();
+                    ObjectMapper objectMapper2 = new ObjectMapper();
+                    boolean hasAudioData = false;
+
+                    while ((line = reader.readLine()) != null) {
+                        if (line.trim().isEmpty()) {
+                            continue;
                         }
-                        
-                        // 提取音频数据
-                        Object data = responseMap.get("data");
-                        if (data != null && data instanceof String) {
-                            String chunk = data.toString();
-                            if (!chunk.isEmpty()) {
-                                base64AudioBuilder.append(chunk);
-                                hasAudioData = true;
-                                logger.debug("提取到音频数据块,长度: {} 字符,累计长度: {} 字符", 
-                                        chunk.length(), base64AudioBuilder.length());
+
+                        try {
+                            // 解析单行JSON
+                            Map<String, Object> responseMap = objectMapper2.readValue(line, Map.class);
+
+                            // 检查响应状态
+                            int code = (int) responseMap.get("code");
+                            if (code != 0 && code != 20000000) {
+                                String message = (String) responseMap.get("message");
+                                throw new IOException("豆包TTS服务返回错误: code=" + code + ", message=" + message);
+                            }
+
+                            // 提取音频数据
+                            Object data = responseMap.get("data");
+                            if (data instanceof String) {
+                                String chunk = data.toString();
+                                if (!chunk.isEmpty()) {
+                                    base64AudioBuilder.append(chunk);
+                                    hasAudioData = true;
+                                }
                             }
+                        } catch (Exception e) {
+                            logger.warn("解析响应行失败: {}", e.getMessage());
                         }
-                    } catch (Exception e) {
-                        logger.warn("解析响应行失败: {}", e.getMessage());
                     }
-                }
-                
-                if (hasAudioData && base64AudioBuilder.length() > 0) {
-                    String base64Audio = base64AudioBuilder.toString();
-                    logger.info("成功提取完整base64音频数据,总长度: {} 字符", base64Audio.length());
-                    
-                    // 解码base64音频数据
-                    byte[] audioBytes = java.util.Base64.getDecoder().decode(base64Audio);
-                    logger.info("成功解码音频数据,长度: {} 字节", audioBytes.length);
-                    return audioBytes;
-                } else {
-                    // 没有音频数据
-                    logger.warn("豆包TTS响应没有音频数据");
-                    throw new IOException("豆包TTS响应没有音频数据");
+
+                    if (hasAudioData && !base64AudioBuilder.isEmpty()) {
+                        String base64Audio = base64AudioBuilder.toString();
+
+                        // 解码base64音频数据
+                        return java.util.Base64.getDecoder().decode(base64Audio);
+                    } else {
+                        // 没有音频数据
+                        logger.warn("豆包TTS响应没有音频数据");
+                        throw new IOException("豆包TTS响应没有音频数据");
+                    }
                 }
             }
         }
+        return new byte[0];
     }
 }

+ 1 - 1
byzs-web/src/main/java/cn/iocoder/byzs/module/web/controller/admin/ai/WebAiController.java

@@ -129,7 +129,7 @@ public class WebAiController {
     @Operation(summary = "文本转语音")
     public CommonResult<String> convertTextToSpeech(@RequestBody @Valid TtsVo ttsVo) {
         // 执行文本转语音
-        String audioUrl = ttsService.convertTextToSpeech(ttsVo.getRoleId(), ttsVo.getContent());
+        String audioUrl = ttsService.convertTextToSpeech(ttsVo.getRoleId(), ttsVo.getContent(), ttsVo.getCommand());
         // 返回结果
         return success(audioUrl);
     }

+ 4 - 0
byzs-web/src/main/java/cn/iocoder/byzs/module/web/controller/admin/ai/vo/TtsVo.java

@@ -14,4 +14,8 @@ public class TtsVo {
      * 内容
      */
     private String content;
+    /**
+     * 命令(语音指令)
+     */
+    private String command;
 }