Sfoglia il codice sorgente

优化tts接口,新增豆包语音合成类型

liyanbo 4 settimane fa
parent
commit
20957a8f8f

+ 8 - 0
byzs-module-ai/pom.xml

@@ -134,6 +134,14 @@
 <!--            <artifactId>volcengine-java-sdk-core</artifactId>-->
 <!--            <version>1.0.5</version>-->
 <!--        </dependency>-->
+        <!-- 火山引擎豆包 语音识别v3 -->
+        <dependency>
+            <groupId>org.java-websocket</groupId>
+            <artifactId>Java-WebSocket</artifactId>
+            <version>1.5.5</version>
+        </dependency>
+
+
 
         <!-- 向量存储:https://db-engines.com/en/ranking/vector+dbms -->
         <dependency>

+ 16 - 1
byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/framework/ai/config/YudaoAiProperties.java

@@ -78,6 +78,7 @@ public class YudaoAiProperties {
 
         private String enable;
         private String apiKey;
+        private String accessKey;
         /**
          * 基础地址
          */
@@ -87,6 +88,20 @@ public class YudaoAiProperties {
         private Double temperature;
         private Integer maxTokens;
         private Double topP;
+        
+        /**
+         * TTS配置
+         */
+        private TtsProperties tts;
+        
+        @Data
+        public static class TtsProperties {
+            private String appId;
+            private String accessToken;
+            private String accessKey;
+            private String resourceId;
+            private String baseUrl;
+        }
 
     }
 
@@ -165,4 +180,4 @@ public class YudaoAiProperties {
 
     }
 
-}
+}

+ 23 - 32
byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/service/tts/AiTtsServiceImpl.java

@@ -37,7 +37,10 @@ public class AiTtsServiceImpl implements AiTtsService {
     private AiChatRoleMapper chatRoleMapper;
 
     @Resource
-    private org.springframework.beans.factory.ObjectProvider<StreamTtsService> streamTtsServiceProvider;
+    private DouBaoTtsService douBaoTtsService;
+
+    @Resource
+    private AliyunTtsService aliyunTtsService;
 
     @Resource
     private FileApi fileApi;
@@ -122,39 +125,27 @@ public class AiTtsServiceImpl implements AiTtsService {
             throw exception(TTS_NOT_EXISTS);
         }
 
-        // 3. 使用StreamTtsService将文本转语音
-        StreamTtsService streamTtsService = streamTtsServiceProvider.getObject();
-        try {
-            // 创建音频数据缓冲区
-            java.io.ByteArrayOutputStream audioOutputStream = new java.io.ByteArrayOutputStream();
-            // 设置音频数据回调
-            streamTtsService.setAudioDataCallback(audioData -> {
-                try {
-                    audioOutputStream.write(audioData);
-                } catch (java.io.IOException e) {
-                    throw new RuntimeException("写入音频数据失败", e);
-                }
-            });
-
-            // 开始TTS语音合成
-            streamTtsService.startTts(aiTtsDO, OutputFormatEnum.MP3);
-            // 发送文本
-            streamTtsService.sendText(content);
-            // 停止TTS
-            streamTtsService.stopTts();
-
-            // 4. 存储语音文件并上传到服务器
-            byte[] mp3Data = audioOutputStream.toByteArray();
-            String filePath = fileApi.createFile(mp3Data);
-            return filePath;
-        } catch (Exception e) {
-            throw new RuntimeException("文本转语音失败", e);
-        } finally {
-            // 确保资源被释放
-            if (streamTtsService != null) {
-                streamTtsService.stopTts();
+        byte[] mp3Data;
+        // 3. 根据平台选择不同的TTS服务
+        if ("DouBao".equals(aiTtsDO.getPlatform())) {
+            // 使用豆包TTS服务
+            try {
+                mp3Data = douBaoTtsService.convertTextToSpeech(aiTtsDO, content);
+            } catch (Exception e) {
+                throw new RuntimeException("豆包文本转语音失败", e);
+            }
+        } else {
+            // 使用阿里云TTS服务
+            try {
+                mp3Data = aliyunTtsService.convertTextToSpeech(aiTtsDO, content);
+            } catch (Exception e) {
+                throw new RuntimeException("阿里云文本转语音失败", e);
             }
         }
+
+        // 4. 存储语音文件并上传到服务器
+        String filePath = fileApi.createFile(mp3Data);
+        return filePath;
     }
 
 }

+ 49 - 0
byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/service/tts/AliyunTtsService.java

@@ -0,0 +1,49 @@
+package cn.iocoder.byzs.module.ai.service.tts;
+
+import cn.iocoder.byzs.module.ai.dal.dataobject.tts.AiTtsDO;
+import cn.iocoder.byzs.module.ai.util.tts.StreamTtsService;
+import com.alibaba.nls.client.protocol.OutputFormatEnum;
+import org.springframework.beans.factory.ObjectProvider;
+import org.springframework.stereotype.Service;
+
+import javax.annotation.Resource;
+import java.io.ByteArrayOutputStream;
+
+@Service
+public class AliyunTtsService {
+
+    @Resource
+    private ObjectProvider<StreamTtsService> streamTtsServiceProvider;
+
+    public byte[] convertTextToSpeech(AiTtsDO aiTtsDO, String content) {
+        StreamTtsService streamTtsService = streamTtsServiceProvider.getObject();
+        try {
+            // 创建音频数据缓冲区
+            ByteArrayOutputStream audioOutputStream = new ByteArrayOutputStream();
+            // 设置音频数据回调
+            streamTtsService.setAudioDataCallback(audioData -> {
+                try {
+                    audioOutputStream.write(audioData);
+                } catch (Exception e) {
+                    throw new RuntimeException("写入音频数据失败", e);
+                }
+            });
+
+            // 开始TTS语音合成
+            streamTtsService.startTts(aiTtsDO, OutputFormatEnum.MP3);
+            // 发送文本
+            streamTtsService.sendText(content);
+            // 停止TTS
+            streamTtsService.stopTts();
+
+            return audioOutputStream.toByteArray();
+        } catch (Exception e) {
+            throw new RuntimeException("文本转语音失败", e);
+        } finally {
+            // 确保资源被释放
+            if (streamTtsService != null) {
+                streamTtsService.stopTts();
+            }
+        }
+    }
+}

+ 193 - 0
byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/service/tts/DouBaoTtsService.java

@@ -0,0 +1,193 @@
+package cn.iocoder.byzs.module.ai.service.tts;
+
+import cn.iocoder.byzs.module.ai.dal.dataobject.tts.AiTtsDO;
+import cn.iocoder.byzs.module.ai.framework.ai.config.YudaoAiProperties;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import okhttp3.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.springframework.stereotype.Service;
+
+import javax.annotation.Resource;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+@Service
+public class DouBaoTtsService {
+
+    private static final Logger logger = LoggerFactory.getLogger(DouBaoTtsService.class);
+
+    @Resource
+    private YudaoAiProperties yudaoAiProperties;
+
+    public byte[] convertTextToSpeech(AiTtsDO aiTtsDO, String content) throws IOException {
+        YudaoAiProperties.DouBaoProperties doubaoProperties = yudaoAiProperties.getDoubao();
+        if (doubaoProperties == null) {
+            throw new IllegalArgumentException("豆包配置未设置");
+        }
+        
+        YudaoAiProperties.DouBaoProperties.TtsProperties doubaoTtsProperties = doubaoProperties.getTts();
+        if (doubaoTtsProperties == null) {
+            throw new IllegalArgumentException("豆包TTS配置未设置");
+        }
+
+        logger.info("豆包配置: enable={}, apiKey={}", doubaoProperties.getEnable(), doubaoProperties.getApiKey());
+        logger.info("豆包TTS配置: appId={}, accessKey={}, resourceId={}, baseUrl={}", 
+                doubaoTtsProperties.getAppId(), doubaoTtsProperties.getAccessKey(), 
+                doubaoTtsProperties.getResourceId(), doubaoTtsProperties.getBaseUrl());
+
+        String ttsUrl = doubaoTtsProperties.getBaseUrl() != null ? doubaoTtsProperties.getBaseUrl() : "https://openspeech.bytedance.com/api/v3/tts/unidirectional";
+        String appId = doubaoTtsProperties.getAppId();
+        String accessKey = doubaoTtsProperties.getAccessKey();
+        String resourceId = doubaoTtsProperties.getResourceId() != null ? doubaoTtsProperties.getResourceId() : "seed-tts-2.0";
+
+        if (appId == null || accessKey == null) {
+            throw new IllegalArgumentException("豆包TTS配置不完整,缺少appId或accessKey");
+        }
+
+        OkHttpClient client = new OkHttpClient();
+
+        // 构造请求体,按照接口文档要求组装JSON参数
+
+        // 使用Map构建请求体,按照文档要求组装参数
+        Map<String, Object> requestMap = new HashMap<>();
+        Map<String, Object> reqParams = new HashMap<>();
+        reqParams.put("speaker", aiTtsDO.getModel()); // 使用配置的音色
+        reqParams.put("text", content); // 待合成文本
+        
+        Map<String, Object> audioParams = new HashMap<>();
+        audioParams.put("format", "mp3"); // 输出音频格式
+        audioParams.put("sample_rate", 16000); // 推荐采样率
+//        audioParams.put("emotion", "带有感情的朗读诗词,要深情的朗读。");
+
+        // 语速和音量参数
+        if (aiTtsDO.getSpeechRate() != null) {
+            reqParams.put("speech_rate", aiTtsDO.getSpeechRate());
+        }
+        if (aiTtsDO.getVolume() != null) {
+            reqParams.put("loudness_rate", aiTtsDO.getVolume());
+        }
+
+        reqParams.put("audio_params", audioParams);
+
+        // 额外参数
+        Map<String, Object> additions = new HashMap<>();
+        //音调
+//        Map<String, Object> post_process = new HashMap<>();
+//        post_process.put("pitch", aiTtsDO.getVolume());
+//        additions.put("post_process", post_process);
+
+        //语音指令
+        String[] context_texts = {"带有感情的朗读诗词,要深情的朗读。"};
+        additions.put("context_texts", context_texts);
+
+        reqParams.put("additions", additions);
+        requestMap.put("req_params", reqParams);
+
+
+        // 转换为JSON字符串
+        ObjectMapper objectMapper = new ObjectMapper();
+        String requestBody = objectMapper.writeValueAsString(requestMap);
+
+        MediaType mediaType = MediaType.parse("application/json");
+        RequestBody body = RequestBody.create(mediaType, requestBody);
+        
+        String requestId = UUID.randomUUID().toString();
+        logger.info("发送豆包TTS请求,url: {}, appId: {}, resourceId: {}, requestId: {}", 
+                ttsUrl, appId, resourceId, requestId);
+        logger.debug("请求体: {}", requestBody);
+        
+        // 构建请求
+        Request request = new Request.Builder()
+                .url(ttsUrl)
+                .addHeader("X-Api-App-Id", appId)
+                .addHeader("X-Api-Access-Key", accessKey)
+                .addHeader("X-Api-Resource-Id", resourceId)
+                .addHeader("X-Api-Request-Id", requestId)
+                .addHeader("Content-Type", "application/json")
+                .post(body)
+                .build();
+        
+        // 打印完整的请求头信息(不包含敏感信息)
+        logger.debug("请求头信息:");
+        for (String name : request.headers().names()) {
+            if (!name.equals("X-Api-Access-Key")) {
+                logger.debug("{}: {}", name, request.headers().get(name));
+            } else {
+                logger.debug("{}: ******", name);
+            }
+        }
+
+        // 发送请求并流式接收响应
+        try (Response response = client.newCall(request).execute()) {
+            if (!response.isSuccessful()) {
+                String errorBody = response.body() != null ? response.body().string() : "无响应体";
+                logger.error("豆包TTS请求失败,状态码: {}, 响应: {}, 错误体: {}", 
+                        response.code(), response.message(), errorBody);
+                throw new IOException("请求失败: " + response + ",错误体: " + errorBody);
+            }
+
+            // 读取响应体并逐行解析JSON,处理SSE流式响应
+            try (InputStream inputStream = response.body().byteStream();
+                 java.io.BufferedReader reader = new java.io.BufferedReader(new java.io.InputStreamReader(inputStream))) {
+                String line;
+                StringBuilder base64AudioBuilder = new StringBuilder();
+                ObjectMapper objectMapper2 = new ObjectMapper();
+                boolean hasAudioData = false;
+                
+                while ((line = reader.readLine()) != null) {
+                    if (line.trim().isEmpty()) {
+                        continue;
+                    }
+                    
+                    logger.debug("豆包TTS响应行: {}", line);
+                    
+                    try {
+                        // 解析单行JSON
+                        Map<String, Object> responseMap = objectMapper2.readValue(line, Map.class);
+                        
+                        // 检查响应状态
+                        int code = (int) responseMap.get("code");
+                        if (code != 0 && code != 20000000) {
+                            String message = (String) responseMap.get("message");
+                            throw new IOException("豆包TTS服务返回错误: code=" + code + ", message=" + message);
+                        }
+                        
+                        // 提取音频数据
+                        Object data = responseMap.get("data");
+                        if (data != null && data instanceof String) {
+                            String chunk = data.toString();
+                            if (!chunk.isEmpty()) {
+                                base64AudioBuilder.append(chunk);
+                                hasAudioData = true;
+                                logger.debug("提取到音频数据块,长度: {} 字符,累计长度: {} 字符", 
+                                        chunk.length(), base64AudioBuilder.length());
+                            }
+                        }
+                    } catch (Exception e) {
+                        logger.warn("解析响应行失败: {}", e.getMessage());
+                    }
+                }
+                
+                if (hasAudioData && base64AudioBuilder.length() > 0) {
+                    String base64Audio = base64AudioBuilder.toString();
+                    logger.info("成功提取完整base64音频数据,总长度: {} 字符", base64Audio.length());
+                    
+                    // 解码base64音频数据
+                    byte[] audioBytes = java.util.Base64.getDecoder().decode(base64Audio);
+                    logger.info("成功解码音频数据,长度: {} 字节", audioBytes.length);
+                    return audioBytes;
+                } else {
+                    // 没有音频数据
+                    logger.warn("豆包TTS响应没有音频数据");
+                    throw new IOException("豆包TTS响应没有音频数据");
+                }
+            }
+        }
+    }
+}

+ 7 - 1
byzs-server/src/main/resources/application.yaml

@@ -205,11 +205,17 @@ byzs:
       enable: true
       api-key: sk-5b612c071f904fd59808dc07c9a4f1b8
       model: deepseek-chat
-    doubao: # 字节豆包
+    doubao: # 字节豆包(真实)
       enable: true
       api-key: 702a7b51-8b6b-483c-8488-ea9f5bc7dc25
       model: doubao-1-5-lite-32k-250115
       image-model: dab-official-text2image-v1 # 文生图模型名称
+      tts: # TTS 配置(项目:default;语音合成v3)
+        appId: 8082193636
+        accessToken: nTp5pr10TFW1hb5LzZZfZcjjnu-HseX4
+        accessKey: I9noz2pmWLGjLtBys0WJr6V-yBAx_Z6z
+        resourceId: seed-tts-2.0
+        baseUrl: https://openspeech.bytedance.com/api/v3/tts/unidirectional
     hunyuan: # 腾讯混元
       enable: true
       api-key: sk-abc