4 settimane fa · 20957a8f8f
--- a/byzs-module-ai/pom.xml
+++ b/byzs-module-ai/pom.xml
@@ -134,6 +134,14 @@
 
				 <!--            <artifactId>volcengine-java-sdk-core</artifactId>-->
			
 
				 <!--            <version>1.0.5</version>-->
			
 
				 <!--        </dependency>-->
			
 
				+        <!-- 火山引擎豆包 语音识别v3 -->
			
 
				+        <dependency>
			
 
				+            <groupId>org.java-websocket</groupId>
			
 
				+            <artifactId>Java-WebSocket</artifactId>
			
 
				+            <version>1.5.5</version>
			
 
				+        </dependency>
			
 
				+
			
 
				+
			
 
				 
			
 
				         <!-- 向量存储：https://db-engines.com/en/ranking/vector+dbms -->
			
 
				         <dependency>
			
--- a/byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/framework/ai/config/YudaoAiProperties.java
+++ b/byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/framework/ai/config/YudaoAiProperties.java
@@ -78,6 +78,7 @@ public class YudaoAiProperties {
 
				 
			
 
				         private String enable;
			
 
				         private String apiKey;
			
 
				+        private String accessKey;
			
 
				         /**
			
 
				          * 基础地址
			
 
				          */
			
@@ -87,6 +88,20 @@ public class YudaoAiProperties {
 
				         private Double temperature;
			
 
				         private Integer maxTokens;
			
 
				         private Double topP;
			
 
				+        
			
 
				+        /**
			
 
				+         * TTS配置
			
 
				+         */
			
 
				+        private TtsProperties tts;
			
 
				+        
			
 
				+        @Data
			
 
				+        public static class TtsProperties {
			
 
				+            private String appId;
			
 
				+            private String accessToken;
			
 
				+            private String accessKey;
			
 
				+            private String resourceId;
			
 
				+            private String baseUrl;
			
 
				+        }
			
 
				 
			
 
				     }
			
 
				 
			
@@ -165,4 +180,4 @@ public class YudaoAiProperties {
 
				 
			
 
				     }
			
 
				 
			
 
				-}
			
 
				+}
			
--- a/byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/service/tts/AiTtsServiceImpl.java
+++ b/byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/service/tts/AiTtsServiceImpl.java
@@ -37,7 +37,10 @@ public class AiTtsServiceImpl implements AiTtsService {
 
				     private AiChatRoleMapper chatRoleMapper;
			
 
				 
			
 
				     @Resource
			
 
				-    private org.springframework.beans.factory.ObjectProvider<StreamTtsService> streamTtsServiceProvider;
			
 
				+    private DouBaoTtsService douBaoTtsService;
			
 
				+
			
 
				+    @Resource
			
 
				+    private AliyunTtsService aliyunTtsService;
			
 
				 
			
 
				     @Resource
			
 
				     private FileApi fileApi;
			
@@ -122,39 +125,27 @@ public class AiTtsServiceImpl implements AiTtsService {
 
				             throw exception(TTS_NOT_EXISTS);
			
 
				         }
			
 
				 
			
 
				-        // 3. 使用StreamTtsService将文本转语音
			
 
				-        StreamTtsService streamTtsService = streamTtsServiceProvider.getObject();
			
 
				-        try {
			
 
				-            // 创建音频数据缓冲区
			
 
				-            java.io.ByteArrayOutputStream audioOutputStream = new java.io.ByteArrayOutputStream();
			
 
				-            // 设置音频数据回调
			
 
				-            streamTtsService.setAudioDataCallback(audioData -> {
			
 
				-                try {
			
 
				-                    audioOutputStream.write(audioData);
			
 
				-                } catch (java.io.IOException e) {
			
 
				-                    throw new RuntimeException("写入音频数据失败", e);
			
 
				-                }
			
 
				-            });
			
 
				-
			
 
				-            // 开始TTS语音合成
			
 
				-            streamTtsService.startTts(aiTtsDO, OutputFormatEnum.MP3);
			
 
				-            // 发送文本
			
 
				-            streamTtsService.sendText(content);
			
 
				-            // 停止TTS
			
 
				-            streamTtsService.stopTts();
			
 
				-
			
 
				-            // 4. 存储语音文件并上传到服务器
			
 
				-            byte[] mp3Data = audioOutputStream.toByteArray();
			
 
				-            String filePath = fileApi.createFile(mp3Data);
			
 
				-            return filePath;
			
 
				-        } catch (Exception e) {
			
 
				-            throw new RuntimeException("文本转语音失败", e);
			
 
				-        } finally {
			
 
				-            // 确保资源被释放
			
 
				-            if (streamTtsService != null) {
			
 
				-                streamTtsService.stopTts();
			
 
				+        byte[] mp3Data;
			
 
				+        // 3. 根据平台选择不同的TTS服务
			
 
				+        if ("DouBao".equals(aiTtsDO.getPlatform())) {
			
 
				+            // 使用豆包TTS服务
			
 
				+            try {
			
 
				+                mp3Data = douBaoTtsService.convertTextToSpeech(aiTtsDO, content);
			
 
				+            } catch (Exception e) {
			
 
				+                throw new RuntimeException("豆包文本转语音失败", e);
			
 
				+            }
			
 
				+        } else {
			
 
				+            // 使用阿里云TTS服务
			
 
				+            try {
			
 
				+                mp3Data = aliyunTtsService.convertTextToSpeech(aiTtsDO, content);
			
 
				+            } catch (Exception e) {
			
 
				+                throw new RuntimeException("阿里云文本转语音失败", e);
			
 
				             }
			
 
				         }
			
 
				+
			
 
				+        // 4. 存储语音文件并上传到服务器
			
 
				+        String filePath = fileApi.createFile(mp3Data);
			
 
				+        return filePath;
			
 
				     }
			
 
				 
			
 
				 }
			
--- a/byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/service/tts/AliyunTtsService.java
+++ b/byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/service/tts/AliyunTtsService.java
@@ -0,0 +1,49 @@
 
				+package cn.iocoder.byzs.module.ai.service.tts;
			
 
				+
			
 
				+import cn.iocoder.byzs.module.ai.dal.dataobject.tts.AiTtsDO;
			
 
				+import cn.iocoder.byzs.module.ai.util.tts.StreamTtsService;
			
 
				+import com.alibaba.nls.client.protocol.OutputFormatEnum;
			
 
				+import org.springframework.beans.factory.ObjectProvider;
			
 
				+import org.springframework.stereotype.Service;
			
 
				+
			
 
				+import javax.annotation.Resource;
			
 
				+import java.io.ByteArrayOutputStream;
			
 
				+
			
 
				+@Service
			
 
				+public class AliyunTtsService {
			
 
				+
			
 
				+    @Resource
			
 
				+    private ObjectProvider<StreamTtsService> streamTtsServiceProvider;
			
 
				+
			
 
				+    public byte[] convertTextToSpeech(AiTtsDO aiTtsDO, String content) {
			
 
				+        StreamTtsService streamTtsService = streamTtsServiceProvider.getObject();
			
 
				+        try {
			
 
				+            // 创建音频数据缓冲区
			
 
				+            ByteArrayOutputStream audioOutputStream = new ByteArrayOutputStream();
			
 
				+            // 设置音频数据回调
			
 
				+            streamTtsService.setAudioDataCallback(audioData -> {
			
 
				+                try {
			
 
				+                    audioOutputStream.write(audioData);
			
 
				+                } catch (Exception e) {
			
 
				+                    throw new RuntimeException("写入音频数据失败", e);
			
 
				+                }
			
 
				+            });
			
 
				+
			
 
				+            // 开始TTS语音合成
			
 
				+            streamTtsService.startTts(aiTtsDO, OutputFormatEnum.MP3);
			
 
				+            // 发送文本
			
 
				+            streamTtsService.sendText(content);
			
 
				+            // 停止TTS
			
 
				+            streamTtsService.stopTts();
			
 
				+
			
 
				+            return audioOutputStream.toByteArray();
			
 
				+        } catch (Exception e) {
			
 
				+            throw new RuntimeException("文本转语音失败", e);
			
 
				+        } finally {
			
 
				+            // 确保资源被释放
			
 
				+            if (streamTtsService != null) {
			
 
				+                streamTtsService.stopTts();
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
--- a/byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/service/tts/DouBaoTtsService.java
+++ b/byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/service/tts/DouBaoTtsService.java
@@ -0,0 +1,193 @@
 
				+package cn.iocoder.byzs.module.ai.service.tts;
			
 
				+
			
 
				+import cn.iocoder.byzs.module.ai.dal.dataobject.tts.AiTtsDO;
			
 
				+import cn.iocoder.byzs.module.ai.framework.ai.config.YudaoAiProperties;
			
 
				+import com.fasterxml.jackson.databind.ObjectMapper;
			
 
				+import okhttp3.*;
			
 
				+import org.slf4j.Logger;
			
 
				+import org.slf4j.LoggerFactory;
			
 
				+import org.springframework.stereotype.Service;
			
 
				+
			
 
				+import javax.annotation.Resource;
			
 
				+import java.io.ByteArrayOutputStream;
			
 
				+import java.io.IOException;
			
 
				+import java.io.InputStream;
			
 
				+import java.util.HashMap;
			
 
				+import java.util.List;
			
 
				+import java.util.Map;
			
 
				+import java.util.UUID;
			
 
				+
			
 
				+@Service
			
 
				+public class DouBaoTtsService {
			
 
				+
			
 
				+    private static final Logger logger = LoggerFactory.getLogger(DouBaoTtsService.class);
			
 
				+
			
 
				+    @Resource
			
 
				+    private YudaoAiProperties yudaoAiProperties;
			
 
				+
			
 
				+    public byte[] convertTextToSpeech(AiTtsDO aiTtsDO, String content) throws IOException {
			
 
				+        YudaoAiProperties.DouBaoProperties doubaoProperties = yudaoAiProperties.getDoubao();
			
 
				+        if (doubaoProperties == null) {
			
 
				+            throw new IllegalArgumentException("豆包配置未设置");
			
 
				+        }
			
 
				+        
			
 
				+        YudaoAiProperties.DouBaoProperties.TtsProperties doubaoTtsProperties = doubaoProperties.getTts();
			
 
				+        if (doubaoTtsProperties == null) {
			
 
				+            throw new IllegalArgumentException("豆包TTS配置未设置");
			
 
				+        }
			
 
				+
			
 
				+        logger.info("豆包配置: enable={}, apiKey={}", doubaoProperties.getEnable(), doubaoProperties.getApiKey());
			
 
				+        logger.info("豆包TTS配置: appId={}, accessKey={}, resourceId={}, baseUrl={}", 
			
 
				+                doubaoTtsProperties.getAppId(), doubaoTtsProperties.getAccessKey(), 
			
 
				+                doubaoTtsProperties.getResourceId(), doubaoTtsProperties.getBaseUrl());
			
 
				+
			
 
				+        String ttsUrl = doubaoTtsProperties.getBaseUrl() != null ? doubaoTtsProperties.getBaseUrl() : "https://openspeech.bytedance.com/api/v3/tts/unidirectional";
			
 
				+        String appId = doubaoTtsProperties.getAppId();
			
 
				+        String accessKey = doubaoTtsProperties.getAccessKey();
			
 
				+        String resourceId = doubaoTtsProperties.getResourceId() != null ? doubaoTtsProperties.getResourceId() : "seed-tts-2.0";
			
 
				+
			
 
				+        if (appId == null || accessKey == null) {
			
 
				+            throw new IllegalArgumentException("豆包TTS配置不完整，缺少appId或accessKey");
			
 
				+        }
			
 
				+
			
 
				+        OkHttpClient client = new OkHttpClient();
			
 
				+
			
 
				+        // 构造请求体，按照接口文档要求组装JSON参数
			
 
				+
			
 
				+        // 使用Map构建请求体，按照文档要求组装参数
			
 
				+        Map<String, Object> requestMap = new HashMap<>();
			
 
				+        Map<String, Object> reqParams = new HashMap<>();
			
 
				+        reqParams.put("speaker", aiTtsDO.getModel()); // 使用配置的音色
			
 
				+        reqParams.put("text", content); // 待合成文本
			
 
				+        
			
 
				+        Map<String, Object> audioParams = new HashMap<>();
			
 
				+        audioParams.put("format", "mp3"); // 输出音频格式
			
 
				+        audioParams.put("sample_rate", 16000); // 推荐采样率
			
 
				+//        audioParams.put("emotion", "带有感情的朗读诗词，要深情的朗读。");
			
 
				+
			
 
				+        // 语速和音量参数
			
 
				+        if (aiTtsDO.getSpeechRate() != null) {
			
 
				+            reqParams.put("speech_rate", aiTtsDO.getSpeechRate());
			
 
				+        }
			
 
				+        if (aiTtsDO.getVolume() != null) {
			
 
				+            reqParams.put("loudness_rate", aiTtsDO.getVolume());
			
 
				+        }
			
 
				+
			
 
				+        reqParams.put("audio_params", audioParams);
			
 
				+
			
 
				+        // 额外参数
			
 
				+        Map<String, Object> additions = new HashMap<>();
			
 
				+        //音调
			
 
				+//        Map<String, Object> post_process = new HashMap<>();
			
 
				+//        post_process.put("pitch", aiTtsDO.getVolume());
			
 
				+//        additions.put("post_process", post_process);
			
 
				+
			
 
				+        //语音指令
			
 
				+        String[] context_texts = {"带有感情的朗读诗词，要深情的朗读。"};
			
 
				+        additions.put("context_texts", context_texts);
			
 
				+
			
 
				+        reqParams.put("additions", additions);
			
 
				+        requestMap.put("req_params", reqParams);
			
 
				+
			
 
				+
			
 
				+        // 转换为JSON字符串
			
 
				+        ObjectMapper objectMapper = new ObjectMapper();
			
 
				+        String requestBody = objectMapper.writeValueAsString(requestMap);
			
 
				+
			
 
				+        MediaType mediaType = MediaType.parse("application/json");
			
 
				+        RequestBody body = RequestBody.create(mediaType, requestBody);
			
 
				+        
			
 
				+        String requestId = UUID.randomUUID().toString();
			
 
				+        logger.info("发送豆包TTS请求，url: {}, appId: {}, resourceId: {}, requestId: {}", 
			
 
				+                ttsUrl, appId, resourceId, requestId);
			
 
				+        logger.debug("请求体: {}", requestBody);
			
 
				+        
			
 
				+        // 构建请求
			
 
				+        Request request = new Request.Builder()
			
 
				+                .url(ttsUrl)
			
 
				+                .addHeader("X-Api-App-Id", appId)
			
 
				+                .addHeader("X-Api-Access-Key", accessKey)
			
 
				+                .addHeader("X-Api-Resource-Id", resourceId)
			
 
				+                .addHeader("X-Api-Request-Id", requestId)
			
 
				+                .addHeader("Content-Type", "application/json")
			
 
				+                .post(body)
			
 
				+                .build();
			
 
				+        
			
 
				+        // 打印完整的请求头信息（不包含敏感信息）
			
 
				+        logger.debug("请求头信息:");
			
 
				+        for (String name : request.headers().names()) {
			
 
				+            if (!name.equals("X-Api-Access-Key")) {
			
 
				+                logger.debug("{}: {}", name, request.headers().get(name));
			
 
				+            } else {
			
 
				+                logger.debug("{}: ******", name);
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        // 发送请求并流式接收响应
			
 
				+        try (Response response = client.newCall(request).execute()) {
			
 
				+            if (!response.isSuccessful()) {
			
 
				+                String errorBody = response.body() != null ? response.body().string() : "无响应体";
			
 
				+                logger.error("豆包TTS请求失败，状态码: {}, 响应: {}, 错误体: {}", 
			
 
				+                        response.code(), response.message(), errorBody);
			
 
				+                throw new IOException("请求失败: " + response + "，错误体: " + errorBody);
			
 
				+            }
			
 
				+
			
 
				+            // 读取响应体并逐行解析JSON，处理SSE流式响应
			
 
				+            try (InputStream inputStream = response.body().byteStream();
			
 
				+                 java.io.BufferedReader reader = new java.io.BufferedReader(new java.io.InputStreamReader(inputStream))) {
			
 
				+                String line;
			
 
				+                StringBuilder base64AudioBuilder = new StringBuilder();
			
 
				+                ObjectMapper objectMapper2 = new ObjectMapper();
			
 
				+                boolean hasAudioData = false;
			
 
				+                
			
 
				+                while ((line = reader.readLine()) != null) {
			
 
				+                    if (line.trim().isEmpty()) {
			
 
				+                        continue;
			
 
				+                    }
			
 
				+                    
			
 
				+                    logger.debug("豆包TTS响应行: {}", line);
			
 
				+                    
			
 
				+                    try {
			
 
				+                        // 解析单行JSON
			
 
				+                        Map<String, Object> responseMap = objectMapper2.readValue(line, Map.class);
			
 
				+                        
			
 
				+                        // 检查响应状态
			
 
				+                        int code = (int) responseMap.get("code");
			
 
				+                        if (code != 0 && code != 20000000) {
			
 
				+                            String message = (String) responseMap.get("message");
			
 
				+                            throw new IOException("豆包TTS服务返回错误: code=" + code + ", message=" + message);
			
 
				+                        }
			
 
				+                        
			
 
				+                        // 提取音频数据
			
 
				+                        Object data = responseMap.get("data");
			
 
				+                        if (data != null && data instanceof String) {
			
 
				+                            String chunk = data.toString();
			
 
				+                            if (!chunk.isEmpty()) {
			
 
				+                                base64AudioBuilder.append(chunk);
			
 
				+                                hasAudioData = true;
			
 
				+                                logger.debug("提取到音频数据块，长度: {} 字符，累计长度: {} 字符", 
			
 
				+                                        chunk.length(), base64AudioBuilder.length());
			
 
				+                            }
			
 
				+                        }
			
 
				+                    } catch (Exception e) {
			
 
				+                        logger.warn("解析响应行失败: {}", e.getMessage());
			
 
				+                    }
			
 
				+                }
			
 
				+                
			
 
				+                if (hasAudioData && base64AudioBuilder.length() > 0) {
			
 
				+                    String base64Audio = base64AudioBuilder.toString();
			
 
				+                    logger.info("成功提取完整base64音频数据，总长度: {} 字符", base64Audio.length());
			
 
				+                    
			
 
				+                    // 解码base64音频数据
			
 
				+                    byte[] audioBytes = java.util.Base64.getDecoder().decode(base64Audio);
			
 
				+                    logger.info("成功解码音频数据，长度: {} 字节", audioBytes.length);
			
 
				+                    return audioBytes;
			
 
				+                } else {
			
 
				+                    // 没有音频数据
			
 
				+                    logger.warn("豆包TTS响应没有音频数据");
			
 
				+                    throw new IOException("豆包TTS响应没有音频数据");
			
 
				+                }
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+}
			
--- a/byzs-server/src/main/resources/application.yaml
+++ b/byzs-server/src/main/resources/application.yaml
@@ -205,11 +205,17 @@ byzs:
 
				       enable: true
			
 
				       api-key: sk-5b612c071f904fd59808dc07c9a4f1b8
			
 
				       model: deepseek-chat
			
 
				-    doubao: # 字节豆包
			
 
				+    doubao: # 字节豆包（真实）
			
 
				       enable: true
			
 
				       api-key: 702a7b51-8b6b-483c-8488-ea9f5bc7dc25
			
 
				       model: doubao-1-5-lite-32k-250115
			
 
				       image-model: dab-official-text2image-v1 # 文生图模型名称
			
 
				+      tts: # TTS 配置(项目：default；语音合成v3)
			
 
				+        appId: 8082193636
			
 
				+        accessToken: nTp5pr10TFW1hb5LzZZfZcjjnu-HseX4
			
 
				+        accessKey: I9noz2pmWLGjLtBys0WJr6V-yBAx_Z6z
			
 
				+        resourceId: seed-tts-2.0
			
 
				+        baseUrl: https://openspeech.bytedance.com/api/v3/tts/unidirectional
			
 
				     hunyuan: # 腾讯混元
			
 
				       enable: true
			
 
				       api-key: sk-abc