|
|
@@ -0,0 +1,193 @@
|
|
|
+package cn.iocoder.byzs.module.ai.service.tts;
|
|
|
+
|
|
|
+import cn.iocoder.byzs.module.ai.dal.dataobject.tts.AiTtsDO;
|
|
|
+import cn.iocoder.byzs.module.ai.framework.ai.config.YudaoAiProperties;
|
|
|
+import com.fasterxml.jackson.databind.ObjectMapper;
|
|
|
+import okhttp3.*;
|
|
|
+import org.slf4j.Logger;
|
|
|
+import org.slf4j.LoggerFactory;
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
+
|
|
|
+import javax.annotation.Resource;
|
|
|
+import java.io.ByteArrayOutputStream;
|
|
|
+import java.io.IOException;
|
|
|
+import java.io.InputStream;
|
|
|
+import java.util.HashMap;
|
|
|
+import java.util.List;
|
|
|
+import java.util.Map;
|
|
|
+import java.util.UUID;
|
|
|
+
|
|
|
+@Service
|
|
|
+public class DouBaoTtsService {
|
|
|
+
|
|
|
+ private static final Logger logger = LoggerFactory.getLogger(DouBaoTtsService.class);
|
|
|
+
|
|
|
+ @Resource
|
|
|
+ private YudaoAiProperties yudaoAiProperties;
|
|
|
+
|
|
|
+ public byte[] convertTextToSpeech(AiTtsDO aiTtsDO, String content) throws IOException {
|
|
|
+ YudaoAiProperties.DouBaoProperties doubaoProperties = yudaoAiProperties.getDoubao();
|
|
|
+ if (doubaoProperties == null) {
|
|
|
+ throw new IllegalArgumentException("豆包配置未设置");
|
|
|
+ }
|
|
|
+
|
|
|
+ YudaoAiProperties.DouBaoProperties.TtsProperties doubaoTtsProperties = doubaoProperties.getTts();
|
|
|
+ if (doubaoTtsProperties == null) {
|
|
|
+ throw new IllegalArgumentException("豆包TTS配置未设置");
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.info("豆包配置: enable={}, apiKey={}", doubaoProperties.getEnable(), doubaoProperties.getApiKey());
|
|
|
+ logger.info("豆包TTS配置: appId={}, accessKey={}, resourceId={}, baseUrl={}",
|
|
|
+ doubaoTtsProperties.getAppId(), doubaoTtsProperties.getAccessKey(),
|
|
|
+ doubaoTtsProperties.getResourceId(), doubaoTtsProperties.getBaseUrl());
|
|
|
+
|
|
|
+ String ttsUrl = doubaoTtsProperties.getBaseUrl() != null ? doubaoTtsProperties.getBaseUrl() : "https://openspeech.bytedance.com/api/v3/tts/unidirectional";
|
|
|
+ String appId = doubaoTtsProperties.getAppId();
|
|
|
+ String accessKey = doubaoTtsProperties.getAccessKey();
|
|
|
+ String resourceId = doubaoTtsProperties.getResourceId() != null ? doubaoTtsProperties.getResourceId() : "seed-tts-2.0";
|
|
|
+
|
|
|
+ if (appId == null || accessKey == null) {
|
|
|
+ throw new IllegalArgumentException("豆包TTS配置不完整,缺少appId或accessKey");
|
|
|
+ }
|
|
|
+
|
|
|
+ OkHttpClient client = new OkHttpClient();
|
|
|
+
|
|
|
+ // 构造请求体,按照接口文档要求组装JSON参数
|
|
|
+
|
|
|
+ // 使用Map构建请求体,按照文档要求组装参数
|
|
|
+ Map<String, Object> requestMap = new HashMap<>();
|
|
|
+ Map<String, Object> reqParams = new HashMap<>();
|
|
|
+ reqParams.put("speaker", aiTtsDO.getModel()); // 使用配置的音色
|
|
|
+ reqParams.put("text", content); // 待合成文本
|
|
|
+
|
|
|
+ Map<String, Object> audioParams = new HashMap<>();
|
|
|
+ audioParams.put("format", "mp3"); // 输出音频格式
|
|
|
+ audioParams.put("sample_rate", 16000); // 推荐采样率
|
|
|
+// audioParams.put("emotion", "带有感情的朗读诗词,要深情的朗读。");
|
|
|
+
|
|
|
+ // 语速和音量参数
|
|
|
+ if (aiTtsDO.getSpeechRate() != null) {
|
|
|
+ reqParams.put("speech_rate", aiTtsDO.getSpeechRate());
|
|
|
+ }
|
|
|
+ if (aiTtsDO.getVolume() != null) {
|
|
|
+ reqParams.put("loudness_rate", aiTtsDO.getVolume());
|
|
|
+ }
|
|
|
+
|
|
|
+ reqParams.put("audio_params", audioParams);
|
|
|
+
|
|
|
+ // 额外参数
|
|
|
+ Map<String, Object> additions = new HashMap<>();
|
|
|
+ //音调
|
|
|
+// Map<String, Object> post_process = new HashMap<>();
|
|
|
+// post_process.put("pitch", aiTtsDO.getVolume());
|
|
|
+// additions.put("post_process", post_process);
|
|
|
+
|
|
|
+ //语音指令
|
|
|
+ String[] context_texts = {"带有感情的朗读诗词,要深情的朗读。"};
|
|
|
+ additions.put("context_texts", context_texts);
|
|
|
+
|
|
|
+ reqParams.put("additions", additions);
|
|
|
+ requestMap.put("req_params", reqParams);
|
|
|
+
|
|
|
+
|
|
|
+ // 转换为JSON字符串
|
|
|
+ ObjectMapper objectMapper = new ObjectMapper();
|
|
|
+ String requestBody = objectMapper.writeValueAsString(requestMap);
|
|
|
+
|
|
|
+ MediaType mediaType = MediaType.parse("application/json");
|
|
|
+ RequestBody body = RequestBody.create(mediaType, requestBody);
|
|
|
+
|
|
|
+ String requestId = UUID.randomUUID().toString();
|
|
|
+ logger.info("发送豆包TTS请求,url: {}, appId: {}, resourceId: {}, requestId: {}",
|
|
|
+ ttsUrl, appId, resourceId, requestId);
|
|
|
+ logger.debug("请求体: {}", requestBody);
|
|
|
+
|
|
|
+ // 构建请求
|
|
|
+ Request request = new Request.Builder()
|
|
|
+ .url(ttsUrl)
|
|
|
+ .addHeader("X-Api-App-Id", appId)
|
|
|
+ .addHeader("X-Api-Access-Key", accessKey)
|
|
|
+ .addHeader("X-Api-Resource-Id", resourceId)
|
|
|
+ .addHeader("X-Api-Request-Id", requestId)
|
|
|
+ .addHeader("Content-Type", "application/json")
|
|
|
+ .post(body)
|
|
|
+ .build();
|
|
|
+
|
|
|
+ // 打印完整的请求头信息(不包含敏感信息)
|
|
|
+ logger.debug("请求头信息:");
|
|
|
+ for (String name : request.headers().names()) {
|
|
|
+ if (!name.equals("X-Api-Access-Key")) {
|
|
|
+ logger.debug("{}: {}", name, request.headers().get(name));
|
|
|
+ } else {
|
|
|
+ logger.debug("{}: ******", name);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ // 发送请求并流式接收响应
|
|
|
+ try (Response response = client.newCall(request).execute()) {
|
|
|
+ if (!response.isSuccessful()) {
|
|
|
+ String errorBody = response.body() != null ? response.body().string() : "无响应体";
|
|
|
+ logger.error("豆包TTS请求失败,状态码: {}, 响应: {}, 错误体: {}",
|
|
|
+ response.code(), response.message(), errorBody);
|
|
|
+ throw new IOException("请求失败: " + response + ",错误体: " + errorBody);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 读取响应体并逐行解析JSON,处理SSE流式响应
|
|
|
+ try (InputStream inputStream = response.body().byteStream();
|
|
|
+ java.io.BufferedReader reader = new java.io.BufferedReader(new java.io.InputStreamReader(inputStream))) {
|
|
|
+ String line;
|
|
|
+ StringBuilder base64AudioBuilder = new StringBuilder();
|
|
|
+ ObjectMapper objectMapper2 = new ObjectMapper();
|
|
|
+ boolean hasAudioData = false;
|
|
|
+
|
|
|
+ while ((line = reader.readLine()) != null) {
|
|
|
+ if (line.trim().isEmpty()) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ logger.debug("豆包TTS响应行: {}", line);
|
|
|
+
|
|
|
+ try {
|
|
|
+ // 解析单行JSON
|
|
|
+ Map<String, Object> responseMap = objectMapper2.readValue(line, Map.class);
|
|
|
+
|
|
|
+ // 检查响应状态
|
|
|
+ int code = (int) responseMap.get("code");
|
|
|
+ if (code != 0 && code != 20000000) {
|
|
|
+ String message = (String) responseMap.get("message");
|
|
|
+ throw new IOException("豆包TTS服务返回错误: code=" + code + ", message=" + message);
|
|
|
+ }
|
|
|
+
|
|
|
+ // 提取音频数据
|
|
|
+ Object data = responseMap.get("data");
|
|
|
+ if (data != null && data instanceof String) {
|
|
|
+ String chunk = data.toString();
|
|
|
+ if (!chunk.isEmpty()) {
|
|
|
+ base64AudioBuilder.append(chunk);
|
|
|
+ hasAudioData = true;
|
|
|
+ logger.debug("提取到音频数据块,长度: {} 字符,累计长度: {} 字符",
|
|
|
+ chunk.length(), base64AudioBuilder.length());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ logger.warn("解析响应行失败: {}", e.getMessage());
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ if (hasAudioData && base64AudioBuilder.length() > 0) {
|
|
|
+ String base64Audio = base64AudioBuilder.toString();
|
|
|
+ logger.info("成功提取完整base64音频数据,总长度: {} 字符", base64Audio.length());
|
|
|
+
|
|
|
+ // 解码base64音频数据
|
|
|
+ byte[] audioBytes = java.util.Base64.getDecoder().decode(base64Audio);
|
|
|
+ logger.info("成功解码音频数据,长度: {} 字节", audioBytes.length);
|
|
|
+ return audioBytes;
|
|
|
+ } else {
|
|
|
+ // 没有音频数据
|
|
|
+ logger.warn("豆包TTS响应没有音频数据");
|
|
|
+ throw new IOException("豆包TTS响应没有音频数据");
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|