Переглянути джерело

优化语音识别组件,整理公用功能,减少父组件代码逻辑冗余:
1、加入inputSelector 属性,用于指定输入框的选择器
2、recordingStartText 和 recordingStartCursorPos 变量,用于记录录音开始时的文本和光标位置
3、recordInputState 方法,用于记录录音开始时的输入框状态
4、toggleSpeechInput 方法,在开始录音时调用 recordInputState 方法
5、onresult 事件处理,发送包含原始文本、处理后文本和光标位置的对象

liyanbo 1 місяць тому
батько
коміт
c3c1e0744e

+ 28 - 19
src/components/ai/image/ImageToImage.vue

@@ -110,11 +110,12 @@
            <ImageUpload v-model="uploadedImage" ref="imageUploadRef"/>
           <!-- 语音输入按钮 -->
           <VoiceInput
-              @voiceRecognized="handleVoiceRecognized"
-              @recordingStatusChanged="handleRecordingStatusChanged"
-              lang="zh-CN"
-              maxDuration="10"
-          />
+                inputSelector="input[type='text']"
+                lang="zh-CN"
+                maxDuration="10"
+                @voiceRecognized="handleVoiceRecognized"
+                @recordingStatusChanged="handleRecordingStatusChanged"
+              />
 
           <!-- 终止按钮 -->
           <div
@@ -235,9 +236,9 @@ const voiceRecognizedText = ref(""); // 实时语音识别结果
 // 用于控制输入框显示的内容
 const displayedPrompt = computed({
   get() {
-    // 录音时,显示inputMessage.value + 实时语音识别结果
+    // 录音时,只显示inputMessage.value(已经在handleVoiceRecognized中实时更新)
     if (isVoiceRecording.value) {
-      return inputMessage.value ? `${inputMessage.value} ${voiceRecognizedText.value}` : voiceRecognizedText.value;
+      return inputMessage.value;
     }
     // 不录音时,只显示inputMessage.value
     return inputMessage.value;
@@ -251,13 +252,24 @@ const displayedPrompt = computed({
 });
 
 // 语音输入识别结果处理
-const handleVoiceRecognized = (text) => {
+const handleVoiceRecognized = (data) => {
   if (isVoiceRecording.value) {
-    // 在同一次录音过程中,只更新临时变量,不修改inputMessage.value
-    voiceRecognizedText.value = text;
+    // 在同一次录音过程中,实时更新文本框内容
+    voiceRecognizedText.value = data.originalText;
+    inputMessage.value = data.processedText;
   } else {
-    // 在录音结束时,将最终的语音内容追加到inputMessage.value
-    inputMessage.value = inputMessage.value ? `${inputMessage.value} ${text}` : text;
+    // 在录音结束时,将最终的语音内容插入到光标位置
+    const input = document.querySelector('input[type="text"]');
+    if (input) {
+      inputMessage.value = data.processedText;
+      // 重新设置光标位置到插入文本的末尾
+      setTimeout(() => {
+        input.selectionStart = input.selectionEnd = data.cursorPos;
+      }, 0);
+    } else {
+      // 如果没有找到输入框,直接替换整个内容
+      inputMessage.value = data.originalText;
+    }
     // 清空临时变量
     voiceRecognizedText.value = "";
   }
@@ -268,13 +280,10 @@ const handleRecordingStatusChanged = (status) => {
   const wasRecording = isVoiceRecording.value;
   isVoiceRecording.value = status;
 
-  // 如果是从录音状态切换到非录音状态,需要将临时的语音识别结果追加到inputMessage.value
-  if (wasRecording && !isVoiceRecording.value) {
-    if (voiceRecognizedText.value) {
-      inputMessage.value = inputMessage.value ? `${inputMessage.value} ${voiceRecognizedText.value}` : voiceRecognizedText.value;
-      // 清空临时变量
-      voiceRecognizedText.value = "";
-    }
+  // 如果是从录音状态切换到非录音状态,清空临时变量
+  if (wasRecording && !status) {
+    // 清空临时变量
+    voiceRecognizedText.value = "";
   }
 };
 

+ 28 - 19
src/components/ai/image/TextToImage.vue

@@ -113,11 +113,12 @@
             />
           <!-- 语音输入按钮 -->
           <VoiceInput
-              @voiceRecognized="handleVoiceRecognized"
-              @recordingStatusChanged="handleRecordingStatusChanged"
-              lang="zh-CN"
-              maxDuration="10"
-          />
+                inputSelector="input[type='text']"
+                lang="zh-CN"
+                maxDuration="10"
+                @voiceRecognized="handleVoiceRecognized"
+                @recordingStatusChanged="handleRecordingStatusChanged"
+              />
           <!-- 终止按钮 -->
           <div
             v-if="conversationInProgress"
@@ -217,9 +218,9 @@ const voiceRecognizedText = ref(""); // 实时语音识别结果
 // 用于控制输入框显示的内容
 const displayedPrompt = computed({
   get() {
-    // 录音时,显示inputMessage.value + 实时语音识别结果
+    // 录音时,只显示inputMessage.value(已经在handleVoiceRecognized中实时更新)
     if (isVoiceRecording.value) {
-      return inputMessage.value ? `${inputMessage.value} ${voiceRecognizedText.value}` : voiceRecognizedText.value;
+      return inputMessage.value;
     }
     // 不录音时,只显示inputMessage.value
     return inputMessage.value;
@@ -233,13 +234,24 @@ const displayedPrompt = computed({
 });
 
 // 语音输入识别结果处理
-const handleVoiceRecognized = (text) => {
+const handleVoiceRecognized = (data) => {
   if (isVoiceRecording.value) {
-    // 在同一次录音过程中,只更新临时变量,不修改inputMessage.value
-    voiceRecognizedText.value = text;
+    // 在同一次录音过程中,实时更新文本框内容
+    voiceRecognizedText.value = data.originalText;
+    inputMessage.value = data.processedText;
   } else {
-    // 在录音结束时,将最终的语音内容追加到inputMessage.value
-    inputMessage.value = inputMessage.value ? `${inputMessage.value} ${text}` : text;
+    // 在录音结束时,将最终的语音内容插入到光标位置
+    const input = document.querySelector('input[type="text"]');
+    if (input) {
+      inputMessage.value = data.processedText;
+      // 重新设置光标位置到插入文本的末尾
+      setTimeout(() => {
+        input.selectionStart = input.selectionEnd = data.cursorPos;
+      }, 0);
+    } else {
+      // 如果没有找到输入框,直接替换整个内容
+      inputMessage.value = data.originalText;
+    }
     // 清空临时变量
     voiceRecognizedText.value = "";
   }
@@ -250,13 +262,10 @@ const handleRecordingStatusChanged = (status) => {
   const wasRecording = isVoiceRecording.value;
   isVoiceRecording.value = status;
 
-  // 如果是从录音状态切换到非录音状态,需要将临时的语音识别结果追加到inputMessage.value
-  if (wasRecording && !isVoiceRecording.value) {
-    if (voiceRecognizedText.value) {
-      inputMessage.value = inputMessage.value ? `${inputMessage.value} ${voiceRecognizedText.value}` : voiceRecognizedText.value;
-      // 清空临时变量
-      voiceRecognizedText.value = "";
-    }
+  // 如果是从录音状态切换到非录音状态,清空临时变量
+  if (wasRecording && !status) {
+    // 清空临时变量
+    voiceRecognizedText.value = "";
   }
 };
 

+ 26 - 17
src/components/ai/text/TextToText.vue

@@ -43,10 +43,11 @@
               />
                 <!-- 语音输入按钮 -->
               <VoiceInput
-                  @voiceRecognized="handleVoiceRecognized"
-                  @recordingStatusChanged="handleRecordingStatusChanged"
-                  lang="zh-CN"
-                  maxDuration="10"
+                inputSelector="input[type='text']"
+                lang="zh-CN"
+                maxDuration="10"
+                @voiceRecognized="handleVoiceRecognized"
+                @recordingStatusChanged="handleRecordingStatusChanged"
               />
 
               <!-- 终止问答按钮 -->
@@ -205,9 +206,9 @@ const voiceRecognizedText = ref(""); // 实时语音识别结果
 // 用于控制输入框显示的内容
 const displayedPrompt = computed({
   get() {
-    // 录音时,显示prompt.value + 实时语音识别结果
+    // 录音时,只显示prompt.value(已经在handleVoiceRecognized中实时更新)
     if (isVoiceRecording.value) {
-      return prompt.value ? `${prompt.value} ${voiceRecognizedText.value}` : voiceRecognizedText.value;
+      return prompt.value;
     }
     // 不录音时,只显示prompt.value
     return prompt.value;
@@ -256,13 +257,24 @@ const getConversation = async (id) => {
 };
 
 // 语音输入识别结果处理
-const handleVoiceRecognized = (text) => {
+const handleVoiceRecognized = (data) => {
   if (isVoiceRecording.value) {
-    // 在同一次录音过程中,只更新临时变量,不修改prompt.value
-    voiceRecognizedText.value = text;
+    // 在同一次录音过程中,实时更新文本框内容
+    voiceRecognizedText.value = data.originalText;
+    prompt.value = data.processedText;
   } else {
-    // 在录音结束时,将最终的语音内容追加到prompt.value
-    prompt.value = prompt.value ? `${prompt.value} ${text}` : text;
+    // 在录音结束时,将最终的语音内容插入到光标位置
+    const input = document.querySelector('input[type="text"]');
+    if (input) {
+      prompt.value = data.processedText;
+      // 重新设置光标位置到插入文本的末尾
+      setTimeout(() => {
+        input.selectionStart = input.selectionEnd = data.cursorPos;
+      }, 0);
+    } else {
+      // 如果没有找到输入框,直接替换整个内容
+      prompt.value = data.originalText;
+    }
     // 清空临时变量
     voiceRecognizedText.value = "";
   }
@@ -273,13 +285,10 @@ const handleRecordingStatusChanged = (isRecording) => {
   const wasRecording = isVoiceRecording.value;
   isVoiceRecording.value = isRecording;
 
-  // 如果是从录音状态切换到非录音状态,需要将临时的语音识别结果追加到prompt.value
+  // 如果是从录音状态切换到非录音状态,清空临时变量
   if (wasRecording && !isRecording) {
-    if (voiceRecognizedText.value) {
-      prompt.value = prompt.value ? `${prompt.value} ${voiceRecognizedText.value}` : voiceRecognizedText.value;
-      // 清空临时变量
-      voiceRecognizedText.value = "";
-    }
+    // 清空临时变量
+    voiceRecognizedText.value = "";
   }
 };
 

+ 28 - 19
src/components/ai/video/ImageToVideo.vue

@@ -75,11 +75,12 @@
              <ImageUpload v-model="uploadedImage" ref="imageUploadRef"/>
             <!-- 语音输入按钮 -->
                 <VoiceInput
-                    @voiceRecognized="handleVoiceRecognized"
-                    @recordingStatusChanged="handleRecordingStatusChanged"
-                    lang="zh-CN"
-                    maxDuration="10"
-                />
+                inputSelector="input[type='text']"
+                lang="zh-CN"
+                maxDuration="10"
+                @voiceRecognized="handleVoiceRecognized"
+                @recordingStatusChanged="handleRecordingStatusChanged"
+              />
 
             <!-- 终止按钮 -->
             <div
@@ -183,9 +184,9 @@ const voiceRecognizedText = ref(""); // 实时语音识别结果
 // 用于控制输入框显示的内容
 const displayedPrompt = computed({
   get() {
-    // 录音时,显示inputMessage.value + 实时语音识别结果
+    // 录音时,只显示inputMessage.value(已经在handleVoiceRecognized中实时更新)
     if (isVoiceRecording.value) {
-      return inputMessage.value ? `${inputMessage.value} ${voiceRecognizedText.value}` : voiceRecognizedText.value;
+      return inputMessage.value;
     }
     // 不录音时,只显示inputMessage.value
     return inputMessage.value;
@@ -199,13 +200,24 @@ const displayedPrompt = computed({
 });
 
 // 语音输入识别结果处理
-const handleVoiceRecognized = (text) => {
+const handleVoiceRecognized = (data) => {
   if (isVoiceRecording.value) {
-    // 在同一次录音过程中,只更新临时变量,不修改inputMessage.value
-    voiceRecognizedText.value = text;
+    // 在同一次录音过程中,实时更新文本框内容
+    voiceRecognizedText.value = data.originalText;
+    inputMessage.value = data.processedText;
   } else {
-    // 在录音结束时,将最终的语音内容追加到inputMessage.value
-    inputMessage.value = inputMessage.value ? `${inputMessage.value} ${text}` : text;
+    // 在录音结束时,将最终的语音内容插入到光标位置
+    const input = document.querySelector('input[type="text"]');
+    if (input) {
+      inputMessage.value = data.processedText;
+      // 重新设置光标位置到插入文本的末尾
+      setTimeout(() => {
+        input.selectionStart = input.selectionEnd = data.cursorPos;
+      }, 0);
+    } else {
+      // 如果没有找到输入框,直接替换整个内容
+      inputMessage.value = data.originalText;
+    }
     // 清空临时变量
     voiceRecognizedText.value = "";
   }
@@ -216,13 +228,10 @@ const handleRecordingStatusChanged = (status) => {
   const wasRecording = isVoiceRecording.value;
   isVoiceRecording.value = status;
 
-  // 如果是从录音状态切换到非录音状态,需要将临时的语音识别结果追加到inputMessage.value
-  if (wasRecording && !isVoiceRecording.value) {
-    if (voiceRecognizedText.value) {
-      inputMessage.value = inputMessage.value ? `${inputMessage.value} ${voiceRecognizedText.value}` : voiceRecognizedText.value;
-      // 清空临时变量
-      voiceRecognizedText.value = "";
-    }
+  // 如果是从录音状态切换到非录音状态,清空临时变量
+  if (wasRecording && !status) {
+    // 清空临时变量
+    voiceRecognizedText.value = "";
   }
 };
 

+ 28 - 19
src/components/ai/vision/VisionThink.vue

@@ -77,11 +77,12 @@
           <ImageUpload v-model="uploadedImage" ref="imageUploadRef"/>
           <!-- 语音输入按钮 -->
           <VoiceInput
-              @voiceRecognized="handleVoiceRecognized"
-              @recordingStatusChanged="handleRecordingStatusChanged"
-              lang="zh-CN"
-              maxDuration="10"
-          />
+                inputSelector="input[type='text']"
+                lang="zh-CN"
+                maxDuration="10"
+                @voiceRecognized="handleVoiceRecognized"
+                @recordingStatusChanged="handleRecordingStatusChanged"
+              />
 
           <!-- 终止按钮 -->
           <div
@@ -180,9 +181,9 @@ const voiceRecognizedText = ref(""); // 实时语音识别结果
 // 用于控制输入框显示的内容
 const displayedPrompt = computed({
   get() {
-    // 录音时,显示inputMessage.value + 实时语音识别结果
+    // 录音时,只显示inputMessage.value(已经在handleVoiceRecognized中实时更新)
     if (isVoiceRecording.value) {
-      return inputMessage.value ? `${inputMessage.value} ${voiceRecognizedText.value}` : voiceRecognizedText.value;
+      return inputMessage.value;
     }
     // 不录音时,只显示inputMessage.value
     return inputMessage.value;
@@ -196,13 +197,24 @@ const displayedPrompt = computed({
 });
 
 // 语音输入识别结果处理
-const handleVoiceRecognized = (text) => {
+const handleVoiceRecognized = (data) => {
   if (isVoiceRecording.value) {
-    // 在同一次录音过程中,只更新临时变量,不修改inputMessage.value
-    voiceRecognizedText.value = text;
+    // 在同一次录音过程中,实时更新文本框内容
+    voiceRecognizedText.value = data.originalText;
+    inputMessage.value = data.processedText;
   } else {
-    // 在录音结束时,将最终的语音内容追加到inputMessage.value
-    inputMessage.value = inputMessage.value ? `${inputMessage.value} ${text}` : text;
+    // 在录音结束时,将最终的语音内容插入到光标位置
+    const input = document.querySelector('input[type="text"]');
+    if (input) {
+      inputMessage.value = data.processedText;
+      // 重新设置光标位置到插入文本的末尾
+      setTimeout(() => {
+        input.selectionStart = input.selectionEnd = data.cursorPos;
+      }, 0);
+    } else {
+      // 如果没有找到输入框,直接替换整个内容
+      inputMessage.value = data.originalText;
+    }
     // 清空临时变量
     voiceRecognizedText.value = "";
   }
@@ -213,13 +225,10 @@ const handleRecordingStatusChanged = (status) => {
   const wasRecording = isVoiceRecording.value;
   isVoiceRecording.value = status;
 
-  // 如果是从录音状态切换到非录音状态,需要将临时的语音识别结果追加到inputMessage.value
-  if (wasRecording && !isVoiceRecording.value) {
-    if (voiceRecognizedText.value) {
-      inputMessage.value = inputMessage.value ? `${inputMessage.value} ${voiceRecognizedText.value}` : voiceRecognizedText.value;
-      // 清空临时变量
-      voiceRecognizedText.value = "";
-    }
+  // 如果是从录音状态切换到非录音状态,清空临时变量
+  if (wasRecording && !status) {
+    // 清空临时变量
+    voiceRecognizedText.value = "";
   }
 };
 

+ 49 - 6
src/components/ai/voice/VoiceInput.vue

@@ -42,6 +42,11 @@ const props = defineProps({
   maxDuration: {
     type: Number,
     default: 10
+  },
+  // 输入框选择器,用于获取输入框元素
+  inputSelector: {
+    type: String,
+    default: 'input[type="text"]'
   }
 })
 
@@ -55,6 +60,8 @@ const countdown = ref(0) // 倒计时剩余秒数
 const countdownTimer = ref(null) // 倒计时定时器
 const isBrowserSupported = ref(true) // 浏览器是否支持语音识别
 const mediaStream = ref(null) // 媒体流引用,用于释放资源
+const recordingStartText = ref('') // 录音开始时的原始文本
+const recordingStartCursorPos = ref(0) // 录音开始时的光标位置
 
 // 检测浏览器是否支持语音识别
 const checkBrowserSupport = () => {
@@ -77,17 +84,38 @@ const initSpeechRecognition = () => {
 
   const instance = new SpeechRecognition()
   instance.lang = props.lang
-  instance.interimResults = true
-  instance.continuous = true
+  instance.interimResults = true//是否返回临时结果
+  instance.maxAlternatives = 1//返回的最大候选结果数
+  instance.continuous = true//收录音频时是否连续识别
 
   instance.onresult = (event) => {
     // 遍历所有结果,包括临时结果
-    for (let i = event.resultIndex; i < event.results.length; i++) {
-      const transcript = event.results[i][0].transcript
+    let fullTranscript = ''
+    for (let i = 0; i < event.results.length; i++) {
+      fullTranscript += event.results[i][0].transcript
+    }
+    // 只在录音状态下发送识别结果
+    if (isRecording.value) {
+      // 计算处理后的文本
+      let processedText = ''
+      let cursorPos = 0
+      if (recordingStartCursorPos.value >= recordingStartText.value.length) {
+        // 光标位置在文本末尾或找不到输入框时,追加到末尾
+        processedText = recordingStartText.value + fullTranscript
+        cursorPos = recordingStartText.value.length + fullTranscript.length
+      } else {
+        // 光标位置在文本中间时,插入到光标位置
+        processedText = recordingStartText.value.substring(0, recordingStartCursorPos.value) + fullTranscript + recordingStartText.value.substring(recordingStartCursorPos.value)
+        cursorPos = recordingStartCursorPos.value + fullTranscript.length
+      }
       // 无论是否是最终结果,实时识别结果
-      emit('voiceRecognized', transcript)
+      emit('voiceRecognized', {
+        originalText: fullTranscript,
+        processedText: processedText,
+        cursorPos: cursorPos
+      })
       // 打印语音识别结果
-      console.log('语音输入文字:', transcript)
+      console.log('语音输入文字:', fullTranscript)
     }
   }
 
@@ -144,6 +172,19 @@ const getBrowserInfo = () => {
   return { name: 'Unknown', version: 'Unknown' }
 }
 
+// 记录录音开始时的输入框状态
+const recordInputState = () => {
+  const input = document.querySelector(props.inputSelector)
+  if (input) {
+    recordingStartCursorPos.value = input.selectionStart
+    recordingStartText.value = input.value
+  } else {
+    // 找不到输入框时,默认在最后面追加
+    recordingStartCursorPos.value = 999999
+    recordingStartText.value = ''
+  }
+}
+
 // 切换录音状态
 const toggleSpeechInput = () => {
   const browser = getBrowserInfo()
@@ -164,6 +205,8 @@ const toggleSpeechInput = () => {
     emit('recordingStatusChanged', false)
     recognition.value?.stop()
   } else {
+    // 记录输入框状态
+    recordInputState()
     // 初始化倒计时前再次清除定时器(防止快速点击)
     clearInterval(countdownTimer.value)
     countdown.value = props.maxDuration // 设置最大录音时间

+ 27 - 18
src/components/videopage/DialogComponents.vue

@@ -131,11 +131,12 @@
               <template #prepend>
                <div class="voice-input-wrapper">
                  <VoiceInput
-                   @voiceRecognized="handleVoiceRecognized"
-                   @recordingStatusChanged="handleRecordingStatusChanged"
-                   lang="zh-CN"
-                   maxDuration="10"
-                 />
+              inputSelector=".user-input input"
+              lang="zh-CN"
+              maxDuration="10"
+              @voiceRecognized="handleVoiceRecognized"
+              @recordingStatusChanged="handleRecordingStatusChanged"
+            />
                </div>
               </template>
 
@@ -223,9 +224,9 @@ const voiceRecognizedText = ref(""); // 实时语音识别结果
 // 用于控制输入框显示的内容
 const displayedPrompt = computed({
   get() {
-    // 录音时,显示prompt.value + 实时语音识别结果
+    // 录音时,只显示prompt.value(已经在handleVoiceRecognized中实时更新)
     if (isVoiceRecording.value) {
-      return prompt.value ? `${prompt.value} ${voiceRecognizedText.value}` : voiceRecognizedText.value;
+      return prompt.value;
     }
     // 不录音时,只显示prompt.value
     return prompt.value;
@@ -376,13 +377,24 @@ const sendMessage = async () => {
 
 
 // 语音输入识别结果处理
-const handleVoiceRecognized = (text) => {
+const handleVoiceRecognized = (data) => {
   if (isVoiceRecording.value) {
-    // 在同一次录音过程中,只更新临时变量,不修改prompt.value
-    voiceRecognizedText.value = text;
+    // 在同一次录音过程中,实时更新文本框内容
+    voiceRecognizedText.value = data.originalText;
+    prompt.value = data.processedText;
   } else {
-    // 在录音结束时,将最终的语音内容追加到prompt.value
-    prompt.value = prompt.value ? `${prompt.value} ${text}` : text;
+    // 在录音结束时,将最终的语音内容插入到光标位置
+    const input = document.querySelector('.user-input input');
+    if (input) {
+      prompt.value = data.processedText;
+      // 重新设置光标位置到插入文本的末尾
+      setTimeout(() => {
+        input.selectionStart = input.selectionEnd = data.cursorPos;
+      }, 0);
+    } else {
+      // 如果没有找到输入框,直接替换整个内容
+      prompt.value = data.originalText;
+    }
     // 清空临时变量
     voiceRecognizedText.value = "";
   }
@@ -393,13 +405,10 @@ const handleRecordingStatusChanged = (isRecording) => {
   const wasRecording = isVoiceRecording.value;
   isVoiceRecording.value = isRecording;
 
-  // 如果是从录音状态切换到非录音状态,需要将临时的语音识别结果追加到prompt.value
+  // 如果是从录音状态切换到非录音状态,清空临时变量
   if (wasRecording && !isRecording) {
-    if (voiceRecognizedText.value) {
-      prompt.value = prompt.value ? `${prompt.value} ${voiceRecognizedText.value}` : voiceRecognizedText.value;
-      // 清空临时变量
-      voiceRecognizedText.value = "";
-    }
+    // 清空临时变量
+    voiceRecognizedText.value = "";
   }
 };
 

+ 11 - 37
src/views/AIPage/aiGenerate/DialogContent.vue

@@ -99,11 +99,12 @@
         </div>
         <!-- 语音输入按钮 -->
         <div class="voice-input-outer" v-if="currentDialogue.type === 'user'" :class="{ 'recording': isVoiceRecording }">
-          <VoiceInput 
-            @voiceRecognized="handleVoiceRecognized" 
-            @recordingStatusChanged="handleRecordingStatusChanged"
+          <VoiceInput
+            inputSelector=".user-input-textarea"
             lang="zh-CN"
             maxDuration="10"
+            @voiceRecognized="handleVoiceRecognized"
+            @recordingStatusChanged="handleRecordingStatusChanged"
           />
         </div>
         <!-- 语音输入按钮占位符 -->
@@ -165,10 +166,6 @@ const userInput = ref('')
 const isVoiceRecording = ref(false)
 // 实时语音识别结果
 const voiceRecognizedText = ref("")
-// 录音开始时的光标位置
-const recordingStartCursorPos = ref(0)
-// 录音开始时的原始文本
-const recordingStartText = ref("")
 // 诗词显示状态
 const showPoem = ref(false)
 // 当前诗词内容
@@ -204,35 +201,24 @@ const currentBackgroundImage = computed(() => {
 const currentDialogueCache = ref(null)
 
 // 方法
-const handleVoiceRecognized = (text) => {
-  console.log('语音识别结果:', text)
+const handleVoiceRecognized = (data) => {
+  console.log('语音识别结果:', data.originalText)
   if (isVoiceRecording.value) {
     // 在同一次录音过程中,实时更新文本框内容
-    voiceRecognizedText.value = text
-    const textarea = document.querySelector('.user-input-textarea')
-    if (textarea) {
-      // 使用录音开始时的原始文本和光标位置
-      const startPos = recordingStartCursorPos.value
-      const originalText = recordingStartText.value
-      // 在光标位置插入实时识别结果
-      userInput.value = originalText.substring(0, startPos) + text + originalText.substring(startPos)
-    }
+    voiceRecognizedText.value = data.originalText
+    userInput.value = data.processedText
   } else {
     // 在录音结束时,将最终的语音内容追加到userInput.value
     const textarea = document.querySelector('.user-input-textarea')
     if (textarea) {
-      // 使用录音开始时的光标位置和原始文本
-      const startPos = recordingStartCursorPos.value
-      const originalText = recordingStartText.value
-      // 在光标位置插入文本
-      userInput.value = originalText.substring(0, startPos) + text + originalText.substring(startPos)
+      userInput.value = data.processedText
       // 重新设置光标位置到插入文本的末尾
       setTimeout(() => {
-        textarea.selectionStart = textarea.selectionEnd = startPos + text.length
+        textarea.selectionStart = textarea.selectionEnd = data.cursorPos
       }, 0)
     } else {
       // 如果没有找到输入框,直接替换整个内容
-      userInput.value = text
+      userInput.value = data.originalText
     }
     // 清空临时变量
     voiceRecognizedText.value = ""
@@ -245,18 +231,6 @@ const handleRecordingStatusChanged = (isRecording) => {
   const wasRecording = isVoiceRecording.value
   isVoiceRecording.value = isRecording
 
-  // 如果是从未录音状态切换到录音状态,记录当前光标位置和文本内容
-  if (!wasRecording && isRecording) {
-    const textarea = document.querySelector('.user-input-textarea')
-    if (textarea) {
-      recordingStartCursorPos.value = textarea.selectionStart
-      recordingStartText.value = userInput.value
-    } else {
-      recordingStartCursorPos.value = 0
-      recordingStartText.value = userInput.value
-    }
-  }
-
   // 如果是从录音状态切换到非录音状态,只需要清空临时变量
   if (wasRecording && !isRecording) {
     // 清空临时变量