Kaynağa Gözat

加入直接对接前端写死的多模态视觉理解大模型

liyanbo 3 ay önce
ebeveyn
işleme
67880823bb

+ 1 - 1
byzs-module-ai/pom.xml

@@ -123,7 +123,7 @@
         </dependency>
 
         <!-- 豆包  -->
-        <!-- 火山引擎豆包 SDK(取消注释并添加) -->
+        <!-- 火山引擎豆包 SDK -->
         <dependency>
             <groupId>com.volcengine</groupId>
             <artifactId>volcengine-java-sdk-ark-runtime</artifactId>

+ 25 - 0
byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/controller/admin/vision/vo/AiVisionThinkReqVO.java

@@ -0,0 +1,25 @@
+package cn.iocoder.byzs.module.ai.controller.admin.vision.vo;
+
+import com.fhs.core.trans.vo.VO;
+import io.swagger.v3.oas.annotations.media.Schema;
+import jakarta.validation.constraints.NotEmpty;
+import jakarta.validation.constraints.NotNull;
+import jakarta.validation.constraints.Size;
+import lombok.Data;
+
+import java.util.List;
+
+@Schema(description = "管理后台 - AI 视觉理解 Response VO")
+@Data
+public class AiVisionThinkReqVO implements VO {
+
+    @NotNull(message = "模型编号不能为空")
+    private Long modelId;
+
+    @NotEmpty(message = "提示词不能为空")
+    @Size(max = 1200, message = "提示词最大 1200")
+    private String prompt;
+
+    @NotNull(message = "图片列表")
+    private List<String> promptImage;
+}

+ 25 - 0
byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/controller/admin/vision/vo/AiVisionThinkRespVO.java

@@ -0,0 +1,25 @@
+package cn.iocoder.byzs.module.ai.controller.admin.vision.vo;
+
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
+import com.fhs.core.trans.vo.VO;
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+import org.springframework.data.annotation.Id;
+
+@Schema(description = "管理后台 - AI 视觉理解 Response VO")
+@Data
+public class AiVisionThinkRespVO implements VO {
+
+    /**
+     * 主键ID(框架强制要求的字段,必须带id注解)
+     */
+    @Id // 核心:添加框架认可的id注解(关键!必须和框架校验的注解一致)
+    @Schema(description = "主键ID(仅满足框架序列化要求)", example = "1")
+    private Long id =1L;
+
+    /**
+     * AI视觉理解返回结果
+     */
+    @Schema(description = "返回结果", example = "识别到图片内容为:一只黑色的猫")
+    private String result;
+}

+ 2 - 1
byzs-module-ai/src/main/java/cn/iocoder/byzs/module/ai/enums/model/AiModelTypeEnum.java

@@ -22,7 +22,8 @@ public enum AiModelTypeEnum implements ArrayValuable<Integer> {
     EMBEDDING(5, "向量"),
     RERANK(6, "重排序"),
     IMAGE_EDIT(7, "图片编辑"),
-    VIDEO_IMAGE(8, "图生视频");
+    VIDEO_IMAGE(8, "图生视频"),
+    VISION_THINK(9, "视觉理解");
 
     /**
      * 类型

+ 99 - 0
byzs-web/src/main/java/cn/iocoder/byzs/module/web/controller/admin/ai/WebAiController.java

@@ -19,6 +19,8 @@ import cn.iocoder.byzs.module.ai.controller.admin.video.vo.AiVideoDrawReqVO;
 import cn.iocoder.byzs.module.ai.controller.admin.video.vo.AiVideoRespVO;
 import cn.iocoder.byzs.module.ai.controller.admin.virtualdevice.vo.VirtualDevicePageReqVO;
 import cn.iocoder.byzs.module.ai.controller.admin.virtualdevice.vo.VirtualDeviceRespVO;
+import cn.iocoder.byzs.module.ai.controller.admin.vision.vo.AiVisionThinkReqVO;
+import cn.iocoder.byzs.module.ai.controller.admin.vision.vo.AiVisionThinkRespVO;
 import cn.iocoder.byzs.module.ai.dal.dataobject.image.AiImageDO;
 import cn.iocoder.byzs.module.ai.dal.dataobject.mapgame.MapGameDO;
 import cn.iocoder.byzs.module.ai.dal.dataobject.model.AiChatRoleDO;
@@ -36,6 +38,18 @@ import cn.iocoder.byzs.module.ai.service.virtualdevice.VirtualDeviceService;
 import cn.iocoder.byzs.module.web.controller.admin.ai.vo.MapGameVO;
 import cn.iocoder.byzs.module.web.controller.admin.ai.vo.WebAiChatRoleVO;
 import cn.iocoder.byzs.module.web.service.ai.WebAiServiceImpl;
+import com.volcengine.ark.runtime.model.responses.constant.ResponsesConstants;
+import com.volcengine.ark.runtime.model.responses.content.InputContentItemImage;
+import com.volcengine.ark.runtime.model.responses.content.InputContentItemText;
+import com.volcengine.ark.runtime.model.responses.content.OutputContentItemText;
+import com.volcengine.ark.runtime.model.responses.item.BaseItem;
+import com.volcengine.ark.runtime.model.responses.item.ItemEasyMessage;
+import com.volcengine.ark.runtime.model.responses.item.ItemOutputMessage;
+import com.volcengine.ark.runtime.model.responses.item.MessageContent;
+import com.volcengine.ark.runtime.model.responses.request.CreateResponsesRequest;
+import com.volcengine.ark.runtime.model.responses.request.ResponsesInput;
+import com.volcengine.ark.runtime.model.responses.response.ResponseObject;
+import com.volcengine.ark.runtime.service.ArkService;
 import io.swagger.v3.oas.annotations.Operation;
 import io.swagger.v3.oas.annotations.Parameter;
 import io.swagger.v3.oas.annotations.tags.Tag;
@@ -49,6 +63,7 @@ import org.springframework.web.bind.annotation.*;
 import reactor.core.publisher.Flux;
 
 import java.util.List;
+import java.util.Optional;
 
 import static cn.iocoder.byzs.framework.common.pojo.CommonResult.success;
 import static cn.iocoder.byzs.framework.security.core.util.SecurityFrameworkUtils.getLoginUserId;
@@ -184,6 +199,90 @@ public class WebAiController {
     }
 
 
+    // ================ 视觉理解 ================
+
+    @Operation(summary = "视觉理解-思考")
+    @PostMapping("/vision-think")
+    public CommonResult<AiVisionThinkRespVO> thinkVideo(@Valid @RequestBody AiVisionThinkReqVO visionThinkReqVO) {
+
+        String apiKey = "4bb6060f-b4a1-4e0a-bf7b-71dcaa6311cb";
+        // 创建ArkService实例
+        ArkService arkService = ArkService.builder().apiKey(apiKey).baseUrl("https://ark.cn-beijing.volces.com/api/v3").build();
+
+        List<String> promptImages = visionThinkReqVO.getPromptImage();
+        String prompt = visionThinkReqVO.getPrompt();
+
+        // 构建消息内容
+        MessageContent.Builder messageContentBuilder = MessageContent.builder();
+
+        // 添加所有图片
+        if (promptImages != null && !promptImages.isEmpty()) {
+            for (String imageUrl : promptImages) {
+                messageContentBuilder.addListItem(
+                        InputContentItemImage.builder().imageUrl(imageUrl).build()
+                );
+            }
+        }
+
+        // 添加文本提示
+        if (prompt != null && !prompt.isEmpty()) {
+            messageContentBuilder.addListItem(
+                    InputContentItemText.builder().text(prompt).build()
+            );
+        }
+
+        CreateResponsesRequest request = CreateResponsesRequest.builder()
+                .model("doubao-seed-1-8-251228")
+                .input(ResponsesInput.builder().addListItem(
+                        ItemEasyMessage.builder().role(ResponsesConstants.MESSAGE_ROLE_USER).content(
+                                messageContentBuilder.build()
+                        ).build()
+                ).build())
+                .build();
+        ResponseObject resp = arkService.createResponse(request);
+        String text = extractTextFromResponse(resp);
+
+        AiVisionThinkRespVO response = new AiVisionThinkRespVO();
+        response.setResult(text);
+
+        arkService.shutdownExecutor();
+        return success(response);
+    }
+
+    public static String extractTextFromResponse(ResponseObject resp) {
+        // 1. 先获取output列表
+        if (resp == null || resp.getOutput() == null) {
+            return "";
+        }
+        List<BaseItem> outputList = (List<BaseItem>) resp.getOutput();
+
+        // 2. 遍历output找到ItemOutputMessage类型的项
+        ItemOutputMessage messageItem = null;
+        for (BaseItem item : outputList) {
+            if (item instanceof ItemOutputMessage && "message".equals(item.getType())) {
+                messageItem = (ItemOutputMessage) item;
+                break;
+            }
+        }
+
+        // 3. 如果没找到message项,返回空
+        if (messageItem == null || messageItem.getContent() == null) {
+            return "";
+        }
+        List<?> contentList = (List<?>) messageItem.getContent();
+
+        // 4. 遍历content找到OutputContentItemText类型的项
+        for (Object content : contentList) {
+            if (content instanceof OutputContentItemText) {
+                OutputContentItemText textItem = (OutputContentItemText) content;
+                return textItem.getText(); // 直接返回提取到的text
+            }
+        }
+
+        // 兜底返回空字符串
+        return "";
+    }
+
     // ================ 虚拟实验室 ================
 
     @GetMapping("/selectVirtualDevice")