feat(API): 扩展聊天接口支持多模态消息

请求参数扩展： - 新增 displayMessage 字段用于数据库存储原始用户输入 - 新增 images 字段支持用户上传的图片（Base64格式） - 新增 uploadedImages 和 uploadedDocuments 用于持久化多模态消息处理： - Claude API 支持 image 类型内容块 - Codex API 支持 input_image 格式 - 用户消息保存时存储上传的图片和文档系统提示词增强： - 添加文档深度分析规范 - 定义七步文档分析框架 - 包含批判性评价和实践价值分析指导
2025-12-20 12:14:18 +08:00 · 2025-12-20 12:14:18 +08:00 · 00b8589e03
commit 00b8589e03
parent acf17557c2
1 changed files with 216 additions and 12 deletions
--- a/src/app/api/chat/route.ts
+++ b/src/app/api/chat/route.ts
@ -8,9 +8,25 @@ import { executeTool } from '@/services/tools';
 interface ChatRequest {
  conversationId: string;
  message: string;
  displayMessage?: string; // 原始用户输入（用于数据库存储和显示）
  model?: string;
  tools?: string[];
  enableThinking?: boolean;
  // 用户上传的图片（发送给 AI）
  images?: {
    type: 'image';
    media_type: string;
    data: string;
  }[];
  // 用户上传的图片 URL（用于保存到数据库显示）
  uploadedImages?: string[];
  // 用户上传的文档（用于保存到数据库）
  uploadedDocuments?: {
    name: string;
    size: number;
    type: string;
    content: string;
  }[];
 }
 // 消息内容块类型（Claude）
@ -123,13 +139,78 @@ const DEFAULT_SYSTEM_PROMPT = `你是一个专业、友好的 AI 助手。请遵
 2. **代码完整性**：确保一段代码中包含所有需要展示的图表内容
 3. **使用默认样式**：系统已默认配置 seaborn-whitegrid 风格，无需在代码中再次设置 plt.style.use()
 4. **中文支持**：系统已配置中文字体（Noto Sans SC），可以直接使用中文标签和标题
-5. **不要重复绘图**：即使是展示不同数据，也应该在一张图表中使用子图(subplot)展示，而不是分开多次绘制`;
+5. **不要重复绘图**：即使是展示不同数据，也应该在一张图表中使用子图(subplot)展示，而不是分开多次绘制
 ## 文档深度分析规范（重要）
 当用户上传文档/文件并要求分析时，请按以下框架进行**深度分析**，不要简单总结：
 ### 1. 📋 文档概览
 - 文档类型、主题、作者/来源
 - 核心论点或目的
 - 目标受众和适用场景
 ### 2. 🏗️ 内容结构分析
 - 文档的整体结构和逻辑框架
 - 各章节/段落的主要内容概述
 - 内容之间的逻辑关系和递进层次
 ### 3. 💡 核心观点提取
 - 列出文档中的所有关键观点（至少5-10个要点）
 - 每个观点配以原文引用或具体依据
 - 分析观点的创新性、独特性和价值
 ### 4. 🔍 深度思考与洞察
 - 作者的写作意图和深层目的
 - 文档中隐含的假设或前提条件
 - 潜在的偏见、局限性或争议点
 - 与相关领域知识的联系和对比
 ### 5. 🎯 实践价值分析
 - 文档的实用性和可操作性评估
 - 具体的应用场景和使用建议
 - 实施步骤、注意事项和潜在风险
 ### 6. ⚖️ 批判性评价
 - 优点、亮点和值得学习之处
 - 不足之处和可改进的建议
 - 与同类内容/观点的横向对比
 ### 7. 📝 总结与延伸
 - 核心要点的结构化总结
 - 延伸阅读/深入学习的建议
 - 相关问题的探讨和思考
 **文档分析注意事项**：
 - 分析要详尽深入，每个部分至少2-3段有实质内容的文字
 - 使用具体的原文引用来支持分析观点
 - 提供独到见解和批判性思考，不要只做简单摘要复述
 - 整体分析字数不少于1500字，确保分析的深度和广度
 - 使用清晰的标题层级和列表格式，便于阅读
 - 分析文档时**不需要执行代码**，除非用户明确要求数据处理或可视化`;
 // POST /api/chat - 发送消息并获取 AI 回复
 export async function POST(request: Request) {
  try {
    const body: ChatRequest = await request.json();
-    const { conversationId, message, model, tools, enableThinking } = body;
+    const { conversationId, message, displayMessage, model, tools, enableThinking, images, uploadedImages, uploadedDocuments } = body;
    // 调试日志：确认接收到的图片数据
    console.log('[API/chat] Received request with:', {
      conversationId,
      messageLength: message?.length,
      displayMessageLength: displayMessage?.length,
      model,
      tools,
      enableThinking,
      imagesCount: images?.length || 0,
      uploadedImagesCount: uploadedImages?.length || 0,
      uploadedDocumentsCount: uploadedDocuments?.length || 0,
      images: images ? images.map(img => ({
        type: img.type,
        media_type: img.media_type,
        dataLength: img.data?.length || 0,
      })) : undefined,
    });
    // 获取用户设置
    const settings = await db.query.userSettings.findFirst({
@ -161,14 +242,17 @@ export async function POST(request: Request) {
      orderBy: (messages, { asc }) => [asc(messages.createdAt)],
    });
-    // 保存用户消息
+    // 保存用户消息（包括上传的图片和文档）
    // 使用 displayMessage（原始用户输入）作为显示内容，如果没有则使用 message
    const userMessageId = nanoid();
    await db.insert(messages).values({
      messageId: userMessageId,
      conversationId,
      role: 'user',
-      content: message,
+      content: displayMessage || message, // 使用原始用户输入作为显示内容
      status: 'completed',
      uploadedImages: uploadedImages && uploadedImages.length > 0 ? uploadedImages : null,
      uploadedDocuments: uploadedDocuments && uploadedDocuments.length > 0 ? uploadedDocuments : null,
    });
    // 准备 AI 消息 ID
@ -216,6 +300,7 @@ export async function POST(request: Request) {
              tools: tools || (conversation.tools as string[]) || [],
              controller,
              encoder,
              images, // 传递用户上传的图片
            });
            fullContent = result.fullContent;
@ -235,6 +320,7 @@ export async function POST(request: Request) {
              enableThinking: enableThinking ?? conversation.enableThinking ?? false,
              controller,
              encoder,
              images, // 传递用户上传的图片
            });
            fullContent = result.fullContent;
@ -256,6 +342,7 @@ export async function POST(request: Request) {
          });
          // 更新对话信息
          const titleSource = displayMessage || message;
          await db
            .update(conversations)
            .set({
@ -264,7 +351,7 @@ export async function POST(request: Request) {
              lastMessageAt: new Date(),
              updatedAt: new Date(),
              title: (conversation.messageCount || 0) === 0
-                ? message.slice(0, 50) + (message.length > 50 ? '...' : '')
+                ? titleSource.slice(0, 50) + (titleSource.length > 50 ? '...' : '')
                : conversation.title,
            })
            .where(eq(conversations.conversationId, conversationId));
@ -318,15 +405,26 @@ interface CodexChatParams {
  tools: string[];
  controller: ReadableStreamDefaultController;
  encoder: TextEncoder;
  // 用户上传的图片
  images?: {
    type: 'image';
    media_type: string;
    data: string;
  }[];
 }
 // Codex Response API 的输入项类型
 interface CodexInputItem {
  type: 'message';
  role: 'user' | 'assistant' | 'system';
-  content: string;
+  content: string | CodexMultimodalContent[];
 }
 // Codex 多模态内容类型
 type CodexMultimodalContent =
  | { type: 'input_text'; text: string }
  | { type: 'input_image'; image_url: string };
 // Codex Response API 的工具调用类型
 interface CodexFunctionCall {
  call_id: string;
@ -350,6 +448,7 @@ async function handleCodexChat(params: CodexChatParams): Promise<{
    tools,
    controller,
    encoder,
    images,
  } = params;
  // 构建 Codex Response API 格式的输入
@ -359,9 +458,53 @@ async function handleCodexChat(params: CodexChatParams): Promise<{
      role: msg.role as 'user' | 'assistant',
      content: msg.content,
    })),
    { type: 'message' as const, role: 'user' as const, content: message },
  ];
  // 添加当前用户消息（支持多模态内容）
  if (images && images.length > 0) {
    console.log('[handleCodexChat] Building multimodal message with', images.length, 'images');
    // 如果有图片，构建多模态消息
    const multimodalContent: CodexMultimodalContent[] = [];
    // 先添加图片
    for (const img of images) {
      console.log('[handleCodexChat] Adding image:', {
        type: img.type,
        media_type: img.media_type,
        dataLength: img.data?.length || 0,
      });
      // Codex/OpenAI 格式：使用 data URL
      const dataUrl = `data:${img.media_type};base64,${img.data}`;
      multimodalContent.push({
        type: 'input_image',
        image_url: dataUrl,
      });
    }
    // 再添加文本
    if (message) {
      multimodalContent.push({
        type: 'input_text',
        text: message,
      });
    }
    console.log('[handleCodexChat] Multimodal content blocks:', multimodalContent.length);
    inputItems.push({
      type: 'message' as const,
      role: 'user' as const,
      content: multimodalContent,
    });
  } else {
    console.log('[handleCodexChat] No images, using simple text message');
    // 没有图片，使用简单文本消息
    inputItems.push({
      type: 'message' as const,
      role: 'user' as const,
      content: message,
    });
  }
  // 构建 Codex Response API 格式的工具定义
  const codexTools = buildCodexToolDefinitions(tools);
@ -599,6 +742,12 @@ interface ClaudeChatParams {
  enableThinking: boolean;
  controller: ReadableStreamDefaultController;
  encoder: TextEncoder;
  // 用户上传的图片
  images?: {
    type: 'image';
    media_type: string;
    data: string;
  }[];
 }
 async function handleClaudeChat(params: ClaudeChatParams): Promise<{
@ -619,6 +768,7 @@ async function handleClaudeChat(params: ClaudeChatParams): Promise<{
    enableThinking,
    controller,
    encoder,
    images,
  } = params;
  // 构建消息历史
@ -627,11 +777,51 @@ async function handleClaudeChat(params: ClaudeChatParams): Promise<{
    content: msg.content,
  }));
-  // 添加当前用户消息
+  // 添加当前用户消息（支持多模态内容）
  if (images && images.length > 0) {
    console.log('[handleClaudeChat] Building multimodal message with', images.length, 'images');
    // 如果有图片，构建多模态消息
    const multimodalContent: ContentBlock[] = [];
    // 先添加图片
    for (const img of images) {
      console.log('[handleClaudeChat] Adding image:', {
        type: img.type,
        media_type: img.media_type,
        dataLength: img.data?.length || 0,
      });
      multimodalContent.push({
        type: 'image' as unknown as 'text',
        // @ts-expect-error - Claude API 支持 image 类型但 TypeScript 类型定义不完整
        source: {
          type: 'base64',
          media_type: img.media_type,
          data: img.data,
        },
      });
    }
    // 再添加文本
    if (message) {
      multimodalContent.push({
        type: 'text',
        text: message,
      });
    }
    console.log('[handleClaudeChat] Multimodal content blocks:', multimodalContent.length);
    messageHistory.push({
      role: 'user',
      content: multimodalContent,
    });
  } else {
    console.log('[handleClaudeChat] No images, using simple text message');
    // 没有图片，使用简单文本消息
    messageHistory.push({
      role: 'user',
      content: message,
    });
  }
  // 构建工具定义
  const toolDefinitions = buildClaudeToolDefinitions(tools);
@ -657,6 +847,20 @@ async function handleClaudeChat(params: ClaudeChatParams): Promise<{
      messages: currentMessages,
    };
    // 调试日志：查看发送给 Claude API 的消息内容
    console.log('[handleClaudeChat] Sending to Claude API:', {
      model,
      messagesCount: currentMessages.length,
      lastMessage: currentMessages.length > 0 ? {
        role: currentMessages[currentMessages.length - 1].role,
        contentType: typeof currentMessages[currentMessages.length - 1].content,
        contentIsArray: Array.isArray(currentMessages[currentMessages.length - 1].content),
        contentLength: Array.isArray(currentMessages[currentMessages.length - 1].content)
          ? (currentMessages[currentMessages.length - 1].content as unknown[]).length
          : (currentMessages[currentMessages.length - 1].content as string).length,
      } : null,
    });
    if (toolDefinitions.length > 0) {
      requestBody.tools = toolDefinitions;
    }