feat(API): 聊天接口支持文档解析和多模态输入

- 支持PDF文档使用Claude原生document类型
- 集成Office文档解析(Word/Excel)
- 支持图片+文档+文本混合输入
- 改进错误处理: 文档解析失败时通知用户
- 解析后的文档内容添加到AI上下文
This commit is contained in:
gaoziman 2025-12-22 23:22:54 +08:00
parent c776fb95b7
commit 98dacf273c

View File

@ -6,6 +6,24 @@ import { nanoid } from 'nanoid';
import { executeTool } from '@/services/tools'; import { executeTool } from '@/services/tools';
import { getCurrentUser } from '@/lib/auth'; import { getCurrentUser } from '@/lib/auth';
import { decryptApiKey } from '@/lib/crypto'; import { decryptApiKey } from '@/lib/crypto';
import { parseDocument, type ParsedDocument } from '@/lib/document-parser';
// PDF 文档数据(从前端传来,直接传给 Claude API
interface PdfDocumentData {
name: string;
size: number;
data: string; // Base64 编码
media_type: 'application/pdf';
}
// Office 文档数据(从前端传来,需要后端解析)
interface OfficeDocumentData {
name: string;
size: number;
data: string; // Base64 编码
type: 'word' | 'excel';
mimeType: string;
}
interface ChatRequest { interface ChatRequest {
conversationId: string; conversationId: string;
@ -29,6 +47,10 @@ interface ChatRequest {
type: string; type: string;
content: string; content: string;
}[]; }[];
// PDF 文档(直接传给 Claude API 使用原生 document 类型)
pdfDocuments?: PdfDocumentData[];
// Office 文档Word/Excel需要后端解析
officeDocuments?: OfficeDocumentData[];
} }
// 消息内容块类型Claude // 消息内容块类型Claude
@ -240,9 +262,9 @@ const DEFAULT_SYSTEM_PROMPT = `你是一个专业、友好的 AI 助手。请遵
export async function POST(request: Request) { export async function POST(request: Request) {
try { try {
const body: ChatRequest = await request.json(); const body: ChatRequest = await request.json();
const { conversationId, message, displayMessage, model, tools, enableThinking, images, uploadedImages, uploadedDocuments } = body; const { conversationId, message, displayMessage, model, tools, enableThinking, images, uploadedImages, uploadedDocuments, pdfDocuments, officeDocuments } = body;
// 调试日志:确认接收到的图片数据 // 调试日志:确认接收到的图片和文档数据
console.log('[API/chat] Received request with:', { console.log('[API/chat] Received request with:', {
conversationId, conversationId,
messageLength: message?.length, messageLength: message?.length,
@ -253,11 +275,24 @@ export async function POST(request: Request) {
imagesCount: images?.length || 0, imagesCount: images?.length || 0,
uploadedImagesCount: uploadedImages?.length || 0, uploadedImagesCount: uploadedImages?.length || 0,
uploadedDocumentsCount: uploadedDocuments?.length || 0, uploadedDocumentsCount: uploadedDocuments?.length || 0,
pdfDocumentsCount: pdfDocuments?.length || 0,
officeDocumentsCount: officeDocuments?.length || 0,
images: images ? images.map(img => ({ images: images ? images.map(img => ({
type: img.type, type: img.type,
media_type: img.media_type, media_type: img.media_type,
dataLength: img.data?.length || 0, dataLength: img.data?.length || 0,
})) : undefined, })) : undefined,
pdfDocuments: pdfDocuments ? pdfDocuments.map(doc => ({
name: doc.name,
size: doc.size,
dataLength: doc.data?.length || 0,
})) : undefined,
officeDocuments: officeDocuments ? officeDocuments.map(doc => ({
name: doc.name,
size: doc.size,
type: doc.type,
dataLength: doc.data?.length || 0,
})) : undefined,
}); });
// 获取当前登录用户 // 获取当前登录用户
@ -360,12 +395,43 @@ export async function POST(request: Request) {
// 获取温度参数 // 获取温度参数
const temperature = parseFloat(conversation.temperature || settings.temperature || '0.7'); const temperature = parseFloat(conversation.temperature || settings.temperature || '0.7');
// 解析 Office 文档Word/Excel
const parsedOfficeDocuments: ParsedDocument[] = [];
const documentParseErrors: { name: string; error: string }[] = []; // 记录解析失败的文档
if (officeDocuments && officeDocuments.length > 0) {
console.log('[API/chat] 📄 开始解析 Office 文档,数量:', officeDocuments.length);
for (const doc of officeDocuments) {
try {
const parsed = await parseDocument(doc.data, doc.name, doc.mimeType);
if (parsed) {
parsedOfficeDocuments.push(parsed);
console.log('[API/chat] ✅ 成功解析文档:', doc.name, '内容长度:', parsed.content.length);
}
} catch (parseError) {
const errorMsg = parseError instanceof Error ? parseError.message : '未知错误';
console.error('[API/chat] ❌ 解析文档失败:', doc.name, parseError);
documentParseErrors.push({ name: doc.name, error: errorMsg });
}
}
}
let fullContent = ''; let fullContent = '';
let thinkingContent = ''; let thinkingContent = '';
let totalInputTokens = 0; let totalInputTokens = 0;
let totalOutputTokens = 0; let totalOutputTokens = 0;
let usedTools: string[] = []; // 收集使用过的工具名称 let usedTools: string[] = []; // 收集使用过的工具名称
// 如果有文档解析失败,将警告添加到内容开头
if (documentParseErrors.length > 0) {
const errorMessages = documentParseErrors.map(e => `- ${e.name}: ${e.error}`).join('\n');
const warningText = `⚠️ 以下文档解析失败,无法分析其内容:\n${errorMessages}\n\n`;
fullContent = warningText;
safeWriter.write({
type: 'text',
content: warningText,
});
}
// 【重要】处理器选择优先级说明: // 【重要】处理器选择优先级说明:
// 1. 首先检查 apiFormat === 'openai':如果用户选择了 "OpenAI 兼容" 格式, // 1. 首先检查 apiFormat === 'openai':如果用户选择了 "OpenAI 兼容" 格式,
// 则所有模型(包括 Codex 模型)都统一使用 /v1/chat/completions 端点 // 则所有模型(包括 Codex 模型)都统一使用 /v1/chat/completions 端点
@ -391,6 +457,8 @@ export async function POST(request: Request) {
encoder, encoder,
images, images,
metasoApiKey: decryptedMetasoApiKey, metasoApiKey: decryptedMetasoApiKey,
pdfDocuments,
parsedOfficeDocuments,
}); });
fullContent = result.fullContent; fullContent = result.fullContent;
@ -416,6 +484,8 @@ export async function POST(request: Request) {
encoder, encoder,
images, // 传递用户上传的图片 images, // 传递用户上传的图片
metasoApiKey: decryptedMetasoApiKey, metasoApiKey: decryptedMetasoApiKey,
pdfDocuments,
parsedOfficeDocuments,
}); });
fullContent = result.fullContent; fullContent = result.fullContent;
@ -439,6 +509,8 @@ export async function POST(request: Request) {
encoder, encoder,
images, // 传递用户上传的图片 images, // 传递用户上传的图片
metasoApiKey: decryptedMetasoApiKey, metasoApiKey: decryptedMetasoApiKey,
pdfDocuments,
parsedOfficeDocuments,
}); });
fullContent = result.fullContent; fullContent = result.fullContent;
@ -534,6 +606,10 @@ interface CodexChatParams {
}[]; }[];
// 秘塔 API Key // 秘塔 API Key
metasoApiKey?: string; metasoApiKey?: string;
// PDF 文档(直接传给 API
pdfDocuments?: PdfDocumentData[];
// 解析后的 Office 文档
parsedOfficeDocuments?: ParsedDocument[];
} }
// Codex Response API 的输入项类型 // Codex Response API 的输入项类型
@ -574,6 +650,8 @@ async function handleCodexChat(params: CodexChatParams): Promise<{
encoder, encoder,
images, images,
metasoApiKey, metasoApiKey,
pdfDocuments,
parsedOfficeDocuments,
} = params; } = params;
// 创建安全的 stream 写入器 // 创建安全的 stream 写入器
@ -591,13 +669,37 @@ async function handleCodexChat(params: CodexChatParams): Promise<{
]; ];
// 添加当前用户消息(支持多模态内容) // 添加当前用户消息(支持多模态内容)
if (images && images.length > 0) { const hasImages = images && images.length > 0;
console.log('[handleCodexChat] Building multimodal message with', images.length, 'images'); const hasPdfDocuments = pdfDocuments && pdfDocuments.length > 0;
const hasOfficeDocuments = parsedOfficeDocuments && parsedOfficeDocuments.length > 0;
// 构建消息文本包含用户消息、PDF 提示和解析后的 Office 文档内容)
let textContent = message;
// Codex/OpenAI 格式不支持原生 PDF添加提示信息
if (hasPdfDocuments) {
const pdfInfo = pdfDocuments!.map(doc => `[PDF 文档: ${doc.name}]`).join('\n');
textContent = `${textContent}\n\n注意用户上传了以下 PDF 文件,但当前 API 格式不支持原生 PDF 解析。建议切换到 Claude 原生格式以获得完整的 PDF 支持。\n${pdfInfo}`;
console.log('[handleCodexChat] PDF documents detected but not supported in Codex format');
}
// 添加解析后的 Office 文档内容
if (hasOfficeDocuments) {
const officeContents = parsedOfficeDocuments!.map(doc => {
const typeLabel = doc.type === 'word' ? 'Word' : doc.type === 'excel' ? 'Excel' : '文档';
return `\n\n--- ${typeLabel} 文件:${doc.name} ---\n${doc.content}\n--- 文件结束 ---`;
}).join('\n');
textContent = textContent + officeContents;
console.log('[handleCodexChat] Added Office documents content, total length:', textContent.length);
}
if (hasImages) {
console.log('[handleCodexChat] Building multimodal message with', images!.length, 'images');
// 如果有图片,构建多模态消息 // 如果有图片,构建多模态消息
const multimodalContent: CodexMultimodalContent[] = []; const multimodalContent: CodexMultimodalContent[] = [];
// 先添加图片 // 先添加图片
for (const img of images) { for (const img of images!) {
console.log('[handleCodexChat] Adding image:', { console.log('[handleCodexChat] Adding image:', {
type: img.type, type: img.type,
media_type: img.media_type, media_type: img.media_type,
@ -612,10 +714,10 @@ async function handleCodexChat(params: CodexChatParams): Promise<{
} }
// 再添加文本 // 再添加文本
if (message) { if (textContent) {
multimodalContent.push({ multimodalContent.push({
type: 'input_text', type: 'input_text',
text: message, text: textContent,
}); });
} }
@ -626,12 +728,12 @@ async function handleCodexChat(params: CodexChatParams): Promise<{
content: multimodalContent, content: multimodalContent,
}); });
} else { } else {
console.log('[handleCodexChat] No images, using simple text message'); console.log('[handleCodexChat] No images, using text message');
// 没有图片,使用简单文本消息 // 没有图片,使用文本消息
inputItems.push({ inputItems.push({
type: 'message' as const, type: 'message' as const,
role: 'user' as const, role: 'user' as const,
content: message, content: textContent,
}); });
} }
@ -912,6 +1014,10 @@ interface ClaudeChatParams {
}[]; }[];
// 秘塔 API Key // 秘塔 API Key
metasoApiKey?: string; metasoApiKey?: string;
// PDF 文档(直接传给 Claude API 使用原生 document 类型)
pdfDocuments?: PdfDocumentData[];
// 解析后的 Office 文档
parsedOfficeDocuments?: ParsedDocument[];
} }
async function handleClaudeChat(params: ClaudeChatParams): Promise<{ async function handleClaudeChat(params: ClaudeChatParams): Promise<{
@ -935,6 +1041,8 @@ async function handleClaudeChat(params: ClaudeChatParams): Promise<{
encoder, encoder,
images, images,
metasoApiKey, metasoApiKey,
pdfDocuments,
parsedOfficeDocuments,
} = params; } = params;
// 创建安全的 stream 写入器 // 创建安全的 stream 写入器
@ -948,35 +1056,72 @@ async function handleClaudeChat(params: ClaudeChatParams): Promise<{
content: msg.content, content: msg.content,
})); }));
// 添加当前用户消息(支持多模态内容) // 添加当前用户消息支持多模态内容图片、PDF、Office文档
if (images && images.length > 0) { const hasImages = images && images.length > 0;
console.log('[handleClaudeChat] Building multimodal message with', images.length, 'images'); const hasPdfDocuments = pdfDocuments && pdfDocuments.length > 0;
// 如果有图片,构建多模态消息 const hasOfficeDocuments = parsedOfficeDocuments && parsedOfficeDocuments.length > 0;
if (hasImages || hasPdfDocuments || hasOfficeDocuments) {
console.log('[handleClaudeChat] Building multimodal message with:', {
images: images?.length || 0,
pdfDocuments: pdfDocuments?.length || 0,
officeDocuments: parsedOfficeDocuments?.length || 0,
});
// 构建多模态消息
const multimodalContent: ContentBlock[] = []; const multimodalContent: ContentBlock[] = [];
// 先添加图片 // 1. 添加 PDF 文档(使用 Claude 原生 document 类型)
for (const img of images) { if (hasPdfDocuments) {
console.log('[handleClaudeChat] Adding image:', { for (const pdf of pdfDocuments!) {
type: img.type, console.log('[handleClaudeChat] Adding PDF document:', pdf.name, 'size:', pdf.size);
media_type: img.media_type, multimodalContent.push({
dataLength: img.data?.length || 0, type: 'document' as unknown as 'text',
}); // @ts-expect-error - Claude API 支持 document 类型但 TypeScript 类型定义不完整
multimodalContent.push({ source: {
type: 'image' as unknown as 'text', type: 'base64',
// @ts-expect-error - Claude API 支持 image 类型但 TypeScript 类型定义不完整 media_type: 'application/pdf',
source: { data: pdf.data,
type: 'base64', },
media_type: img.media_type, });
data: img.data, }
},
});
} }
// 再添加文本 // 2. 添加图片
if (message) { if (hasImages) {
for (const img of images!) {
console.log('[handleClaudeChat] Adding image:', {
type: img.type,
media_type: img.media_type,
dataLength: img.data?.length || 0,
});
multimodalContent.push({
type: 'image' as unknown as 'text',
// @ts-expect-error - Claude API 支持 image 类型但 TypeScript 类型定义不完整
source: {
type: 'base64',
media_type: img.media_type,
data: img.data,
},
});
}
}
// 3. 构建消息文本(包含用户消息和解析后的 Office 文档内容)
let textContent = message;
if (hasOfficeDocuments) {
const officeContents = parsedOfficeDocuments!.map(doc => {
const typeLabel = doc.type === 'word' ? 'Word' : doc.type === 'excel' ? 'Excel' : '文档';
return `\n\n--- ${typeLabel} 文件:${doc.name} ---\n${doc.content}\n--- 文件结束 ---`;
}).join('\n');
textContent = message + officeContents;
console.log('[handleClaudeChat] Added Office documents content, total length:', textContent.length);
}
// 4. 添加文本内容
if (textContent) {
multimodalContent.push({ multimodalContent.push({
type: 'text', type: 'text',
text: message, text: textContent,
}); });
} }
@ -986,8 +1131,8 @@ async function handleClaudeChat(params: ClaudeChatParams): Promise<{
content: multimodalContent, content: multimodalContent,
}); });
} else { } else {
console.log('[handleClaudeChat] No images, using simple text message'); console.log('[handleClaudeChat] No multimodal content, using simple text message');
// 没有图片,使用简单文本消息 // 没有多模态内容,使用简单文本消息
messageHistory.push({ messageHistory.push({
role: 'user', role: 'user',
content: message, content: message,
@ -1323,6 +1468,10 @@ interface OpenAICompatibleChatParams {
}[]; }[];
// 秘塔 API Key // 秘塔 API Key
metasoApiKey?: string; metasoApiKey?: string;
// PDF 文档
pdfDocuments?: PdfDocumentData[];
// 解析后的 Office 文档
parsedOfficeDocuments?: ParsedDocument[];
} }
// OpenAI 消息格式 // OpenAI 消息格式
@ -1368,6 +1517,8 @@ async function handleOpenAICompatibleChat(params: OpenAICompatibleChatParams): P
encoder, encoder,
images, images,
metasoApiKey, metasoApiKey,
pdfDocuments,
parsedOfficeDocuments,
} = params; } = params;
// 创建安全的 stream 写入器 // 创建安全的 stream 写入器
@ -1385,12 +1536,36 @@ async function handleOpenAICompatibleChat(params: OpenAICompatibleChatParams): P
]; ];
// 添加当前用户消息(支持多模态) // 添加当前用户消息(支持多模态)
if (images && images.length > 0) { const hasImages = images && images.length > 0;
console.log('[handleOpenAICompatibleChat] Building multimodal message with', images.length, 'images'); const hasPdfDocuments = pdfDocuments && pdfDocuments.length > 0;
const hasOfficeDocuments = parsedOfficeDocuments && parsedOfficeDocuments.length > 0;
// 构建消息文本包含用户消息、PDF 提示和解析后的 Office 文档内容)
let textContent = message;
// OpenAI 格式不支持原生 PDF添加提示信息
if (hasPdfDocuments) {
const pdfInfo = pdfDocuments!.map(doc => `[PDF 文档: ${doc.name}]`).join('\n');
textContent = `${textContent}\n\n注意用户上传了以下 PDF 文件,但当前 API 格式不支持原生 PDF 解析。建议切换到 Claude 原生格式以获得完整的 PDF 支持。\n${pdfInfo}`;
console.log('[handleOpenAICompatibleChat] PDF documents detected but not supported in OpenAI format');
}
// 添加解析后的 Office 文档内容
if (hasOfficeDocuments) {
const officeContents = parsedOfficeDocuments!.map(doc => {
const typeLabel = doc.type === 'word' ? 'Word' : doc.type === 'excel' ? 'Excel' : '文档';
return `\n\n--- ${typeLabel} 文件:${doc.name} ---\n${doc.content}\n--- 文件结束 ---`;
}).join('\n');
textContent = textContent + officeContents;
console.log('[handleOpenAICompatibleChat] Added Office documents content, total length:', textContent.length);
}
if (hasImages) {
console.log('[handleOpenAICompatibleChat] Building multimodal message with', images!.length, 'images');
const multimodalContent: OpenAIMessageContent[] = []; const multimodalContent: OpenAIMessageContent[] = [];
// 添加图片 // 添加图片
for (const img of images) { for (const img of images!) {
console.log('[handleOpenAICompatibleChat] Adding image:', { console.log('[handleOpenAICompatibleChat] Adding image:', {
type: img.type, type: img.type,
media_type: img.media_type, media_type: img.media_type,
@ -1405,10 +1580,10 @@ async function handleOpenAICompatibleChat(params: OpenAICompatibleChatParams): P
} }
// 添加文本 // 添加文本
if (message) { if (textContent) {
multimodalContent.push({ multimodalContent.push({
type: 'text', type: 'text',
text: message, text: textContent,
}); });
} }
@ -1417,10 +1592,10 @@ async function handleOpenAICompatibleChat(params: OpenAICompatibleChatParams): P
content: multimodalContent, content: multimodalContent,
}); });
} else { } else {
console.log('[handleOpenAICompatibleChat] No images, using simple text message'); console.log('[handleOpenAICompatibleChat] No images, using text message');
openaiMessages.push({ openaiMessages.push({
role: 'user', role: 'user',
content: message, content: textContent,
}); });
} }