From c776fb95b7b324c93d3b4d24ab96bd7a601c70c4 Mon Sep 17 00:00:00 2001 From: gaoziman <2942894660@qq.com> Date: Mon, 22 Dec 2025 23:22:39 +0800 Subject: [PATCH] =?UTF-8?q?feat(=E5=89=8D=E7=AB=AF):=20=E8=81=8A=E5=A4=A9?= =?UTF-8?q?=E9=92=A9=E5=AD=90=E6=94=AF=E6=8C=81=E6=96=87=E6=A1=A3=E4=B8=8A?= =?UTF-8?q?=E4=BC=A0=E5=92=8C=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 集成文档类型检测工具 - 支持PDF/Word/Excel文件上传 - PDF文档转Base64传给后端API - Office文档(Word/Excel)传给后端解析 - 文本文件直接读取内容 - 添加文档大小验证 --- src/hooks/useStreamChat.ts | 97 +++++++++++++++++++++++++++++++++++++- 1 file changed, 96 insertions(+), 1 deletion(-) diff --git a/src/hooks/useStreamChat.ts b/src/hooks/useStreamChat.ts index 93cb9df..e0bf493 100644 --- a/src/hooks/useStreamChat.ts +++ b/src/hooks/useStreamChat.ts @@ -2,6 +2,16 @@ import { useState, useCallback, useRef } from 'react'; import { executePythonInPyodide, type LoadingCallback } from '@/services/tools/pyodideRunner'; +import { + detectDocumentType, + isPdfFile, + isOfficeDocument, + validateDocumentSize, + fileToBase64 as documentFileToBase64, + type PdfDocumentData, + type OfficeDocumentData, + getFileMimeType, +} from '@/utils/document-utils'; export interface StreamMessage { type: 'thinking' | 'text' | 'tool_use_start' | 'tool_execution_result' | 'tool_search_images' | 'tool_search_videos' | 'pyodide_execution_required' | 'tool_used' | 'done' | 'error'; @@ -263,16 +273,23 @@ export function useStreamChat() { const uploadedDocuments: UploadedDocument[] = []; const imageContents: { type: 'image'; media_type: string; data: string }[] = []; const documentContents: { name: string; content: string }[] = []; + // PDF 文档(直接传给 Claude API) + const pdfDocuments: PdfDocumentData[] = []; + // Office 文档(Word/Excel,需要后端解析) + const officeDocuments: OfficeDocumentData[] = []; if (files && files.length > 0) { console.log('[useStreamChat] Processing files:', files.length); for (const fileInfo of files) { + const docType = detectDocumentType(fileInfo.file); console.log('[useStreamChat] File info:', { name: fileInfo.file.name, type: fileInfo.file.type, size: fileInfo.file.size, isImage: fileInfo.file.type.startsWith('image/'), + docType: docType, }); + // 处理图片文件 if (fileInfo.file.type.startsWith('image/')) { try { @@ -291,6 +308,67 @@ export function useStreamChat() { console.error('Failed to convert image to base64:', err); } } + // 处理 PDF 文件(使用 Claude 原生支持) + else if (isPdfFile(fileInfo.file)) { + // 验证文件大小 + const validation = validateDocumentSize(fileInfo.file); + if (!validation.valid) { + console.error('[useStreamChat] PDF validation failed:', validation.error); + // 可以选择抛出错误或显示提示 + continue; + } + + try { + const base64 = await documentFileToBase64(fileInfo.file); + console.log('[useStreamChat] PDF converted to base64, length:', base64.length); + pdfDocuments.push({ + name: fileInfo.file.name, + size: fileInfo.file.size, + data: base64, + media_type: 'application/pdf', + }); + // 保存到 uploadedDocuments 用于前端显示 + uploadedDocuments.push({ + name: fileInfo.file.name, + size: fileInfo.file.size, + type: 'pdf', + content: `[PDF 文档: ${fileInfo.file.name}]`, // PDF 内容由 Claude 直接处理 + }); + } catch (err) { + console.error('Failed to convert PDF to base64:', err); + } + } + // 处理 Office 文档(Word/Excel,需要后端解析) + else if (isOfficeDocument(fileInfo.file)) { + // 验证文件大小 + const validation = validateDocumentSize(fileInfo.file); + if (!validation.valid) { + console.error('[useStreamChat] Office document validation failed:', validation.error); + continue; + } + + try { + const base64 = await documentFileToBase64(fileInfo.file); + const mimeType = getFileMimeType(fileInfo.file); + console.log('[useStreamChat] Office document converted to base64, length:', base64.length, 'type:', docType); + officeDocuments.push({ + name: fileInfo.file.name, + size: fileInfo.file.size, + data: base64, + type: docType as 'word' | 'excel', + mimeType: mimeType, + }); + // 保存到 uploadedDocuments 用于前端显示 + uploadedDocuments.push({ + name: fileInfo.file.name, + size: fileInfo.file.size, + type: docType, + content: `[${docType === 'word' ? 'Word' : 'Excel'} 文档: ${fileInfo.file.name}]`, // 内容由后端解析后补充 + }); + } catch (err) { + console.error('Failed to convert Office document to base64:', err); + } + } // 处理文本类文件 else if (isTextFile(fileInfo.file)) { try { @@ -348,7 +426,7 @@ export function useStreamChat() { abortControllerRef.current = new AbortController(); try { - // 调试日志:确认图片数据 + // 调试日志:确认图片和文档数据 console.log('[useStreamChat] Sending request with:', { conversationId, messageLength: finalMessage.length, @@ -356,11 +434,24 @@ export function useStreamChat() { tools, enableThinking, imagesCount: imageContents.length, + pdfDocumentsCount: pdfDocuments.length, + officeDocumentsCount: officeDocuments.length, images: imageContents.length > 0 ? imageContents.map(img => ({ type: img.type, media_type: img.media_type, dataLength: img.data.length, })) : undefined, + pdfDocuments: pdfDocuments.length > 0 ? pdfDocuments.map(doc => ({ + name: doc.name, + size: doc.size, + dataLength: doc.data.length, + })) : undefined, + officeDocuments: officeDocuments.length > 0 ? officeDocuments.map(doc => ({ + name: doc.name, + size: doc.size, + type: doc.type, + dataLength: doc.data.length, + })) : undefined, }); const response = await fetch('/api/chat', { @@ -381,6 +472,10 @@ export function useStreamChat() { uploadedImages: uploadedImages.length > 0 ? uploadedImages : undefined, // 传递上传的文档用于保存到数据库 uploadedDocuments: uploadedDocuments.length > 0 ? uploadedDocuments : undefined, + // 传递 PDF 文档给后端(用于 Claude 原生 document 类型) + pdfDocuments: pdfDocuments.length > 0 ? pdfDocuments : undefined, + // 传递 Office 文档给后端(需要后端解析) + officeDocuments: officeDocuments.length > 0 ? officeDocuments : undefined, }), signal: abortControllerRef.current.signal, });