feat(前端): 聊天钩子支持文档上传和解析

- 集成文档类型检测工具
- 支持PDF/Word/Excel文件上传
- PDF文档转Base64传给后端API
- Office文档(Word/Excel)传给后端解析
- 文本文件直接读取内容
- 添加文档大小验证
This commit is contained in:
gaoziman 2025-12-22 23:22:39 +08:00
parent 2344024e84
commit c776fb95b7

View File

@ -2,6 +2,16 @@
import { useState, useCallback, useRef } from 'react'; import { useState, useCallback, useRef } from 'react';
import { executePythonInPyodide, type LoadingCallback } from '@/services/tools/pyodideRunner'; import { executePythonInPyodide, type LoadingCallback } from '@/services/tools/pyodideRunner';
import {
detectDocumentType,
isPdfFile,
isOfficeDocument,
validateDocumentSize,
fileToBase64 as documentFileToBase64,
type PdfDocumentData,
type OfficeDocumentData,
getFileMimeType,
} from '@/utils/document-utils';
export interface StreamMessage { export interface StreamMessage {
type: 'thinking' | 'text' | 'tool_use_start' | 'tool_execution_result' | 'tool_search_images' | 'tool_search_videos' | 'pyodide_execution_required' | 'tool_used' | 'done' | 'error'; type: 'thinking' | 'text' | 'tool_use_start' | 'tool_execution_result' | 'tool_search_images' | 'tool_search_videos' | 'pyodide_execution_required' | 'tool_used' | 'done' | 'error';
@ -263,16 +273,23 @@ export function useStreamChat() {
const uploadedDocuments: UploadedDocument[] = []; const uploadedDocuments: UploadedDocument[] = [];
const imageContents: { type: 'image'; media_type: string; data: string }[] = []; const imageContents: { type: 'image'; media_type: string; data: string }[] = [];
const documentContents: { name: string; content: string }[] = []; const documentContents: { name: string; content: string }[] = [];
// PDF 文档(直接传给 Claude API
const pdfDocuments: PdfDocumentData[] = [];
// Office 文档Word/Excel需要后端解析
const officeDocuments: OfficeDocumentData[] = [];
if (files && files.length > 0) { if (files && files.length > 0) {
console.log('[useStreamChat] Processing files:', files.length); console.log('[useStreamChat] Processing files:', files.length);
for (const fileInfo of files) { for (const fileInfo of files) {
const docType = detectDocumentType(fileInfo.file);
console.log('[useStreamChat] File info:', { console.log('[useStreamChat] File info:', {
name: fileInfo.file.name, name: fileInfo.file.name,
type: fileInfo.file.type, type: fileInfo.file.type,
size: fileInfo.file.size, size: fileInfo.file.size,
isImage: fileInfo.file.type.startsWith('image/'), isImage: fileInfo.file.type.startsWith('image/'),
docType: docType,
}); });
// 处理图片文件 // 处理图片文件
if (fileInfo.file.type.startsWith('image/')) { if (fileInfo.file.type.startsWith('image/')) {
try { try {
@ -291,6 +308,67 @@ export function useStreamChat() {
console.error('Failed to convert image to base64:', err); console.error('Failed to convert image to base64:', err);
} }
} }
// 处理 PDF 文件(使用 Claude 原生支持)
else if (isPdfFile(fileInfo.file)) {
// 验证文件大小
const validation = validateDocumentSize(fileInfo.file);
if (!validation.valid) {
console.error('[useStreamChat] PDF validation failed:', validation.error);
// 可以选择抛出错误或显示提示
continue;
}
try {
const base64 = await documentFileToBase64(fileInfo.file);
console.log('[useStreamChat] PDF converted to base64, length:', base64.length);
pdfDocuments.push({
name: fileInfo.file.name,
size: fileInfo.file.size,
data: base64,
media_type: 'application/pdf',
});
// 保存到 uploadedDocuments 用于前端显示
uploadedDocuments.push({
name: fileInfo.file.name,
size: fileInfo.file.size,
type: 'pdf',
content: `[PDF 文档: ${fileInfo.file.name}]`, // PDF 内容由 Claude 直接处理
});
} catch (err) {
console.error('Failed to convert PDF to base64:', err);
}
}
// 处理 Office 文档Word/Excel需要后端解析
else if (isOfficeDocument(fileInfo.file)) {
// 验证文件大小
const validation = validateDocumentSize(fileInfo.file);
if (!validation.valid) {
console.error('[useStreamChat] Office document validation failed:', validation.error);
continue;
}
try {
const base64 = await documentFileToBase64(fileInfo.file);
const mimeType = getFileMimeType(fileInfo.file);
console.log('[useStreamChat] Office document converted to base64, length:', base64.length, 'type:', docType);
officeDocuments.push({
name: fileInfo.file.name,
size: fileInfo.file.size,
data: base64,
type: docType as 'word' | 'excel',
mimeType: mimeType,
});
// 保存到 uploadedDocuments 用于前端显示
uploadedDocuments.push({
name: fileInfo.file.name,
size: fileInfo.file.size,
type: docType,
content: `[${docType === 'word' ? 'Word' : 'Excel'} 文档: ${fileInfo.file.name}]`, // 内容由后端解析后补充
});
} catch (err) {
console.error('Failed to convert Office document to base64:', err);
}
}
// 处理文本类文件 // 处理文本类文件
else if (isTextFile(fileInfo.file)) { else if (isTextFile(fileInfo.file)) {
try { try {
@ -348,7 +426,7 @@ export function useStreamChat() {
abortControllerRef.current = new AbortController(); abortControllerRef.current = new AbortController();
try { try {
// 调试日志:确认图片数据 // 调试日志:确认图片和文档数据
console.log('[useStreamChat] Sending request with:', { console.log('[useStreamChat] Sending request with:', {
conversationId, conversationId,
messageLength: finalMessage.length, messageLength: finalMessage.length,
@ -356,11 +434,24 @@ export function useStreamChat() {
tools, tools,
enableThinking, enableThinking,
imagesCount: imageContents.length, imagesCount: imageContents.length,
pdfDocumentsCount: pdfDocuments.length,
officeDocumentsCount: officeDocuments.length,
images: imageContents.length > 0 ? imageContents.map(img => ({ images: imageContents.length > 0 ? imageContents.map(img => ({
type: img.type, type: img.type,
media_type: img.media_type, media_type: img.media_type,
dataLength: img.data.length, dataLength: img.data.length,
})) : undefined, })) : undefined,
pdfDocuments: pdfDocuments.length > 0 ? pdfDocuments.map(doc => ({
name: doc.name,
size: doc.size,
dataLength: doc.data.length,
})) : undefined,
officeDocuments: officeDocuments.length > 0 ? officeDocuments.map(doc => ({
name: doc.name,
size: doc.size,
type: doc.type,
dataLength: doc.data.length,
})) : undefined,
}); });
const response = await fetch('/api/chat', { const response = await fetch('/api/chat', {
@ -381,6 +472,10 @@ export function useStreamChat() {
uploadedImages: uploadedImages.length > 0 ? uploadedImages : undefined, uploadedImages: uploadedImages.length > 0 ? uploadedImages : undefined,
// 传递上传的文档用于保存到数据库 // 传递上传的文档用于保存到数据库
uploadedDocuments: uploadedDocuments.length > 0 ? uploadedDocuments : undefined, uploadedDocuments: uploadedDocuments.length > 0 ? uploadedDocuments : undefined,
// 传递 PDF 文档给后端(用于 Claude 原生 document 类型)
pdfDocuments: pdfDocuments.length > 0 ? pdfDocuments : undefined,
// 传递 Office 文档给后端(需要后端解析)
officeDocuments: officeDocuments.length > 0 ? officeDocuments : undefined,
}), }),
signal: abortControllerRef.current.signal, signal: abortControllerRef.current.signal,
}); });