Compare commits

...

4 Commits

Author SHA1 Message Date
gaoziman
98dacf273c feat(API): 聊天接口支持文档解析和多模态输入
- 支持PDF文档使用Claude原生document类型
- 集成Office文档解析(Word/Excel)
- 支持图片+文档+文本混合输入
- 改进错误处理: 文档解析失败时通知用户
- 解析后的文档内容添加到AI上下文
2025-12-22 23:22:54 +08:00
gaoziman
c776fb95b7 feat(前端): 聊天钩子支持文档上传和解析
- 集成文档类型检测工具
- 支持PDF/Word/Excel文件上传
- PDF文档转Base64传给后端API
- Office文档(Word/Excel)传给后端解析
- 文本文件直接读取内容
- 添加文档大小验证
2025-12-22 23:22:39 +08:00
gaoziman
2344024e84 feat(依赖): 添加word-extractor库支持.doc文件解析
- 安装 word-extractor@1.0.4 依赖
- 新增 src/types/word-extractor.d.ts 类型声明
  - WordExtractor 类
  - Document 类及其方法类型
  - 支持 Buffer 和文件路径两种输入方式
2025-12-22 23:22:26 +08:00
gaoziman
fa260137ac feat(文档解析): 添加PDF/Word/Excel文档解析工具
- 新增 src/lib/document-parser.ts: 后端文档解析器
  - 支持 Word 文档(.doc/.docx)解析
  - 支持 Excel 文档(.xlsx)转Markdown表格
  - PDF文档直接传递给Claude API原生处理

- 新增 src/utils/document-utils.ts: 前端文档工具
  - 文档类型检测函数
  - 文件大小验证
  - Base64编码转换
2025-12-22 23:22:13 +08:00
7 changed files with 1192 additions and 43 deletions

View File

@ -19,6 +19,7 @@
"drizzle-orm": "^0.45.1",
"jose": "^6.1.3",
"lucide-react": "^0.561.0",
"mammoth": "^1.11.0",
"nanoid": "^5.1.6",
"next": "16.0.10",
"nodemailer": "^7.0.11",
@ -29,6 +30,8 @@
"react-markdown": "^10.1.0",
"remark-gfm": "^4.0.1",
"sonner": "^2.0.7",
"word-extractor": "^1.0.4",
"xlsx": "^0.18.5",
"zod": "^4.2.1"
},
"devDependencies": {

View File

@ -26,6 +26,9 @@ importers:
lucide-react:
specifier: ^0.561.0
version: 0.561.0(react@19.2.1)
mammoth:
specifier: ^1.11.0
version: 1.11.0
nanoid:
specifier: ^5.1.6
version: 5.1.6
@ -56,6 +59,12 @@ importers:
sonner:
specifier: ^2.0.7
version: 2.0.7(react-dom@19.2.1(react@19.2.1))(react@19.2.1)
word-extractor:
specifier: ^1.0.4
version: 1.0.4
xlsx:
specifier: ^0.18.5
version: 0.18.5
zod:
specifier: ^4.2.1
version: 4.2.1
@ -1526,6 +1535,10 @@ packages:
cpu: [x64]
os: [win32]
'@xmldom/xmldom@0.8.11':
resolution: {integrity: sha512-cQzWCtO6C8TQiYl1ruKNn2U6Ao4o4WBBcbL61yJl84x+j5sOWWFU9X7DpND8XZG3daDppSsigMdfAIl2upQBRw==}
engines: {node: '>=10.0.0'}
acorn-jsx@5.3.2:
resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==}
peerDependencies:
@ -1536,6 +1549,10 @@ packages:
engines: {node: '>=0.4.0'}
hasBin: true
adler-32@1.3.1:
resolution: {integrity: sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A==}
engines: {node: '>=0.8'}
ajv@6.12.6:
resolution: {integrity: sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==}
@ -1543,6 +1560,9 @@ packages:
resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==}
engines: {node: '>=8'}
argparse@1.0.10:
resolution: {integrity: sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==}
argparse@2.0.1:
resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==}
@ -1607,6 +1627,9 @@ packages:
balanced-match@1.0.2:
resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==}
base64-js@1.5.1:
resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==}
baseline-browser-mapping@2.9.9:
resolution: {integrity: sha512-V8fbOCSeOFvlDj7LLChUcqbZrdKD9RU/VR260piF1790vT0mfLSwGc/Qzxv3IqiTukOpNtItePa0HBpMAj7MDg==}
hasBin: true
@ -1615,6 +1638,9 @@ packages:
resolution: {integrity: sha512-GlF5wPWnSa/X5LKM1o0wz0suXIINz1iHRLvTS+sLyi7XPbe5ycmYI3DlZqVGZZtDgl4DmasFg7gOB3JYbphV5g==}
hasBin: true
bluebird@3.4.7:
resolution: {integrity: sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==}
bowser@2.13.1:
resolution: {integrity: sha512-OHawaAbjwx6rqICCKgSG0SAnT05bzd7ppyKLVUITZpANBaaMFBAsaNkto3LoQ31tyFP5kNujE8Cdx85G9VzOkw==}
@ -1633,6 +1659,9 @@ packages:
engines: {node: ^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7}
hasBin: true
buffer-crc32@0.2.13:
resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==}
buffer-from@1.1.2:
resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==}
@ -1658,6 +1687,10 @@ packages:
ccount@2.0.1:
resolution: {integrity: sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==}
cfb@1.2.2:
resolution: {integrity: sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA==}
engines: {node: '>=0.8'}
chalk@4.1.2:
resolution: {integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==}
engines: {node: '>=10'}
@ -1681,6 +1714,10 @@ packages:
resolution: {integrity: sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==}
engines: {node: '>=6'}
codepage@1.15.0:
resolution: {integrity: sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA==}
engines: {node: '>=0.8'}
color-convert@2.0.1:
resolution: {integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==}
engines: {node: '>=7.0.0'}
@ -1697,6 +1734,14 @@ packages:
convert-source-map@2.0.0:
resolution: {integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==}
core-util-is@1.0.3:
resolution: {integrity: sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==}
crc-32@1.2.2:
resolution: {integrity: sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==}
engines: {node: '>=0.8'}
hasBin: true
cross-spawn@7.0.6:
resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==}
engines: {node: '>= 8'}
@ -1761,6 +1806,9 @@ packages:
devlop@1.1.0:
resolution: {integrity: sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==}
dingbat-to-unicode@1.0.1:
resolution: {integrity: sha512-98l0sW87ZT58pU4i61wa2OHwxbiYSbuxsCBozaVnYX2iCnr3bLM3fIes1/ej7h1YdOKuKt/MLs706TVnALA65w==}
doctrine@2.1.0:
resolution: {integrity: sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==}
engines: {node: '>=0.10.0'}
@ -1861,6 +1909,9 @@ packages:
sqlite3:
optional: true
duck@0.1.12:
resolution: {integrity: sha512-wkctla1O6VfP89gQ+J/yDesM0S7B7XLXjKGzXxMDVFg7uEn706niAtyYovKbyq1oT9YwDcly721/iUWoc8MVRg==}
dunder-proto@1.0.1:
resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==}
engines: {node: '>= 0.4'}
@ -2081,6 +2132,9 @@ packages:
fastq@1.19.1:
resolution: {integrity: sha512-GwLTyxkCXjXbxqIhTsMI2Nui8huMPtnxg7krajPJAjnEG/iiOS7i+zCtWGZR9G0NBKbXKh6X9m9UIsYX/N6vvQ==}
fd-slicer@1.1.0:
resolution: {integrity: sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==}
fdir@6.5.0:
resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==}
engines: {node: '>=12.0.0'}
@ -2113,6 +2167,10 @@ packages:
resolution: {integrity: sha512-dKx12eRCVIzqCxFGplyFKJMPvLEWgmNtUrpTiJIR5u97zEhRG8ySrtboPHZXx7daLxQVrl643cTzbab2tkQjxg==}
engines: {node: '>= 0.4'}
frac@1.1.2:
resolution: {integrity: sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA==}
engines: {node: '>=0.8'}
fsevents@2.3.3:
resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==}
engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
@ -2228,6 +2286,9 @@ packages:
resolution: {integrity: sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==}
engines: {node: '>= 4'}
immediate@3.0.6:
resolution: {integrity: sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==}
import-fresh@3.3.1:
resolution: {integrity: sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==}
engines: {node: '>=6'}
@ -2236,6 +2297,9 @@ packages:
resolution: {integrity: sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==}
engines: {node: '>=0.8.19'}
inherits@2.0.4:
resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==}
inline-style-parser@0.2.7:
resolution: {integrity: sha512-Nb2ctOyNR8DqQoR0OwRG95uNWIC0C1lCgf5Naz5H6Ji72KZ8OcFZLz2P5sNgwlyoJ8Yif11oMuYs5pBQa86csA==}
@ -2362,6 +2426,9 @@ packages:
resolution: {integrity: sha512-mfcwb6IzQyOKTs84CQMrOwW4gQcaTOAWJ0zzJCl2WSPDrWk/OzDaImWFH3djXhb24g4eudZfLRozAvPGw4d9hQ==}
engines: {node: '>= 0.4'}
isarray@1.0.0:
resolution: {integrity: sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==}
isarray@2.0.5:
resolution: {integrity: sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw==}
@ -2413,6 +2480,9 @@ packages:
resolution: {integrity: sha512-ZZow9HBI5O6EPgSJLUb8n2NKgmVWTwCvHGwFuJlMjvLFqlGG6pjirPhtdsseaLZjSibD8eegzmYpUZwoIlj2cQ==}
engines: {node: '>=4.0'}
jszip@3.10.1:
resolution: {integrity: sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==}
keyv@4.5.4:
resolution: {integrity: sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==}
@ -2427,6 +2497,9 @@ packages:
resolution: {integrity: sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==}
engines: {node: '>= 0.8.0'}
lie@3.3.0:
resolution: {integrity: sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==}
lightningcss-android-arm64@1.30.2:
resolution: {integrity: sha512-BH9sEdOCahSgmkVhBLeU7Hc9DWeZ1Eb6wNS6Da8igvUwAe0sqROHddIlvU06q3WyXVEOYDZ6ykBZQnjTbmo4+A==}
engines: {node: '>= 12.0.0'}
@ -2511,6 +2584,9 @@ packages:
resolution: {integrity: sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==}
hasBin: true
lop@0.4.2:
resolution: {integrity: sha512-RefILVDQ4DKoRZsJ4Pj22TxE3omDO47yFpkIBoDKzkqPRISs5U1cnAdg/5583YPkWPaLIYHOKRMQSvjFsO26cw==}
lru-cache@5.1.1:
resolution: {integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==}
@ -2522,6 +2598,11 @@ packages:
magic-string@0.30.21:
resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==}
mammoth@1.11.0:
resolution: {integrity: sha512-BcEqqY/BOwIcI1iR5tqyVlqc3KIaMRa4egSoK83YAVrBf6+yqdAAbtUcFDCWX8Zef8/fgNZ6rl4VUv+vVX8ddQ==}
engines: {node: '>=12.0.0'}
hasBin: true
markdown-table@3.0.4:
resolution: {integrity: sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw==}
@ -2757,6 +2838,9 @@ packages:
resolution: {integrity: sha512-gXah6aZrcUxjWg2zR2MwouP2eHlCBzdV4pygudehaKXSGW4v2AsRQUK+lwwXhii6KFZcunEnmSUoYp5CXibxtA==}
engines: {node: '>= 0.4'}
option@0.2.4:
resolution: {integrity: sha512-pkEqbDyl8ou5cpq+VsnQbe/WlEy5qS7xPzMS1U55OCG9KPvwFD46zDbxQIj3egJSFc3D+XhYOPUzz49zQAVy7A==}
optionator@0.9.4:
resolution: {integrity: sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==}
engines: {node: '>= 0.8.0'}
@ -2773,6 +2857,9 @@ packages:
resolution: {integrity: sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==}
engines: {node: '>=10'}
pako@1.0.11:
resolution: {integrity: sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==}
parent-module@1.0.1:
resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==}
engines: {node: '>=6'}
@ -2784,6 +2871,10 @@ packages:
resolution: {integrity: sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==}
engines: {node: '>=8'}
path-is-absolute@1.0.1:
resolution: {integrity: sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==}
engines: {node: '>=0.10.0'}
path-key@3.1.1:
resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==}
engines: {node: '>=8'}
@ -2791,6 +2882,9 @@ packages:
path-parse@1.0.7:
resolution: {integrity: sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==}
pend@1.2.0:
resolution: {integrity: sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==}
pg-cloudflare@1.2.7:
resolution: {integrity: sha512-YgCtzMH0ptvZJslLM1ffsY4EuGaU0cx4XSdXLRFae8bPP4dS5xL1tNB3k2o/N64cHJpwU7dxKli/nZ2lUa5fLg==}
@ -2872,6 +2966,9 @@ packages:
resolution: {integrity: sha512-DEvV2ZF2r2/63V+tK8hQvrR2ZGn10srHbXviTlcv7Kpzw8jWiNTqbVgjO3IY8RxrrOUF8VPMQQFysYYYv0YZxw==}
engines: {node: '>=6'}
process-nextick-args@2.0.1:
resolution: {integrity: sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==}
prop-types@15.8.1:
resolution: {integrity: sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==}
@ -2903,6 +3000,9 @@ packages:
resolution: {integrity: sha512-DGrYcCWK7tvYMnWh79yrPHt+vdx9tY+1gPZa7nJQtO/p8bLTDaHp4dzwEhQB7pZ4Xe3ok4XKuEPrVuc+wlpkmw==}
engines: {node: '>=0.10.0'}
readable-stream@2.3.8:
resolution: {integrity: sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==}
reflect.getprototypeof@1.0.10:
resolution: {integrity: sha512-00o4I+DVrefhv+nX0ulyi3biSHCPDe+yLv5o/p6d/UVlirijB8E16FtfwSAi4g3tcqrQ4lRAqQSoFEZJehYEcw==}
engines: {node: '>= 0.4'}
@ -2950,6 +3050,9 @@ packages:
resolution: {integrity: sha512-AURm5f0jYEOydBj7VQlVvDrjeFgthDdEF5H1dP+6mNpoXOMo1quQqJ4wvJDyRZ9+pO3kGWoOdmV08cSv2aJV6Q==}
engines: {node: '>=0.4'}
safe-buffer@5.1.2:
resolution: {integrity: sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==}
safe-push-apply@1.0.0:
resolution: {integrity: sha512-iKE9w/Z7xCzUMIZqdBsp6pEQvwuEebH4vdpjcDWnyzaI6yl6O9FHvVpmGelvEHNsoY6wGblkxR6Zty/h00WiSA==}
engines: {node: '>= 0.4'}
@ -2958,6 +3061,10 @@ packages:
resolution: {integrity: sha512-x/+Cz4YrimQxQccJf5mKEbIa1NzeCRNI5Ecl/ekmlYaampdNLPalVyIcCZNNH3MvmqBugV5TMYZXv0ljslUlaw==}
engines: {node: '>= 0.4'}
saxes@5.0.1:
resolution: {integrity: sha512-5LBh1Tls8c9xgGjw3QrMwETmTMVk0oFgvrFSvWx62llR2hcEInrKNZ2GZCCuuy2lvWrdl5jhbpeqc5hRYKFOcw==}
engines: {node: '>=10'}
scheduler@0.27.0:
resolution: {integrity: sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==}
@ -2982,6 +3089,9 @@ packages:
resolution: {integrity: sha512-RJRdvCo6IAnPdsvP/7m6bsQqNnn1FCBX5ZNtFL98MmFF/4xAIJTIg1YbHW5DC2W5SKZanrC6i4HsJqlajw/dZw==}
engines: {node: '>= 0.4'}
setimmediate@1.0.5:
resolution: {integrity: sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==}
sharp@0.34.5:
resolution: {integrity: sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==}
engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0}
@ -3034,6 +3144,13 @@ packages:
resolution: {integrity: sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==}
engines: {node: '>= 10.x'}
sprintf-js@1.0.3:
resolution: {integrity: sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==}
ssf@0.11.2:
resolution: {integrity: sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g==}
engines: {node: '>=0.8'}
stable-hash@0.0.5:
resolution: {integrity: sha512-+L3ccpzibovGXFK+Ap/f8LOS0ahMrHTf3xu7mMLSpEGU0EO9ucaysSylKo9eRDFNhWve/y275iPmIZ4z39a9iA==}
@ -3064,6 +3181,9 @@ packages:
resolution: {integrity: sha512-UXSH262CSZY1tfu3G3Secr6uGLCFVPMhIqHjlgCUtCCcgihYc/xKs9djMTMUOb2j1mVSeU8EU6NWc/iQKU6Gfg==}
engines: {node: '>= 0.4'}
string_decoder@1.1.1:
resolution: {integrity: sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==}
stringify-entities@4.0.4:
resolution: {integrity: sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==}
@ -3179,6 +3299,9 @@ packages:
resolution: {integrity: sha512-nWJ91DjeOkej/TA8pXQ3myruKpKEYgqvpw9lz4OPHj/NWFNluYrjbz9j01CJ8yKQd2g4jFoOkINCTW2I5LEEyw==}
engines: {node: '>= 0.4'}
underscore@1.13.7:
resolution: {integrity: sha512-GMXzWtsc57XAtguZgaQViUOzs0KTkk8ojr3/xAxXLITqf/3EMwxC0inyETfDFjH/Krbhuep0HNbbjI9i/q3F3g==}
undici-types@6.21.0:
resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==}
@ -3212,6 +3335,9 @@ packages:
uri-js@4.4.1:
resolution: {integrity: sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==}
util-deprecate@1.0.2:
resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==}
vfile-message@4.0.3:
resolution: {integrity: sha512-QTHzsGd1EhbZs4AsQ20JX1rC3cOlt/IWJruk893DfLRr57lcnOeMaWG4K0JrRta4mIJZKth2Au3mM3u03/JWKw==}
@ -3239,10 +3365,33 @@ packages:
engines: {node: '>= 8'}
hasBin: true
wmf@1.0.2:
resolution: {integrity: sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw==}
engines: {node: '>=0.8'}
word-extractor@1.0.4:
resolution: {integrity: sha512-PyAGZQ2gjnVA5kcZAOAxoYciCMaAvu0dbVlw/zxHphhy+3be8cDeYKHJPO8iedIM3Sx0arA/ugKTJyXhZNgo6g==}
word-wrap@1.2.5:
resolution: {integrity: sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==}
engines: {node: '>=0.10.0'}
word@0.3.0:
resolution: {integrity: sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA==}
engines: {node: '>=0.8'}
xlsx@0.18.5:
resolution: {integrity: sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ==}
engines: {node: '>=0.8'}
hasBin: true
xmlbuilder@10.1.1:
resolution: {integrity: sha512-OyzrcFLL/nb6fMGHbiRDuPup9ljBycsdCypwuyg5AAHvyWzGfChJpCXMG88AGTIMFhGZ9RccFN1e6lhg3hkwKg==}
engines: {node: '>=4.0'}
xmlchars@2.2.0:
resolution: {integrity: sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==}
xtend@4.0.2:
resolution: {integrity: sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==}
engines: {node: '>=0.4'}
@ -3250,6 +3399,9 @@ packages:
yallist@3.1.1:
resolution: {integrity: sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==}
yauzl@2.10.0:
resolution: {integrity: sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==}
yocto-queue@0.1.0:
resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==}
engines: {node: '>=10'}
@ -4807,12 +4959,16 @@ snapshots:
'@unrs/resolver-binding-win32-x64-msvc@1.11.1':
optional: true
'@xmldom/xmldom@0.8.11': {}
acorn-jsx@5.3.2(acorn@8.15.0):
dependencies:
acorn: 8.15.0
acorn@8.15.0: {}
adler-32@1.3.1: {}
ajv@6.12.6:
dependencies:
fast-deep-equal: 3.1.3
@ -4824,6 +4980,10 @@ snapshots:
dependencies:
color-convert: 2.0.1
argparse@1.0.10:
dependencies:
sprintf-js: 1.0.3
argparse@2.0.1: {}
aria-query@5.3.2: {}
@ -4911,10 +5071,14 @@ snapshots:
balanced-match@1.0.2: {}
base64-js@1.5.1: {}
baseline-browser-mapping@2.9.9: {}
bcryptjs@3.0.3: {}
bluebird@3.4.7: {}
bowser@2.13.1: {}
brace-expansion@1.1.12:
@ -4938,6 +5102,8 @@ snapshots:
node-releases: 2.0.27
update-browserslist-db: 1.2.3(browserslist@4.28.1)
buffer-crc32@0.2.13: {}
buffer-from@1.1.2: {}
call-bind-apply-helpers@1.0.2:
@ -4963,6 +5129,11 @@ snapshots:
ccount@2.0.1: {}
cfb@1.2.2:
dependencies:
adler-32: 1.3.1
crc-32: 1.2.2
chalk@4.1.2:
dependencies:
ansi-styles: 4.3.0
@ -4980,6 +5151,8 @@ snapshots:
clsx@2.1.1: {}
codepage@1.15.0: {}
color-convert@2.0.1:
dependencies:
color-name: 1.1.4
@ -4992,6 +5165,10 @@ snapshots:
convert-source-map@2.0.0: {}
core-util-is@1.0.3: {}
crc-32@1.2.2: {}
cross-spawn@7.0.6:
dependencies:
path-key: 3.1.1
@ -5054,6 +5231,8 @@ snapshots:
dependencies:
dequal: 2.0.3
dingbat-to-unicode@1.0.1: {}
doctrine@2.1.0:
dependencies:
esutils: 2.0.3
@ -5072,6 +5251,10 @@ snapshots:
'@types/pg': 8.16.0
pg: 8.16.3
duck@0.1.12:
dependencies:
underscore: 1.13.7
dunder-proto@1.0.1:
dependencies:
call-bind-apply-helpers: 1.0.2
@ -5513,6 +5696,10 @@ snapshots:
dependencies:
reusify: 1.1.0
fd-slicer@1.1.0:
dependencies:
pend: 1.2.0
fdir@6.5.0(picomatch@4.0.3):
optionalDependencies:
picomatch: 4.0.3
@ -5541,6 +5728,8 @@ snapshots:
dependencies:
is-callable: 1.2.7
frac@1.1.2: {}
fsevents@2.3.3:
optional: true
@ -5668,6 +5857,8 @@ snapshots:
ignore@7.0.5: {}
immediate@3.0.6: {}
import-fresh@3.3.1:
dependencies:
parent-module: 1.0.1
@ -5675,6 +5866,8 @@ snapshots:
imurmurhash@0.1.4: {}
inherits@2.0.4: {}
inline-style-parser@0.2.7: {}
internal-slot@1.1.0:
@ -5808,6 +6001,8 @@ snapshots:
call-bound: 1.0.4
get-intrinsic: 1.3.0
isarray@1.0.0: {}
isarray@2.0.5: {}
isexe@2.0.0: {}
@ -5852,6 +6047,13 @@ snapshots:
object.assign: 4.1.7
object.values: 1.2.1
jszip@3.10.1:
dependencies:
lie: 3.3.0
pako: 1.0.11
readable-stream: 2.3.8
setimmediate: 1.0.5
keyv@4.5.4:
dependencies:
json-buffer: 3.0.1
@ -5867,6 +6069,10 @@ snapshots:
prelude-ls: 1.2.1
type-check: 0.4.0
lie@3.3.0:
dependencies:
immediate: 3.0.6
lightningcss-android-arm64@1.30.2:
optional: true
@ -5928,6 +6134,12 @@ snapshots:
dependencies:
js-tokens: 4.0.0
lop@0.4.2:
dependencies:
duck: 0.1.12
option: 0.2.4
underscore: 1.13.7
lru-cache@5.1.1:
dependencies:
yallist: 3.1.1
@ -5940,6 +6152,19 @@ snapshots:
dependencies:
'@jridgewell/sourcemap-codec': 1.5.5
mammoth@1.11.0:
dependencies:
'@xmldom/xmldom': 0.8.11
argparse: 1.0.10
base64-js: 1.5.1
bluebird: 3.4.7
dingbat-to-unicode: 1.0.1
jszip: 3.10.1
lop: 0.4.2
path-is-absolute: 1.0.1
underscore: 1.13.7
xmlbuilder: 10.1.1
markdown-table@3.0.4: {}
math-intrinsics@1.1.0: {}
@ -6384,6 +6609,8 @@ snapshots:
define-properties: 1.2.1
es-object-atoms: 1.1.1
option@0.2.4: {}
optionator@0.9.4:
dependencies:
deep-is: 0.1.4
@ -6407,6 +6634,8 @@ snapshots:
dependencies:
p-limit: 3.1.0
pako@1.0.11: {}
parent-module@1.0.1:
dependencies:
callsites: 3.1.0
@ -6423,10 +6652,14 @@ snapshots:
path-exists@4.0.0: {}
path-is-absolute@1.0.1: {}
path-key@3.1.1: {}
path-parse@1.0.7: {}
pend@1.2.0: {}
pg-cloudflare@1.2.7:
optional: true
@ -6496,6 +6729,8 @@ snapshots:
prismjs@1.30.0: {}
process-nextick-args@2.0.1: {}
prop-types@15.8.1:
dependencies:
loose-envify: 1.4.0
@ -6535,6 +6770,16 @@ snapshots:
react@19.2.1: {}
readable-stream@2.3.8:
dependencies:
core-util-is: 1.0.3
inherits: 2.0.4
isarray: 1.0.0
process-nextick-args: 2.0.1
safe-buffer: 5.1.2
string_decoder: 1.1.1
util-deprecate: 1.0.2
reflect.getprototypeof@1.0.10:
dependencies:
call-bind: 1.0.8
@ -6619,6 +6864,8 @@ snapshots:
has-symbols: 1.1.0
isarray: 2.0.5
safe-buffer@5.1.2: {}
safe-push-apply@1.0.0:
dependencies:
es-errors: 1.3.0
@ -6630,6 +6877,10 @@ snapshots:
es-errors: 1.3.0
is-regex: 1.2.1
saxes@5.0.1:
dependencies:
xmlchars: 2.2.0
scheduler@0.27.0: {}
semver@6.3.1: {}
@ -6658,6 +6909,8 @@ snapshots:
es-errors: 1.3.0
es-object-atoms: 1.1.1
setimmediate@1.0.5: {}
sharp@0.34.5:
dependencies:
'@img/colour': 1.0.0
@ -6742,6 +6995,12 @@ snapshots:
split2@4.2.0: {}
sprintf-js@1.0.3: {}
ssf@0.11.2:
dependencies:
frac: 1.1.2
stable-hash@0.0.5: {}
stop-iteration-iterator@1.1.0:
@ -6799,6 +7058,10 @@ snapshots:
define-properties: 1.2.1
es-object-atoms: 1.1.1
string_decoder@1.1.1:
dependencies:
safe-buffer: 5.1.2
stringify-entities@4.0.4:
dependencies:
character-entities-html4: 2.1.0
@ -6925,6 +7188,8 @@ snapshots:
has-symbols: 1.1.0
which-boxed-primitive: 1.1.1
underscore@1.13.7: {}
undici-types@6.21.0: {}
unified@11.0.5:
@ -6994,6 +7259,8 @@ snapshots:
dependencies:
punycode: 2.3.1
util-deprecate@1.0.2: {}
vfile-message@4.0.3:
dependencies:
'@types/unist': 3.0.3
@ -7049,12 +7316,40 @@ snapshots:
dependencies:
isexe: 2.0.0
wmf@1.0.2: {}
word-extractor@1.0.4:
dependencies:
saxes: 5.0.1
yauzl: 2.10.0
word-wrap@1.2.5: {}
word@0.3.0: {}
xlsx@0.18.5:
dependencies:
adler-32: 1.3.1
cfb: 1.2.2
codepage: 1.15.0
crc-32: 1.2.2
ssf: 0.11.2
wmf: 1.0.2
word: 0.3.0
xmlbuilder@10.1.1: {}
xmlchars@2.2.0: {}
xtend@4.0.2: {}
yallist@3.1.1: {}
yauzl@2.10.0:
dependencies:
buffer-crc32: 0.2.13
fd-slicer: 1.1.0
yocto-queue@0.1.0: {}
zod-validation-error@4.0.2(zod@4.2.1):

View File

@ -6,6 +6,24 @@ import { nanoid } from 'nanoid';
import { executeTool } from '@/services/tools';
import { getCurrentUser } from '@/lib/auth';
import { decryptApiKey } from '@/lib/crypto';
import { parseDocument, type ParsedDocument } from '@/lib/document-parser';
// PDF 文档数据(从前端传来,直接传给 Claude API
interface PdfDocumentData {
name: string;
size: number;
data: string; // Base64 编码
media_type: 'application/pdf';
}
// Office 文档数据(从前端传来,需要后端解析)
interface OfficeDocumentData {
name: string;
size: number;
data: string; // Base64 编码
type: 'word' | 'excel';
mimeType: string;
}
interface ChatRequest {
conversationId: string;
@ -29,6 +47,10 @@ interface ChatRequest {
type: string;
content: string;
}[];
// PDF 文档(直接传给 Claude API 使用原生 document 类型)
pdfDocuments?: PdfDocumentData[];
// Office 文档Word/Excel需要后端解析
officeDocuments?: OfficeDocumentData[];
}
// 消息内容块类型Claude
@ -240,9 +262,9 @@ const DEFAULT_SYSTEM_PROMPT = `你是一个专业、友好的 AI 助手。请遵
export async function POST(request: Request) {
try {
const body: ChatRequest = await request.json();
const { conversationId, message, displayMessage, model, tools, enableThinking, images, uploadedImages, uploadedDocuments } = body;
const { conversationId, message, displayMessage, model, tools, enableThinking, images, uploadedImages, uploadedDocuments, pdfDocuments, officeDocuments } = body;
// 调试日志:确认接收到的图片数据
// 调试日志:确认接收到的图片和文档数据
console.log('[API/chat] Received request with:', {
conversationId,
messageLength: message?.length,
@ -253,11 +275,24 @@ export async function POST(request: Request) {
imagesCount: images?.length || 0,
uploadedImagesCount: uploadedImages?.length || 0,
uploadedDocumentsCount: uploadedDocuments?.length || 0,
pdfDocumentsCount: pdfDocuments?.length || 0,
officeDocumentsCount: officeDocuments?.length || 0,
images: images ? images.map(img => ({
type: img.type,
media_type: img.media_type,
dataLength: img.data?.length || 0,
})) : undefined,
pdfDocuments: pdfDocuments ? pdfDocuments.map(doc => ({
name: doc.name,
size: doc.size,
dataLength: doc.data?.length || 0,
})) : undefined,
officeDocuments: officeDocuments ? officeDocuments.map(doc => ({
name: doc.name,
size: doc.size,
type: doc.type,
dataLength: doc.data?.length || 0,
})) : undefined,
});
// 获取当前登录用户
@ -360,12 +395,43 @@ export async function POST(request: Request) {
// 获取温度参数
const temperature = parseFloat(conversation.temperature || settings.temperature || '0.7');
// 解析 Office 文档Word/Excel
const parsedOfficeDocuments: ParsedDocument[] = [];
const documentParseErrors: { name: string; error: string }[] = []; // 记录解析失败的文档
if (officeDocuments && officeDocuments.length > 0) {
console.log('[API/chat] 📄 开始解析 Office 文档,数量:', officeDocuments.length);
for (const doc of officeDocuments) {
try {
const parsed = await parseDocument(doc.data, doc.name, doc.mimeType);
if (parsed) {
parsedOfficeDocuments.push(parsed);
console.log('[API/chat] ✅ 成功解析文档:', doc.name, '内容长度:', parsed.content.length);
}
} catch (parseError) {
const errorMsg = parseError instanceof Error ? parseError.message : '未知错误';
console.error('[API/chat] ❌ 解析文档失败:', doc.name, parseError);
documentParseErrors.push({ name: doc.name, error: errorMsg });
}
}
}
let fullContent = '';
let thinkingContent = '';
let totalInputTokens = 0;
let totalOutputTokens = 0;
let usedTools: string[] = []; // 收集使用过的工具名称
// 如果有文档解析失败,将警告添加到内容开头
if (documentParseErrors.length > 0) {
const errorMessages = documentParseErrors.map(e => `- ${e.name}: ${e.error}`).join('\n');
const warningText = `⚠️ 以下文档解析失败,无法分析其内容:\n${errorMessages}\n\n`;
fullContent = warningText;
safeWriter.write({
type: 'text',
content: warningText,
});
}
// 【重要】处理器选择优先级说明:
// 1. 首先检查 apiFormat === 'openai':如果用户选择了 "OpenAI 兼容" 格式,
// 则所有模型(包括 Codex 模型)都统一使用 /v1/chat/completions 端点
@ -391,6 +457,8 @@ export async function POST(request: Request) {
encoder,
images,
metasoApiKey: decryptedMetasoApiKey,
pdfDocuments,
parsedOfficeDocuments,
});
fullContent = result.fullContent;
@ -416,6 +484,8 @@ export async function POST(request: Request) {
encoder,
images, // 传递用户上传的图片
metasoApiKey: decryptedMetasoApiKey,
pdfDocuments,
parsedOfficeDocuments,
});
fullContent = result.fullContent;
@ -439,6 +509,8 @@ export async function POST(request: Request) {
encoder,
images, // 传递用户上传的图片
metasoApiKey: decryptedMetasoApiKey,
pdfDocuments,
parsedOfficeDocuments,
});
fullContent = result.fullContent;
@ -534,6 +606,10 @@ interface CodexChatParams {
}[];
// 秘塔 API Key
metasoApiKey?: string;
// PDF 文档(直接传给 API
pdfDocuments?: PdfDocumentData[];
// 解析后的 Office 文档
parsedOfficeDocuments?: ParsedDocument[];
}
// Codex Response API 的输入项类型
@ -574,6 +650,8 @@ async function handleCodexChat(params: CodexChatParams): Promise<{
encoder,
images,
metasoApiKey,
pdfDocuments,
parsedOfficeDocuments,
} = params;
// 创建安全的 stream 写入器
@ -591,13 +669,37 @@ async function handleCodexChat(params: CodexChatParams): Promise<{
];
// 添加当前用户消息(支持多模态内容)
if (images && images.length > 0) {
console.log('[handleCodexChat] Building multimodal message with', images.length, 'images');
const hasImages = images && images.length > 0;
const hasPdfDocuments = pdfDocuments && pdfDocuments.length > 0;
const hasOfficeDocuments = parsedOfficeDocuments && parsedOfficeDocuments.length > 0;
// 构建消息文本包含用户消息、PDF 提示和解析后的 Office 文档内容)
let textContent = message;
// Codex/OpenAI 格式不支持原生 PDF添加提示信息
if (hasPdfDocuments) {
const pdfInfo = pdfDocuments!.map(doc => `[PDF 文档: ${doc.name}]`).join('\n');
textContent = `${textContent}\n\n注意用户上传了以下 PDF 文件,但当前 API 格式不支持原生 PDF 解析。建议切换到 Claude 原生格式以获得完整的 PDF 支持。\n${pdfInfo}`;
console.log('[handleCodexChat] PDF documents detected but not supported in Codex format');
}
// 添加解析后的 Office 文档内容
if (hasOfficeDocuments) {
const officeContents = parsedOfficeDocuments!.map(doc => {
const typeLabel = doc.type === 'word' ? 'Word' : doc.type === 'excel' ? 'Excel' : '文档';
return `\n\n--- ${typeLabel} 文件:${doc.name} ---\n${doc.content}\n--- 文件结束 ---`;
}).join('\n');
textContent = textContent + officeContents;
console.log('[handleCodexChat] Added Office documents content, total length:', textContent.length);
}
if (hasImages) {
console.log('[handleCodexChat] Building multimodal message with', images!.length, 'images');
// 如果有图片,构建多模态消息
const multimodalContent: CodexMultimodalContent[] = [];
// 先添加图片
for (const img of images) {
for (const img of images!) {
console.log('[handleCodexChat] Adding image:', {
type: img.type,
media_type: img.media_type,
@ -612,10 +714,10 @@ async function handleCodexChat(params: CodexChatParams): Promise<{
}
// 再添加文本
if (message) {
if (textContent) {
multimodalContent.push({
type: 'input_text',
text: message,
text: textContent,
});
}
@ -626,12 +728,12 @@ async function handleCodexChat(params: CodexChatParams): Promise<{
content: multimodalContent,
});
} else {
console.log('[handleCodexChat] No images, using simple text message');
// 没有图片,使用简单文本消息
console.log('[handleCodexChat] No images, using text message');
// 没有图片,使用文本消息
inputItems.push({
type: 'message' as const,
role: 'user' as const,
content: message,
content: textContent,
});
}
@ -912,6 +1014,10 @@ interface ClaudeChatParams {
}[];
// 秘塔 API Key
metasoApiKey?: string;
// PDF 文档(直接传给 Claude API 使用原生 document 类型)
pdfDocuments?: PdfDocumentData[];
// 解析后的 Office 文档
parsedOfficeDocuments?: ParsedDocument[];
}
async function handleClaudeChat(params: ClaudeChatParams): Promise<{
@ -935,6 +1041,8 @@ async function handleClaudeChat(params: ClaudeChatParams): Promise<{
encoder,
images,
metasoApiKey,
pdfDocuments,
parsedOfficeDocuments,
} = params;
// 创建安全的 stream 写入器
@ -948,14 +1056,39 @@ async function handleClaudeChat(params: ClaudeChatParams): Promise<{
content: msg.content,
}));
// 添加当前用户消息(支持多模态内容)
if (images && images.length > 0) {
console.log('[handleClaudeChat] Building multimodal message with', images.length, 'images');
// 如果有图片,构建多模态消息
// 添加当前用户消息支持多模态内容图片、PDF、Office文档
const hasImages = images && images.length > 0;
const hasPdfDocuments = pdfDocuments && pdfDocuments.length > 0;
const hasOfficeDocuments = parsedOfficeDocuments && parsedOfficeDocuments.length > 0;
if (hasImages || hasPdfDocuments || hasOfficeDocuments) {
console.log('[handleClaudeChat] Building multimodal message with:', {
images: images?.length || 0,
pdfDocuments: pdfDocuments?.length || 0,
officeDocuments: parsedOfficeDocuments?.length || 0,
});
// 构建多模态消息
const multimodalContent: ContentBlock[] = [];
// 先添加图片
for (const img of images) {
// 1. 添加 PDF 文档(使用 Claude 原生 document 类型)
if (hasPdfDocuments) {
for (const pdf of pdfDocuments!) {
console.log('[handleClaudeChat] Adding PDF document:', pdf.name, 'size:', pdf.size);
multimodalContent.push({
type: 'document' as unknown as 'text',
// @ts-expect-error - Claude API 支持 document 类型但 TypeScript 类型定义不完整
source: {
type: 'base64',
media_type: 'application/pdf',
data: pdf.data,
},
});
}
}
// 2. 添加图片
if (hasImages) {
for (const img of images!) {
console.log('[handleClaudeChat] Adding image:', {
type: img.type,
media_type: img.media_type,
@ -971,12 +1104,24 @@ async function handleClaudeChat(params: ClaudeChatParams): Promise<{
},
});
}
}
// 再添加文本
if (message) {
// 3. 构建消息文本(包含用户消息和解析后的 Office 文档内容)
let textContent = message;
if (hasOfficeDocuments) {
const officeContents = parsedOfficeDocuments!.map(doc => {
const typeLabel = doc.type === 'word' ? 'Word' : doc.type === 'excel' ? 'Excel' : '文档';
return `\n\n--- ${typeLabel} 文件:${doc.name} ---\n${doc.content}\n--- 文件结束 ---`;
}).join('\n');
textContent = message + officeContents;
console.log('[handleClaudeChat] Added Office documents content, total length:', textContent.length);
}
// 4. 添加文本内容
if (textContent) {
multimodalContent.push({
type: 'text',
text: message,
text: textContent,
});
}
@ -986,8 +1131,8 @@ async function handleClaudeChat(params: ClaudeChatParams): Promise<{
content: multimodalContent,
});
} else {
console.log('[handleClaudeChat] No images, using simple text message');
// 没有图片,使用简单文本消息
console.log('[handleClaudeChat] No multimodal content, using simple text message');
// 没有多模态内容,使用简单文本消息
messageHistory.push({
role: 'user',
content: message,
@ -1323,6 +1468,10 @@ interface OpenAICompatibleChatParams {
}[];
// 秘塔 API Key
metasoApiKey?: string;
// PDF 文档
pdfDocuments?: PdfDocumentData[];
// 解析后的 Office 文档
parsedOfficeDocuments?: ParsedDocument[];
}
// OpenAI 消息格式
@ -1368,6 +1517,8 @@ async function handleOpenAICompatibleChat(params: OpenAICompatibleChatParams): P
encoder,
images,
metasoApiKey,
pdfDocuments,
parsedOfficeDocuments,
} = params;
// 创建安全的 stream 写入器
@ -1385,12 +1536,36 @@ async function handleOpenAICompatibleChat(params: OpenAICompatibleChatParams): P
];
// 添加当前用户消息(支持多模态)
if (images && images.length > 0) {
console.log('[handleOpenAICompatibleChat] Building multimodal message with', images.length, 'images');
const hasImages = images && images.length > 0;
const hasPdfDocuments = pdfDocuments && pdfDocuments.length > 0;
const hasOfficeDocuments = parsedOfficeDocuments && parsedOfficeDocuments.length > 0;
// 构建消息文本包含用户消息、PDF 提示和解析后的 Office 文档内容)
let textContent = message;
// OpenAI 格式不支持原生 PDF添加提示信息
if (hasPdfDocuments) {
const pdfInfo = pdfDocuments!.map(doc => `[PDF 文档: ${doc.name}]`).join('\n');
textContent = `${textContent}\n\n注意用户上传了以下 PDF 文件,但当前 API 格式不支持原生 PDF 解析。建议切换到 Claude 原生格式以获得完整的 PDF 支持。\n${pdfInfo}`;
console.log('[handleOpenAICompatibleChat] PDF documents detected but not supported in OpenAI format');
}
// 添加解析后的 Office 文档内容
if (hasOfficeDocuments) {
const officeContents = parsedOfficeDocuments!.map(doc => {
const typeLabel = doc.type === 'word' ? 'Word' : doc.type === 'excel' ? 'Excel' : '文档';
return `\n\n--- ${typeLabel} 文件:${doc.name} ---\n${doc.content}\n--- 文件结束 ---`;
}).join('\n');
textContent = textContent + officeContents;
console.log('[handleOpenAICompatibleChat] Added Office documents content, total length:', textContent.length);
}
if (hasImages) {
console.log('[handleOpenAICompatibleChat] Building multimodal message with', images!.length, 'images');
const multimodalContent: OpenAIMessageContent[] = [];
// 添加图片
for (const img of images) {
for (const img of images!) {
console.log('[handleOpenAICompatibleChat] Adding image:', {
type: img.type,
media_type: img.media_type,
@ -1405,10 +1580,10 @@ async function handleOpenAICompatibleChat(params: OpenAICompatibleChatParams): P
}
// 添加文本
if (message) {
if (textContent) {
multimodalContent.push({
type: 'text',
text: message,
text: textContent,
});
}
@ -1417,10 +1592,10 @@ async function handleOpenAICompatibleChat(params: OpenAICompatibleChatParams): P
content: multimodalContent,
});
} else {
console.log('[handleOpenAICompatibleChat] No images, using simple text message');
console.log('[handleOpenAICompatibleChat] No images, using text message');
openaiMessages.push({
role: 'user',
content: message,
content: textContent,
});
}

View File

@ -2,6 +2,16 @@
import { useState, useCallback, useRef } from 'react';
import { executePythonInPyodide, type LoadingCallback } from '@/services/tools/pyodideRunner';
import {
detectDocumentType,
isPdfFile,
isOfficeDocument,
validateDocumentSize,
fileToBase64 as documentFileToBase64,
type PdfDocumentData,
type OfficeDocumentData,
getFileMimeType,
} from '@/utils/document-utils';
export interface StreamMessage {
type: 'thinking' | 'text' | 'tool_use_start' | 'tool_execution_result' | 'tool_search_images' | 'tool_search_videos' | 'pyodide_execution_required' | 'tool_used' | 'done' | 'error';
@ -263,16 +273,23 @@ export function useStreamChat() {
const uploadedDocuments: UploadedDocument[] = [];
const imageContents: { type: 'image'; media_type: string; data: string }[] = [];
const documentContents: { name: string; content: string }[] = [];
// PDF 文档(直接传给 Claude API
const pdfDocuments: PdfDocumentData[] = [];
// Office 文档Word/Excel需要后端解析
const officeDocuments: OfficeDocumentData[] = [];
if (files && files.length > 0) {
console.log('[useStreamChat] Processing files:', files.length);
for (const fileInfo of files) {
const docType = detectDocumentType(fileInfo.file);
console.log('[useStreamChat] File info:', {
name: fileInfo.file.name,
type: fileInfo.file.type,
size: fileInfo.file.size,
isImage: fileInfo.file.type.startsWith('image/'),
docType: docType,
});
// 处理图片文件
if (fileInfo.file.type.startsWith('image/')) {
try {
@ -291,6 +308,67 @@ export function useStreamChat() {
console.error('Failed to convert image to base64:', err);
}
}
// 处理 PDF 文件(使用 Claude 原生支持)
else if (isPdfFile(fileInfo.file)) {
// 验证文件大小
const validation = validateDocumentSize(fileInfo.file);
if (!validation.valid) {
console.error('[useStreamChat] PDF validation failed:', validation.error);
// 可以选择抛出错误或显示提示
continue;
}
try {
const base64 = await documentFileToBase64(fileInfo.file);
console.log('[useStreamChat] PDF converted to base64, length:', base64.length);
pdfDocuments.push({
name: fileInfo.file.name,
size: fileInfo.file.size,
data: base64,
media_type: 'application/pdf',
});
// 保存到 uploadedDocuments 用于前端显示
uploadedDocuments.push({
name: fileInfo.file.name,
size: fileInfo.file.size,
type: 'pdf',
content: `[PDF 文档: ${fileInfo.file.name}]`, // PDF 内容由 Claude 直接处理
});
} catch (err) {
console.error('Failed to convert PDF to base64:', err);
}
}
// 处理 Office 文档Word/Excel需要后端解析
else if (isOfficeDocument(fileInfo.file)) {
// 验证文件大小
const validation = validateDocumentSize(fileInfo.file);
if (!validation.valid) {
console.error('[useStreamChat] Office document validation failed:', validation.error);
continue;
}
try {
const base64 = await documentFileToBase64(fileInfo.file);
const mimeType = getFileMimeType(fileInfo.file);
console.log('[useStreamChat] Office document converted to base64, length:', base64.length, 'type:', docType);
officeDocuments.push({
name: fileInfo.file.name,
size: fileInfo.file.size,
data: base64,
type: docType as 'word' | 'excel',
mimeType: mimeType,
});
// 保存到 uploadedDocuments 用于前端显示
uploadedDocuments.push({
name: fileInfo.file.name,
size: fileInfo.file.size,
type: docType,
content: `[${docType === 'word' ? 'Word' : 'Excel'} 文档: ${fileInfo.file.name}]`, // 内容由后端解析后补充
});
} catch (err) {
console.error('Failed to convert Office document to base64:', err);
}
}
// 处理文本类文件
else if (isTextFile(fileInfo.file)) {
try {
@ -348,7 +426,7 @@ export function useStreamChat() {
abortControllerRef.current = new AbortController();
try {
// 调试日志:确认图片数据
// 调试日志:确认图片和文档数据
console.log('[useStreamChat] Sending request with:', {
conversationId,
messageLength: finalMessage.length,
@ -356,11 +434,24 @@ export function useStreamChat() {
tools,
enableThinking,
imagesCount: imageContents.length,
pdfDocumentsCount: pdfDocuments.length,
officeDocumentsCount: officeDocuments.length,
images: imageContents.length > 0 ? imageContents.map(img => ({
type: img.type,
media_type: img.media_type,
dataLength: img.data.length,
})) : undefined,
pdfDocuments: pdfDocuments.length > 0 ? pdfDocuments.map(doc => ({
name: doc.name,
size: doc.size,
dataLength: doc.data.length,
})) : undefined,
officeDocuments: officeDocuments.length > 0 ? officeDocuments.map(doc => ({
name: doc.name,
size: doc.size,
type: doc.type,
dataLength: doc.data.length,
})) : undefined,
});
const response = await fetch('/api/chat', {
@ -381,6 +472,10 @@ export function useStreamChat() {
uploadedImages: uploadedImages.length > 0 ? uploadedImages : undefined,
// 传递上传的文档用于保存到数据库
uploadedDocuments: uploadedDocuments.length > 0 ? uploadedDocuments : undefined,
// 传递 PDF 文档给后端(用于 Claude 原生 document 类型)
pdfDocuments: pdfDocuments.length > 0 ? pdfDocuments : undefined,
// 传递 Office 文档给后端(需要后端解析)
officeDocuments: officeDocuments.length > 0 ? officeDocuments : undefined,
}),
signal: abortControllerRef.current.signal,
});

336
src/lib/document-parser.ts Normal file
View File

@ -0,0 +1,336 @@
/**
*
* PDFWord (.doc/.docx)Excel (.xlsx)
*/
import mammoth from 'mammoth';
import * as XLSX from 'xlsx';
import WordExtractor from 'word-extractor';
// 文档限制配置
export const DOCUMENT_LIMITS = {
pdf: {
maxSize: 32 * 1024 * 1024, // 32MB (Claude API 限制)
maxPages: 100, // Claude API 最大页数限制
},
word: {
maxSize: 20 * 1024 * 1024, // 20MB
},
excel: {
maxSize: 20 * 1024 * 1024, // 20MB
maxRows: 10000, // 最大行数
maxSheets: 10, // 最大工作表数
},
};
// 支持的文档类型
export type DocumentType = 'pdf' | 'word' | 'excel' | 'unknown';
/**
*
*/
export interface DocumentFile {
name: string;
size: number;
type: DocumentType;
mimeType: string;
data: string; // Base64 编码的文件内容
}
/**
* PDF Claude API
*/
export interface PdfDocument {
name: string;
size: number;
data: string; // Base64 编码
media_type: 'application/pdf';
}
/**
*
*/
export interface ParsedDocument {
name: string;
size: number;
type: DocumentType;
content: string; // 提取的文本内容
metadata?: {
sheets?: string[]; // Excel 工作表名称
pageCount?: number; // 页数
};
}
/**
*
*/
export function detectDocumentType(file: { name: string; type: string }): DocumentType {
const mimeType = file.type.toLowerCase();
const extension = file.name.split('.').pop()?.toLowerCase() || '';
// PDF
if (mimeType === 'application/pdf' || extension === 'pdf') {
return 'pdf';
}
// Word 文档
if (
mimeType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' ||
mimeType === 'application/msword' ||
extension === 'docx' ||
extension === 'doc'
) {
return 'word';
}
// Excel 文档
if (
mimeType === 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ||
mimeType === 'application/vnd.ms-excel' ||
extension === 'xlsx' ||
extension === 'xls'
) {
return 'excel';
}
return 'unknown';
}
/**
*
*/
export function isSupportedDocument(file: { name: string; type: string }): boolean {
const docType = detectDocumentType(file);
return docType !== 'unknown';
}
/**
* PDF
*/
export function isPdfFile(file: { name: string; type: string }): boolean {
return detectDocumentType(file) === 'pdf';
}
/**
* Word
*/
export function isWordFile(file: { name: string; type: string }): boolean {
return detectDocumentType(file) === 'word';
}
/**
* Excel
*/
export function isExcelFile(file: { name: string; type: string }): boolean {
return detectDocumentType(file) === 'excel';
}
/**
*
*/
export function validateDocumentSize(
file: { name: string; type: string; size: number }
): { valid: boolean; error?: string } {
const docType = detectDocumentType(file);
switch (docType) {
case 'pdf':
if (file.size > DOCUMENT_LIMITS.pdf.maxSize) {
return {
valid: false,
error: `PDF 文件 "${file.name}" 超过 ${DOCUMENT_LIMITS.pdf.maxSize / 1024 / 1024}MB 限制`,
};
}
break;
case 'word':
if (file.size > DOCUMENT_LIMITS.word.maxSize) {
return {
valid: false,
error: `Word 文件 "${file.name}" 超过 ${DOCUMENT_LIMITS.word.maxSize / 1024 / 1024}MB 限制`,
};
}
break;
case 'excel':
if (file.size > DOCUMENT_LIMITS.excel.maxSize) {
return {
valid: false,
error: `Excel 文件 "${file.name}" 超过 ${DOCUMENT_LIMITS.excel.maxSize / 1024 / 1024}MB 限制`,
};
}
break;
}
return { valid: true };
}
/**
* Base64 Buffer
*/
export function base64ToBuffer(base64: string): Buffer {
return Buffer.from(base64, 'base64');
}
/**
* Word (.doc .docx)
* - .docx: 使用 mammoth
* - .doc: 使用 word-extractor
*/
export async function parseWordDocument(base64Data: string, fileName: string): Promise<ParsedDocument> {
try {
const buffer = base64ToBuffer(base64Data);
const extension = fileName.split('.').pop()?.toLowerCase();
let content = '';
if (extension === 'doc') {
// 使用 word-extractor 处理 .doc 文件(旧版 Word 97-2003 格式)
console.log('[parseWordDocument] Using word-extractor for .doc file:', fileName);
const extractor = new WordExtractor();
const doc = await extractor.extract(buffer);
// 提取正文内容
content = doc.getBody();
// 可选:添加脚注、尾注等内容
const footnotes = doc.getFootnotes();
const endnotes = doc.getEndnotes();
if (footnotes && footnotes.trim()) {
content += '\n\n--- 脚注 ---\n' + footnotes;
}
if (endnotes && endnotes.trim()) {
content += '\n\n--- 尾注 ---\n' + endnotes;
}
console.log('[parseWordDocument] Successfully extracted .doc content, length:', content.length);
} else {
// 使用 mammoth 处理 .docx 文件Office Open XML 格式)
console.log('[parseWordDocument] Using mammoth for .docx file:', fileName);
const result = await mammoth.extractRawText({ buffer });
content = result.value;
console.log('[parseWordDocument] Successfully extracted .docx content, length:', content.length);
}
return {
name: fileName,
size: buffer.length,
type: 'word',
content,
};
} catch (error) {
console.error('[parseWordDocument] Error:', error);
const errorMsg = error instanceof Error ? error.message : '未知错误';
throw new Error(`解析 Word 文档 "${fileName}" 失败: ${errorMsg}`);
}
}
/**
* Excel (.xlsx)
* 使 xlsx Markdown
*/
export async function parseExcelDocument(base64Data: string, fileName: string): Promise<ParsedDocument> {
try {
const buffer = base64ToBuffer(base64Data);
// 读取 Excel 文件
const workbook = XLSX.read(buffer, { type: 'buffer' });
const sheets: string[] = [];
const contentParts: string[] = [];
let totalRows = 0;
// 遍历所有工作表(限制数量)
const sheetNames = workbook.SheetNames.slice(0, DOCUMENT_LIMITS.excel.maxSheets);
for (const sheetName of sheetNames) {
sheets.push(sheetName);
const worksheet = workbook.Sheets[sheetName];
// 获取工作表范围
const range = XLSX.utils.decode_range(worksheet['!ref'] || 'A1');
const rowCount = range.e.r - range.s.r + 1;
// 限制行数
const maxRow = Math.min(range.e.r, range.s.r + DOCUMENT_LIMITS.excel.maxRows - 1);
const limitedRange = {
...range,
e: { ...range.e, r: maxRow },
};
// 转换为 JSON 数据
const jsonData = XLSX.utils.sheet_to_json(worksheet, {
range: limitedRange,
header: 1,
defval: '',
}) as unknown[][];
if (jsonData.length === 0) continue;
// 转换为 Markdown 表格
let markdown = `\n### 工作表: ${sheetName}\n\n`;
if (jsonData.length > 0) {
// 表头
const headers = jsonData[0] as string[];
markdown += '| ' + headers.map(h => String(h || '').replace(/\|/g, '\\|')).join(' | ') + ' |\n';
markdown += '| ' + headers.map(() => '---').join(' | ') + ' |\n';
// 数据行
for (let i = 1; i < jsonData.length; i++) {
const row = jsonData[i] as string[];
markdown += '| ' + row.map(cell => String(cell || '').replace(/\|/g, '\\|').replace(/\n/g, ' ')).join(' | ') + ' |\n';
}
if (rowCount > DOCUMENT_LIMITS.excel.maxRows) {
markdown += `\n*(已截断,原表格共 ${rowCount} 行,仅显示前 ${DOCUMENT_LIMITS.excel.maxRows} 行)*\n`;
}
}
contentParts.push(markdown);
totalRows += jsonData.length;
}
// 如果有更多工作表未处理
if (workbook.SheetNames.length > DOCUMENT_LIMITS.excel.maxSheets) {
contentParts.push(`\n*(共 ${workbook.SheetNames.length} 个工作表,仅显示前 ${DOCUMENT_LIMITS.excel.maxSheets} 个)*\n`);
}
return {
name: fileName,
size: buffer.length,
type: 'excel',
content: contentParts.join('\n'),
metadata: {
sheets,
},
};
} catch (error) {
console.error('[parseExcelDocument] Error:', error);
throw new Error(`解析 Excel 文档失败: ${error instanceof Error ? error.message : '未知错误'}`);
}
}
/**
*
* PDF Claude API
*/
export async function parseDocument(
base64Data: string,
fileName: string,
mimeType: string
): Promise<ParsedDocument | null> {
const docType = detectDocumentType({ name: fileName, type: mimeType });
switch (docType) {
case 'word':
return parseWordDocument(base64Data, fileName);
case 'excel':
return parseExcelDocument(base64Data, fileName);
case 'pdf':
// PDF 不在这里解析,返回 null
// PDF 会直接传给 Claude API 使用原生 document 类型
return null;
default:
return null;
}
}

59
src/types/word-extractor.d.ts vendored Normal file
View File

@ -0,0 +1,59 @@
/**
* word-extractor
* .doc .docx
*/
declare module 'word-extractor' {
interface ExtractOptions {
/** 如果为 true默认将常见的 Unicode 引号转换为标准 ASCII 字符 */
filterUnicode?: boolean;
}
interface HeaderOptions extends ExtractOptions {
/** 如果为 true默认将页眉和页脚作为单个字符串返回 */
includeFooters?: boolean;
}
interface TextboxOptions extends ExtractOptions {
/** 如果为 true默认包含页眉和页脚中的文本框内容 */
includeHeadersAndFooters?: boolean;
/** 如果为 true默认包含文档正文中的文本框内容 */
includeBody?: boolean;
}
/**
* Document - Word
*/
class Document {
/** 获取文档正文内容 */
getBody(options?: ExtractOptions): string;
/** 获取脚注内容 */
getFootnotes(options?: ExtractOptions): string;
/** 获取尾注内容 */
getEndnotes(options?: ExtractOptions): string;
/** 获取页眉内容 */
getHeaders(options?: HeaderOptions): string;
/** 获取页脚内容 */
getFooters(options?: ExtractOptions): string;
/** 获取批注内容 */
getAnnotations(options?: ExtractOptions): string;
/** 获取文本框内容 */
getTextboxes(options?: TextboxOptions): string;
}
/**
* WordExtractor - Word
* .docOLE .docxOpen XML
*/
class WordExtractor {
constructor();
/**
* Word
* @param source - Buffer
* @returns Promise<Document>
*/
extract(source: string | Buffer): Promise<Document>;
}
export = WordExtractor;
}

186
src/utils/document-utils.ts Normal file
View File

@ -0,0 +1,186 @@
/**
*
*
*/
// 文档限制配置(与后端保持一致)
export const DOCUMENT_LIMITS = {
pdf: {
maxSize: 32 * 1024 * 1024, // 32MB (Claude API 限制)
maxPages: 100, // Claude API 最大页数限制
},
word: {
maxSize: 20 * 1024 * 1024, // 20MB
},
excel: {
maxSize: 20 * 1024 * 1024, // 20MB
},
};
// 支持的文档类型
export type DocumentType = 'pdf' | 'word' | 'excel' | 'unknown';
/**
* PDF
*/
export interface PdfDocumentData {
name: string;
size: number;
data: string; // Base64 编码
media_type: 'application/pdf';
}
/**
*
*/
export interface OfficeDocumentData {
name: string;
size: number;
data: string; // Base64 编码
type: 'word' | 'excel';
mimeType: string;
}
/**
*
*/
export function detectDocumentType(file: File): DocumentType {
const mimeType = file.type.toLowerCase();
const extension = file.name.split('.').pop()?.toLowerCase() || '';
// PDF
if (mimeType === 'application/pdf' || extension === 'pdf') {
return 'pdf';
}
// Word 文档
if (
mimeType === 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' ||
mimeType === 'application/msword' ||
extension === 'docx' ||
extension === 'doc'
) {
return 'word';
}
// Excel 文档
if (
mimeType === 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' ||
mimeType === 'application/vnd.ms-excel' ||
extension === 'xlsx' ||
extension === 'xls'
) {
return 'excel';
}
return 'unknown';
}
/**
*
*/
export function isSupportedDocument(file: File): boolean {
const docType = detectDocumentType(file);
return docType !== 'unknown';
}
/**
* PDF
*/
export function isPdfFile(file: File): boolean {
return detectDocumentType(file) === 'pdf';
}
/**
* Word
*/
export function isWordFile(file: File): boolean {
return detectDocumentType(file) === 'word';
}
/**
* Excel
*/
export function isExcelFile(file: File): boolean {
return detectDocumentType(file) === 'excel';
}
/**
* Word Excel
*/
export function isOfficeDocument(file: File): boolean {
const docType = detectDocumentType(file);
return docType === 'word' || docType === 'excel';
}
/**
*
*/
export function validateDocumentSize(file: File): { valid: boolean; error?: string } {
const docType = detectDocumentType(file);
switch (docType) {
case 'pdf':
if (file.size > DOCUMENT_LIMITS.pdf.maxSize) {
return {
valid: false,
error: `PDF 文件 "${file.name}" 超过 ${DOCUMENT_LIMITS.pdf.maxSize / 1024 / 1024}MB 限制`,
};
}
break;
case 'word':
if (file.size > DOCUMENT_LIMITS.word.maxSize) {
return {
valid: false,
error: `Word 文件 "${file.name}" 超过 ${DOCUMENT_LIMITS.word.maxSize / 1024 / 1024}MB 限制`,
};
}
break;
case 'excel':
if (file.size > DOCUMENT_LIMITS.excel.maxSize) {
return {
valid: false,
error: `Excel 文件 "${file.name}" 超过 ${DOCUMENT_LIMITS.excel.maxSize / 1024 / 1024}MB 限制`,
};
}
break;
}
return { valid: true };
}
/**
* Base64 data URL
*/
export async function fileToBase64(file: File): Promise<string> {
return new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => {
const result = reader.result as string;
// 移除 data:xxx;base64, 前缀,只保留 base64 数据
const base64 = result.split(',')[1];
resolve(base64);
};
reader.onerror = reject;
reader.readAsDataURL(file);
});
}
/**
* MIME
*/
export function getFileMimeType(file: File): string {
if (file.type) return file.type;
// 根据扩展名推断
const extension = file.name.split('.').pop()?.toLowerCase() || '';
const mimeTypes: Record<string, string> = {
pdf: 'application/pdf',
doc: 'application/msword',
docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
xls: 'application/vnd.ms-excel',
xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
};
return mimeTypes[extension] || 'application/octet-stream';
}