From 2344024e849189696367003189207d88aab1e8c5 Mon Sep 17 00:00:00 2001 From: gaoziman <2942894660@qq.com> Date: Mon, 22 Dec 2025 23:22:26 +0800 Subject: [PATCH] =?UTF-8?q?feat(=E4=BE=9D=E8=B5=96):=20=E6=B7=BB=E5=8A=A0w?= =?UTF-8?q?ord-extractor=E5=BA=93=E6=94=AF=E6=8C=81.doc=E6=96=87=E4=BB=B6?= =?UTF-8?q?=E8=A7=A3=E6=9E=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 安装 word-extractor@1.0.4 依赖 - 新增 src/types/word-extractor.d.ts 类型声明 - WordExtractor 类 - Document 类及其方法类型 - 支持 Buffer 和文件路径两种输入方式 --- package.json | 3 + pnpm-lock.yaml | 295 ++++++++++++++++++++++++++++++++++ src/types/word-extractor.d.ts | 59 +++++++ 3 files changed, 357 insertions(+) create mode 100644 src/types/word-extractor.d.ts diff --git a/package.json b/package.json index 83d3596..dd867c8 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ "drizzle-orm": "^0.45.1", "jose": "^6.1.3", "lucide-react": "^0.561.0", + "mammoth": "^1.11.0", "nanoid": "^5.1.6", "next": "16.0.10", "nodemailer": "^7.0.11", @@ -29,6 +30,8 @@ "react-markdown": "^10.1.0", "remark-gfm": "^4.0.1", "sonner": "^2.0.7", + "word-extractor": "^1.0.4", + "xlsx": "^0.18.5", "zod": "^4.2.1" }, "devDependencies": { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 452f8cd..eabd7da 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -26,6 +26,9 @@ importers: lucide-react: specifier: ^0.561.0 version: 0.561.0(react@19.2.1) + mammoth: + specifier: ^1.11.0 + version: 1.11.0 nanoid: specifier: ^5.1.6 version: 5.1.6 @@ -56,6 +59,12 @@ importers: sonner: specifier: ^2.0.7 version: 2.0.7(react-dom@19.2.1(react@19.2.1))(react@19.2.1) + word-extractor: + specifier: ^1.0.4 + version: 1.0.4 + xlsx: + specifier: ^0.18.5 + version: 0.18.5 zod: specifier: ^4.2.1 version: 4.2.1 @@ -1526,6 +1535,10 @@ packages: cpu: [x64] os: [win32] + '@xmldom/xmldom@0.8.11': + resolution: {integrity: sha512-cQzWCtO6C8TQiYl1ruKNn2U6Ao4o4WBBcbL61yJl84x+j5sOWWFU9X7DpND8XZG3daDppSsigMdfAIl2upQBRw==} + engines: {node: '>=10.0.0'} + acorn-jsx@5.3.2: resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==} peerDependencies: @@ -1536,6 +1549,10 @@ packages: engines: {node: '>=0.4.0'} hasBin: true + adler-32@1.3.1: + resolution: {integrity: sha512-ynZ4w/nUUv5rrsR8UUGoe1VC9hZj6V5hU9Qw1HlMDJGEJw5S7TfTErWTjMys6M7vr0YWcPqs3qAr4ss0nDfP+A==} + engines: {node: '>=0.8'} + ajv@6.12.6: resolution: {integrity: sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==} @@ -1543,6 +1560,9 @@ packages: resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==} engines: {node: '>=8'} + argparse@1.0.10: + resolution: {integrity: sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==} + argparse@2.0.1: resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==} @@ -1607,6 +1627,9 @@ packages: balanced-match@1.0.2: resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} + base64-js@1.5.1: + resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==} + baseline-browser-mapping@2.9.9: resolution: {integrity: sha512-V8fbOCSeOFvlDj7LLChUcqbZrdKD9RU/VR260piF1790vT0mfLSwGc/Qzxv3IqiTukOpNtItePa0HBpMAj7MDg==} hasBin: true @@ -1615,6 +1638,9 @@ packages: resolution: {integrity: sha512-GlF5wPWnSa/X5LKM1o0wz0suXIINz1iHRLvTS+sLyi7XPbe5ycmYI3DlZqVGZZtDgl4DmasFg7gOB3JYbphV5g==} hasBin: true + bluebird@3.4.7: + resolution: {integrity: sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==} + bowser@2.13.1: resolution: {integrity: sha512-OHawaAbjwx6rqICCKgSG0SAnT05bzd7ppyKLVUITZpANBaaMFBAsaNkto3LoQ31tyFP5kNujE8Cdx85G9VzOkw==} @@ -1633,6 +1659,9 @@ packages: engines: {node: ^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7} hasBin: true + buffer-crc32@0.2.13: + resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==} + buffer-from@1.1.2: resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==} @@ -1658,6 +1687,10 @@ packages: ccount@2.0.1: resolution: {integrity: sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==} + cfb@1.2.2: + resolution: {integrity: sha512-KfdUZsSOw19/ObEWasvBP/Ac4reZvAGauZhs6S/gqNhXhI7cKwvlH7ulj+dOEYnca4bm4SGo8C1bTAQvnTjgQA==} + engines: {node: '>=0.8'} + chalk@4.1.2: resolution: {integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==} engines: {node: '>=10'} @@ -1681,6 +1714,10 @@ packages: resolution: {integrity: sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==} engines: {node: '>=6'} + codepage@1.15.0: + resolution: {integrity: sha512-3g6NUTPd/YtuuGrhMnOMRjFc+LJw/bnMp3+0r/Wcz3IXUuCosKRJvMphm5+Q+bvTVGcJJuRvVLuYba+WojaFaA==} + engines: {node: '>=0.8'} + color-convert@2.0.1: resolution: {integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==} engines: {node: '>=7.0.0'} @@ -1697,6 +1734,14 @@ packages: convert-source-map@2.0.0: resolution: {integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==} + core-util-is@1.0.3: + resolution: {integrity: sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==} + + crc-32@1.2.2: + resolution: {integrity: sha512-ROmzCKrTnOwybPcJApAA6WBWij23HVfGVNKqqrZpuyZOHqK2CwHSvpGuyt/UNNvaIjEd8X5IFGp4Mh+Ie1IHJQ==} + engines: {node: '>=0.8'} + hasBin: true + cross-spawn@7.0.6: resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==} engines: {node: '>= 8'} @@ -1761,6 +1806,9 @@ packages: devlop@1.1.0: resolution: {integrity: sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==} + dingbat-to-unicode@1.0.1: + resolution: {integrity: sha512-98l0sW87ZT58pU4i61wa2OHwxbiYSbuxsCBozaVnYX2iCnr3bLM3fIes1/ej7h1YdOKuKt/MLs706TVnALA65w==} + doctrine@2.1.0: resolution: {integrity: sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==} engines: {node: '>=0.10.0'} @@ -1861,6 +1909,9 @@ packages: sqlite3: optional: true + duck@0.1.12: + resolution: {integrity: sha512-wkctla1O6VfP89gQ+J/yDesM0S7B7XLXjKGzXxMDVFg7uEn706niAtyYovKbyq1oT9YwDcly721/iUWoc8MVRg==} + dunder-proto@1.0.1: resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==} engines: {node: '>= 0.4'} @@ -2081,6 +2132,9 @@ packages: fastq@1.19.1: resolution: {integrity: sha512-GwLTyxkCXjXbxqIhTsMI2Nui8huMPtnxg7krajPJAjnEG/iiOS7i+zCtWGZR9G0NBKbXKh6X9m9UIsYX/N6vvQ==} + fd-slicer@1.1.0: + resolution: {integrity: sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==} + fdir@6.5.0: resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==} engines: {node: '>=12.0.0'} @@ -2113,6 +2167,10 @@ packages: resolution: {integrity: sha512-dKx12eRCVIzqCxFGplyFKJMPvLEWgmNtUrpTiJIR5u97zEhRG8ySrtboPHZXx7daLxQVrl643cTzbab2tkQjxg==} engines: {node: '>= 0.4'} + frac@1.1.2: + resolution: {integrity: sha512-w/XBfkibaTl3YDqASwfDUqkna4Z2p9cFSr1aHDt0WoMTECnRfBOv2WArlZILlqgWlmdIlALXGpM2AOhEk5W3IA==} + engines: {node: '>=0.8'} + fsevents@2.3.3: resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} @@ -2228,6 +2286,9 @@ packages: resolution: {integrity: sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==} engines: {node: '>= 4'} + immediate@3.0.6: + resolution: {integrity: sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==} + import-fresh@3.3.1: resolution: {integrity: sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==} engines: {node: '>=6'} @@ -2236,6 +2297,9 @@ packages: resolution: {integrity: sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==} engines: {node: '>=0.8.19'} + inherits@2.0.4: + resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} + inline-style-parser@0.2.7: resolution: {integrity: sha512-Nb2ctOyNR8DqQoR0OwRG95uNWIC0C1lCgf5Naz5H6Ji72KZ8OcFZLz2P5sNgwlyoJ8Yif11oMuYs5pBQa86csA==} @@ -2362,6 +2426,9 @@ packages: resolution: {integrity: sha512-mfcwb6IzQyOKTs84CQMrOwW4gQcaTOAWJ0zzJCl2WSPDrWk/OzDaImWFH3djXhb24g4eudZfLRozAvPGw4d9hQ==} engines: {node: '>= 0.4'} + isarray@1.0.0: + resolution: {integrity: sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==} + isarray@2.0.5: resolution: {integrity: sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw==} @@ -2413,6 +2480,9 @@ packages: resolution: {integrity: sha512-ZZow9HBI5O6EPgSJLUb8n2NKgmVWTwCvHGwFuJlMjvLFqlGG6pjirPhtdsseaLZjSibD8eegzmYpUZwoIlj2cQ==} engines: {node: '>=4.0'} + jszip@3.10.1: + resolution: {integrity: sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==} + keyv@4.5.4: resolution: {integrity: sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==} @@ -2427,6 +2497,9 @@ packages: resolution: {integrity: sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==} engines: {node: '>= 0.8.0'} + lie@3.3.0: + resolution: {integrity: sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==} + lightningcss-android-arm64@1.30.2: resolution: {integrity: sha512-BH9sEdOCahSgmkVhBLeU7Hc9DWeZ1Eb6wNS6Da8igvUwAe0sqROHddIlvU06q3WyXVEOYDZ6ykBZQnjTbmo4+A==} engines: {node: '>= 12.0.0'} @@ -2511,6 +2584,9 @@ packages: resolution: {integrity: sha512-lyuxPGr/Wfhrlem2CL/UcnUc1zcqKAImBDzukY7Y5F/yQiNdko6+fRLevlw1HgMySw7f611UIY408EtxRSoK3Q==} hasBin: true + lop@0.4.2: + resolution: {integrity: sha512-RefILVDQ4DKoRZsJ4Pj22TxE3omDO47yFpkIBoDKzkqPRISs5U1cnAdg/5583YPkWPaLIYHOKRMQSvjFsO26cw==} + lru-cache@5.1.1: resolution: {integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==} @@ -2522,6 +2598,11 @@ packages: magic-string@0.30.21: resolution: {integrity: sha512-vd2F4YUyEXKGcLHoq+TEyCjxueSeHnFxyyjNp80yg0XV4vUhnDer/lvvlqM/arB5bXQN5K2/3oinyCRyx8T2CQ==} + mammoth@1.11.0: + resolution: {integrity: sha512-BcEqqY/BOwIcI1iR5tqyVlqc3KIaMRa4egSoK83YAVrBf6+yqdAAbtUcFDCWX8Zef8/fgNZ6rl4VUv+vVX8ddQ==} + engines: {node: '>=12.0.0'} + hasBin: true + markdown-table@3.0.4: resolution: {integrity: sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw==} @@ -2757,6 +2838,9 @@ packages: resolution: {integrity: sha512-gXah6aZrcUxjWg2zR2MwouP2eHlCBzdV4pygudehaKXSGW4v2AsRQUK+lwwXhii6KFZcunEnmSUoYp5CXibxtA==} engines: {node: '>= 0.4'} + option@0.2.4: + resolution: {integrity: sha512-pkEqbDyl8ou5cpq+VsnQbe/WlEy5qS7xPzMS1U55OCG9KPvwFD46zDbxQIj3egJSFc3D+XhYOPUzz49zQAVy7A==} + optionator@0.9.4: resolution: {integrity: sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==} engines: {node: '>= 0.8.0'} @@ -2773,6 +2857,9 @@ packages: resolution: {integrity: sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==} engines: {node: '>=10'} + pako@1.0.11: + resolution: {integrity: sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==} + parent-module@1.0.1: resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==} engines: {node: '>=6'} @@ -2784,6 +2871,10 @@ packages: resolution: {integrity: sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==} engines: {node: '>=8'} + path-is-absolute@1.0.1: + resolution: {integrity: sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==} + engines: {node: '>=0.10.0'} + path-key@3.1.1: resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==} engines: {node: '>=8'} @@ -2791,6 +2882,9 @@ packages: path-parse@1.0.7: resolution: {integrity: sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==} + pend@1.2.0: + resolution: {integrity: sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==} + pg-cloudflare@1.2.7: resolution: {integrity: sha512-YgCtzMH0ptvZJslLM1ffsY4EuGaU0cx4XSdXLRFae8bPP4dS5xL1tNB3k2o/N64cHJpwU7dxKli/nZ2lUa5fLg==} @@ -2872,6 +2966,9 @@ packages: resolution: {integrity: sha512-DEvV2ZF2r2/63V+tK8hQvrR2ZGn10srHbXviTlcv7Kpzw8jWiNTqbVgjO3IY8RxrrOUF8VPMQQFysYYYv0YZxw==} engines: {node: '>=6'} + process-nextick-args@2.0.1: + resolution: {integrity: sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==} + prop-types@15.8.1: resolution: {integrity: sha512-oj87CgZICdulUohogVAR7AjlC0327U4el4L6eAvOqCeudMDVU0NThNaV+b9Df4dXgSP1gXMTnPdhfe/2qDH5cg==} @@ -2903,6 +3000,9 @@ packages: resolution: {integrity: sha512-DGrYcCWK7tvYMnWh79yrPHt+vdx9tY+1gPZa7nJQtO/p8bLTDaHp4dzwEhQB7pZ4Xe3ok4XKuEPrVuc+wlpkmw==} engines: {node: '>=0.10.0'} + readable-stream@2.3.8: + resolution: {integrity: sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==} + reflect.getprototypeof@1.0.10: resolution: {integrity: sha512-00o4I+DVrefhv+nX0ulyi3biSHCPDe+yLv5o/p6d/UVlirijB8E16FtfwSAi4g3tcqrQ4lRAqQSoFEZJehYEcw==} engines: {node: '>= 0.4'} @@ -2950,6 +3050,9 @@ packages: resolution: {integrity: sha512-AURm5f0jYEOydBj7VQlVvDrjeFgthDdEF5H1dP+6mNpoXOMo1quQqJ4wvJDyRZ9+pO3kGWoOdmV08cSv2aJV6Q==} engines: {node: '>=0.4'} + safe-buffer@5.1.2: + resolution: {integrity: sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==} + safe-push-apply@1.0.0: resolution: {integrity: sha512-iKE9w/Z7xCzUMIZqdBsp6pEQvwuEebH4vdpjcDWnyzaI6yl6O9FHvVpmGelvEHNsoY6wGblkxR6Zty/h00WiSA==} engines: {node: '>= 0.4'} @@ -2958,6 +3061,10 @@ packages: resolution: {integrity: sha512-x/+Cz4YrimQxQccJf5mKEbIa1NzeCRNI5Ecl/ekmlYaampdNLPalVyIcCZNNH3MvmqBugV5TMYZXv0ljslUlaw==} engines: {node: '>= 0.4'} + saxes@5.0.1: + resolution: {integrity: sha512-5LBh1Tls8c9xgGjw3QrMwETmTMVk0oFgvrFSvWx62llR2hcEInrKNZ2GZCCuuy2lvWrdl5jhbpeqc5hRYKFOcw==} + engines: {node: '>=10'} + scheduler@0.27.0: resolution: {integrity: sha512-eNv+WrVbKu1f3vbYJT/xtiF5syA5HPIMtf9IgY/nKg0sWqzAUEvqY/xm7OcZc/qafLx/iO9FgOmeSAp4v5ti/Q==} @@ -2982,6 +3089,9 @@ packages: resolution: {integrity: sha512-RJRdvCo6IAnPdsvP/7m6bsQqNnn1FCBX5ZNtFL98MmFF/4xAIJTIg1YbHW5DC2W5SKZanrC6i4HsJqlajw/dZw==} engines: {node: '>= 0.4'} + setimmediate@1.0.5: + resolution: {integrity: sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==} + sharp@0.34.5: resolution: {integrity: sha512-Ou9I5Ft9WNcCbXrU9cMgPBcCK8LiwLqcbywW3t4oDV37n1pzpuNLsYiAV8eODnjbtQlSDwZ2cUEeQz4E54Hltg==} engines: {node: ^18.17.0 || ^20.3.0 || >=21.0.0} @@ -3034,6 +3144,13 @@ packages: resolution: {integrity: sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==} engines: {node: '>= 10.x'} + sprintf-js@1.0.3: + resolution: {integrity: sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==} + + ssf@0.11.2: + resolution: {integrity: sha512-+idbmIXoYET47hH+d7dfm2epdOMUDjqcB4648sTZ+t2JwoyBFL/insLfB/racrDmsKB3diwsDA696pZMieAC5g==} + engines: {node: '>=0.8'} + stable-hash@0.0.5: resolution: {integrity: sha512-+L3ccpzibovGXFK+Ap/f8LOS0ahMrHTf3xu7mMLSpEGU0EO9ucaysSylKo9eRDFNhWve/y275iPmIZ4z39a9iA==} @@ -3064,6 +3181,9 @@ packages: resolution: {integrity: sha512-UXSH262CSZY1tfu3G3Secr6uGLCFVPMhIqHjlgCUtCCcgihYc/xKs9djMTMUOb2j1mVSeU8EU6NWc/iQKU6Gfg==} engines: {node: '>= 0.4'} + string_decoder@1.1.1: + resolution: {integrity: sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==} + stringify-entities@4.0.4: resolution: {integrity: sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==} @@ -3179,6 +3299,9 @@ packages: resolution: {integrity: sha512-nWJ91DjeOkej/TA8pXQ3myruKpKEYgqvpw9lz4OPHj/NWFNluYrjbz9j01CJ8yKQd2g4jFoOkINCTW2I5LEEyw==} engines: {node: '>= 0.4'} + underscore@1.13.7: + resolution: {integrity: sha512-GMXzWtsc57XAtguZgaQViUOzs0KTkk8ojr3/xAxXLITqf/3EMwxC0inyETfDFjH/Krbhuep0HNbbjI9i/q3F3g==} + undici-types@6.21.0: resolution: {integrity: sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==} @@ -3212,6 +3335,9 @@ packages: uri-js@4.4.1: resolution: {integrity: sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==} + util-deprecate@1.0.2: + resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==} + vfile-message@4.0.3: resolution: {integrity: sha512-QTHzsGd1EhbZs4AsQ20JX1rC3cOlt/IWJruk893DfLRr57lcnOeMaWG4K0JrRta4mIJZKth2Au3mM3u03/JWKw==} @@ -3239,10 +3365,33 @@ packages: engines: {node: '>= 8'} hasBin: true + wmf@1.0.2: + resolution: {integrity: sha512-/p9K7bEh0Dj6WbXg4JG0xvLQmIadrner1bi45VMJTfnbVHsc7yIajZyoSoK60/dtVBs12Fm6WkUI5/3WAVsNMw==} + engines: {node: '>=0.8'} + + word-extractor@1.0.4: + resolution: {integrity: sha512-PyAGZQ2gjnVA5kcZAOAxoYciCMaAvu0dbVlw/zxHphhy+3be8cDeYKHJPO8iedIM3Sx0arA/ugKTJyXhZNgo6g==} + word-wrap@1.2.5: resolution: {integrity: sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==} engines: {node: '>=0.10.0'} + word@0.3.0: + resolution: {integrity: sha512-OELeY0Q61OXpdUfTp+oweA/vtLVg5VDOXh+3he3PNzLGG/y0oylSOC1xRVj0+l4vQ3tj/bB1HVHv1ocXkQceFA==} + engines: {node: '>=0.8'} + + xlsx@0.18.5: + resolution: {integrity: sha512-dmg3LCjBPHZnQp5/F/+nnTa+miPJxUXB6vtk42YjBBKayDNagxGEeIdWApkYPOf3Z3pm3k62Knjzp7lMeTEtFQ==} + engines: {node: '>=0.8'} + hasBin: true + + xmlbuilder@10.1.1: + resolution: {integrity: sha512-OyzrcFLL/nb6fMGHbiRDuPup9ljBycsdCypwuyg5AAHvyWzGfChJpCXMG88AGTIMFhGZ9RccFN1e6lhg3hkwKg==} + engines: {node: '>=4.0'} + + xmlchars@2.2.0: + resolution: {integrity: sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==} + xtend@4.0.2: resolution: {integrity: sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==} engines: {node: '>=0.4'} @@ -3250,6 +3399,9 @@ packages: yallist@3.1.1: resolution: {integrity: sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==} + yauzl@2.10.0: + resolution: {integrity: sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==} + yocto-queue@0.1.0: resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==} engines: {node: '>=10'} @@ -4807,12 +4959,16 @@ snapshots: '@unrs/resolver-binding-win32-x64-msvc@1.11.1': optional: true + '@xmldom/xmldom@0.8.11': {} + acorn-jsx@5.3.2(acorn@8.15.0): dependencies: acorn: 8.15.0 acorn@8.15.0: {} + adler-32@1.3.1: {} + ajv@6.12.6: dependencies: fast-deep-equal: 3.1.3 @@ -4824,6 +4980,10 @@ snapshots: dependencies: color-convert: 2.0.1 + argparse@1.0.10: + dependencies: + sprintf-js: 1.0.3 + argparse@2.0.1: {} aria-query@5.3.2: {} @@ -4911,10 +5071,14 @@ snapshots: balanced-match@1.0.2: {} + base64-js@1.5.1: {} + baseline-browser-mapping@2.9.9: {} bcryptjs@3.0.3: {} + bluebird@3.4.7: {} + bowser@2.13.1: {} brace-expansion@1.1.12: @@ -4938,6 +5102,8 @@ snapshots: node-releases: 2.0.27 update-browserslist-db: 1.2.3(browserslist@4.28.1) + buffer-crc32@0.2.13: {} + buffer-from@1.1.2: {} call-bind-apply-helpers@1.0.2: @@ -4963,6 +5129,11 @@ snapshots: ccount@2.0.1: {} + cfb@1.2.2: + dependencies: + adler-32: 1.3.1 + crc-32: 1.2.2 + chalk@4.1.2: dependencies: ansi-styles: 4.3.0 @@ -4980,6 +5151,8 @@ snapshots: clsx@2.1.1: {} + codepage@1.15.0: {} + color-convert@2.0.1: dependencies: color-name: 1.1.4 @@ -4992,6 +5165,10 @@ snapshots: convert-source-map@2.0.0: {} + core-util-is@1.0.3: {} + + crc-32@1.2.2: {} + cross-spawn@7.0.6: dependencies: path-key: 3.1.1 @@ -5054,6 +5231,8 @@ snapshots: dependencies: dequal: 2.0.3 + dingbat-to-unicode@1.0.1: {} + doctrine@2.1.0: dependencies: esutils: 2.0.3 @@ -5072,6 +5251,10 @@ snapshots: '@types/pg': 8.16.0 pg: 8.16.3 + duck@0.1.12: + dependencies: + underscore: 1.13.7 + dunder-proto@1.0.1: dependencies: call-bind-apply-helpers: 1.0.2 @@ -5513,6 +5696,10 @@ snapshots: dependencies: reusify: 1.1.0 + fd-slicer@1.1.0: + dependencies: + pend: 1.2.0 + fdir@6.5.0(picomatch@4.0.3): optionalDependencies: picomatch: 4.0.3 @@ -5541,6 +5728,8 @@ snapshots: dependencies: is-callable: 1.2.7 + frac@1.1.2: {} + fsevents@2.3.3: optional: true @@ -5668,6 +5857,8 @@ snapshots: ignore@7.0.5: {} + immediate@3.0.6: {} + import-fresh@3.3.1: dependencies: parent-module: 1.0.1 @@ -5675,6 +5866,8 @@ snapshots: imurmurhash@0.1.4: {} + inherits@2.0.4: {} + inline-style-parser@0.2.7: {} internal-slot@1.1.0: @@ -5808,6 +6001,8 @@ snapshots: call-bound: 1.0.4 get-intrinsic: 1.3.0 + isarray@1.0.0: {} + isarray@2.0.5: {} isexe@2.0.0: {} @@ -5852,6 +6047,13 @@ snapshots: object.assign: 4.1.7 object.values: 1.2.1 + jszip@3.10.1: + dependencies: + lie: 3.3.0 + pako: 1.0.11 + readable-stream: 2.3.8 + setimmediate: 1.0.5 + keyv@4.5.4: dependencies: json-buffer: 3.0.1 @@ -5867,6 +6069,10 @@ snapshots: prelude-ls: 1.2.1 type-check: 0.4.0 + lie@3.3.0: + dependencies: + immediate: 3.0.6 + lightningcss-android-arm64@1.30.2: optional: true @@ -5928,6 +6134,12 @@ snapshots: dependencies: js-tokens: 4.0.0 + lop@0.4.2: + dependencies: + duck: 0.1.12 + option: 0.2.4 + underscore: 1.13.7 + lru-cache@5.1.1: dependencies: yallist: 3.1.1 @@ -5940,6 +6152,19 @@ snapshots: dependencies: '@jridgewell/sourcemap-codec': 1.5.5 + mammoth@1.11.0: + dependencies: + '@xmldom/xmldom': 0.8.11 + argparse: 1.0.10 + base64-js: 1.5.1 + bluebird: 3.4.7 + dingbat-to-unicode: 1.0.1 + jszip: 3.10.1 + lop: 0.4.2 + path-is-absolute: 1.0.1 + underscore: 1.13.7 + xmlbuilder: 10.1.1 + markdown-table@3.0.4: {} math-intrinsics@1.1.0: {} @@ -6384,6 +6609,8 @@ snapshots: define-properties: 1.2.1 es-object-atoms: 1.1.1 + option@0.2.4: {} + optionator@0.9.4: dependencies: deep-is: 0.1.4 @@ -6407,6 +6634,8 @@ snapshots: dependencies: p-limit: 3.1.0 + pako@1.0.11: {} + parent-module@1.0.1: dependencies: callsites: 3.1.0 @@ -6423,10 +6652,14 @@ snapshots: path-exists@4.0.0: {} + path-is-absolute@1.0.1: {} + path-key@3.1.1: {} path-parse@1.0.7: {} + pend@1.2.0: {} + pg-cloudflare@1.2.7: optional: true @@ -6496,6 +6729,8 @@ snapshots: prismjs@1.30.0: {} + process-nextick-args@2.0.1: {} + prop-types@15.8.1: dependencies: loose-envify: 1.4.0 @@ -6535,6 +6770,16 @@ snapshots: react@19.2.1: {} + readable-stream@2.3.8: + dependencies: + core-util-is: 1.0.3 + inherits: 2.0.4 + isarray: 1.0.0 + process-nextick-args: 2.0.1 + safe-buffer: 5.1.2 + string_decoder: 1.1.1 + util-deprecate: 1.0.2 + reflect.getprototypeof@1.0.10: dependencies: call-bind: 1.0.8 @@ -6619,6 +6864,8 @@ snapshots: has-symbols: 1.1.0 isarray: 2.0.5 + safe-buffer@5.1.2: {} + safe-push-apply@1.0.0: dependencies: es-errors: 1.3.0 @@ -6630,6 +6877,10 @@ snapshots: es-errors: 1.3.0 is-regex: 1.2.1 + saxes@5.0.1: + dependencies: + xmlchars: 2.2.0 + scheduler@0.27.0: {} semver@6.3.1: {} @@ -6658,6 +6909,8 @@ snapshots: es-errors: 1.3.0 es-object-atoms: 1.1.1 + setimmediate@1.0.5: {} + sharp@0.34.5: dependencies: '@img/colour': 1.0.0 @@ -6742,6 +6995,12 @@ snapshots: split2@4.2.0: {} + sprintf-js@1.0.3: {} + + ssf@0.11.2: + dependencies: + frac: 1.1.2 + stable-hash@0.0.5: {} stop-iteration-iterator@1.1.0: @@ -6799,6 +7058,10 @@ snapshots: define-properties: 1.2.1 es-object-atoms: 1.1.1 + string_decoder@1.1.1: + dependencies: + safe-buffer: 5.1.2 + stringify-entities@4.0.4: dependencies: character-entities-html4: 2.1.0 @@ -6925,6 +7188,8 @@ snapshots: has-symbols: 1.1.0 which-boxed-primitive: 1.1.1 + underscore@1.13.7: {} + undici-types@6.21.0: {} unified@11.0.5: @@ -6994,6 +7259,8 @@ snapshots: dependencies: punycode: 2.3.1 + util-deprecate@1.0.2: {} + vfile-message@4.0.3: dependencies: '@types/unist': 3.0.3 @@ -7049,12 +7316,40 @@ snapshots: dependencies: isexe: 2.0.0 + wmf@1.0.2: {} + + word-extractor@1.0.4: + dependencies: + saxes: 5.0.1 + yauzl: 2.10.0 + word-wrap@1.2.5: {} + word@0.3.0: {} + + xlsx@0.18.5: + dependencies: + adler-32: 1.3.1 + cfb: 1.2.2 + codepage: 1.15.0 + crc-32: 1.2.2 + ssf: 0.11.2 + wmf: 1.0.2 + word: 0.3.0 + + xmlbuilder@10.1.1: {} + + xmlchars@2.2.0: {} + xtend@4.0.2: {} yallist@3.1.1: {} + yauzl@2.10.0: + dependencies: + buffer-crc32: 0.2.13 + fd-slicer: 1.1.0 + yocto-queue@0.1.0: {} zod-validation-error@4.0.2(zod@4.2.1): diff --git a/src/types/word-extractor.d.ts b/src/types/word-extractor.d.ts new file mode 100644 index 0000000..a19a8b5 --- /dev/null +++ b/src/types/word-extractor.d.ts @@ -0,0 +1,59 @@ +/** + * word-extractor 类型声明文件 + * 用于支持 .doc 和 .docx 文件解析 + */ + +declare module 'word-extractor' { + interface ExtractOptions { + /** 如果为 true(默认),将常见的 Unicode 引号转换为标准 ASCII 字符 */ + filterUnicode?: boolean; + } + + interface HeaderOptions extends ExtractOptions { + /** 如果为 true(默认),将页眉和页脚作为单个字符串返回 */ + includeFooters?: boolean; + } + + interface TextboxOptions extends ExtractOptions { + /** 如果为 true(默认),包含页眉和页脚中的文本框内容 */ + includeHeadersAndFooters?: boolean; + /** 如果为 true(默认),包含文档正文中的文本框内容 */ + includeBody?: boolean; + } + + /** + * Document 类 - 提取的 Word 文档内容 + */ + class Document { + /** 获取文档正文内容 */ + getBody(options?: ExtractOptions): string; + /** 获取脚注内容 */ + getFootnotes(options?: ExtractOptions): string; + /** 获取尾注内容 */ + getEndnotes(options?: ExtractOptions): string; + /** 获取页眉内容 */ + getHeaders(options?: HeaderOptions): string; + /** 获取页脚内容 */ + getFooters(options?: ExtractOptions): string; + /** 获取批注内容 */ + getAnnotations(options?: ExtractOptions): string; + /** 获取文本框内容 */ + getTextboxes(options?: TextboxOptions): string; + } + + /** + * WordExtractor 类 - Word 文档提取器 + * 支持 .doc(OLE 格式)和 .docx(Open XML 格式) + */ + class WordExtractor { + constructor(); + /** + * 提取 Word 文档内容 + * @param source - 文件路径或 Buffer + * @returns Promise + */ + extract(source: string | Buffer): Promise; + } + + export = WordExtractor; +}